Skip to content

Commit 48fb8ee

Browse files
[Clang][SME2] Add multi-vector add/sub builtins (#69725)
Adds the following SME2 builtins: - sv(add|sub) - sv(add|sub)_za32/za64, - sv(add|sub)_write_za32/za64 Other changes in this patch: - CGBuiltin.cpp: The GetAArch64SMEProcessedOperands function is created to avoid duplicating existing code from EmitAArch64SVEBuiltinExpr. - arm_sve.td: The add/sub SME2 builtins which do not operate on ZA have been added to arm_sve.td, matching the corrosponding LLVM IR intrinsic names which start with @llvm.aarch64.sve for this reason. - SveEmitter.cpp: Adds the createCoreHeaderIntrinsics function to remove duplicated code in createHeader & createSMEHeader. Uses a new enum (ACLEKind) to choose either "__builtin_sme_" or "__builtin_sve_" when emitting the intrinsics. See https://github.com/ARM-software/acle/pull/217/files
1 parent d180cfb commit 48fb8ee

File tree

8 files changed

+1936
-51
lines changed

8 files changed

+1936
-51
lines changed

clang/include/clang/Basic/arm_sme.td

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,3 +263,38 @@ multiclass ZAFPOuterProd<string n_suffix> {
263263

264264
defm SVMOPA : ZAFPOuterProd<"mopa">;
265265
defm SVMOPS : ZAFPOuterProd<"mops">;
266+
267+
////////////////////////////////////////////////////////////////////////////////
268+
// SME2 - ADD, SUB
269+
270+
multiclass ZAAddSub<string n_suffix> {
271+
let TargetGuard = "sme2" in {
272+
def NAME # _WRITE_SINGLE_ZA32_VG1X2_I32 : Inst<"sv" # n_suffix # "_write[_single]_za32[_{d}]_vg1x2", "vm2d", "iUi", MergeNone, "aarch64_sme_" # n_suffix # "_write_single_za_vg1x2", [IsStreaming, IsSharedZA], []>;
273+
def NAME # _WRITE_SINGLE_ZA32_VG1X4_I32 : Inst<"sv" # n_suffix # "_write[_single]_za32[_{d}]_vg1x4", "vm4d", "iUi", MergeNone, "aarch64_sme_" # n_suffix # "_write_single_za_vg1x4", [IsStreaming, IsSharedZA], []>;
274+
275+
def NAME # _WRITE_ZA32_VG1X2_I32 : Inst<"sv" # n_suffix # "_write_za32[_{d}]_vg1x2", "vm22", "iUi", MergeNone, "aarch64_sme_" # n_suffix # "_write_za_vg1x2", [IsStreaming, IsSharedZA], []>;
276+
def NAME # _WRITE_ZA32_VG1X4_I32 : Inst<"sv" # n_suffix # "_write_za32[_{d}]_vg1x4", "vm44", "iUi", MergeNone, "aarch64_sme_" # n_suffix # "_write_za_vg1x4", [IsStreaming, IsSharedZA], []>;
277+
278+
def NAME # _ZA32_VG1x2_I32 : Inst<"sv" # n_suffix # "_za32[_{d}]_vg1x2", "vm2", "iUif", MergeNone, "aarch64_sme_" # n_suffix # "_za32_vg1x2", [IsStreaming, IsSharedZA], []>;
279+
def NAME # _ZA32_VG1X4_I32 : Inst<"sv" # n_suffix # "_za32[_{d}]_vg1x4", "vm4", "iUif", MergeNone, "aarch64_sme_" # n_suffix # "_za32_vg1x4", [IsStreaming, IsSharedZA], []>;
280+
281+
let TargetGuard = "sme-i16i64" in {
282+
def NAME # _WRITE_SINGLE_ZA64_VG1X2_I64 : Inst<"sv" # n_suffix # "_write[_single]_za64[_{d}]_vg1x2", "vm2d", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_write_single_za_vg1x2", [IsStreaming, IsSharedZA], []>;
283+
def NAME # _WRITE_SINGLE_ZA64_VG1X4_I64 : Inst<"sv" # n_suffix # "_write[_single]_za64[_{d}]_vg1x4", "vm4d", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_write_single_za_vg1x4", [IsStreaming, IsSharedZA], []>;
284+
285+
def NAME # _WRITE_ZA64_VG1x2_I64 : Inst<"sv" # n_suffix # "_write_za64[_{d}]_vg1x2", "vm22", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_write_za_vg1x2", [IsStreaming, IsSharedZA], []>;
286+
def NAME # _WRITE_ZA64_VG1x4_I64 : Inst<"sv" # n_suffix # "_write_za64[_{d}]_vg1x4", "vm44", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_write_za_vg1x4", [IsStreaming, IsSharedZA], []>;
287+
288+
def NAME # _ZA64_VG1X2_I64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsSharedZA], []>;
289+
def NAME # _ZA64_VG1X4_I64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "lUl", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsSharedZA], []>;
290+
}
291+
292+
let TargetGuard = "sme-f64f64" in {
293+
def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsSharedZA], []>;
294+
def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsSharedZA], []>;
295+
}
296+
}
297+
}
298+
299+
defm SVADD : ZAAddSub<"add">;
300+
defm SVSUB : ZAAddSub<"sub">;

clang/include/clang/Basic/arm_sve.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1980,3 +1980,15 @@ def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sv
19801980

19811981
defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">;
19821982
}
1983+
1984+
////////////////////////////////////////////////////////////////////////////////
1985+
// SME2
1986+
1987+
// SME intrinsics which operate only on vectors and do not require ZA should be added here,
1988+
// as they could possibly become SVE instructions in the future.
1989+
1990+
let TargetGuard = "sme2" in {
1991+
// == ADD (vectors) ==
1992+
def SVADD_SINGLE_X2 : SInst<"svadd[_single_{d}_x2]", "22d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x2", [IsStreaming], []>;
1993+
def SVADD_SINGLE_X4 : SInst<"svadd[_single_{d}_x4]", "44d", "cUcsUsiUilUl", MergeNone, "aarch64_sve_add_single_x4", [IsStreaming], []>;
1994+
}

clang/include/clang/Basic/arm_sve_sme_incl.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ class ImmCheck<int arg, ImmCheckType kind, int eltSizeArg = -1> {
257257
}
258258

259259
class Inst<string n, string p, string t, MergeType mt, string i,
260-
list<FlagType> ft, list<ImmCheck> ch, MemEltType met> {
260+
list<FlagType> ft, list<ImmCheck> ch, MemEltType met = MemEltTyDefault> {
261261
string Name = n;
262262
string Prototype = p;
263263
string Types = t;

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4297,6 +4297,7 @@ class CodeGenFunction : public CodeGenTypeCache {
42974297
/// the wider vector. This avoids the error when allocating space in llvm
42984298
/// for struct of scalable vectors if a function returns struct.
42994299
llvm::Value *FormSVEBuiltinResult(llvm::Value *Call);
4300+
43004301
llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
43014302

43024303
llvm::Value *EmitSMELd1St1(const SVETypeFlags &TypeFlags,

0 commit comments

Comments
 (0)