Skip to content

Commit 179ba12

Browse files
authored
[AArch64][FMV] Support feature MOPS in Function Multi Versioning. (#78788)
The patch adds support for FEAT_MOPS (Memory Copy and Memory Set instructions) in Function Multi Versioning. The bits [19:16] of the system register ID_AA64ISAR2_EL1 indicate whether FEAT_MOPS is implemented in AArch64 state. This information is accessible via ELF hwcaps.
1 parent 8c41e3f commit 179ba12

File tree

7 files changed

+25
-17
lines changed

7 files changed

+25
-17
lines changed

clang/test/CodeGen/attr-target-version.c

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ inline int __attribute__((target_version("sve+sve-bf16"))) fmv_inline(void) { re
3535
inline int __attribute__((target_version("sve2-aes+sve2-sha3"))) fmv_inline(void) { return 5; }
3636
inline int __attribute__((target_version("sve2+sve2-pmull128+sve2-bitperm"))) fmv_inline(void) { return 9; }
3737
inline int __attribute__((target_version("sve2-sm4+memtag2"))) fmv_inline(void) { return 10; }
38-
inline int __attribute__((target_version("memtag3+rcpc3"))) fmv_inline(void) { return 11; }
38+
inline int __attribute__((target_version("memtag3+rcpc3+mops"))) fmv_inline(void) { return 11; }
3939
inline int __attribute__((target_version("default"))) fmv_inline(void) { return 3; }
4040

4141
__attribute__((target_version("ls64"))) int fmv_e(void);
@@ -272,36 +272,36 @@ int hoo(void) {
272272
// CHECK-NEXT: ret ptr @fmv_inline._Mfp16Mfp16MfcmaMsme
273273
// CHECK: resolver_else:
274274
// CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
275-
// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 893353197568
276-
// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 893353197568
275+
// CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 864726312827224064
276+
// CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 864726312827224064
277277
// CHECK-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
278278
// CHECK-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
279279
// CHECK: resolver_return1:
280-
// CHECK-NEXT: ret ptr @fmv_inline._Msve2Msve2-pmull128Msve2-bitperm
280+
// CHECK-NEXT: ret ptr @fmv_inline._Mrcpc3Mmemtag3Mmops
281281
// CHECK: resolver_else2:
282282
// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
283-
// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 34359773184
284-
// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 34359773184
283+
// CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 893353197568
284+
// CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 893353197568
285285
// CHECK-NEXT: [[TMP11:%.*]] = and i1 true, [[TMP10]]
286286
// CHECK-NEXT: br i1 [[TMP11]], label [[RESOLVER_RETURN3:%.*]], label [[RESOLVER_ELSE4:%.*]]
287287
// CHECK: resolver_return3:
288-
// CHECK-NEXT: ret ptr @fmv_inline._Msha1MpmullMf64mm
288+
// CHECK-NEXT: ret ptr @fmv_inline._Msve2Msve2-pmull128Msve2-bitperm
289289
// CHECK: resolver_else4:
290290
// CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
291-
// CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 17246986240
292-
// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 17246986240
291+
// CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 34359773184
292+
// CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[TMP13]], 34359773184
293293
// CHECK-NEXT: [[TMP15:%.*]] = and i1 true, [[TMP14]]
294294
// CHECK-NEXT: br i1 [[TMP15]], label [[RESOLVER_RETURN5:%.*]], label [[RESOLVER_ELSE6:%.*]]
295295
// CHECK: resolver_return5:
296-
// CHECK-NEXT: ret ptr @fmv_inline._Msha3Mi8mmMf32mm
296+
// CHECK-NEXT: ret ptr @fmv_inline._Msha1MpmullMf64mm
297297
// CHECK: resolver_else6:
298298
// CHECK-NEXT: [[TMP16:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
299-
// CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 288265560523800576
300-
// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 288265560523800576
299+
// CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 17246986240
300+
// CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[TMP17]], 17246986240
301301
// CHECK-NEXT: [[TMP19:%.*]] = and i1 true, [[TMP18]]
302302
// CHECK-NEXT: br i1 [[TMP19]], label [[RESOLVER_RETURN7:%.*]], label [[RESOLVER_ELSE8:%.*]]
303303
// CHECK: resolver_return7:
304-
// CHECK-NEXT: ret ptr @fmv_inline._Mrcpc3Mmemtag3
304+
// CHECK-NEXT: ret ptr @fmv_inline._Msha3Mi8mmMf32mm
305305
// CHECK: resolver_else8:
306306
// CHECK-NEXT: [[TMP20:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
307307
// CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 19791209299968
@@ -609,7 +609,7 @@ int hoo(void) {
609609
//
610610
//
611611
// CHECK: Function Attrs: noinline nounwind optnone
612-
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mrcpc3Mmemtag3
612+
// CHECK-LABEL: define {{[^@]+}}@fmv_inline._Mrcpc3Mmemtag3Mmops
613613
// CHECK-SAME: () #[[ATTR23:[0-9]+]] {
614614
// CHECK-NEXT: entry:
615615
// CHECK-NEXT: ret i32 11
@@ -768,7 +768,7 @@ int hoo(void) {
768768
// CHECK: attributes #[[ATTR20]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+neon,+sve,+sve2,+sve2-aes,+sve2-sha3" }
769769
// CHECK: attributes #[[ATTR21]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+neon,+sve,+sve2,+sve2-aes,+sve2-bitperm" }
770770
// CHECK: attributes #[[ATTR22]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+ls64,+mte,+neon,+sve,+sve2,+sve2-sm4" }
771-
// CHECK: attributes #[[ATTR23]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+ls64,+mte,+rcpc,+rcpc3" }
771+
// CHECK: attributes #[[ATTR23]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+ls64,+mops,+mte,+rcpc,+rcpc3" }
772772
// CHECK: attributes #[[ATTR24]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+ls64,+sb" }
773773
//.
774774
// CHECK-NOFMV: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" }

clang/test/Sema/attr-target-clones-aarch64.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// RUN: %clang_cc1 -triple aarch64-linux-gnu -fsyntax-only -verify %s
22

3-
void __attribute__((target_clones("fp16+sve2-aes", "sb+sve2-sha3+rcpc3"))) no_def(void);
3+
void __attribute__((target_clones("fp16+sve2-aes", "sb+sve2-sha3+rcpc3+mops"))) no_def(void);
44

55
// expected-warning@+1 {{unsupported 'default' in the 'target_clones' attribute string; 'target_clones' attribute ignored}}
66
void __attribute__((target_clones("default+sha3"))) warn1(void);

clang/test/SemaCXX/attr-target-version.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ void __attribute__((target_version("vmull"))) wrong_tv(void);
66
void __attribute__((target_version("dotprod"))) no_def(void);
77
void __attribute__((target_version("rdm+fp"))) no_def(void);
88
void __attribute__((target_version("rcpc3"))) no_def(void);
9+
void __attribute__((target_version("mops"))) no_def(void);
910

1011
// expected-error@+1 {{no matching function for call to 'no_def'}}
1112
void foo(void) { no_def(); }

compiler-rt/lib/builtins/cpu_model/aarch64.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ enum CPUFeatures {
115115
FEAT_SME_I64,
116116
FEAT_SME2,
117117
FEAT_RCPC3,
118+
FEAT_MOPS,
118119
FEAT_MAX,
119120
FEAT_EXT = 62, // Reserved to indicate presence of additional features field
120121
// in __aarch64_cpu_features

compiler-rt/lib/builtins/cpu_model/aarch64/fmv/mrs.inc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,8 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
109109
setCPUFeature(FEAT_SME_I64);
110110
if (hwcap2 & HWCAP2_SME_F64F64)
111111
setCPUFeature(FEAT_SME_F64);
112+
if (hwcap2 & HWCAP2_MOPS)
113+
setCPUFeature(FEAT_MOPS);
112114
if (hwcap & HWCAP_CPUID) {
113115
unsigned long ftr;
114116
getCPUFeature(ID_AA64PFR1_EL1, ftr);

compiler-rt/lib/builtins/cpu_model/aarch64/hwcap.inc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,3 +178,6 @@
178178
#ifndef HWCAP2_SVE_EBF16
179179
#define HWCAP2_SVE_EBF16 (1ULL << 33)
180180
#endif
181+
#ifndef HWCAP2_MOPS
182+
#define HWCAP2_MOPS (1ULL << 43)
183+
#endif

llvm/include/llvm/TargetParser/AArch64TargetParser.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ enum CPUFeatures {
9494
FEAT_SME_I64,
9595
FEAT_SME2,
9696
FEAT_RCPC3,
97+
FEAT_MOPS,
9798
FEAT_MAX,
9899
FEAT_EXT = 62,
99100
FEAT_INIT
@@ -246,7 +247,7 @@ inline constexpr ExtensionInfo Extensions[] = {
246247
{"memtag", AArch64::AEK_MTE, "+mte", "-mte", FEAT_MEMTAG, "", 440},
247248
{"memtag2", AArch64::AEK_NONE, {}, {}, FEAT_MEMTAG2, "+mte", 450},
248249
{"memtag3", AArch64::AEK_NONE, {}, {}, FEAT_MEMTAG3, "+mte", 460},
249-
{"mops", AArch64::AEK_MOPS, "+mops", "-mops", FEAT_INIT, "", 0},
250+
{"mops", AArch64::AEK_MOPS, "+mops", "-mops", FEAT_MOPS, "+mops", 650},
250251
{"pauth", AArch64::AEK_PAUTH, "+pauth", "-pauth", FEAT_INIT, "", 0},
251252
{"pmull", AArch64::AEK_NONE, {}, {}, FEAT_PMULL, "+aes,+fp-armv8,+neon", 160},
252253
{"pmuv3", AArch64::AEK_PERFMON, "+perfmon", "-perfmon", FEAT_INIT, "", 0},

0 commit comments

Comments
 (0)