Skip to content

Commit 8042022

Browse files
authored
[FMV][AArch64] Don't optimize backward compatible features in resolver. (#90928)
For arch64 features, such as Branch Target Identification or MTE (Memory Tagging Extension), compatible with targets that lack their support we may encounter scenarios where a binary compiled with MTE for example is executed on both MTE and non-MTE hardware and we still need to detect at runtime whether the MTE feature is available to choose the appropriate function version. So, we cannot optimize the function multi versioning resolver by removing checks for these features enabled for the target during compilation.
1 parent 8a0073a commit 8042022

File tree

2 files changed

+35
-4
lines changed

2 files changed

+35
-4
lines changed

clang/lib/CodeGen/CodeGenFunction.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2761,8 +2761,14 @@ llvm::Value *CodeGenFunction::FormAArch64ResolverCondition(
27612761
const MultiVersionResolverOption &RO) {
27622762
llvm::SmallVector<StringRef, 8> CondFeatures;
27632763
for (const StringRef &Feature : RO.Conditions.Features) {
2764-
// Form condition for features which are not yet enabled in target
2765-
if (!getContext().getTargetInfo().hasFeature(Feature))
2764+
// Optimize the Function Multi Versioning resolver by creating conditions
2765+
// only for features that are not enabled in the target. The exception is
2766+
// for features whose extension instructions are executed as NOP on targets
2767+
// without extension support.
2768+
if (!getContext().getTargetInfo().hasFeature(Feature) ||
2769+
Feature.equals("bti") || Feature.equals("memtag") ||
2770+
Feature.equals("memtag2") || Feature.equals("memtag3") ||
2771+
Feature.equals("dgh"))
27662772
CondFeatures.push_back(Feature);
27672773
}
27682774
if (!CondFeatures.empty()) {

clang/test/CodeGen/attr-target-clones-aarch64.c

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -526,8 +526,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
526526
// CHECK-MTE-BTI-NEXT: resolver_entry:
527527
// CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver()
528528
// CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
529-
// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 4096
530-
// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 4096
529+
// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 17592186048512
530+
// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 17592186048512
531531
// CHECK-MTE-BTI-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
532532
// CHECK-MTE-BTI-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
533533
// CHECK-MTE-BTI: resolver_return:
@@ -604,7 +604,24 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
604604
//
605605
// CHECK-MTE-BTI-LABEL: @ftc_dup3.resolver(
606606
// CHECK-MTE-BTI-NEXT: resolver_entry:
607+
// CHECK-MTE-BTI-NEXT: call void @__init_cpu_features_resolver()
608+
// CHECK-MTE-BTI-NEXT: [[TMP0:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
609+
// CHECK-MTE-BTI-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], 1125899906842624
610+
// CHECK-MTE-BTI-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 1125899906842624
611+
// CHECK-MTE-BTI-NEXT: [[TMP3:%.*]] = and i1 true, [[TMP2]]
612+
// CHECK-MTE-BTI-NEXT: br i1 [[TMP3]], label [[RESOLVER_RETURN:%.*]], label [[RESOLVER_ELSE:%.*]]
613+
// CHECK-MTE-BTI: resolver_return:
607614
// CHECK-MTE-BTI-NEXT: ret ptr @ftc_dup3._Mbti
615+
// CHECK-MTE-BTI: resolver_else:
616+
// CHECK-MTE-BTI-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
617+
// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 17592186044416
618+
// CHECK-MTE-BTI-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 17592186044416
619+
// CHECK-MTE-BTI-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
620+
// CHECK-MTE-BTI-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
621+
// CHECK-MTE-BTI: resolver_return1:
622+
// CHECK-MTE-BTI-NEXT: ret ptr @ftc_dup3._Mmemtag2
623+
// CHECK-MTE-BTI: resolver_else2:
624+
// CHECK-MTE-BTI-NEXT: ret ptr @ftc_dup3.default
608625
//
609626
//
610627
// CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone
@@ -712,7 +729,15 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
712729
// CHECK-MTE-BTI: resolver_return:
713730
// CHECK-MTE-BTI-NEXT: ret ptr @ftc_inline3._MsbMsve
714731
// CHECK-MTE-BTI: resolver_else:
732+
// CHECK-MTE-BTI-NEXT: [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
733+
// CHECK-MTE-BTI-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 1125899906842624
734+
// CHECK-MTE-BTI-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 1125899906842624
735+
// CHECK-MTE-BTI-NEXT: [[TMP7:%.*]] = and i1 true, [[TMP6]]
736+
// CHECK-MTE-BTI-NEXT: br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
737+
// CHECK-MTE-BTI: resolver_return1:
715738
// CHECK-MTE-BTI-NEXT: ret ptr @ftc_inline3._Mbti
739+
// CHECK-MTE-BTI: resolver_else2:
740+
// CHECK-MTE-BTI-NEXT: ret ptr @ftc_inline3.default
716741
//
717742
//
718743
// CHECK-MTE-BTI: Function Attrs: noinline nounwind optnone

0 commit comments

Comments
 (0)