-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[ctx_prof] Handle select
and its step
instrumentation
#109185
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
✅ With the latest revision this PR passed the C/C++ code formatter. |
select
select
and its step
instrumentation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-pgo Author: Mircea Trofin (mtrofin) ChangesThe Full diff: https://github.com/llvm/llvm-project/pull/109185.diff 5 Files Affected:
diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
index b3e64b26ee543c..0a5beb92fcbcc0 100644
--- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h
+++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
@@ -117,6 +117,9 @@ class CtxProfAnalysis : public AnalysisInfoMixin<CtxProfAnalysis> {
/// Get the instruction instrumenting a BB, or nullptr if not present.
static InstrProfIncrementInst *getBBInstrumentation(BasicBlock &BB);
+
+ /// Get the step instrumentation associated with a `select`
+ static InstrProfIncrementInstStep *getSelectInstrumentation(SelectInst &SI);
};
class CtxProfAnalysisPrinterPass
diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index 3df72983862d98..7517011395a7d6 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -254,6 +254,15 @@ InstrProfIncrementInst *CtxProfAnalysis::getBBInstrumentation(BasicBlock &BB) {
return nullptr;
}
+InstrProfIncrementInstStep *
+CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) {
+ Instruction *Prev = &SI;
+ while ((Prev = Prev->getPrevNode()))
+ if (auto *Step = dyn_cast<InstrProfIncrementInstStep>(Prev))
+ return Step;
+ return nullptr;
+}
+
template <class ProfilesTy, class ProfTy>
static void preorderVisit(ProfilesTy &Profiles,
function_ref<void(ProfTy &)> Visitor,
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
index 91f950e2ba4c3e..30bb251364fdef 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
@@ -154,6 +154,8 @@ class ProfileAnnotator final {
bool hasCount() const { return Count.has_value(); }
+ uint64_t getCount() const { return *Count;}
+
bool trySetSingleUnknownInEdgeCount() {
if (UnknownCountInEdges == 1) {
setSingleUnknownEdgeCount(InEdges);
@@ -266,6 +268,21 @@ class ProfileAnnotator final {
return HitExit;
}
+ bool allNonColdSelectsHaveProfile() const {
+ for (const auto &BB : F) {
+ if (getBBInfo(BB).getCount() > 0) {
+ for (const auto &I : BB) {
+ if (const auto *SI = dyn_cast<SelectInst>(&I)) {
+ if (!SI->getMetadata(LLVMContext::MD_prof)) {
+ return false;
+ }
+ }
+ }
+ }
+ }
+ return true;
+ }
+
public:
ProfileAnnotator(Function &F, const SmallVectorImpl<uint64_t> &Counters,
InstrProfSummaryBuilder &PB)
@@ -324,12 +341,33 @@ class ProfileAnnotator final {
PB.addEntryCount(Counters[0]);
for (auto &BB : F) {
+ const auto &BBInfo = getBBInfo(BB);
+ if (BBInfo.getCount() > 0) {
+ for (auto &I : BB) {
+ if (auto *SI = dyn_cast<SelectInst>(&I)) {
+ if (auto *Step = CtxProfAnalysis::getSelectInstrumentation(*SI)) {
+ auto Index = Step->getIndex()->getZExtValue();
+ assert(Index < Counters.size() &&
+ "The index of the step instruction must be inside the "
+ "counters vector by "
+ "construction - tripping this assertion indicates a bug in "
+ "how the contextual profile is managed by IPO transforms");
+ auto TotalCount = BBInfo.getCount();
+ auto TrueCount = Counters[Index];
+ auto FalseCount =
+ (TotalCount > TrueCount ? TotalCount - TrueCount : 0U);
+ setProfMetadata(F.getParent(), SI, {TrueCount, FalseCount},
+ std::max(TrueCount, FalseCount));
+ }
+ }
+ }
+ }
if (succ_size(&BB) < 2)
continue;
auto *Term = BB.getTerminator();
SmallVector<uint64_t, 2> EdgeCounts(Term->getNumSuccessors(), 0);
uint64_t MaxCount = 0;
- const auto &BBInfo = getBBInfo(BB);
+
for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdges(); SuccIdx < Size;
++SuccIdx) {
uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx);
@@ -343,12 +381,15 @@ class ProfileAnnotator final {
setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount);
}
assert(allCountersAreAssigned() &&
- "Expected all counters have been assigned.");
+ "[ctx-prof] Expected all counters have been assigned.");
assert(allTakenPathsExit() &&
"[ctx-prof] Encountered a BB with more than one successor, where "
"all outgoing edges have a 0 count. This occurs in non-exiting "
"functions (message pumps, usually) which are not supported in the "
"contextual profiling case");
+ assert(allNonColdSelectsHaveProfile() &&
+ "[ctx-prof] All non-cold select instructions were expected to have "
+ "a profile.");
}
};
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 2e05fa80464b8d..257850f7e6d2af 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -2211,7 +2211,15 @@ remapIndices(Function &Caller, BasicBlock *StartBB,
}
for (auto &I : llvm::make_early_inc_range(*BB)) {
if (auto *Inc = dyn_cast<InstrProfIncrementInst>(&I)) {
- if (Inc != BBID) {
+ if (isa<InstrProfIncrementInstStep>(Inc)) {
+ if (isa<Constant>(Inc->getStep())) {
+ assert(!Inc->getNextNode() || !isa<SelectInst>(Inc->getNextNode()));
+ Inc->eraseFromParent();
+ } else {
+ assert(isa_and_nonnull<SelectInst>(Inc->getNextNode()));
+ RewriteInstrIfNeeded(*Inc);
+ }
+ } else if (Inc != BBID) {
// If we're here it means that the BB had more than 1 IDs, presumably
// some coming from the callee. We "made up our mind" to keep the
// first one (which may or may not have been originally the caller's).
diff --git a/llvm/test/Analysis/CtxProfAnalysis/handle-select.ll b/llvm/test/Analysis/CtxProfAnalysis/handle-select.ll
new file mode 100644
index 00000000000000..e740466a03f3e9
--- /dev/null
+++ b/llvm/test/Analysis/CtxProfAnalysis/handle-select.ll
@@ -0,0 +1,76 @@
+; Check that we handle `step` instrumentations. These addorn `select`s.
+; We don't want to confuse the `step` with normal increments, the latter of which
+; we use for BB ID-ing: we want to keep the `step`s after inlining, except if
+; the `select` is elided.
+;
+; RUN: split-file %s %t
+; RUN: llvm-ctxprof-util fromJSON --input=%t/profile.json --output=%t/profile.ctxprofdata
+;
+; RUN: opt -passes=ctx-instr-gen %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s --check-prefix=INSTR
+; RUN: opt -passes=ctx-instr-gen,module-inline %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s --check-prefix=POST-INL
+; RUN: opt -passes=ctx-instr-gen,module-inline,ctx-prof-flatten %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s --check-prefix=FLATTEN
+
+; INSTR-LABEL: yes:
+; INSTR-NEXT: call void @llvm.instrprof.increment(ptr @foo, i64 [[#]], i32 2, i32 1)
+; INSTR-NEXT: call void @llvm.instrprof.callsite(ptr @foo, i64 [[#]], i32 2, i32 0, ptr @bar)
+
+; INSTR-LABEL: no:
+; INSTR-NEXT: call void @llvm.instrprof.callsite(ptr @foo, i64 [[#]], i32 2, i32 1, ptr @bar)
+
+; INSTR-LABEL: define i32 @bar
+; INSTR-NEXT: call void @llvm.instrprof.increment(ptr @bar, i64 [[#]], i32 2, i32 0)
+; INSTR-NEXT: %inc =
+; INSTR: %test = icmp eq i32 %t, 0
+; INSTR-NEXT: %1 = zext i1 %test to i64
+; INSTR-NEXT: call void @llvm.instrprof.increment.step(ptr @bar, i64 [[#]], i32 2, i32 1, i64 %1)
+; INSTR-NEXT: %res = select
+
+; POST-INL-LABEL: yes:
+; POST-INL-NEXT: call void @llvm.instrprof.increment
+; POST-INL: call void @llvm.instrprof.increment.step
+; POST-INL-NEXT: %res.i = select
+
+; POST-INL-LABEL: no:
+; POST-INL-NEXT: call void @llvm.instrprof.increment
+; POST-INL-NEXT: br label
+
+; POST-INL-LABEL: exit:
+; POST-INL-NEXT: %res = phi i32 [ %res.i, %yes ], [ 1, %no ]
+
+; FLATTEN-LABEL: yes:
+; FLATTEN: %res.i = select i1 %test.i, i32 %inc.i, i32 %dec.i, !prof ![[SELPROF:[0-9]+]]
+; FLATTEN-LABEL: no:
+;
+; See the profile, in the "yes" case we set the step counter's value, in @bar, to 3. The total
+; entry count of that BB is 4.
+; ![[SELPROF]] = !{!"branch_weights", i32 3, i32 1}
+
+;--- example.ll
+define i32 @foo(i32 %t) !guid !0 {
+ %test = icmp slt i32 %t, 0
+ br i1 %test, label %yes, label %no
+yes:
+ %res1 = call i32 @bar(i32 %t) alwaysinline
+ br label %exit
+no:
+ ; this will result in eliding the select in @bar, when inlined.
+ %res2 = call i32 @bar(i32 0) alwaysinline
+ br label %exit
+exit:
+ %res = phi i32 [%res1, %yes], [%res2, %no]
+ ret i32 %res
+}
+
+define i32 @bar(i32 %t) !guid !1 {
+ %inc = add i32 %t, 1
+ %dec = sub i32 %t, 1
+ %test = icmp eq i32 %t, 0
+ %res = select i1 %test, i32 %inc, i32 %dec
+ ret i32 %res
+}
+
+!0 = !{i64 1234}
+!1 = !{i64 5678}
+
+;--- profile.json
+[{"Guid":1234, "Counters":[10, 4], "Callsites":[[{"Guid": 5678, "Counters":[4,3]}],[{"Guid": 5678, "Counters":[6,6]}]]}]
|
987562a
to
152a2a9
Compare
09642a4
to
7e92883
Compare
152a2a9
to
f654c77
Compare
7e92883
to
b68a12f
Compare
f654c77
to
241afaa
Compare
b68a12f
to
dfde003
Compare
17a2ba8
to
54fd9b5
Compare
54fd9b5
to
8ed7812
Compare
The
step
instrumentation shouldn't be treated, during use, like anincrement
. The latter is treated as a BB ID. The step isn't that, it's more of a type of value profiling. We need to distinguish between the 2 when really looking for BB IDs (==increments), and handle appropriatelystep
s. In particular, we need to know when to elide them becauseselect
s may get elided by function cloning, if the condition of the select is statically known.