Skip to content

Commit fbd89bc

Browse files
authored
Reland "[LTO] Run Argument Promotion before IPSCCP" (#111853)
Run ArgumentPromotion before IPSCCP in the LTO pipeline, to expose more constants to be propagated. We also run PostOrderFunctionAttrs to improve the information available to ArgumentPromotion's alias analysis, and SROA to clean up allocas. Relands #111163.
1 parent 2904f80 commit fbd89bc

File tree

3 files changed

+92
-19
lines changed

3 files changed

+92
-19
lines changed

llvm/lib/Passes/PassBuilderPipelines.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1859,6 +1859,15 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
18591859
MPM.addPass(PGOIndirectCallPromotion(
18601860
true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
18611861

1862+
// Promoting by-reference arguments to by-value exposes more constants to
1863+
// IPSCCP.
1864+
CGSCCPassManager CGPM;
1865+
CGPM.addPass(PostOrderFunctionAttrsPass());
1866+
CGPM.addPass(ArgumentPromotionPass());
1867+
CGPM.addPass(
1868+
createCGSCCToFunctionPassAdaptor(SROAPass(SROAOptions::ModifyCFG)));
1869+
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
1870+
18621871
// Propagate constants at call sites into the functions they call. This
18631872
// opens opportunities for globalopt (and inlining) by substituting function
18641873
// pointers passed as arguments to direct uses of functions.
@@ -1871,10 +1880,6 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
18711880
MPM.addPass(CalledValuePropagationPass());
18721881
}
18731882

1874-
// Now deduce any function attributes based in the current code.
1875-
MPM.addPass(
1876-
createModuleToPostOrderCGSCCPassAdaptor(PostOrderFunctionAttrsPass()));
1877-
18781883
// Do RPO function attribute inference across the module to forward-propagate
18791884
// attributes where applicable.
18801885
// FIXME: Is this really an optimization rather than a canonicalization?

llvm/test/Other/new-pm-lto-defaults.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -41,24 +41,24 @@
4141
; CHECK-O23SZ-NEXT: PGOIndirectCallPromotion
4242
; CHECK-O23SZ-NEXT: Running analysis: ProfileSummaryAnalysis
4343
; CHECK-O23SZ-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
44-
; CHECK-O23SZ-NEXT: Running pass: IPSCCPPass
44+
; CHECK-O23SZ-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC
45+
; CHECK-O23SZ-NEXT: Running analysis: LazyCallGraphAnalysis
46+
; CHECK-O23SZ-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
47+
; CHECK-O23SZ-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph{{.*}}>
48+
; CHECK-O23SZ-NEXT: Running pass: PostOrderFunctionAttrsPass
49+
; CHECK-O23SZ-NEXT: Running analysis: AAManager
50+
; CHECK-O23SZ-NEXT: Running analysis: BasicAA
4551
; CHECK-O23SZ-NEXT: Running analysis: AssumptionAnalysis on foo
52+
; CHECK-O23SZ-NEXT: Running analysis: ScopedNoAliasAA
53+
; CHECK-O23SZ-NEXT: Running analysis: TypeBasedAA
54+
; CHECK-O23SZ-NEXT: Running analysis: OuterAnalysisManagerProxy
55+
; CHECK-O23SZ-NEXT: Running pass: ArgumentPromotionPass
56+
; CHECK-O23SZ-NEXT: Running pass: SROAPass
57+
; CHECK-O23SZ-NEXT: Running pass: IPSCCPPass
4658
; CHECK-O23SZ-NEXT: Running pass: CalledValuePropagationPass
47-
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC
48-
; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis
49-
; CHECK-O1-NEXT: Running analysis: TargetLibraryAnalysis
50-
; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy
51-
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph{{.*}}>
52-
; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass
53-
; CHECK-O-NEXT: Running analysis: AAManager
54-
; CHECK-O-NEXT: Running analysis: BasicAA
55-
; CHECK-O1-NEXT: Running analysis: AssumptionAnalysis on foo
56-
; CHECK-O1-NEXT: Running analysis: TargetIRAnalysis
57-
; CHECK-O1-NEXT: Running analysis: DominatorTreeAnalysis
58-
; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA
59-
; CHECK-O-NEXT: Running analysis: TypeBasedAA
60-
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
6159
; CHECK-O-NEXT: Running pass: ReversePostOrderFunctionAttrsPass
60+
; CHECK-O1-NEXT: Running analysis: LazyCallGraphAnalysis
61+
; CHECK-O1-NEXT: Running analysis: TargetLibraryAnalysis
6262
; CHECK-O-NEXT: Running pass: GlobalSplitPass
6363
; CHECK-O-NEXT: Running pass: WholeProgramDevirtPass
6464
; CHECK-O1-NEXT: Running pass: LowerTypeTestsPass
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes='lto<O3>' -S < %s | FileCheck %s
3+
4+
; We should be able to propagate the constants from @parent to @child.
5+
6+
define void @parent(ptr %p) {
7+
; CHECK-LABEL: define void @parent(
8+
; CHECK-SAME: ptr nocapture [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
9+
; CHECK-NEXT: tail call fastcc void @child(ptr [[P]])
10+
; CHECK-NEXT: ret void
11+
;
12+
%c = alloca i32
13+
store i32 5, ptr %c
14+
%n = alloca i32
15+
store i32 1024, ptr %n
16+
call void @child(ptr %p, ptr %n, ptr %c)
17+
ret void
18+
}
19+
20+
define internal void @child(ptr %p, ptr %n, ptr %c) noinline {
21+
; CHECK-LABEL: define internal fastcc void @child(
22+
; CHECK-SAME: ptr nocapture [[P:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] {
23+
; CHECK-NEXT: [[ENTRY:.*]]:
24+
; CHECK-NEXT: br label %[[FOR_COND:.*]]
25+
; CHECK: [[FOR_COND]]:
26+
; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_INC:.*]] ]
27+
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[I_0]], 1024
28+
; CHECK-NEXT: br i1 [[CMP_NOT]], label %[[FOR_END:.*]], label %[[FOR_INC]]
29+
; CHECK: [[FOR_INC]]:
30+
; CHECK-NEXT: [[IDXPROM:%.*]] = zext nneg i32 [[I_0]] to i64
31+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[IDXPROM]]
32+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
33+
; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[TMP0]], 5
34+
; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX]], align 4
35+
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1
36+
; CHECK-NEXT: br label %[[FOR_COND]]
37+
; CHECK: [[FOR_END]]:
38+
; CHECK-NEXT: ret void
39+
;
40+
entry:
41+
br label %for.cond
42+
43+
for.cond:
44+
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
45+
%n.val = load i32, ptr %n
46+
%cmp = icmp ne i32 %i.0, %n.val
47+
br i1 %cmp, label %for.body, label %for.cond.cleanup
48+
49+
for.cond.cleanup:
50+
br label %for.end
51+
52+
for.body:
53+
%idxprom = sext i32 %i.0 to i64
54+
%arrayidx = getelementptr inbounds i32, ptr %p, i64 %idxprom
55+
%0 = load i32, ptr %arrayidx, align 4
56+
%c.val = load i32, ptr %c
57+
%mul = mul i32 %0, %c.val
58+
store i32 %mul, ptr %arrayidx, align 4
59+
br label %for.inc
60+
61+
for.inc:
62+
%inc = add nsw i32 %i.0, 1
63+
br label %for.cond
64+
65+
for.end:
66+
ret void
67+
}
68+

0 commit comments

Comments
 (0)