Skip to content

Commit 9ee8e38

Browse files
committed
[VPlan] Also propagate versioned strides to users via sext/zext.
The versioned value may not be used in the loop directly but through a sext/zext. Add new live-ins in those cases.
1 parent a4c21d1 commit 9ee8e38

File tree

4 files changed

+35
-38
lines changed

4 files changed

+35
-38
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8817,12 +8817,24 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
88178817
// Only handle constant strides for now.
88188818
if (!ScevStride)
88198819
continue;
8820-
Constant *CI = ConstantInt::get(Stride->getType(), ScevStride->getAPInt());
88218820

8822-
auto *ConstVPV = Plan->getOrAddLiveIn(CI);
8823-
// The versioned value may not be used in the loop directly, so just add a
8824-
// new live-in in those cases.
8825-
Plan->getOrAddLiveIn(StrideV)->replaceAllUsesWith(ConstVPV);
8821+
auto *CI = Plan->getOrAddLiveIn(
8822+
ConstantInt::get(Stride->getType(), ScevStride->getAPInt()));
8823+
if (VPValue *StrideVPV = Plan->getLiveIn(StrideV))
8824+
StrideVPV->replaceAllUsesWith(CI);
8825+
8826+
// The versioned value may not be used in the loop directly but through a
8827+
// sext/zext. Add new live-ins in those cases.
8828+
for (Value *U : StrideV->users()) {
8829+
if (!isa<SExtInst, ZExtInst>(U))
8830+
continue;
8831+
VPValue *StrideVPV = Plan->getLiveIn(U);
8832+
if (!StrideVPV)
8833+
continue;
8834+
VPValue *CI = Plan->getOrAddLiveIn(ConstantInt::get(
8835+
U->getType(), ScevStride->getAPInt().getSExtValue()));
8836+
StrideVPV->replaceAllUsesWith(CI);
8837+
}
88268838
}
88278839

88288840
VPlanTransforms::dropPoisonGeneratingRecipes(*Plan, [this](BasicBlock *BB) {

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3215,6 +3215,9 @@ class VPlan {
32153215
return Value2VPValue[V];
32163216
}
32173217

3218+
/// Return the live-in VPValue for \p V, if there is one or nullptr otherwise.
3219+
VPValue *getLiveIn(Value *V) const { return Value2VPValue.lookup(V); }
3220+
32183221
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
32193222
/// Print the live-ins of this VPlan to \p O.
32203223
void printLiveIns(raw_ostream &O) const;

llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1328,13 +1328,11 @@ define void @unknown_inner_stride(ptr nocapture noundef %dst, ptr nocapture noun
13281328
; CHECK: vector.body:
13291329
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
13301330
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 0
1331-
; CHECK-NEXT: [[TMP15:%.*]] = mul nsw i64 [[TMP14]], [[TMP0]]
1332-
; CHECK-NEXT: [[TMP16:%.*]] = add nsw i64 [[TMP15]], [[TMP11]]
1331+
; CHECK-NEXT: [[TMP16:%.*]] = add nsw i64 [[TMP14]], [[TMP11]]
13331332
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP16]]
13341333
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0
13351334
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP18]], align 4, !alias.scope [[META60:![0-9]+]]
1336-
; CHECK-NEXT: [[TMP19:%.*]] = mul nsw i64 [[TMP14]], [[TMP1]]
1337-
; CHECK-NEXT: [[TMP20:%.*]] = add nsw i64 [[TMP19]], [[TMP12]]
1335+
; CHECK-NEXT: [[TMP20:%.*]] = add nsw i64 [[TMP14]], [[TMP12]]
13381336
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP20]]
13391337
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[TMP21]], i32 0
13401338
; CHECK-NEXT: [[WIDE_LOAD3:%.*]] = load <4 x i32>, ptr [[TMP22]], align 4, !alias.scope [[META63:![0-9]+]], !noalias [[META60]]

llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll

Lines changed: 13 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ define void @test_versioned_with_sext_use(i32 %offset, ptr %dst) {
3434
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP3]]
3535
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0
3636
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP5]], align 8
37-
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP3]], [[OFFSET_EXT]]
37+
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP3]], 1
3838
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
3939
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
4040
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -110,7 +110,7 @@ define void @test_versioned_with_zext_use(i32 %offset, ptr %dst) {
110110
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP3]]
111111
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0
112112
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP5]], align 8
113-
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP3]], [[OFFSET_EXT]]
113+
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP3]], 1
114114
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
115115
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
116116
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
@@ -165,38 +165,23 @@ define void @versioned_sext_use_in_gep(i32 %scale, ptr %dst, i64 %scale.2) {
165165
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[SCALE]], 1
166166
; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
167167
; CHECK: vector.ph:
168-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[SCALE_EXT]], i64 0
169-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
170-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[SCALE_2]], i64 0
171-
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer
172168
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
173169
; CHECK: vector.body:
174170
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
175-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
176-
; CHECK-NEXT: [[TMP0:%.*]] = mul <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
177-
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[TMP0]], i32 0
178-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP1]]
179-
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP0]], i32 1
180-
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]]
181-
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[TMP0]], i32 2
182-
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP5]]
183-
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP0]], i32 3
184-
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
185-
; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i64> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]]
186-
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP9]], i32 0
171+
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0
172+
; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 1
173+
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 2
174+
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 3
187175
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP10]]
188-
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP9]], i32 1
189176
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP12]]
190-
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP9]], i32 2
191177
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP14]]
192-
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i64> [[TMP9]], i32 3
193178
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP16]]
194-
; CHECK-NEXT: store ptr [[TMP11]], ptr [[TMP2]], align 8
195-
; CHECK-NEXT: store ptr [[TMP13]], ptr [[TMP4]], align 8
196-
; CHECK-NEXT: store ptr [[TMP15]], ptr [[TMP6]], align 8
197-
; CHECK-NEXT: store ptr [[TMP17]], ptr [[TMP8]], align 8
179+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[SCALE_2]]
180+
; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP11]], align 8
181+
; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP13]], align 8
182+
; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP15]], align 8
183+
; CHECK-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8
198184
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
199-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
200185
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
201186
; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
202187
; CHECK: middle.block:
@@ -282,7 +267,7 @@ define void @test_versioned_with_different_uses(i32 %offset, ptr noalias %dst.1,
282267
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[DST_2]], i64 [[TMP3]]
283268
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0
284269
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP13]], align 8
285-
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP3]], [[OFFSET_EXT]]
270+
; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP3]], 1
286271
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
287272
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
288273
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -376,8 +361,7 @@ define void @test_versioned_with_non_ex_use(i32 %offset, ptr noalias %dst.1, ptr
376361
; CHECK-NEXT: store i32 0, ptr [[TMP14]], align 8
377362
; CHECK-NEXT: store i32 0, ptr [[TMP16]], align 8
378363
; CHECK-NEXT: store i32 0, ptr [[TMP18]], align 8
379-
; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP9]], [[OFFSET_EXT]]
380-
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DST_2]], i64 [[TMP19]]
364+
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DST_2]], i64 [[TMP9]]
381365
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP20]], i32 0
382366
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP21]], align 8
383367
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4

0 commit comments

Comments
 (0)