Skip to content

Commit f9369cc

Browse files
committed
[VPlan] Make sure last IV increment value is available if needed.
Legalize extract-from-ends using uniform VPReplicateRecipe of wide inductions to use regular VPReplicateRecipe, so the correct end value is available. Fixes #121745.
1 parent 3874c64 commit f9369cc

File tree

2 files changed

+49
-19
lines changed

2 files changed

+49
-19
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 46 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -562,21 +562,63 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind,
562562
return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step);
563563
}
564564

565+
static SmallVector<VPUser *> collectUsersRecursively(VPValue *V) {
566+
SetVector<VPUser *> Users(V->user_begin(), V->user_end());
567+
for (unsigned I = 0; I != Users.size(); ++I) {
568+
VPRecipeBase *Cur = cast<VPRecipeBase>(Users[I]);
569+
if (isa<VPHeaderPHIRecipe>(Cur))
570+
continue;
571+
for (VPValue *V : Cur->definedValues())
572+
Users.insert(V->user_begin(), V->user_end());
573+
}
574+
return Users.takeVector();
575+
}
576+
565577
/// Legalize VPWidenPointerInductionRecipe, by replacing it with a PtrAdd
566578
/// (IndStart, ScalarIVSteps (0, Step)) if only its scalar values are used, as
567579
/// VPWidenPointerInductionRecipe will generate vectors only. If some users
568580
/// require vectors while other require scalars, the scalar uses need to extract
569581
/// the scalars from the generated vectors (Note that this is different to how
570-
/// int/fp inductions are handled). Also optimize VPWidenIntOrFpInductionRecipe,
571-
/// if any of its users needs scalar values, by providing them scalar steps
572-
/// built on the canonical scalar IV and update the original IV's users. This is
573-
/// an optional optimization to reduce the needs of vector extracts.
582+
/// int/fp inductions are handled). Legalize extract-from-ends using uniform
583+
/// VPReplicateRecipe of wide inductions to use regular VPReplicateRecipe, so
584+
/// the correct end value is available. Also optimize
585+
/// VPWidenIntOrFpInductionRecipe, if any of its users needs scalar values, by
586+
/// providing them scalar steps built on the canonical scalar IV and update the
587+
/// original IV's users. This is an optional optimization to reduce the needs of
588+
/// vector extracts.
574589
static void legalizeAndOptimizeInductions(VPlan &Plan) {
590+
using namespace llvm::VPlanPatternMatch;
575591
SmallVector<VPRecipeBase *> ToRemove;
576592
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
577593
bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1));
578594
VPBuilder Builder(HeaderVPBB, HeaderVPBB->getFirstNonPhi());
579595
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
596+
auto *PhiR = dyn_cast<VPHeaderPHIRecipe>(&Phi);
597+
if (!PhiR)
598+
break;
599+
600+
// Check if any uniform VPReplicateRecipes using the phi recipe are used by
601+
// ExtractFromEnd. Those must be replaced by a regular VPReplicateRecipe to
602+
// ensure the final value is available.
603+
// TODO: Remove once uniformity analysis is done on VPlan.
604+
for (VPUser *U : collectUsersRecursively(PhiR)) {
605+
auto *ExitIRI = dyn_cast<VPIRInstruction>(U);
606+
VPValue *Op;
607+
if (!ExitIRI || !match(ExitIRI->getOperand(0),
608+
m_VPInstruction<VPInstruction::ExtractFromEnd>(
609+
m_VPValue(Op), m_VPValue())))
610+
continue;
611+
auto *RepR = dyn_cast<VPReplicateRecipe>(Op);
612+
if (!RepR || !RepR->isUniform())
613+
continue;
614+
assert(!RepR->isPredicated() && "RepR must not be predicated");
615+
Instruction *I = RepR->getUnderlyingInstr();
616+
auto *Clone =
617+
new VPReplicateRecipe(I, RepR->operands(), /*IsUniform*/ false);
618+
Clone->insertAfter(RepR);
619+
RepR->replaceAllUsesWith(Clone);
620+
}
621+
580622
// Replace wide pointer inductions which have only their scalars used by
581623
// PtrAdd(IndStart, ScalarIVSteps (0, Step)).
582624
if (auto *PtrIV = dyn_cast<VPWidenPointerInductionRecipe>(&Phi)) {
@@ -1086,18 +1128,6 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
10861128
return true;
10871129
}
10881130

1089-
static SmallVector<VPUser *> collectUsersRecursively(VPValue *V) {
1090-
SetVector<VPUser *> Users(V->user_begin(), V->user_end());
1091-
for (unsigned I = 0; I != Users.size(); ++I) {
1092-
VPRecipeBase *Cur = cast<VPRecipeBase>(Users[I]);
1093-
if (isa<VPHeaderPHIRecipe>(Cur))
1094-
continue;
1095-
for (VPValue *V : Cur->definedValues())
1096-
Users.insert(V->user_begin(), V->user_end());
1097-
}
1098-
return Users.takeVector();
1099-
}
1100-
11011131
void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
11021132
for (VPRecipeBase &R :
11031133
Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {

llvm/test/Transforms/LoopVectorize/iv_outside_user.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -981,7 +981,6 @@ exit:
981981
}
982982

983983
; Test case for https://github.com/llvm/llvm-project/issues/121745.
984-
; FIXME: At the moment an incorrect exit value is used for %iv.next.
985984
define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) {
986985
; VEC-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification(
987986
; VEC-SAME: ptr [[DST:%.*]]) {
@@ -994,10 +993,12 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) {
994993
; VEC: [[VECTOR_BODY]]:
995994
; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
996995
; VEC-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
996+
; VEC-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1
997997
; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP0]]
998998
; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 0
999999
; VEC-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP2]], align 2
10001000
; VEC-NEXT: [[TMP4:%.*]] = add i32 [[STEP_2]], [[TMP0]]
1001+
; VEC-NEXT: [[TMP5:%.*]] = add i32 [[STEP_2]], [[TMP6]]
10011002
; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
10021003
; VEC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8
10031004
; VEC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
@@ -1014,7 +1015,7 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) {
10141015
; VEC-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8
10151016
; VEC-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], {{!llvm.loop ![0-9]+}}
10161017
; VEC: [[E_EXIT]]:
1017-
; VEC-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP4]], %[[MIDDLE_BLOCK]] ]
1018+
; VEC-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ]
10181019
; VEC-NEXT: ret i32 [[RES]]
10191020
;
10201021
; INTERLEAVE-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification(
@@ -1071,7 +1072,6 @@ e.exit:
10711072
ret i32 %res
10721073
}
10731074

1074-
; FIXME: At the moment an incorrect exit value is used for %iv.next.
10751075
define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) {
10761076
; VEC-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(
10771077
; VEC-SAME: ptr [[DST:%.*]]) {

0 commit comments

Comments
 (0)