@@ -562,21 +562,63 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind,
562
562
return Builder.createScalarIVSteps (InductionOpcode, FPBinOp, BaseIV, Step);
563
563
}
564
564
565
+ static SmallVector<VPUser *> collectUsersRecursively (VPValue *V) {
566
+ SetVector<VPUser *> Users (V->user_begin (), V->user_end ());
567
+ for (unsigned I = 0 ; I != Users.size (); ++I) {
568
+ VPRecipeBase *Cur = cast<VPRecipeBase>(Users[I]);
569
+ if (isa<VPHeaderPHIRecipe>(Cur))
570
+ continue ;
571
+ for (VPValue *V : Cur->definedValues ())
572
+ Users.insert (V->user_begin (), V->user_end ());
573
+ }
574
+ return Users.takeVector ();
575
+ }
576
+
565
577
// / Legalize VPWidenPointerInductionRecipe, by replacing it with a PtrAdd
566
578
// / (IndStart, ScalarIVSteps (0, Step)) if only its scalar values are used, as
567
579
// / VPWidenPointerInductionRecipe will generate vectors only. If some users
568
580
// / require vectors while other require scalars, the scalar uses need to extract
569
581
// / the scalars from the generated vectors (Note that this is different to how
570
- // / int/fp inductions are handled). Also optimize VPWidenIntOrFpInductionRecipe,
571
- // / if any of its users needs scalar values, by providing them scalar steps
572
- // / built on the canonical scalar IV and update the original IV's users. This is
573
- // / an optional optimization to reduce the needs of vector extracts.
582
+ // / int/fp inductions are handled). Legalize extract-from-ends using uniform
583
+ // / VPReplicateRecipe of wide inductions to use regular VPReplicateRecipe, so
584
+ // / the correct end value is available. Also optimize
585
+ // / VPWidenIntOrFpInductionRecipe, if any of its users needs scalar values, by
586
+ // / providing them scalar steps built on the canonical scalar IV and update the
587
+ // / original IV's users. This is an optional optimization to reduce the needs of
588
+ // / vector extracts.
574
589
static void legalizeAndOptimizeInductions (VPlan &Plan) {
590
+ using namespace llvm ::VPlanPatternMatch;
575
591
SmallVector<VPRecipeBase *> ToRemove;
576
592
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
577
593
bool HasOnlyVectorVFs = !Plan.hasVF (ElementCount::getFixed (1 ));
578
594
VPBuilder Builder (HeaderVPBB, HeaderVPBB->getFirstNonPhi ());
579
595
for (VPRecipeBase &Phi : HeaderVPBB->phis ()) {
596
+ auto *PhiR = dyn_cast<VPHeaderPHIRecipe>(&Phi);
597
+ if (!PhiR)
598
+ break ;
599
+
600
+ // Check if any uniform VPReplicateRecipes using the phi recipe are used by
601
+ // ExtractFromEnd. Those must be replaced by a regular VPReplicateRecipe to
602
+ // ensure the final value is available.
603
+ // TODO: Remove once uniformity analysis is done on VPlan.
604
+ for (VPUser *U : collectUsersRecursively (PhiR)) {
605
+ auto *ExitIRI = dyn_cast<VPIRInstruction>(U);
606
+ VPValue *Op;
607
+ if (!ExitIRI || !match (ExitIRI->getOperand (0 ),
608
+ m_VPInstruction<VPInstruction::ExtractFromEnd>(
609
+ m_VPValue (Op), m_VPValue ())))
610
+ continue ;
611
+ auto *RepR = dyn_cast<VPReplicateRecipe>(Op);
612
+ if (!RepR || !RepR->isUniform ())
613
+ continue ;
614
+ assert (!RepR->isPredicated () && " RepR must not be predicated" );
615
+ Instruction *I = RepR->getUnderlyingInstr ();
616
+ auto *Clone =
617
+ new VPReplicateRecipe (I, RepR->operands (), /* IsUniform*/ false );
618
+ Clone->insertAfter (RepR);
619
+ RepR->replaceAllUsesWith (Clone);
620
+ }
621
+
580
622
// Replace wide pointer inductions which have only their scalars used by
581
623
// PtrAdd(IndStart, ScalarIVSteps (0, Step)).
582
624
if (auto *PtrIV = dyn_cast<VPWidenPointerInductionRecipe>(&Phi)) {
@@ -1086,18 +1128,6 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
1086
1128
return true ;
1087
1129
}
1088
1130
1089
- static SmallVector<VPUser *> collectUsersRecursively (VPValue *V) {
1090
- SetVector<VPUser *> Users (V->user_begin (), V->user_end ());
1091
- for (unsigned I = 0 ; I != Users.size (); ++I) {
1092
- VPRecipeBase *Cur = cast<VPRecipeBase>(Users[I]);
1093
- if (isa<VPHeaderPHIRecipe>(Cur))
1094
- continue ;
1095
- for (VPValue *V : Cur->definedValues ())
1096
- Users.insert (V->user_begin (), V->user_end ());
1097
- }
1098
- return Users.takeVector ();
1099
- }
1100
-
1101
1131
void VPlanTransforms::clearReductionWrapFlags (VPlan &Plan) {
1102
1132
for (VPRecipeBase &R :
1103
1133
Plan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
0 commit comments