@@ -519,14 +519,14 @@ class InnerLoopVectorizer {
519
519
// / the induction resume value, and the value for the bypass block, if needed.
520
520
// / \p Step is the SCEV-expanded induction step to use. In cases where the
521
521
// / loop skeleton is more complicated (i.e., epilogue vectorization) and the
522
- // / resume values can come from an additional bypass block, the \p
523
- // / AdditionalBypass pair provides this additional bypass block along with the
524
- // / resume value coming from it .
525
- void createInductionResumeVPValue (
526
- VPIRInstruction *InductionPhiIRI, const InductionDescriptor &ID,
527
- Value *Step, ArrayRef<BasicBlock *> BypassBlocks,
528
- VPBuilder &ScalarPHBuilder,
529
- std::pair<BasicBlock *, Value *> AdditionalBypass = { nullptr , nullptr } );
522
+ // / resume values can come from an additional bypass block, \p
523
+ // / AdditionalBypassValue provides the end value on the edge from bypass to
524
+ // / this loop .
525
+ void createInductionResumeVPValue (VPIRInstruction *InductionPhiIRI,
526
+ const InductionDescriptor &ID, Value *Step ,
527
+ ArrayRef<BasicBlock *> BypassBlocks,
528
+ VPBuilder &ScalarPHBuilder,
529
+ Value *AdditionalBypassValue = nullptr );
530
530
531
531
// / Returns the original loop trip count.
532
532
Value *getTripCount () const { return TripCount; }
@@ -539,12 +539,14 @@ class InnerLoopVectorizer {
539
539
// / Retrieve the bypass value associated with an original induction header
540
540
// / phi.
541
541
Value *getInductionAdditionalBypassValue (PHINode *OrigPhi) const {
542
- return Induction2AdditionalBypass .at (OrigPhi). second ;
542
+ return Induction2AdditionalBypassValue .at (OrigPhi);
543
543
}
544
544
545
545
// / Return the additional bypass block.
546
- BasicBlock *getInductionAdditionalBypassBlock () const {
547
- return Induction2AdditionalBypass.begin ()->second .first ;
546
+ BasicBlock *getAdditionalBypassBlock () const {
547
+ assert (AdditionalBypassBlock &&
548
+ " Trying to access AdditionalBypassBlock but it has not been set" );
549
+ return AdditionalBypassBlock;
548
550
}
549
551
550
552
protected:
@@ -584,11 +586,10 @@ class InnerLoopVectorizer {
584
586
// / in the scalar epilogue, from where the vectorized loop left off.
585
587
// / In cases where the loop skeleton is more complicated (eg. epilogue
586
588
// / vectorization) and the resume values can come from an additional bypass
587
- // / block, the \p AdditionalBypass pair provides information about the bypass
588
- // / block and the end value on the edge from bypass to this loop.
589
- void createInductionResumeVPValues (
590
- const SCEV2ValueTy &ExpandedSCEVs,
591
- std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr , nullptr });
589
+ // / block, the \p AdditionalBypassValue provides the end value on the edge
590
+ // / from bypass to this loop.
591
+ void createInductionResumeVPValues (const SCEV2ValueTy &ExpandedSCEVs,
592
+ Value *AdditionalBypassValue = nullptr );
592
593
593
594
// / Allow subclasses to override and print debug traces before/after vplan
594
595
// / execution, when trace information is requested.
@@ -678,11 +679,15 @@ class InnerLoopVectorizer {
678
679
// / for cleaning the checks, if vectorization turns out unprofitable.
679
680
GeneratedRTChecks &RTChecks;
680
681
681
- // / Mapping of induction phis to their bypass values and bypass blocks. They
682
+ // / The additional bypass block which conditionally skips over the epilogue
683
+ // / loop after executing the main loop. Needed to resume inductions and
684
+ // / reductions during epilogue vectorization.
685
+ BasicBlock *AdditionalBypassBlock = nullptr ;
686
+
687
+ // / Mapping of induction phis to their additional bypass values. They
682
688
// / need to be added as operands to phi nodes in the scalar loop preheader
683
689
// / after the epilogue skeleton has been created.
684
- DenseMap<PHINode *, std::pair<BasicBlock *, Value *>>
685
- Induction2AdditionalBypass;
690
+ DenseMap<PHINode *, Value *> Induction2AdditionalBypassValue;
686
691
687
692
VPlan &Plan;
688
693
};
@@ -2603,14 +2608,14 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
2603
2608
void InnerLoopVectorizer::createInductionResumeVPValue (
2604
2609
VPIRInstruction *InductionPhiRI, const InductionDescriptor &II, Value *Step,
2605
2610
ArrayRef<BasicBlock *> BypassBlocks, VPBuilder &ScalarPHBuilder,
2606
- std::pair<BasicBlock *, Value *> AdditionalBypass ) {
2611
+ Value *AdditionalBypassValue ) {
2607
2612
auto *OrigPhi = cast<PHINode>(&InductionPhiRI->getInstruction ());
2608
2613
Value *VectorTripCount = getOrCreateVectorTripCount (LoopVectorPreHeader);
2609
2614
assert (VectorTripCount && " Expected valid arguments" );
2610
2615
2611
2616
Instruction *OldInduction = Legal->getPrimaryInduction ();
2612
2617
Value *EndValue = nullptr ;
2613
- Value *EndValueFromAdditionalBypass = AdditionalBypass. second ;
2618
+ Value *EndValueFromAdditionalBypass = AdditionalBypassValue ;
2614
2619
if (OrigPhi == OldInduction) {
2615
2620
// We know what the end value is.
2616
2621
EndValue = VectorTripCount;
@@ -2626,11 +2631,11 @@ void InnerLoopVectorizer::createInductionResumeVPValue(
2626
2631
EndValue->setName (" ind.end" );
2627
2632
2628
2633
// Compute the end value for the additional bypass (if applicable).
2629
- if (AdditionalBypass. first ) {
2630
- B.SetInsertPoint (AdditionalBypass. first ,
2631
- AdditionalBypass. first ->getFirstInsertionPt ());
2634
+ if (AdditionalBypassValue ) {
2635
+ B.SetInsertPoint (getAdditionalBypassBlock () ,
2636
+ getAdditionalBypassBlock () ->getFirstInsertionPt ());
2632
2637
EndValueFromAdditionalBypass =
2633
- emitTransformedIndex (B, AdditionalBypass. second , II.getStartValue (),
2638
+ emitTransformedIndex (B, AdditionalBypassValue , II.getStartValue (),
2634
2639
Step, II.getKind (), II.getInductionBinOp ());
2635
2640
EndValueFromAdditionalBypass->setName (" ind.end" );
2636
2641
}
@@ -2644,14 +2649,13 @@ void InnerLoopVectorizer::createInductionResumeVPValue(
2644
2649
" InductionPhiRI should not have any operands" );
2645
2650
InductionPhiRI->addOperand (ResumePhiRecipe);
2646
2651
2647
- if (AdditionalBypass. first ) {
2652
+ if (AdditionalBypassValue ) {
2648
2653
// Store the bypass value here, as it needs to be added as operand to its
2649
2654
// scalar preheader phi node after the epilogue skeleton has been created.
2650
2655
// TODO: Directly add as extra operand to the VPResumePHI recipe.
2651
- assert (!Induction2AdditionalBypass .contains (OrigPhi) &&
2656
+ assert (!Induction2AdditionalBypassValue .contains (OrigPhi) &&
2652
2657
" entry for OrigPhi already exits" );
2653
- Induction2AdditionalBypass[OrigPhi] = {AdditionalBypass.first ,
2654
- EndValueFromAdditionalBypass};
2658
+ Induction2AdditionalBypassValue[OrigPhi] = EndValueFromAdditionalBypass;
2655
2659
}
2656
2660
}
2657
2661
@@ -2670,19 +2674,13 @@ static Value *getExpandedStep(const InductionDescriptor &ID,
2670
2674
}
2671
2675
2672
2676
void InnerLoopVectorizer::createInductionResumeVPValues (
2673
- const SCEV2ValueTy &ExpandedSCEVs,
2674
- std::pair<BasicBlock *, Value *> AdditionalBypass) {
2675
- assert (((AdditionalBypass.first && AdditionalBypass.second ) ||
2676
- (!AdditionalBypass.first && !AdditionalBypass.second )) &&
2677
- " Inconsistent information about additional bypass." );
2677
+ const SCEV2ValueTy &ExpandedSCEVs, Value *AdditionalBypassValue) {
2678
2678
// We are going to resume the execution of the scalar loop.
2679
2679
// Go over all of the induction variable PHIs of the scalar loop header and
2680
2680
// fix their starting values, which depend on the counter of the last
2681
- // iteration of the vectorized loop. The starting values of PHI nodes depend
2682
- // on the counter of the last iteration in the vectorized loop. If we come
2683
- // from one of the LoopBypassBlocks then we need to start from the original
2684
- // start value. If we come from the AdditionalBypass then we need to start
2685
- // from its value.
2681
+ // iteration of the vectorized loop. If we come from one of the
2682
+ // LoopBypassBlocks then we need to start from the original start value. If we
2683
+ // come from the AdditionalBypass then we need to start from its value.
2686
2684
VPBasicBlock *ScalarPHVPBB = Plan.getScalarPreheader ();
2687
2685
VPBuilder ScalarPHBuilder (ScalarPHVPBB, ScalarPHVPBB->begin ());
2688
2686
for (VPRecipeBase &R : *Plan.getScalarHeader ()) {
@@ -2695,7 +2693,7 @@ void InnerLoopVectorizer::createInductionResumeVPValues(
2695
2693
const InductionDescriptor &II = Legal->getInductionVars ().find (Phi)->second ;
2696
2694
createInductionResumeVPValue (PhiR, II, getExpandedStep (II, ExpandedSCEVs),
2697
2695
LoopBypassBlocks, ScalarPHBuilder,
2698
- AdditionalBypass );
2696
+ AdditionalBypassValue );
2699
2697
}
2700
2698
}
2701
2699
@@ -7744,7 +7742,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
7744
7742
// 2.5 When vectorizing the epilogue, fix reduction and induction resume
7745
7743
// values from the additional bypass block.
7746
7744
if (VectorizingEpilogue) {
7747
- BasicBlock *BypassBlock = ILV.getInductionAdditionalBypassBlock ();
7745
+ BasicBlock *BypassBlock = ILV.getAdditionalBypassBlock ();
7748
7746
for (VPRecipeBase &R : *ExitVPBB) {
7749
7747
fixReductionScalarResumeWhenVectorizingEpilog (
7750
7748
&R, State, State.CFG .VPBB2IRBB [ExitVPBB], BypassBlock);
@@ -7941,6 +7939,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
7941
7939
nullptr , " vec.epilog.iter.check" , true );
7942
7940
emitMinimumVectorEpilogueIterCountCheck (LoopScalarPreHeader,
7943
7941
VecEpilogueIterationCountCheck);
7942
+ AdditionalBypassBlock = VecEpilogueIterationCountCheck;
7944
7943
7945
7944
// Adjust the control flow taking the state info from the main loop
7946
7945
// vectorization into account.
@@ -8017,12 +8016,13 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
8017
8016
// preheader.
8018
8017
PHINode *EPResumeVal = nullptr ;
8019
8018
Type *IdxTy = Legal->getWidestInductionType ();
8019
+ Value *TC = EPI.VectorTripCount ;
8020
+ Constant *Init = ConstantInt::get (IdxTy, 0 );
8021
+
8020
8022
for (PHINode &P : LoopVectorPreHeader->phis ()) {
8021
8023
if (P.getType () == IdxTy &&
8022
- P.getIncomingValueForBlock (VecEpilogueIterationCountCheck) ==
8023
- EPI.VectorTripCount &&
8024
- P.getIncomingValueForBlock (EPI.MainLoopIterationCountCheck ) ==
8025
- ConstantInt::get (IdxTy, 0 )) {
8024
+ P.getIncomingValueForBlock (VecEpilogueIterationCountCheck) == TC &&
8025
+ P.getIncomingValueForBlock (EPI.MainLoopIterationCountCheck ) == Init) {
8026
8026
EPResumeVal = &P;
8027
8027
EPResumeVal->setName (" vec.epilog.resume.val" );
8028
8028
break ;
@@ -8031,22 +8031,19 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
8031
8031
if (!EPResumeVal) {
8032
8032
EPResumeVal = PHINode::Create (IdxTy, 2 , " vec.epilog.resume.val" );
8033
8033
EPResumeVal->insertBefore (LoopVectorPreHeader->getFirstNonPHIIt ());
8034
- EPResumeVal->addIncoming (EPI.VectorTripCount ,
8035
- VecEpilogueIterationCountCheck);
8036
- EPResumeVal->addIncoming (ConstantInt::get (IdxTy, 0 ),
8037
- EPI.MainLoopIterationCountCheck );
8034
+ EPResumeVal->addIncoming (TC, VecEpilogueIterationCountCheck);
8035
+ EPResumeVal->addIncoming (Init, EPI.MainLoopIterationCountCheck );
8038
8036
}
8039
8037
8040
8038
// Generate induction resume values. These variables save the new starting
8041
8039
// indexes for the scalar loop. They are used to test if there are any tail
8042
8040
// iterations left once the vector loop has completed.
8043
8041
// Note that when the vectorized epilogue is skipped due to iteration count
8044
8042
// check, then the resume value for the induction variable comes from
8045
- // the trip count of the main vector loop, hence passing the AdditionalBypass
8046
- // argument.
8047
- createInductionResumeVPValues (ExpandedSCEVs,
8048
- {VecEpilogueIterationCountCheck,
8049
- EPI.VectorTripCount } /* AdditionalBypass */ );
8043
+ // the trip count of the main vector loop, hence passing the
8044
+ // AdditionalBypassValue argument.
8045
+ createInductionResumeVPValues (
8046
+ ExpandedSCEVs, EPI.VectorTripCount /* AdditionalBypassValue */ );
8050
8047
8051
8048
return {LoopVectorPreHeader, EPResumeVal};
8052
8049
}
@@ -10358,6 +10355,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10358
10355
auto *WidenInd = cast<VPWidenIntOrFpInductionRecipe>(&R);
10359
10356
IndPhi = WidenInd->getPHINode ();
10360
10357
}
10358
+ // Hook up to the PHINode generated by a ResumePhi recipe of main
10359
+ // loop VPlan, which feeds the scalar loop.
10361
10360
ResumeV = IndPhi->getIncomingValueForBlock (L->getLoopPreheader ());
10362
10361
}
10363
10362
assert (ResumeV && " Must have a resume value" );
0 commit comments