48
48
#include " llvm/Analysis/MustExecute.h"
49
49
#include " llvm/Analysis/PostDominators.h"
50
50
#include " llvm/Analysis/TargetLibraryInfo.h"
51
+ #include " llvm/Analysis/TargetTransformInfo.h"
51
52
#include " llvm/Analysis/ValueTracking.h"
52
53
#include " llvm/IR/Argument.h"
53
54
#include " llvm/IR/BasicBlock.h"
@@ -560,7 +561,8 @@ static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
560
561
561
562
static bool tryToShorten (Instruction *DeadI, int64_t &DeadStart,
562
563
uint64_t &DeadSize, int64_t KillingStart,
563
- uint64_t KillingSize, bool IsOverwriteEnd) {
564
+ uint64_t KillingSize, bool IsOverwriteEnd,
565
+ const TargetTransformInfo &TTI) {
564
566
auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
565
567
Align PrefAlign = DeadIntrinsic->getDestAlign ().valueOrOne ();
566
568
@@ -612,6 +614,24 @@ static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
612
614
assert (DeadSize > ToRemoveSize && " Can't remove more than original size" );
613
615
614
616
uint64_t NewSize = DeadSize - ToRemoveSize;
617
+
618
+ // Check that we aren't going to pessimize codegen by lowering the length. I.e
619
+ // a memcpy(dst, src, 8) is more efficient than memcpy(dst, src, 7).
620
+ // These checks are relatively conservative. We bail out if:
621
+ // 1) We are removing less than 1 store (measured by targets load/store Vec
622
+ // width).
623
+ // 2) We are saving a load/store (assuming loads/stores occur per pow2 block)
624
+ // 3) We aren't preventing this from going below inline thresh
625
+ // 4) We are shrinking by less than half of the initial size.
626
+ uint64_t PrefVecWidth =
627
+ TTI.getLoadStoreVecRegBitWidth (DeadIntrinsic->getDestAddressSpace ()) / 8U ;
628
+ uint64_t InlineThresh = TTI.getMaxMemIntrinsicInlineSizeThreshold ();
629
+ if (ToRemoveSize < PrefVecWidth &&
630
+ popcount (DeadSize) < popcount (DeadSize - ToRemoveSize) &&
631
+ (DeadSize <= InlineThresh) == (DeadSize - ToRemoveSize <= InlineThresh) &&
632
+ ToRemoveSize < DeadSize / 2U )
633
+ return false ;
634
+
615
635
if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
616
636
// When shortening an atomic memory intrinsic, the newly shortened
617
637
// length must remain an integer multiple of the element size.
@@ -654,7 +674,8 @@ static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
654
674
}
655
675
656
676
static bool tryToShortenEnd (Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
657
- int64_t &DeadStart, uint64_t &DeadSize) {
677
+ int64_t &DeadStart, uint64_t &DeadSize,
678
+ const TargetTransformInfo &TTI) {
658
679
if (IntervalMap.empty () || !isShortenableAtTheEnd (DeadI))
659
680
return false ;
660
681
@@ -672,7 +693,7 @@ static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
672
693
// be non negative due to preceding checks.
673
694
KillingSize >= DeadSize - (uint64_t )(KillingStart - DeadStart)) {
674
695
if (tryToShorten (DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
675
- true )) {
696
+ true , TTI )) {
676
697
IntervalMap.erase (OII);
677
698
return true ;
678
699
}
@@ -682,7 +703,8 @@ static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
682
703
683
704
static bool tryToShortenBegin (Instruction *DeadI,
684
705
OverlapIntervalsTy &IntervalMap,
685
- int64_t &DeadStart, uint64_t &DeadSize) {
706
+ int64_t &DeadStart, uint64_t &DeadSize,
707
+ const TargetTransformInfo &TTI) {
686
708
if (IntervalMap.empty () || !isShortenableAtTheBeginning (DeadI))
687
709
return false ;
688
710
@@ -701,7 +723,7 @@ static bool tryToShortenBegin(Instruction *DeadI,
701
723
assert (KillingSize - (uint64_t )(DeadStart - KillingStart) < DeadSize &&
702
724
" Should have been handled as OW_Complete" );
703
725
if (tryToShorten (DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
704
- false )) {
726
+ false , TTI )) {
705
727
IntervalMap.erase (OII);
706
728
return true ;
707
729
}
@@ -824,6 +846,7 @@ struct DSEState {
824
846
DominatorTree &DT;
825
847
PostDominatorTree &PDT;
826
848
const TargetLibraryInfo &TLI;
849
+ const TargetTransformInfo &TTI;
827
850
const DataLayout &DL;
828
851
const LoopInfo &LI;
829
852
@@ -868,9 +891,9 @@ struct DSEState {
868
891
869
892
DSEState (Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
870
893
PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
871
- const LoopInfo &LI)
894
+ const TargetTransformInfo &TTI, const LoopInfo &LI)
872
895
: F(F), AA(AA), EI(DT, &LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT),
873
- PDT (PDT), TLI(TLI), DL(F.getDataLayout()), LI(LI) {
896
+ PDT (PDT), TLI(TLI), TTI(TTI), DL(F.getDataLayout()), LI(LI) {
874
897
// Collect blocks with throwing instructions not modeled in MemorySSA and
875
898
// alloc-like objects.
876
899
unsigned PO = 0 ;
@@ -2066,10 +2089,10 @@ struct DSEState {
2066
2089
uint64_t DeadSize = Loc.Size .getValue ();
2067
2090
GetPointerBaseWithConstantOffset (Ptr , DeadStart, DL);
2068
2091
OverlapIntervalsTy &IntervalMap = OI.second ;
2069
- Changed |= tryToShortenEnd (DeadI, IntervalMap, DeadStart, DeadSize);
2092
+ Changed |= tryToShortenEnd (DeadI, IntervalMap, DeadStart, DeadSize, TTI );
2070
2093
if (IntervalMap.empty ())
2071
2094
continue ;
2072
- Changed |= tryToShortenBegin (DeadI, IntervalMap, DeadStart, DeadSize);
2095
+ Changed |= tryToShortenBegin (DeadI, IntervalMap, DeadStart, DeadSize, TTI );
2073
2096
}
2074
2097
return Changed;
2075
2098
}
@@ -2137,10 +2160,11 @@ struct DSEState {
2137
2160
static bool eliminateDeadStores (Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
2138
2161
DominatorTree &DT, PostDominatorTree &PDT,
2139
2162
const TargetLibraryInfo &TLI,
2163
+ const TargetTransformInfo &TTI,
2140
2164
const LoopInfo &LI) {
2141
2165
bool MadeChange = false ;
2142
2166
2143
- DSEState State (F, AA, MSSA, DT, PDT, TLI, LI);
2167
+ DSEState State (F, AA, MSSA, DT, PDT, TLI, TTI, LI);
2144
2168
// For each store:
2145
2169
for (unsigned I = 0 ; I < State.MemDefs .size (); I++) {
2146
2170
MemoryDef *KillingDef = State.MemDefs [I];
@@ -2332,12 +2356,13 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
2332
2356
PreservedAnalyses DSEPass::run (Function &F, FunctionAnalysisManager &AM) {
2333
2357
AliasAnalysis &AA = AM.getResult <AAManager>(F);
2334
2358
const TargetLibraryInfo &TLI = AM.getResult <TargetLibraryAnalysis>(F);
2359
+ const TargetTransformInfo &TTI = AM.getResult <TargetIRAnalysis>(F);
2335
2360
DominatorTree &DT = AM.getResult <DominatorTreeAnalysis>(F);
2336
2361
MemorySSA &MSSA = AM.getResult <MemorySSAAnalysis>(F).getMSSA ();
2337
2362
PostDominatorTree &PDT = AM.getResult <PostDominatorTreeAnalysis>(F);
2338
2363
LoopInfo &LI = AM.getResult <LoopAnalysis>(F);
2339
2364
2340
- bool Changed = eliminateDeadStores (F, AA, MSSA, DT, PDT, TLI, LI);
2365
+ bool Changed = eliminateDeadStores (F, AA, MSSA, DT, PDT, TLI, TTI, LI);
2341
2366
2342
2367
#ifdef LLVM_ENABLE_STATS
2343
2368
if (AreStatisticsEnabled ())
0 commit comments