Skip to content

Commit 3023278

Browse files
committed
[DSE] Don't shrink memory intrinsic if its liable to make codegen worse
Currently for the following snippet: `memcpy(dst, src, 8); dst[7] = 0` DSE will transform it to: `memcpy(dst, src, 7); dst[7] = 0` However, its typically better to emit this with a full 8 byte `memcpy` followed by an overwrite of byte 7 as opposed to truncate the `memcpy` to 7 `memcpy`. This is because a 8-byte memcpy can be lowered with a single load/store, whereas a 7 byte memcpy needs 2x load/stores. Similiar is true for `memset`. This patch changes the behavior to check if we are liable to be pessimizing codegen when shrinking `memcpy` to avoid the above and other similiar cases.
1 parent 216f7de commit 3023278

File tree

4 files changed

+61
-51
lines changed

4 files changed

+61
-51
lines changed

llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
#include "llvm/Analysis/MustExecute.h"
4949
#include "llvm/Analysis/PostDominators.h"
5050
#include "llvm/Analysis/TargetLibraryInfo.h"
51+
#include "llvm/Analysis/TargetTransformInfo.h"
5152
#include "llvm/Analysis/ValueTracking.h"
5253
#include "llvm/IR/Argument.h"
5354
#include "llvm/IR/BasicBlock.h"
@@ -560,7 +561,8 @@ static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
560561

561562
static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
562563
uint64_t &DeadSize, int64_t KillingStart,
563-
uint64_t KillingSize, bool IsOverwriteEnd) {
564+
uint64_t KillingSize, bool IsOverwriteEnd,
565+
const TargetTransformInfo &TTI) {
564566
auto *DeadIntrinsic = cast<AnyMemIntrinsic>(DeadI);
565567
Align PrefAlign = DeadIntrinsic->getDestAlign().valueOrOne();
566568

@@ -612,6 +614,24 @@ static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
612614
assert(DeadSize > ToRemoveSize && "Can't remove more than original size");
613615

614616
uint64_t NewSize = DeadSize - ToRemoveSize;
617+
618+
// Check that we aren't going to pessimize codegen by lowering the length. I.e
619+
// a memcpy(dst, src, 8) is more efficient than memcpy(dst, src, 7).
620+
// These checks are relatively conservative. We bail out if:
621+
// 1) We are removing less than 1 store (measured by targets load/store Vec
622+
// width).
623+
// 2) We are saving a load/store (assuming loads/stores occur per pow2 block)
624+
// 3) We aren't preventing this from going below inline thresh
625+
// 4) We are shrinking by less than half of the initial size.
626+
uint64_t PrefVecWidth =
627+
TTI.getLoadStoreVecRegBitWidth(DeadIntrinsic->getDestAddressSpace()) / 8U;
628+
uint64_t InlineThresh = TTI.getMaxMemIntrinsicInlineSizeThreshold();
629+
if (ToRemoveSize < PrefVecWidth &&
630+
popcount(DeadSize) < popcount(DeadSize - ToRemoveSize) &&
631+
(DeadSize <= InlineThresh) == (DeadSize - ToRemoveSize <= InlineThresh) &&
632+
ToRemoveSize < DeadSize / 2U)
633+
return false;
634+
615635
if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(DeadI)) {
616636
// When shortening an atomic memory intrinsic, the newly shortened
617637
// length must remain an integer multiple of the element size.
@@ -654,7 +674,8 @@ static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
654674
}
655675

656676
static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
657-
int64_t &DeadStart, uint64_t &DeadSize) {
677+
int64_t &DeadStart, uint64_t &DeadSize,
678+
const TargetTransformInfo &TTI) {
658679
if (IntervalMap.empty() || !isShortenableAtTheEnd(DeadI))
659680
return false;
660681

@@ -672,7 +693,7 @@ static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
672693
// be non negative due to preceding checks.
673694
KillingSize >= DeadSize - (uint64_t)(KillingStart - DeadStart)) {
674695
if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
675-
true)) {
696+
true, TTI)) {
676697
IntervalMap.erase(OII);
677698
return true;
678699
}
@@ -682,7 +703,8 @@ static bool tryToShortenEnd(Instruction *DeadI, OverlapIntervalsTy &IntervalMap,
682703

683704
static bool tryToShortenBegin(Instruction *DeadI,
684705
OverlapIntervalsTy &IntervalMap,
685-
int64_t &DeadStart, uint64_t &DeadSize) {
706+
int64_t &DeadStart, uint64_t &DeadSize,
707+
const TargetTransformInfo &TTI) {
686708
if (IntervalMap.empty() || !isShortenableAtTheBeginning(DeadI))
687709
return false;
688710

@@ -701,7 +723,7 @@ static bool tryToShortenBegin(Instruction *DeadI,
701723
assert(KillingSize - (uint64_t)(DeadStart - KillingStart) < DeadSize &&
702724
"Should have been handled as OW_Complete");
703725
if (tryToShorten(DeadI, DeadStart, DeadSize, KillingStart, KillingSize,
704-
false)) {
726+
false, TTI)) {
705727
IntervalMap.erase(OII);
706728
return true;
707729
}
@@ -824,6 +846,7 @@ struct DSEState {
824846
DominatorTree &DT;
825847
PostDominatorTree &PDT;
826848
const TargetLibraryInfo &TLI;
849+
const TargetTransformInfo &TTI;
827850
const DataLayout &DL;
828851
const LoopInfo &LI;
829852

@@ -868,9 +891,9 @@ struct DSEState {
868891

869892
DSEState(Function &F, AliasAnalysis &AA, MemorySSA &MSSA, DominatorTree &DT,
870893
PostDominatorTree &PDT, const TargetLibraryInfo &TLI,
871-
const LoopInfo &LI)
894+
const TargetTransformInfo &TTI, const LoopInfo &LI)
872895
: F(F), AA(AA), EI(DT, &LI), BatchAA(AA, &EI), MSSA(MSSA), DT(DT),
873-
PDT(PDT), TLI(TLI), DL(F.getDataLayout()), LI(LI) {
896+
PDT(PDT), TLI(TLI), TTI(TTI), DL(F.getDataLayout()), LI(LI) {
874897
// Collect blocks with throwing instructions not modeled in MemorySSA and
875898
// alloc-like objects.
876899
unsigned PO = 0;
@@ -2066,10 +2089,10 @@ struct DSEState {
20662089
uint64_t DeadSize = Loc.Size.getValue();
20672090
GetPointerBaseWithConstantOffset(Ptr, DeadStart, DL);
20682091
OverlapIntervalsTy &IntervalMap = OI.second;
2069-
Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize);
2092+
Changed |= tryToShortenEnd(DeadI, IntervalMap, DeadStart, DeadSize, TTI);
20702093
if (IntervalMap.empty())
20712094
continue;
2072-
Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize);
2095+
Changed |= tryToShortenBegin(DeadI, IntervalMap, DeadStart, DeadSize, TTI);
20732096
}
20742097
return Changed;
20752098
}
@@ -2137,10 +2160,11 @@ struct DSEState {
21372160
static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
21382161
DominatorTree &DT, PostDominatorTree &PDT,
21392162
const TargetLibraryInfo &TLI,
2163+
const TargetTransformInfo &TTI,
21402164
const LoopInfo &LI) {
21412165
bool MadeChange = false;
21422166

2143-
DSEState State(F, AA, MSSA, DT, PDT, TLI, LI);
2167+
DSEState State(F, AA, MSSA, DT, PDT, TLI, TTI, LI);
21442168
// For each store:
21452169
for (unsigned I = 0; I < State.MemDefs.size(); I++) {
21462170
MemoryDef *KillingDef = State.MemDefs[I];
@@ -2332,12 +2356,13 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
23322356
PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
23332357
AliasAnalysis &AA = AM.getResult<AAManager>(F);
23342358
const TargetLibraryInfo &TLI = AM.getResult<TargetLibraryAnalysis>(F);
2359+
const TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
23352360
DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
23362361
MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
23372362
PostDominatorTree &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
23382363
LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
23392364

2340-
bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, LI);
2365+
bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, TLI, TTI, LI);
23412366

23422367
#ifdef LLVM_ENABLE_STATS
23432368
if (AreStatisticsEnabled())

llvm/test/DebugInfo/Generic/assignment-tracking/dse/dse-after-memcpyopt-merge.ll

Lines changed: 11 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -33,18 +33,13 @@ define dso_local void @_Z1fv() local_unnamed_addr !dbg !7 {
3333
; CHECK-NEXT: [[ARRAYIDX3_I:%.*]] = getelementptr inbounds [[STRUCT_V]], ptr [[G]], i64 0, i32 0, i64 1, !dbg [[DBG35:![0-9]+]]
3434
; CHECK-NEXT: #dbg_assign(float 0.000000e+00, [[META12]], !DIExpression(DW_OP_LLVM_fragment, 32, 32), [[META34]], ptr [[ARRAYIDX3_I]], !DIExpression(), [[META25]])
3535
; CHECK-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds [[STRUCT_V]], ptr [[G]], i64 0, i32 0, i64 0, !dbg [[DBG36:![0-9]+]]
36-
; CHECK-NEXT: #dbg_assign(float 0.000000e+00, [[META12]], !DIExpression(DW_OP_LLVM_fragment, 0, 32), [[META37:![0-9]+]], ptr poison, !DIExpression(), [[META25]])
37-
; CHECK-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds [[STRUCT_V]], ptr [[G]], i64 0, i32 0, i64 3, !dbg [[DBG38:![0-9]+]]
38-
; CHECK-NEXT: #dbg_assign(float 0.000000e+00, [[META12]], !DIExpression(DW_OP_LLVM_fragment, 96, 32), [[META39:![0-9]+]], ptr poison, !DIExpression(), [[META25]])
39-
; CHECK-NEXT: [[TMP0:%.*]] = bitcast ptr [[ARRAYIDX5_I]] to ptr, !dbg [[DBG40:![0-9]+]]
40-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i64 4, !dbg [[DBG41:![0-9]+]]
41-
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP1]], i8 0, i64 8, i1 false), !dbg [[DBG41]], !DIAssignID [[META34]]
42-
; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [[STRUCT_V]], ptr [[G]], i64 0, i32 0, i64 3, !dbg [[META25]]
43-
; CHECK-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX7]], align 4, !dbg [[META25]], !DIAssignID [[DIASSIGNID42:![0-9]+]]
44-
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_V]], ptr [[G]], i64 0, i32 0, i64 0, !dbg [[META25]]
45-
; CHECK-NEXT: store float 0.000000e+00, ptr [[ARRAYIDX]], align 4, !dbg [[META25]], !DIAssignID [[DIASSIGNID43:![0-9]+]]
46-
; CHECK-NEXT: call void @_Z3escP1v(ptr nonnull [[G]]), !dbg [[DBG40]]
47-
; CHECK-NEXT: ret void, !dbg [[DBG44:![0-9]+]]
36+
; CHECK-NEXT: #dbg_assign(float 0.000000e+00, [[META12]], !DIExpression(DW_OP_LLVM_fragment, 0, 32), [[META34]], ptr [[ARRAYIDX5_I]], !DIExpression(), [[META25]])
37+
; CHECK-NEXT: [[ARRAYIDX7_I:%.*]] = getelementptr inbounds [[STRUCT_V]], ptr [[G]], i64 0, i32 0, i64 3, !dbg [[DBG37:![0-9]+]]
38+
; CHECK-NEXT: #dbg_assign(float 0.000000e+00, [[META12]], !DIExpression(DW_OP_LLVM_fragment, 96, 32), [[META34]], ptr [[ARRAYIDX7_I]], !DIExpression(), [[META25]])
39+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast ptr [[ARRAYIDX5_I]] to ptr, !dbg [[DBG38:![0-9]+]]
40+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 16, i1 false), !dbg [[DBG39:![0-9]+]], !DIAssignID [[META34]]
41+
; CHECK-NEXT: call void @_Z3escP1v(ptr nonnull [[G]]), !dbg [[DBG38]]
42+
; CHECK-NEXT: ret void, !dbg [[DBG40:![0-9]+]]
4843
;
4944
entry:
5045
%g = alloca %struct.v, align 4, !DIAssignID !23
@@ -174,12 +169,8 @@ declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
174169
; CHECK: [[META34]] = distinct !DIAssignID()
175170
; CHECK: [[DBG35]] = !DILocation(line: 5, column: 12, scope: [[META27]], inlinedAt: [[META33]])
176171
; CHECK: [[DBG36]] = !DILocation(line: 5, column: 5, scope: [[META27]], inlinedAt: [[META33]])
177-
; CHECK: [[META37]] = distinct !DIAssignID()
178-
; CHECK: [[DBG38]] = !DILocation(line: 6, column: 5, scope: [[META27]], inlinedAt: [[META33]])
179-
; CHECK: [[META39]] = distinct !DIAssignID()
180-
; CHECK: [[DBG40]] = !DILocation(line: 14, column: 3, scope: [[DBG8]])
181-
; CHECK: [[DBG41]] = !DILocation(line: 5, column: 17, scope: [[META27]], inlinedAt: [[META33]])
182-
; CHECK: [[DIASSIGNID42]] = distinct !DIAssignID()
183-
; CHECK: [[DIASSIGNID43]] = distinct !DIAssignID()
184-
; CHECK: [[DBG44]] = !DILocation(line: 15, column: 1, scope: [[DBG8]])
172+
; CHECK: [[DBG37]] = !DILocation(line: 6, column: 5, scope: [[META27]], inlinedAt: [[META33]])
173+
; CHECK: [[DBG38]] = !DILocation(line: 14, column: 3, scope: [[DBG8]])
174+
; CHECK: [[DBG39]] = !DILocation(line: 5, column: 17, scope: [[META27]], inlinedAt: [[META33]])
175+
; CHECK: [[DBG40]] = !DILocation(line: 15, column: 1, scope: [[DBG8]])
185176
;.

llvm/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ define void @write4to7_weird_element_type(ptr nocapture %p) {
2323
; CHECK-LABEL: @write4to7_weird_element_type(
2424
; CHECK-NEXT: entry:
2525
; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
26-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4
27-
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP1]], i8 0, i64 24, i1 false)
26+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4
27+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i1 false)
2828
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 1
2929
; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX1]], align 4
3030
; CHECK-NEXT: ret void
@@ -119,8 +119,7 @@ entry:
119119
define void @write0to7(ptr nocapture %p) {
120120
; CHECK-LABEL: @write0to7(
121121
; CHECK-NEXT: entry:
122-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 8
123-
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i1 false)
122+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 32, i1 false)
124123
; CHECK-NEXT: store i64 1, ptr [[P]], align 8
125124
; CHECK-NEXT: ret void
126125
;
@@ -135,8 +134,7 @@ entry:
135134
define void @write0to7_atomic(ptr nocapture %p) {
136135
; CHECK-LABEL: @write0to7_atomic(
137136
; CHECK-NEXT: entry:
138-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 8
139-
; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 24, i32 4)
137+
; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 32, i32 4)
140138
; CHECK-NEXT: store atomic i64 1, ptr [[P]] unordered, align 8
141139
; CHECK-NEXT: ret void
142140
;
@@ -236,8 +234,7 @@ define void @write2to10(ptr nocapture %p) {
236234
; CHECK-LABEL: @write2to10(
237235
; CHECK-NEXT: entry:
238236
; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
239-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4
240-
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 28, i1 false)
237+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ARRAYIDX0]], i8 0, i64 32, i1 false)
241238
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1
242239
; CHECK-NEXT: store i64 1, ptr [[ARRAYIDX2]], align 8
243240
; CHECK-NEXT: ret void
@@ -254,8 +251,7 @@ define void @write2to10_atomic(ptr nocapture %p) {
254251
; CHECK-LABEL: @write2to10_atomic(
255252
; CHECK-NEXT: entry:
256253
; CHECK-NEXT: [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1
257-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[ARRAYIDX0]], i64 4
258-
; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 28, i32 4)
254+
; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[ARRAYIDX0]], i8 0, i64 32, i32 4)
259255
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1
260256
; CHECK-NEXT: store atomic i64 1, ptr [[ARRAYIDX2]] unordered, align 8
261257
; CHECK-NEXT: ret void
@@ -360,8 +356,7 @@ define void @ow_begin_align1(ptr nocapture %p) {
360356
; CHECK-LABEL: @ow_begin_align1(
361357
; CHECK-NEXT: entry:
362358
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
363-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 7
364-
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[TMP0]], i8 0, i64 25, i1 false)
359+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[P1]], i8 0, i64 32, i1 false)
365360
; CHECK-NEXT: store i64 1, ptr [[P]], align 1
366361
; CHECK-NEXT: ret void
367362
;
@@ -376,8 +371,7 @@ define void @ow_end_align4(ptr nocapture %p) {
376371
; CHECK-LABEL: @ow_end_align4(
377372
; CHECK-NEXT: entry:
378373
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
379-
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 4
380-
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[TMP0]], i8 0, i64 28, i1 false)
374+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[P1]], i8 0, i64 32, i1 false)
381375
; CHECK-NEXT: store i64 1, ptr [[P]], align 1
382376
; CHECK-NEXT: ret void
383377
;

llvm/test/Transforms/DeadStoreElimination/OverwriteStoreEnd.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ entry:
6363
define void @write28to32(ptr nocapture %p) nounwind uwtable ssp {
6464
; CHECK-LABEL: @write28to32(
6565
; CHECK-NEXT: entry:
66-
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 28, i1 false)
66+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 32, i1 false)
6767
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 7
6868
; CHECK-NEXT: store i32 1, ptr [[ARRAYIDX1]], align 4
6969
; CHECK-NEXT: ret void
@@ -78,7 +78,7 @@ entry:
7878
define void @write28to32_atomic(ptr nocapture %p) nounwind uwtable ssp {
7979
; CHECK-LABEL: @write28to32_atomic(
8080
; CHECK-NEXT: entry:
81-
; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 28, i32 4)
81+
; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 4 [[P:%.*]], i8 0, i64 32, i32 4)
8282
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 7
8383
; CHECK-NEXT: store atomic i32 1, ptr [[ARRAYIDX1]] unordered, align 4
8484
; CHECK-NEXT: ret void
@@ -259,8 +259,8 @@ define void @write16To23AndThen24To31(ptr nocapture %P, i64 %n64, i32 %n32, i16
259259
; CHECK-NEXT: tail call void @llvm.memset.p0.i64(ptr align 8 [[P:%.*]], i8 0, i64 16, i1 false)
260260
; CHECK-NEXT: [[BASE64_2:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 2
261261
; CHECK-NEXT: [[BASE64_3:%.*]] = getelementptr inbounds i64, ptr [[P]], i64 3
262-
; CHECK-NEXT: store i64 3, ptr [[BASE64_2]]
263-
; CHECK-NEXT: store i64 3, ptr [[BASE64_3]]
262+
; CHECK-NEXT: store i64 3, ptr [[BASE64_2]], align 8
263+
; CHECK-NEXT: store i64 3, ptr [[BASE64_3]], align 8
264264
; CHECK-NEXT: ret void
265265
;
266266
entry:
@@ -345,7 +345,7 @@ define void @ow_end_align1(ptr nocapture %p) {
345345
; CHECK-LABEL: @ow_end_align1(
346346
; CHECK-NEXT: entry:
347347
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
348-
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[P1]], i8 0, i64 27, i1 false)
348+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 [[P1]], i8 0, i64 32, i1 false)
349349
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 27
350350
; CHECK-NEXT: store i64 1, ptr [[P2]], align 1
351351
; CHECK-NEXT: ret void
@@ -362,7 +362,7 @@ define void @ow_end_align4(ptr nocapture %p) {
362362
; CHECK-LABEL: @ow_end_align4(
363363
; CHECK-NEXT: entry:
364364
; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 1
365-
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[P1]], i8 0, i64 28, i1 false)
365+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[P1]], i8 0, i64 32, i1 false)
366366
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 27
367367
; CHECK-NEXT: store i64 1, ptr [[P2]], align 1
368368
; CHECK-NEXT: ret void

0 commit comments

Comments
 (0)