Skip to content

Commit b1f9440

Browse files
author
Thorsten Schütt
authored
[GlobalIsel] Import GEP flags (#93850)
#90824
1 parent 2ecb1ab commit b1f9440

23 files changed

+258
-127
lines changed

llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ namespace llvm {
2626

2727
/// A base class for all GenericMachineInstrs.
2828
class GenericMachineInstr : public MachineInstr {
29+
constexpr static unsigned PoisonFlags = NoUWrap | NoSWrap | NoUSWrap |
30+
IsExact | Disjoint | NonNeg |
31+
FmNoNans | FmNoInfs;
32+
2933
public:
3034
GenericMachineInstr() = delete;
3135

@@ -37,14 +41,10 @@ class GenericMachineInstr : public MachineInstr {
3741
return isPreISelGenericOpcode(MI->getOpcode());
3842
}
3943

40-
bool hasPoisonGeneratingFlags() const {
41-
return getFlags() & (NoUWrap | NoSWrap | IsExact | Disjoint | NonNeg |
42-
FmNoNans | FmNoInfs);
43-
}
44+
bool hasPoisonGeneratingFlags() const { return getFlags() & PoisonFlags; }
4445

4546
void dropPoisonGeneratingFlags() {
46-
clearFlags(NoUWrap | NoSWrap | IsExact | Disjoint | NonNeg | FmNoNans |
47-
FmNoInfs);
47+
clearFlags(PoisonFlags);
4848
assert(!hasPoisonGeneratingFlags());
4949
}
5050
};

llvm/include/llvm/CodeGen/MachineInstr.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,8 @@ class MachineInstr
117117
NoConvergent = 1 << 17, // Call does not require convergence guarantees.
118118
NonNeg = 1 << 18, // The operand is non-negative.
119119
Disjoint = 1 << 19, // Each bit is zero in at least one of the inputs.
120+
NoUSWrap = 1 << 20, // Instruction supports geps
121+
// no unsigned signed wrap.
120122
};
121123

122124
private:

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1581,10 +1581,8 @@ bool IRTranslator::translateGetElementPtr(const User &U,
15811581
LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
15821582

15831583
uint32_t Flags = 0;
1584-
if (isa<Instruction>(U)) {
1585-
const Instruction &I = cast<Instruction>(U);
1586-
Flags = MachineInstr::copyFlagsFromInstruction(I);
1587-
}
1584+
if (const Instruction *I = dyn_cast<Instruction>(&U))
1585+
Flags = MachineInstr::copyFlagsFromInstruction(*I);
15881586

15891587
// Normalize Vector GEP - all scalar operands should be converted to the
15901588
// splat vector.

llvm/lib/CodeGen/MIRParser/MILexer.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
212212
.Case("reassoc", MIToken::kw_reassoc)
213213
.Case("nuw", MIToken::kw_nuw)
214214
.Case("nsw", MIToken::kw_nsw)
215+
.Case("nusw", MIToken::kw_nusw)
215216
.Case("exact", MIToken::kw_exact)
216217
.Case("nneg", MIToken::kw_nneg)
217218
.Case("disjoint", MIToken::kw_disjoint)

llvm/lib/CodeGen/MIRParser/MILexer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ struct MIToken {
6969
kw_contract,
7070
kw_afn,
7171
kw_reassoc,
72+
kw_nusw,
7273
kw_nuw,
7374
kw_nsw,
7475
kw_exact,

llvm/lib/CodeGen/MIRPrinter.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -813,6 +813,8 @@ void MIPrinter::print(const MachineInstr &MI) {
813813
OS << "nneg ";
814814
if (MI.getFlag(MachineInstr::Disjoint))
815815
OS << "disjoint ";
816+
if (MI.getFlag(MachineInstr::NoUSWrap))
817+
OS << "nusw ";
816818

817819
OS << TII->getName(MI.getOpcode());
818820
if (I < E)

llvm/lib/CodeGen/MachineInstr.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,11 @@ uint32_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) {
576576
MIFlags |= MachineInstr::MIFlag::NoSWrap;
577577
if (TI->hasNoUnsignedWrap())
578578
MIFlags |= MachineInstr::MIFlag::NoUWrap;
579+
} else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) {
580+
if (GEP->hasNoUnsignedSignedWrap())
581+
MIFlags |= MachineInstr::MIFlag::NoUSWrap;
582+
if (GEP->hasNoUnsignedWrap())
583+
MIFlags |= MachineInstr::MIFlag::NoUWrap;
579584
}
580585

581586
// Copy the nonneg flag.

llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-gep.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ define i32 @cse_gep(ptr %ptr, i32 %idx) {
2020
; O0-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
2121
; O0-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL1]](s64)
2222
; O0-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
23-
; O0-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
24-
; O0-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.gep2)
23+
; O0-NEXT: %11:_(p0) = nuw nusw G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
24+
; O0-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %11(p0) :: (load (s32) from %ir.gep2)
2525
; O0-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]]
2626
; O0-NEXT: $w0 = COPY [[ADD]](s32)
2727
; O0-NEXT: RET_ReallyLR implicit $w0
@@ -39,8 +39,8 @@ define i32 @cse_gep(ptr %ptr, i32 %idx) {
3939
; O3-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0)
4040
; O3-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.gep1)
4141
; O3-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
42-
; O3-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[PTR_ADD]], [[C1]](s64)
43-
; O3-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from %ir.gep2)
42+
; O3-NEXT: %9:_(p0) = nuw nusw G_PTR_ADD [[PTR_ADD]], [[C1]](s64)
43+
; O3-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %9(p0) :: (load (s32) from %ir.gep2)
4444
; O3-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]]
4545
; O3-NEXT: $w0 = COPY [[ADD]](s32)
4646
; O3-NEXT: RET_ReallyLR implicit $w0

llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -795,8 +795,8 @@ define void @jt_multiple_jump_tables(ptr %arg, i32 %arg1, ptr %arg2) {
795795
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[PHI]], [[C111]]
796796
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[MUL]](s64)
797797
; CHECK-NEXT: [[C112:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
798-
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[PTR_ADD]], [[C112]](s64)
799-
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[PTR_ADD1]](p0) :: (load (p0) from %ir.tmp59)
798+
; CHECK-NEXT: %120:_(p0) = nuw nusw G_PTR_ADD [[PTR_ADD]], [[C112]](s64)
799+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD %120(p0) :: (load (p0) from %ir.tmp59)
800800
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
801801
; CHECK-NEXT: $x0 = COPY [[COPY]](p0)
802802
; CHECK-NEXT: $x1 = COPY [[LOAD]](p0)
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
; RUN: llc -O0 -mtriple=aarch64-linux-gnu -global-isel -stop-after=irtranslator %s -o - | FileCheck %s
3+
4+
define i32 @gep_nusw_nuw(ptr %ptr, i32 %idx) {
5+
; CHECK-LABEL: name: gep_nusw_nuw
6+
; CHECK: bb.1 (%ir-block.0):
7+
; CHECK-NEXT: liveins: $w1, $x0
8+
; CHECK-NEXT: {{ $}}
9+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
10+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
11+
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32)
12+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
13+
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
14+
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL]](s64)
15+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0)
16+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.gep1)
17+
; CHECK-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
18+
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL1]](s64)
19+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
20+
; CHECK-NEXT: %11:_(p0) = nuw nusw G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
21+
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %11(p0) :: (load (s32) from %ir.gep2)
22+
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]]
23+
; CHECK-NEXT: $w0 = COPY [[ADD]](s32)
24+
; CHECK-NEXT: RET_ReallyLR implicit $w0
25+
%sidx = sext i32 %idx to i64
26+
%gep1 = getelementptr inbounds [4 x i32], ptr %ptr, i64 %sidx, i64 0
27+
%v1 = load i32, ptr %gep1
28+
%gep2 = getelementptr nusw nuw [4 x i32], ptr %ptr, i64 %sidx, i64 1
29+
%v2 = load i32, ptr %gep2
30+
%res = add i32 %v1, %v2
31+
ret i32 %res
32+
}
33+
34+
define i32 @gep_nuw(ptr %ptr, i32 %idx) {
35+
; CHECK-LABEL: name: gep_nuw
36+
; CHECK: bb.1 (%ir-block.0):
37+
; CHECK-NEXT: liveins: $w1, $x0
38+
; CHECK-NEXT: {{ $}}
39+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
40+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
41+
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32)
42+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
43+
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
44+
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL]](s64)
45+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0)
46+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.gep1)
47+
; CHECK-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
48+
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL1]](s64)
49+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
50+
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
51+
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.gep2)
52+
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]]
53+
; CHECK-NEXT: $w0 = COPY [[ADD]](s32)
54+
; CHECK-NEXT: RET_ReallyLR implicit $w0
55+
%sidx = sext i32 %idx to i64
56+
%gep1 = getelementptr inbounds [4 x i32], ptr %ptr, i64 %sidx, i64 0
57+
%v1 = load i32, ptr %gep1
58+
%gep2 = getelementptr nuw [4 x i32], ptr %ptr, i64 %sidx, i64 1
59+
%v2 = load i32, ptr %gep2
60+
%res = add i32 %v1, %v2
61+
ret i32 %res
62+
}
63+
64+
define i32 @gep_nusw(ptr %ptr, i32 %idx) {
65+
; CHECK-LABEL: name: gep_nusw
66+
; CHECK: bb.1 (%ir-block.0):
67+
; CHECK-NEXT: liveins: $w1, $x0
68+
; CHECK-NEXT: {{ $}}
69+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
70+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
71+
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32)
72+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
73+
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
74+
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL]](s64)
75+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0)
76+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.gep1)
77+
; CHECK-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
78+
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL1]](s64)
79+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
80+
; CHECK-NEXT: %11:_(p0) = nusw G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
81+
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %11(p0) :: (load (s32) from %ir.gep2)
82+
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]]
83+
; CHECK-NEXT: $w0 = COPY [[ADD]](s32)
84+
; CHECK-NEXT: RET_ReallyLR implicit $w0
85+
%sidx = sext i32 %idx to i64
86+
%gep1 = getelementptr inbounds [4 x i32], ptr %ptr, i64 %sidx, i64 0
87+
%v1 = load i32, ptr %gep1
88+
%gep2 = getelementptr nusw [4 x i32], ptr %ptr, i64 %sidx, i64 1
89+
%v2 = load i32, ptr %gep2
90+
%res = add i32 %v1, %v2
91+
ret i32 %res
92+
}
93+
94+
define i32 @gep_none(ptr %ptr, i32 %idx) {
95+
; CHECK-LABEL: name: gep_none
96+
; CHECK: bb.1 (%ir-block.0):
97+
; CHECK-NEXT: liveins: $w1, $x0
98+
; CHECK-NEXT: {{ $}}
99+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
100+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
101+
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32)
102+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
103+
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
104+
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL]](s64)
105+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0)
106+
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.gep1)
107+
; CHECK-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]]
108+
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL1]](s64)
109+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
110+
; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64)
111+
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.gep2)
112+
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]]
113+
; CHECK-NEXT: $w0 = COPY [[ADD]](s32)
114+
; CHECK-NEXT: RET_ReallyLR implicit $w0
115+
%sidx = sext i32 %idx to i64
116+
%gep1 = getelementptr inbounds [4 x i32], ptr %ptr, i64 %sidx, i64 0
117+
%v1 = load i32, ptr %gep1
118+
%gep2 = getelementptr [4 x i32], ptr %ptr, i64 %sidx, i64 1
119+
%v2 = load i32, ptr %gep2
120+
%res = add i32 %v1, %v2
121+
ret i32 %res
122+
}

llvm/test/CodeGen/AArch64/arm64-this-return.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ define ptr @E_ctor_base(ptr %this, i32 %x) {
148148
; GISEL-MIR: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
149149
; GISEL-MIR: [[COPY2:%[0-9]+]]:_(p0) = COPY [[COPY]](p0)
150150
; GISEL-MIR: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
151-
; GISEL-MIR: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[COPY]], [[C]](s64)
151+
; GISEL-MIR: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw G_PTR_ADD [[COPY]], [[C]](s64)
152152
; GISEL-MIR: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
153153
; GISEL-MIR: $x0 = COPY [[PTR_ADD]](p0)
154154
; GISEL-MIR: $w1 = COPY [[COPY1]](s32)

llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -952,9 +952,9 @@ define void @void_func_sret_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }) %ar
952952
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `ptr addrspace(1) undef`, addrspace 1)
953953
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (volatile load (s32) from `ptr addrspace(1) undef`, addrspace 1)
954954
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
955-
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = nuw G_PTR_ADD [[COPY]], [[C]](s32)
955+
; CHECK-NEXT: %5:_(p5) = nuw nusw G_PTR_ADD [[COPY]], [[C]](s32)
956956
; CHECK-NEXT: G_STORE [[LOAD]](s8), [[COPY]](p5) :: (store (s8) into %ir.arg0, addrspace 5)
957-
; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5)
957+
; CHECK-NEXT: G_STORE [[LOAD1]](s32), %5(p5) :: (store (s32) into %ir.gep1, addrspace 5)
958958
; CHECK-NEXT: SI_RETURN
959959
%val0 = load volatile i8, ptr addrspace(1) undef
960960
%val1 = load volatile i32, ptr addrspace(1) undef

llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2914,16 +2914,16 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(ptr addrspa
29142914
; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
29152915
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.p.kernarg.offset1, align 16, addrspace 4)
29162916
; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
2917-
; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[INT]], [[C]](s64)
2918-
; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32) from %ir.idx.kernarg.offset, align 8, addrspace 4)
2917+
; GCN-NEXT: %17:_(p4) = nuw nusw G_PTR_ADD [[INT]], [[C]](s64)
2918+
; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %17(p4) :: (dereferenceable invariant load (s32) from %ir.idx.kernarg.offset, align 8, addrspace 4)
29192919
; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
29202920
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
29212921
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v33i32_func_v33i32_i32
29222922
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
29232923
; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
29242924
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
29252925
; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
2926-
; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
2926+
; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64)
29272927
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
29282928
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
29292929
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -2947,7 +2947,7 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(ptr addrspa
29472947
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
29482948
; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
29492949
; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
2950-
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
2950+
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
29512951
; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
29522952
; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
29532953
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)

llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,16 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
2424
; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.1.out.val
2525
; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
2626
; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
27-
; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = nuw G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32)
27+
; GCN-NEXT: %17:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32)
2828
; GCN-NEXT: G_STORE [[C]](s8), [[FRAME_INDEX]](p5) :: (store (s8) into %ir.in.val, addrspace 5)
29-
; GCN-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.in.gep1, addrspace 5)
29+
; GCN-NEXT: G_STORE [[C1]](s32), %17(p5) :: (store (s32) into %ir.in.gep1, addrspace 5)
3030
; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
3131
; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32
3232
; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
3333
; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
3434
; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4)
3535
; GCN-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
36-
; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64)
36+
; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64)
3737
; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]]
3838
; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]]
3939
; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]]
@@ -50,15 +50,15 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
5050
; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
5151
; GCN-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
5252
; GCN-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
53-
; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32)
53+
; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32)
5454
; GCN-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
55-
; GCN-NEXT: G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.in.val, align 4, addrspace 5)
55+
; GCN-NEXT: G_MEMCPY [[PTR_ADD1]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.in.val, align 4, addrspace 5)
5656
; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX1]](p5)
5757
; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
5858
; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>)
5959
; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4)
6060
; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4)
61-
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
61+
; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
6262
; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64)
6363
; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32)
6464
; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32)
@@ -67,9 +67,9 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
6767
; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32)
6868
; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
6969
; GCN-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc
70-
; GCN-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = nuw G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32)
70+
; GCN-NEXT: %45:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32)
7171
; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p5) :: (dereferenceable load (s8) from %ir.out.val, addrspace 5)
72-
; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (dereferenceable load (s32) from %ir.out.gep1, addrspace 5)
72+
; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %45(p5) :: (dereferenceable load (s32) from %ir.out.gep1, addrspace 5)
7373
; GCN-NEXT: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1)
7474
; GCN-NEXT: G_STORE [[LOAD1]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
7575
; GCN-NEXT: S_ENDPGM 0

0 commit comments

Comments
 (0)