Skip to content

Commit dd47be0

Browse files
committed
[AMDGPU] Handle lowering addrspace casts from LDS to FLAT address.
1 parent 711419e commit dd47be0

8 files changed

+425
-286
lines changed

llvm/lib/Target/AMDGPU/AMDGPUSwLowerLDS.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -655,6 +655,10 @@ void AMDGPUSwLowerLDS::getLDSMemoryInstructions(
655655
} else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(&Inst)) {
656656
if (XCHG->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
657657
LDSInstructions.insert(&Inst);
658+
} else if (AddrSpaceCastInst *AscI = dyn_cast<AddrSpaceCastInst>(&Inst)) {
659+
if ((AscI->getSrcAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) &&
660+
(AscI->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS))
661+
LDSInstructions.insert(&Inst);
658662
} else
659663
continue;
660664
}
@@ -722,6 +726,16 @@ void AMDGPUSwLowerLDS::translateLDSMemoryOperationsToGlobalMemory(
722726
AsanInfo.Instructions.insert(NewXCHG);
723727
XCHG->replaceAllUsesWith(NewXCHG);
724728
XCHG->eraseFromParent();
729+
} else if (AddrSpaceCastInst *AscI = dyn_cast<AddrSpaceCastInst>(Inst)) {
730+
Value *AIOperand = AscI->getPointerOperand();
731+
Value *Gep =
732+
getTranslatedGlobalMemoryGEPOfLDSPointer(LoadMallocPtr, AIOperand);
733+
Value *NewAI = IRB.CreateAddrSpaceCast(Gep, AscI->getType());
734+
// Note: No need to add the instruction to AsanInfo instructions to be
735+
// instrumented list. FLAT_ADDRESS ptr would have been already
736+
// instrumented by asan pass prior to this pass.
737+
AscI->replaceAllUsesWith(NewAI);
738+
AscI->eraseFromParent();
725739
} else
726740
report_fatal_error("Unimplemented LDS lowering instruction");
727741
}

llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-non-kernel-declaration.ll

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,12 @@ define void @non_kernel_function() sanitize_address {
2020
; CHECK-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP5]], align 8
2121
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) [[TMP6]], align 4
2222
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 [[TMP7]]
23-
; CHECK-NEXT: [[Y:%.*]] = addrspacecast ptr addrspace(3) [[TMP8]] to ptr
24-
; CHECK-NEXT: [[TMP9:%.*]] = addrspacecast ptr addrspace(3) [[TMP8]] to ptr
23+
; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr addrspace(3) [[TMP8]] to i32
24+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP14]]
25+
; CHECK-NEXT: [[TMP11:%.*]] = addrspacecast ptr addrspace(1) [[TMP10]] to ptr
26+
; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr addrspace(3) [[TMP8]] to i32
27+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP4]], i32 [[TMP12]]
28+
; CHECK-NEXT: [[TMP9:%.*]] = addrspacecast ptr addrspace(1) [[TMP13]] to ptr
2529
; CHECK-NEXT: store i8 5, ptr [[TMP9]], align 8
2630
; CHECK-NEXT: ret void
2731
;

llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-indirect-access-asan.ll

Lines changed: 51 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 4
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
22
; RUN: opt < %s -passes=amdgpu-sw-lower-lds -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
33

44
; Test to check if static LDS is lowered correctly when a non-kernel with LDS accesses is called from kernel.
@@ -28,8 +28,12 @@ define void @use_variables() sanitize_address {
2828
; CHECK-NEXT: [[TMP12:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP11]], align 8
2929
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) [[TMP12]], align 4
3030
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[TMP10]]
31-
; CHECK-NEXT: [[X:%.*]] = addrspacecast ptr addrspace(3) [[TMP9]] to ptr
32-
; CHECK-NEXT: [[TMP16:%.*]] = addrspacecast ptr addrspace(3) [[TMP9]] to ptr
31+
; CHECK-NEXT: [[TMP13:%.*]] = ptrtoint ptr addrspace(3) [[TMP9]] to i32
32+
; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP13]]
33+
; CHECK-NEXT: [[TMP34:%.*]] = addrspacecast ptr addrspace(1) [[TMP33]] to ptr
34+
; CHECK-NEXT: [[TMP35:%.*]] = ptrtoint ptr addrspace(3) [[TMP9]] to i32
35+
; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP35]]
36+
; CHECK-NEXT: [[TMP16:%.*]] = addrspacecast ptr addrspace(1) [[TMP36]] to ptr
3337
; CHECK-NEXT: store i8 3, ptr [[TMP16]], align 4
3438
; CHECK-NEXT: [[TMP14:%.*]] = ptrtoint ptr addrspace(3) [[TMP15]] to i32
3539
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP7]], i32 [[TMP14]]
@@ -45,16 +49,16 @@ define void @use_variables() sanitize_address {
4549
; CHECK-NEXT: [[TMP25:%.*]] = and i1 [[TMP21]], [[TMP24]]
4650
; CHECK-NEXT: [[TMP26:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP25]])
4751
; CHECK-NEXT: [[TMP27:%.*]] = icmp ne i64 [[TMP26]], 0
48-
; CHECK-NEXT: br i1 [[TMP27]], label [[ASAN_REPORT:%.*]], label [[TMP30:%.*]], !prof [[PROF2:![0-9]+]]
49-
; CHECK: asan.report:
50-
; CHECK-NEXT: br i1 [[TMP25]], label [[TMP28:%.*]], label [[TMP29:%.*]]
51-
; CHECK: 28:
52+
; CHECK-NEXT: br i1 [[TMP27]], label %[[ASAN_REPORT:.*]], label %[[BB35:.*]], !prof [[PROF2:![0-9]+]]
53+
; CHECK: [[ASAN_REPORT]]:
54+
; CHECK-NEXT: br i1 [[TMP25]], label %[[BB33:.*]], label %[[BB34:.*]]
55+
; CHECK: [[BB33]]:
5256
; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP32]]) #[[ATTR7:[0-9]+]]
5357
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
54-
; CHECK-NEXT: br label [[TMP29]]
55-
; CHECK: 29:
56-
; CHECK-NEXT: br label [[TMP30]]
57-
; CHECK: 30:
58+
; CHECK-NEXT: br label %[[BB34]]
59+
; CHECK: [[BB34]]:
60+
; CHECK-NEXT: br label %[[BB35]]
61+
; CHECK: [[BB35]]:
5862
; CHECK-NEXT: store i8 3, ptr addrspace(1) [[TMP31]], align 8
5963
; CHECK-NEXT: ret void
6064
;
@@ -67,15 +71,15 @@ define void @use_variables() sanitize_address {
6771
define amdgpu_kernel void @k0() sanitize_address {
6872
; CHECK-LABEL: define amdgpu_kernel void @k0(
6973
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] {
70-
; CHECK-NEXT: WId:
74+
; CHECK-NEXT: [[WID:.*]]:
7175
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
7276
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
7377
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
7478
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
7579
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
7680
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
77-
; CHECK-NEXT: br i1 [[TMP5]], label [[MALLOC:%.*]], label [[TMP7:%.*]]
78-
; CHECK: Malloc:
81+
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB24:.*]]
82+
; CHECK: [[MALLOC]]:
7983
; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4
8084
; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4
8185
; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[TMP13]], [[TMP14]]
@@ -100,9 +104,9 @@ define amdgpu_kernel void @k0() sanitize_address {
100104
; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP6]], i64 132
101105
; CHECK-NEXT: [[TMP68:%.*]] = ptrtoint ptr addrspace(1) [[TMP67]] to i64
102106
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP68]], i64 28)
103-
; CHECK-NEXT: br label [[TMP7]]
104-
; CHECK: 24:
105-
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, [[WID:%.*]] ], [ true, [[MALLOC]] ]
107+
; CHECK-NEXT: br label %[[BB24]]
108+
; CHECK: [[BB24]]:
109+
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
106110
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
107111
; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
108112
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
@@ -124,16 +128,16 @@ define amdgpu_kernel void @k0() sanitize_address {
124128
; CHECK-NEXT: [[TMP41:%.*]] = and i1 [[TMP37]], [[TMP40]]
125129
; CHECK-NEXT: [[TMP42:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP41]])
126130
; CHECK-NEXT: [[TMP43:%.*]] = icmp ne i64 [[TMP42]], 0
127-
; CHECK-NEXT: br i1 [[TMP43]], label [[ASAN_REPORT:%.*]], label [[TMP46:%.*]], !prof [[PROF2]]
128-
; CHECK: asan.report:
129-
; CHECK-NEXT: br i1 [[TMP41]], label [[TMP44:%.*]], label [[CONDFREE:%.*]]
130-
; CHECK: 44:
131+
; CHECK-NEXT: br i1 [[TMP43]], label %[[ASAN_REPORT:.*]], label %[[BB46:.*]], !prof [[PROF2]]
132+
; CHECK: [[ASAN_REPORT]]:
133+
; CHECK-NEXT: br i1 [[TMP41]], label %[[BB44:.*]], label %[[BB45:.*]]
134+
; CHECK: [[BB44]]:
131135
; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP32]]) #[[ATTR7]]
132136
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
133-
; CHECK-NEXT: br label [[CONDFREE]]
134-
; CHECK: 45:
135-
; CHECK-NEXT: br label [[TMP46]]
136-
; CHECK: 46:
137+
; CHECK-NEXT: br label %[[BB45]]
138+
; CHECK: [[BB45]]:
139+
; CHECK-NEXT: br label %[[BB46]]
140+
; CHECK: [[BB46]]:
137141
; CHECK-NEXT: store i8 7, ptr addrspace(1) [[TMP31]], align 1
138142
; CHECK-NEXT: [[TMP47:%.*]] = ptrtoint ptr addrspace(3) [[TMP18]] to i32
139143
; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP47]]
@@ -152,16 +156,16 @@ define amdgpu_kernel void @k0() sanitize_address {
152156
; CHECK-NEXT: [[TMP59:%.*]] = and i1 [[TMP54]], [[TMP58]]
153157
; CHECK-NEXT: [[TMP60:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP59]])
154158
; CHECK-NEXT: [[TMP61:%.*]] = icmp ne i64 [[TMP60]], 0
155-
; CHECK-NEXT: br i1 [[TMP61]], label [[ASAN_REPORT1:%.*]], label [[TMP64:%.*]], !prof [[PROF2]]
156-
; CHECK: asan.report1:
157-
; CHECK-NEXT: br i1 [[TMP59]], label [[TMP62:%.*]], label [[TMP63:%.*]]
158-
; CHECK: 64:
159+
; CHECK-NEXT: br i1 [[TMP61]], label %[[ASAN_REPORT1:.*]], label %[[BB66:.*]], !prof [[PROF2]]
160+
; CHECK: [[ASAN_REPORT1]]:
161+
; CHECK-NEXT: br i1 [[TMP59]], label %[[BB64:.*]], label %[[BB65:.*]]
162+
; CHECK: [[BB64]]:
159163
; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP83]]) #[[ATTR7]]
160164
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
161-
; CHECK-NEXT: br label [[TMP63]]
162-
; CHECK: 65:
163-
; CHECK-NEXT: br label [[TMP64]]
164-
; CHECK: 66:
165+
; CHECK-NEXT: br label %[[BB65]]
166+
; CHECK: [[BB65]]:
167+
; CHECK-NEXT: br label %[[BB66]]
168+
; CHECK: [[BB66]]:
165169
; CHECK-NEXT: [[TMP84:%.*]] = ptrtoint ptr addrspace(1) [[TMP82]] to i64
166170
; CHECK-NEXT: [[TMP85:%.*]] = lshr i64 [[TMP84]], 3
167171
; CHECK-NEXT: [[TMP69:%.*]] = add i64 [[TMP85]], 2147450880
@@ -174,28 +178,28 @@ define amdgpu_kernel void @k0() sanitize_address {
174178
; CHECK-NEXT: [[TMP76:%.*]] = and i1 [[TMP72]], [[TMP75]]
175179
; CHECK-NEXT: [[TMP77:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP76]])
176180
; CHECK-NEXT: [[TMP78:%.*]] = icmp ne i64 [[TMP77]], 0
177-
; CHECK-NEXT: br i1 [[TMP78]], label [[ASAN_REPORT2:%.*]], label [[TMP81:%.*]], !prof [[PROF2]]
178-
; CHECK: asan.report2:
179-
; CHECK-NEXT: br i1 [[TMP76]], label [[TMP79:%.*]], label [[TMP80:%.*]]
180-
; CHECK: 79:
181+
; CHECK-NEXT: br i1 [[TMP78]], label %[[ASAN_REPORT2:.*]], label %[[BB81:.*]], !prof [[PROF2]]
182+
; CHECK: [[ASAN_REPORT2]]:
183+
; CHECK-NEXT: br i1 [[TMP76]], label %[[BB79:.*]], label %[[BB80:.*]]
184+
; CHECK: [[BB79]]:
181185
; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP84]]) #[[ATTR7]]
182186
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
183-
; CHECK-NEXT: br label [[TMP80]]
184-
; CHECK: 80:
185-
; CHECK-NEXT: br label [[TMP81]]
186-
; CHECK: 81:
187+
; CHECK-NEXT: br label %[[BB80]]
188+
; CHECK: [[BB80]]:
189+
; CHECK-NEXT: br label %[[BB81]]
190+
; CHECK: [[BB81]]:
187191
; CHECK-NEXT: store i32 8, ptr addrspace(1) [[TMP48]], align 2
188-
; CHECK-NEXT: br label [[CONDFREE1:%.*]]
189-
; CHECK: CondFree:
192+
; CHECK-NEXT: br label %[[CONDFREE:.*]]
193+
; CHECK: [[CONDFREE]]:
190194
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
191-
; CHECK-NEXT: br i1 [[XYZCOND]], label [[FREE:%.*]], label [[END:%.*]]
192-
; CHECK: Free:
195+
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
196+
; CHECK: [[FREE]]:
193197
; CHECK-NEXT: [[TMP20:%.*]] = call ptr @llvm.returnaddress(i32 0)
194198
; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64
195199
; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
196200
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP22]], i64 [[TMP21]])
197-
; CHECK-NEXT: br label [[END]]
198-
; CHECK: End:
201+
; CHECK-NEXT: br label %[[END]]
202+
; CHECK: [[END]]:
199203
; CHECK-NEXT: ret void
200204
;
201205
call void @use_variables()

0 commit comments

Comments
 (0)