Skip to content

[AMDGPU][ASAN] Move infer-address-spaces before amdgpu-sw-lower-lds in pass pipeline #120375

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1174,6 +1174,9 @@ void AMDGPUPassConfig::addIRPasses() {
// Replace OpenCL enqueued block function pointers with global variables.
addPass(createAMDGPUOpenCLEnqueuedBlockLoweringPass());

if (TM.getOptLevel() > CodeGenOptLevel::None)
addPass(createInferAddressSpacesPass());

// Lower LDS accesses to global memory pass if address sanitizer is enabled.
if (EnableSwLowerLDS)
addPass(createAMDGPUSwLowerLDSLegacyPass(&TM));
Expand All @@ -1183,9 +1186,6 @@ void AMDGPUPassConfig::addIRPasses() {
addPass(createAMDGPULowerModuleLDSLegacyPass(&TM));
}

if (TM.getOptLevel() > CodeGenOptLevel::None)
addPass(createInferAddressSpacesPass());

// Run atomic optimizer before Atomic Expand
if ((TM.getTargetTriple().getArch() == Triple::amdgcn) &&
(TM.getOptLevel() >= CodeGenOptLevel::Less) &&
Expand Down Expand Up @@ -1941,13 +1941,17 @@ void AMDGPUCodeGenPassBuilder::addIRPasses(AddIRPass &addPass) const {

// TODO: Missing OpenCLEnqueuedBlockLowering

if (TM.getOptLevel() > CodeGenOptLevel::None)
addPass(InferAddressSpacesPass());

// Lower LDS accesses to global memory pass if address sanitizer is enabled.
if (EnableSwLowerLDS)
addPass(AMDGPUSwLowerLDSPass(TM));

// Runs before PromoteAlloca so the latter can account for function uses
if (EnableLowerModuleLDS)
addPass(AMDGPULowerModuleLDSPass(TM));

if (TM.getOptLevel() > CodeGenOptLevel::None)
addPass(InferAddressSpacesPass());

// Run atomic optimizer before Atomic Expand
if (TM.getOptLevel() >= CodeGenOptLevel::Less &&
(AMDGPUAtomicOptimizerStrategy != ScanOptions::None))
Expand Down
102 changes: 102 additions & 0 deletions llvm/test/CodeGen/AMDGPU/amdgpu-sw-lower-lds-static-lds-test-O0.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
; RUN: opt < %s -passes='function(infer-address-spaces),amdgpu-sw-lower-lds' -S -mtriple=amdgcn-amd-amdhsa | FileCheck %s
@lds = internal addrspace(3) global [5 x i32] poison, align 16

;.
; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 16, !absolute_symbol [[META0:![0-9]+]]
; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 32 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 20, i32 64 } }, no_sanitize_address
;.
define amdgpu_kernel void @k0() sanitize_address {
; CHECK-LABEL: define amdgpu_kernel void @k0(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[WID:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0
; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB18:.*]]
; CHECK: [[MALLOC]]:
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE:%.*]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 2), align 4
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]]
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64
; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64
; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]])
; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1)
; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 8
; CHECK-NEXT: [[TMP15:%.*]] = ptrtoint ptr addrspace(1) [[TMP14]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP15]], i64 24)
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 52
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64
; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 44)
; CHECK-NEXT: br label %[[BB18]]
; CHECK: [[BB18]]:
; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ]
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8
; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[LLVM_AMDGCN_SW_LDS_K0_MD_TYPE]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP20]]
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [5 x i32], ptr addrspace(3) [[TMP21]], i64 0, i64 0
; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(3) [[GEP]] to i32
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP19]], i32 [[TMP22]]
; CHECK-NEXT: [[TMP24:%.*]] = ptrtoint ptr addrspace(1) [[TMP23]] to i64
; CHECK-NEXT: [[TMP25:%.*]] = lshr i64 [[TMP24]], 3
; CHECK-NEXT: [[TMP26:%.*]] = add i64 [[TMP25]], 2147450880
; CHECK-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP26]] to ptr
; CHECK-NEXT: [[TMP28:%.*]] = load i8, ptr [[TMP27]], align 1
; CHECK-NEXT: [[TMP29:%.*]] = icmp ne i8 [[TMP28]], 0
; CHECK-NEXT: [[TMP30:%.*]] = and i64 [[TMP24]], 7
; CHECK-NEXT: [[TMP31:%.*]] = add i64 [[TMP30]], 3
; CHECK-NEXT: [[TMP32:%.*]] = trunc i64 [[TMP31]] to i8
; CHECK-NEXT: [[TMP33:%.*]] = icmp sge i8 [[TMP32]], [[TMP28]]
; CHECK-NEXT: [[TMP34:%.*]] = and i1 [[TMP29]], [[TMP33]]
; CHECK-NEXT: [[TMP35:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP34]])
; CHECK-NEXT: [[TMP36:%.*]] = icmp ne i64 [[TMP35]], 0
; CHECK-NEXT: br i1 [[TMP36]], label %[[ASAN_REPORT:.*]], label %[[BB39:.*]], !prof [[PROF2:![0-9]+]]
; CHECK: [[ASAN_REPORT]]:
; CHECK-NEXT: br i1 [[TMP34]], label %[[BB37:.*]], label %[[BB38:.*]]
; CHECK: [[BB37]]:
; CHECK-NEXT: call void @__asan_report_store4(i64 [[TMP24]]) #[[ATTR6:[0-9]+]]
; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
; CHECK-NEXT: br label %[[BB38]]
; CHECK: [[BB38]]:
; CHECK-NEXT: br label %[[BB39]]
; CHECK: [[BB39]]:
; CHECK-NEXT: store i32 1, ptr addrspace(1) [[TMP23]], align 4
; CHECK-NEXT: br label %[[CONDFREE:.*]]
; CHECK: [[CONDFREE]]:
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier()
; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]]
; CHECK: [[FREE]]:
; CHECK-NEXT: [[TMP40:%.*]] = call ptr @llvm.returnaddress(i32 0)
; CHECK-NEXT: [[TMP41:%.*]] = ptrtoint ptr [[TMP40]] to i64
; CHECK-NEXT: [[TMP42:%.*]] = ptrtoint ptr addrspace(1) [[TMP19]] to i64
; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP42]], i64 [[TMP41]])
; CHECK-NEXT: br label %[[END]]
; CHECK: [[END]]:
; CHECK-NEXT: ret void
;
%gep = getelementptr inbounds [5 x i32], ptr addrspacecast (ptr addrspace(3) @lds to ptr), i64 0, i64 0
store i32 1, ptr %gep, align 4
ret void
}

!llvm.module.flags = !{!0}
!0 = !{i32 4, !"nosanitize_address", i32 1}
;.
; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="16" }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn }
; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind }
; CHECK: attributes #[[ATTR6]] = { nomerge }
;.
; CHECK: [[META0]] = !{i32 0, i32 1}
; CHECK: [[META1:![0-9]+]] = !{i32 4, !"nosanitize_address", i32 1}
; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575}
;.
12 changes: 8 additions & 4 deletions llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -187,10 +187,11 @@
; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl)
; GCN-O1-NEXT: Function Alias Analysis Results
; GCN-O1-NEXT: Lower OpenCL enqueued blocks
; GCN-O1-NEXT: FunctionPass Manager
; GCN-O1-NEXT: Infer address spaces
; GCN-O1-NEXT: AMDGPU Software lowering of LDS
; GCN-O1-NEXT: Lower uses of LDS variables from non-kernel functions
; GCN-O1-NEXT: FunctionPass Manager
; GCN-O1-NEXT: Infer address spaces
; GCN-O1-NEXT: Dominator Tree Construction
; GCN-O1-NEXT: Cycle Info Analysis
; GCN-O1-NEXT: Uniformity Analysis
Expand Down Expand Up @@ -467,10 +468,11 @@
; GCN-O1-OPTS-NEXT: Basic Alias Analysis (stateless AA impl)
; GCN-O1-OPTS-NEXT: Function Alias Analysis Results
; GCN-O1-OPTS-NEXT: Lower OpenCL enqueued blocks
; GCN-O1-OPTS-NEXT: FunctionPass Manager
; GCN-O1-OPTS-NEXT: Infer address spaces
; GCN-O1-OPTS-NEXT: AMDGPU Software lowering of LDS
; GCN-O1-OPTS-NEXT: Lower uses of LDS variables from non-kernel functions
; GCN-O1-OPTS-NEXT: FunctionPass Manager
; GCN-O1-OPTS-NEXT: Infer address spaces
; GCN-O1-OPTS-NEXT: Dominator Tree Construction
; GCN-O1-OPTS-NEXT: Cycle Info Analysis
; GCN-O1-OPTS-NEXT: Uniformity Analysis
Expand Down Expand Up @@ -777,10 +779,11 @@
; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl)
; GCN-O2-NEXT: Function Alias Analysis Results
; GCN-O2-NEXT: Lower OpenCL enqueued blocks
; GCN-O2-NEXT: FunctionPass Manager
; GCN-O2-NEXT: Infer address spaces
; GCN-O2-NEXT: AMDGPU Software lowering of LDS
; GCN-O2-NEXT: Lower uses of LDS variables from non-kernel functions
; GCN-O2-NEXT: FunctionPass Manager
; GCN-O2-NEXT: Infer address spaces
; GCN-O2-NEXT: Dominator Tree Construction
; GCN-O2-NEXT: Cycle Info Analysis
; GCN-O2-NEXT: Uniformity Analysis
Expand Down Expand Up @@ -1091,10 +1094,11 @@
; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl)
; GCN-O3-NEXT: Function Alias Analysis Results
; GCN-O3-NEXT: Lower OpenCL enqueued blocks
; GCN-O3-NEXT: FunctionPass Manager
; GCN-O3-NEXT: Infer address spaces
; GCN-O3-NEXT: AMDGPU Software lowering of LDS
; GCN-O3-NEXT: Lower uses of LDS variables from non-kernel functions
; GCN-O3-NEXT: FunctionPass Manager
; GCN-O3-NEXT: Infer address spaces
; GCN-O3-NEXT: Dominator Tree Construction
; GCN-O3-NEXT: Cycle Info Analysis
; GCN-O3-NEXT: Uniformity Analysis
Expand Down
Loading