Skip to content

Commit 53979d1

Browse files
committed
[AMDGPU] New intrinsic llvm.amdgcn.pops.exiting.wave.id
This provides access to the special scalar source value SRC_POPS_EXITING_WAVE_ID on GFX9 and GFX10.
1 parent eb6097a commit 53979d1

File tree

4 files changed

+52
-0
lines changed

4 files changed

+52
-0
lines changed

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2485,6 +2485,11 @@ class AMDGPUGlobalLoadLDS : Intrinsic <
24852485
"", [SDNPMemOperand]>;
24862486
def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS;
24872487

2488+
// Use read/write of inaccessible memory to model the fact that this reads a
2489+
// volatile value.
2490+
def int_amdgcn_pops_exiting_wave_id :
2491+
DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>;
2492+
24882493
//===----------------------------------------------------------------------===//
24892494
// GFX10 Intrinsics
24902495
//===----------------------------------------------------------------------===//

llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5132,6 +5132,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
51325132
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
51335133
break;
51345134
}
5135+
case Intrinsic::amdgcn_pops_exiting_wave_id:
5136+
return getDefaultMappingSOP(MI);
51355137
default:
51365138
return getInvalidInstructionMapping();
51375139
}

llvm/lib/Target/AMDGPU/SOPInstructions.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,11 @@ let isMoveImm = 1 in {
213213
} // End Uses = [SCC]
214214
} // End isMoveImm = 1
215215

216+
// Variant of S_MOV_B32 used for reading volatile source values like
217+
// SRC_POPS_EXITING_WAVE_ID.
218+
let mayLoad = 1, mayStore = 1, maybeAtomic = 0 in
219+
def S_MOV_B32_loadstore : SOP1_32 <"s_mov_b32">;
220+
216221
let Defs = [SCC] in {
217222
def S_NOT_B32 : SOP1_32 <"s_not_b32",
218223
[(set i32:$sdst, (UniformUnaryFrag<not> i32:$src0))]
@@ -1865,6 +1870,12 @@ let SubtargetPredicate = isNotGFX9Plus in {
18651870
def : GetFPModePat<fpmode_mask_gfx6plus>;
18661871
}
18671872

1873+
let SubtargetPredicate = isGFX9GFX10 in
1874+
def : GCNPat<
1875+
(int_amdgcn_pops_exiting_wave_id),
1876+
(S_MOV_B32_loadstore (i32 SRC_POPS_EXITING_WAVE_ID))
1877+
>;
1878+
18681879
//===----------------------------------------------------------------------===//
18691880
// SOP2 Patterns
18701881
//===----------------------------------------------------------------------===//
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefix=SDAG
3+
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefix=GFX9-GISEL
4+
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefix=SDAG
5+
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefix=GFX10-GISEL
6+
7+
define amdgpu_ps void @test(ptr addrspace(1) inreg %ptr) {
8+
; SDAG-LABEL: test:
9+
; SDAG: ; %bb.0:
10+
; SDAG-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id
11+
; SDAG-NEXT: v_mov_b32_e32 v0, 0
12+
; SDAG-NEXT: v_mov_b32_e32 v1, s2
13+
; SDAG-NEXT: global_store_dword v0, v1, s[0:1]
14+
; SDAG-NEXT: s_endpgm
15+
;
16+
; GFX9-GISEL-LABEL: test:
17+
; GFX9-GISEL: ; %bb.0:
18+
; GFX9-GISEL-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id
19+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
20+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
21+
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
22+
; GFX9-GISEL-NEXT: s_endpgm
23+
;
24+
; GFX10-GISEL-LABEL: test:
25+
; GFX10-GISEL: ; %bb.0:
26+
; GFX10-GISEL-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id
27+
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
28+
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s2
29+
; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
30+
; GFX10-GISEL-NEXT: s_endpgm
31+
%id = call i32 @llvm.amdgcn.pops.exiting.wave.id()
32+
store i32 %id, ptr addrspace(1) %ptr
33+
ret void
34+
}

0 commit comments

Comments
 (0)