Skip to content

Commit 6cf37dd

Browse files
authored
[AMDGPU] Enable architected SGPRs for GFX12 (#79160)
1 parent 2856db0 commit 6cf37dd

File tree

4 files changed

+176
-85
lines changed

4 files changed

+176
-85
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1496,6 +1496,7 @@ def FeatureISAVersion12 : FeatureSet<
14961496
FeatureWavefrontSize32,
14971497
FeatureShaderCyclesHiLoRegisters,
14981498
FeatureArchitectedFlatScratch,
1499+
FeatureArchitectedSGPRs,
14991500
FeatureAtomicFaddRtnInsts,
15001501
FeatureAtomicFaddNoRtnInsts,
15011502
FeatureAtomicDsPkAdd16Insts,

llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -41,30 +41,30 @@ define amdgpu_kernel void @indirect_call_known_no_special_inputs() {
4141
;
4242
; GFX12-LABEL: indirect_call_known_no_special_inputs:
4343
; GFX12: ; %bb.0: ; %bb
44+
; GFX12-NEXT: s_getpc_b64 s[2:3]
45+
; GFX12-NEXT: s_sext_i32_i16 s3, s3
46+
; GFX12-NEXT: s_add_co_u32 s2, s2, snork@gotpcrel32@lo+8
47+
; GFX12-NEXT: s_add_co_ci_u32 s3, s3, snork@gotpcrel32@hi+16
48+
; GFX12-NEXT: s_mov_b64 s[0:1], 0
4449
; GFX12-NEXT: s_getpc_b64 s[4:5]
4550
; GFX12-NEXT: s_sext_i32_i16 s5, s5
46-
; GFX12-NEXT: s_add_co_u32 s4, s4, snork@gotpcrel32@lo+8
47-
; GFX12-NEXT: s_add_co_ci_u32 s5, s5, snork@gotpcrel32@hi+16
48-
; GFX12-NEXT: s_mov_b64 s[2:3], 0
49-
; GFX12-NEXT: s_getpc_b64 s[6:7]
50-
; GFX12-NEXT: s_sext_i32_i16 s7, s7
51-
; GFX12-NEXT: s_add_co_u32 s6, s6, wobble@gotpcrel32@lo+8
52-
; GFX12-NEXT: s_add_co_ci_u32 s7, s7, wobble@gotpcrel32@hi+16
53-
; GFX12-NEXT: s_load_u8 s1, s[2:3], 0x0
51+
; GFX12-NEXT: s_add_co_u32 s4, s4, wobble@gotpcrel32@lo+8
52+
; GFX12-NEXT: s_add_co_ci_u32 s5, s5, wobble@gotpcrel32@hi+16
53+
; GFX12-NEXT: s_load_u8 s6, s[0:1], 0x0
54+
; GFX12-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
5455
; GFX12-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
55-
; GFX12-NEXT: s_load_b64 s[4:5], s[6:7], 0x0
5656
; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v4, 0
5757
; GFX12-NEXT: v_mov_b32_e32 v31, v0
58+
; GFX12-NEXT: s_mov_b32 s12, ttmp9
5859
; GFX12-NEXT: s_mov_b64 s[8:9], 0
59-
; GFX12-NEXT: s_mov_b32 s12, s0
6060
; GFX12-NEXT: s_mov_b32 s32, 0
6161
; GFX12-NEXT: s_wait_kmcnt 0x0
62-
; GFX12-NEXT: s_and_b32 s1, 1, s1
62+
; GFX12-NEXT: s_and_b32 s4, 1, s6
6363
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
64-
; GFX12-NEXT: s_cmp_eq_u32 s1, 1
65-
; GFX12-NEXT: s_cselect_b32 s3, s5, s3
66-
; GFX12-NEXT: s_cselect_b32 s2, s4, s2
67-
; GFX12-NEXT: s_swappc_b64 s[30:31], s[2:3]
64+
; GFX12-NEXT: s_cmp_eq_u32 s4, 1
65+
; GFX12-NEXT: s_cselect_b32 s1, s3, s1
66+
; GFX12-NEXT: s_cselect_b32 s0, s2, s0
67+
; GFX12-NEXT: s_swappc_b64 s[30:31], s[0:1]
6868
; GFX12-NEXT: s_endpgm
6969

7070
bb:

llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics.ll

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs --verify-machineinstrs < %s | FileCheck -check-prefix=GFX9-SDAG %s
3-
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel --verify-machineinstrs < %s | FileCheck -check-prefix=GFX9-GISEL %s
2+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
3+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
4+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
5+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
46

57
define amdgpu_cs void @_amdgpu_cs_main() {
68
; GFX9-SDAG-LABEL: _amdgpu_cs_main:
@@ -23,6 +25,30 @@ define amdgpu_cs void @_amdgpu_cs_main() {
2325
; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s2
2426
; GFX9-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0
2527
; GFX9-GISEL-NEXT: s_endpgm
28+
;
29+
; GFX12-SDAG-LABEL: _amdgpu_cs_main:
30+
; GFX12-SDAG: ; %bb.0: ; %.entry
31+
; GFX12-SDAG-NEXT: s_lshr_b32 s2, ttmp7, 16
32+
; GFX12-SDAG-NEXT: s_and_b32 s1, ttmp7, 0xffff
33+
; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
34+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, s1
35+
; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s2
36+
; GFX12-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
37+
; GFX12-SDAG-NEXT: s_nop 0
38+
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
39+
; GFX12-SDAG-NEXT: s_endpgm
40+
;
41+
; GFX12-GISEL-LABEL: _amdgpu_cs_main:
42+
; GFX12-GISEL: ; %bb.0: ; %.entry
43+
; GFX12-GISEL-NEXT: s_mov_b32 s0, ttmp9
44+
; GFX12-GISEL-NEXT: s_and_b32 s1, ttmp7, 0xffff
45+
; GFX12-GISEL-NEXT: s_lshr_b32 s2, ttmp7, 16
46+
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
47+
; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, s2
48+
; GFX12-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], null
49+
; GFX12-GISEL-NEXT: s_nop 0
50+
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
51+
; GFX12-GISEL-NEXT: s_endpgm
2652
.entry:
2753
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
2854
%idy = call i32 @llvm.amdgcn.workgroup.id.y()
@@ -68,6 +94,24 @@ define amdgpu_cs void @caller() {
6894
; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11]
6995
; GFX9-GISEL-NEXT: s_swappc_b64 s[30:31], s[4:5]
7096
; GFX9-GISEL-NEXT: s_endpgm
97+
;
98+
; GFX12-SDAG-LABEL: caller:
99+
; GFX12-SDAG: ; %bb.0:
100+
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9
101+
; GFX12-SDAG-NEXT: s_mov_b32 s1, callee@abs32@hi
102+
; GFX12-SDAG-NEXT: s_mov_b32 s0, callee@abs32@lo
103+
; GFX12-SDAG-NEXT: s_mov_b32 s32, 0
104+
; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[0:1]
105+
; GFX12-SDAG-NEXT: s_endpgm
106+
;
107+
; GFX12-GISEL-LABEL: caller:
108+
; GFX12-GISEL: ; %bb.0:
109+
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
110+
; GFX12-GISEL-NEXT: s_mov_b32 s0, callee@abs32@lo
111+
; GFX12-GISEL-NEXT: s_mov_b32 s1, callee@abs32@hi
112+
; GFX12-GISEL-NEXT: s_mov_b32 s32, 0
113+
; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
114+
; GFX12-GISEL-NEXT: s_endpgm
71115
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
72116
call amdgpu_gfx void @callee(i32 %idx)
73117
ret void
@@ -79,3 +123,6 @@ declare i32 @llvm.amdgcn.workgroup.id.x()
79123
declare i32 @llvm.amdgcn.workgroup.id.y()
80124
declare i32 @llvm.amdgcn.workgroup.id.z()
81125
declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg)
126+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
127+
; GFX12: {{.*}}
128+
; GFX9: {{.*}}

llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll

Lines changed: 111 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,77 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs --verify-machineinstrs < %s | FileCheck -check-prefix=GCN-SDAG %s
3-
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel --verify-machineinstrs < %s | FileCheck -check-prefix=GCN-GISEL %s
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
3+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
4+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
5+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
46

57
define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) {
6-
; GCN-SDAG-LABEL: workgroup_id_x:
7-
; GCN-SDAG: ; %bb.0:
8-
; GCN-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
9-
; GCN-SDAG-NEXT: v_mov_b32_e32 v0, 0
10-
; GCN-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9
11-
; GCN-SDAG-NEXT: s_waitcnt lgkmcnt(0)
12-
; GCN-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
13-
; GCN-SDAG-NEXT: s_endpgm
8+
; GFX9-SDAG-LABEL: workgroup_id_x:
9+
; GFX9-SDAG: ; %bb.0:
10+
; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
11+
; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0
12+
; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9
13+
; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
14+
; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
15+
; GFX9-SDAG-NEXT: s_endpgm
1416
;
15-
; GCN-GISEL-LABEL: workgroup_id_x:
16-
; GCN-GISEL: ; %bb.0:
17-
; GCN-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
18-
; GCN-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
19-
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0
20-
; GCN-GISEL-NEXT: s_waitcnt lgkmcnt(0)
21-
; GCN-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
22-
; GCN-GISEL-NEXT: s_endpgm
17+
; GFX9-GISEL-LABEL: workgroup_id_x:
18+
; GFX9-GISEL: ; %bb.0:
19+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
20+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
21+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
22+
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
23+
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
24+
; GFX9-GISEL-NEXT: s_endpgm
25+
;
26+
; GFX12-SDAG-LABEL: workgroup_id_x:
27+
; GFX12-SDAG: ; %bb.0:
28+
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
29+
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, ttmp9
30+
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
31+
; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
32+
; GFX12-SDAG-NEXT: s_nop 0
33+
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
34+
; GFX12-SDAG-NEXT: s_endpgm
35+
;
36+
; GFX12-GISEL-LABEL: workgroup_id_x:
37+
; GFX12-GISEL: ; %bb.0:
38+
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
39+
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0
40+
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
41+
; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
42+
; GFX12-GISEL-NEXT: s_nop 0
43+
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
44+
; GFX12-GISEL-NEXT: s_endpgm
2345
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
2446
store i32 %idx, ptr addrspace(1) %ptrx
2547

2648
ret void
2749
}
2850

2951
define amdgpu_kernel void @workgroup_id_xy(ptr addrspace(1) %ptrx, ptr addrspace(1) %ptry) {
30-
; GCN-SDAG-LABEL: workgroup_id_xy:
31-
; GCN-SDAG: ; %bb.0:
32-
; GCN-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
33-
; GCN-SDAG-NEXT: v_mov_b32_e32 v0, 0
34-
; GCN-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9
35-
; GCN-SDAG-NEXT: s_waitcnt lgkmcnt(0)
36-
; GCN-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
37-
; GCN-SDAG-NEXT: v_mov_b32_e32 v1, ttmp7
38-
; GCN-SDAG-NEXT: global_store_dword v0, v1, s[2:3]
39-
; GCN-SDAG-NEXT: s_endpgm
52+
; GFX9-LABEL: workgroup_id_xy:
53+
; GFX9: ; %bb.0:
54+
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
55+
; GFX9-NEXT: v_mov_b32_e32 v0, 0
56+
; GFX9-NEXT: v_mov_b32_e32 v1, ttmp9
57+
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
58+
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
59+
; GFX9-NEXT: v_mov_b32_e32 v1, ttmp7
60+
; GFX9-NEXT: global_store_dword v0, v1, s[2:3]
61+
; GFX9-NEXT: s_endpgm
4062
;
41-
; GCN-GISEL-LABEL: workgroup_id_xy:
42-
; GCN-GISEL: ; %bb.0:
43-
; GCN-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
44-
; GCN-GISEL-NEXT: v_mov_b32_e32 v0, 0
45-
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, ttmp9
46-
; GCN-GISEL-NEXT: s_waitcnt lgkmcnt(0)
47-
; GCN-GISEL-NEXT: global_store_dword v0, v1, s[0:1]
48-
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, ttmp7
49-
; GCN-GISEL-NEXT: global_store_dword v0, v1, s[2:3]
50-
; GCN-GISEL-NEXT: s_endpgm
63+
; GFX12-LABEL: workgroup_id_xy:
64+
; GFX12: ; %bb.0:
65+
; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
66+
; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, ttmp9
67+
; GFX12-NEXT: v_mov_b32_e32 v2, ttmp7
68+
; GFX12-NEXT: s_wait_kmcnt 0x0
69+
; GFX12-NEXT: s_clause 0x1
70+
; GFX12-NEXT: global_store_b32 v0, v1, s[0:1]
71+
; GFX12-NEXT: global_store_b32 v0, v2, s[2:3]
72+
; GFX12-NEXT: s_nop 0
73+
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
74+
; GFX12-NEXT: s_endpgm
5175
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
5276
store i32 %idx, ptr addrspace(1) %ptrx
5377
%idy = call i32 @llvm.amdgcn.workgroup.id.y()
@@ -57,37 +81,56 @@ define amdgpu_kernel void @workgroup_id_xy(ptr addrspace(1) %ptrx, ptr addrspace
5781
}
5882

5983
define amdgpu_kernel void @workgroup_id_xyz(ptr addrspace(1) %ptrx, ptr addrspace(1) %ptry, ptr addrspace(1) %ptrz) {
60-
; GCN-SDAG-LABEL: workgroup_id_xyz:
61-
; GCN-SDAG: ; %bb.0:
62-
; GCN-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
63-
; GCN-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
64-
; GCN-SDAG-NEXT: v_mov_b32_e32 v0, 0
65-
; GCN-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9
66-
; GCN-SDAG-NEXT: s_waitcnt lgkmcnt(0)
67-
; GCN-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
68-
; GCN-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff
69-
; GCN-SDAG-NEXT: v_mov_b32_e32 v1, s0
70-
; GCN-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16
71-
; GCN-SDAG-NEXT: global_store_dword v0, v1, s[2:3]
72-
; GCN-SDAG-NEXT: v_mov_b32_e32 v1, s0
73-
; GCN-SDAG-NEXT: global_store_dword v0, v1, s[6:7]
74-
; GCN-SDAG-NEXT: s_endpgm
84+
; GFX9-SDAG-LABEL: workgroup_id_xyz:
85+
; GFX9-SDAG: ; %bb.0:
86+
; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
87+
; GFX9-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
88+
; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0
89+
; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, ttmp9
90+
; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
91+
; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[0:1]
92+
; GFX9-SDAG-NEXT: s_and_b32 s0, ttmp7, 0xffff
93+
; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s0
94+
; GFX9-SDAG-NEXT: s_lshr_b32 s0, ttmp7, 16
95+
; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[2:3]
96+
; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s0
97+
; GFX9-SDAG-NEXT: global_store_dword v0, v1, s[6:7]
98+
; GFX9-SDAG-NEXT: s_endpgm
99+
;
100+
; GFX9-GISEL-LABEL: workgroup_id_xyz:
101+
; GFX9-GISEL: ; %bb.0:
102+
; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
103+
; GFX9-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
104+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
105+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
106+
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
107+
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
108+
; GFX9-GISEL-NEXT: s_and_b32 s0, ttmp7, 0xffff
109+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
110+
; GFX9-GISEL-NEXT: s_lshr_b32 s0, ttmp7, 16
111+
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
112+
; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
113+
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[6:7]
114+
; GFX9-GISEL-NEXT: s_endpgm
75115
;
76-
; GCN-GISEL-LABEL: workgroup_id_xyz:
77-
; GCN-GISEL: ; %bb.0:
78-
; GCN-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
79-
; GCN-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x10
80-
; GCN-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9
81-
; GCN-GISEL-NEXT: v_mov_b32_e32 v1, 0
82-
; GCN-GISEL-NEXT: s_waitcnt lgkmcnt(0)
83-
; GCN-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
84-
; GCN-GISEL-NEXT: s_and_b32 s0, ttmp7, 0xffff
85-
; GCN-GISEL-NEXT: v_mov_b32_e32 v0, s0
86-
; GCN-GISEL-NEXT: s_lshr_b32 s0, ttmp7, 16
87-
; GCN-GISEL-NEXT: global_store_dword v1, v0, s[2:3]
88-
; GCN-GISEL-NEXT: v_mov_b32_e32 v0, s0
89-
; GCN-GISEL-NEXT: global_store_dword v1, v0, s[6:7]
90-
; GCN-GISEL-NEXT: s_endpgm
116+
; GFX12-LABEL: workgroup_id_xyz:
117+
; GFX12: ; %bb.0:
118+
; GFX12-NEXT: s_clause 0x1
119+
; GFX12-NEXT: s_load_b128 s[4:7], s[0:1], 0x0
120+
; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x10
121+
; GFX12-NEXT: s_and_b32 s2, ttmp7, 0xffff
122+
; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, ttmp9
123+
; GFX12-NEXT: s_lshr_b32 s3, ttmp7, 16
124+
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
125+
; GFX12-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
126+
; GFX12-NEXT: s_wait_kmcnt 0x0
127+
; GFX12-NEXT: s_clause 0x2
128+
; GFX12-NEXT: global_store_b32 v0, v1, s[4:5]
129+
; GFX12-NEXT: global_store_b32 v0, v2, s[6:7]
130+
; GFX12-NEXT: global_store_b32 v0, v3, s[0:1]
131+
; GFX12-NEXT: s_nop 0
132+
; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
133+
; GFX12-NEXT: s_endpgm
91134
%idx = call i32 @llvm.amdgcn.workgroup.id.x()
92135
store i32 %idx, ptr addrspace(1) %ptrx
93136
%idy = call i32 @llvm.amdgcn.workgroup.id.y()

0 commit comments

Comments
 (0)