xref: /llvm-project/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
5; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
6
7define amdgpu_kernel void @workgroup_id_x(ptr addrspace(1) %ptrx) {
8;
9; GFX9-LABEL: workgroup_id_x:
10; GFX9:       ; %bb.0:
11; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
12; GFX9-NEXT:    v_mov_b32_e32 v0, ttmp9
13; GFX9-NEXT:    v_mov_b32_e32 v1, 0
14; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
15; GFX9-NEXT:    global_store_dword v1, v0, s[0:1]
16; GFX9-NEXT:    s_endpgm
17;
18; GFX12-LABEL: workgroup_id_x:
19; GFX12:       ; %bb.0:
20; GFX12-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
21; GFX12-NEXT:    v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0
22; GFX12-NEXT:    s_wait_kmcnt 0x0
23; GFX12-NEXT:    global_store_b32 v1, v0, s[0:1]
24; GFX12-NEXT:    s_endpgm
25  %idx = call i32 @llvm.amdgcn.workgroup.id.x()
26  store i32 %idx, ptr addrspace(1) %ptrx
27
28  ret void
29}
30
31define amdgpu_kernel void @workgroup_id_xy(ptr addrspace(1) %ptrx, ptr addrspace(1) %ptry) {
32; GFX9-LABEL: workgroup_id_xy:
33; GFX9:       ; %bb.0:
34; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
35; GFX9-NEXT:    v_mov_b32_e32 v0, ttmp9
36; GFX9-NEXT:    v_mov_b32_e32 v1, 0
37; GFX9-NEXT:    s_and_b32 s4, ttmp7, 0xffff
38; GFX9-NEXT:    v_mov_b32_e32 v2, s4
39; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
40; GFX9-NEXT:    global_store_dword v1, v0, s[0:1]
41; GFX9-NEXT:    global_store_dword v1, v2, s[2:3]
42; GFX9-NEXT:    s_endpgm
43;
44; GFX12-LABEL: workgroup_id_xy:
45; GFX12:       ; %bb.0:
46; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
47; GFX12-NEXT:    s_and_b32 s4, ttmp7, 0xffff
48; GFX12-NEXT:    v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0
49; GFX12-NEXT:    v_mov_b32_e32 v2, s4
50; GFX12-NEXT:    s_wait_kmcnt 0x0
51; GFX12-NEXT:    s_clause 0x1
52; GFX12-NEXT:    global_store_b32 v1, v0, s[0:1]
53; GFX12-NEXT:    global_store_b32 v1, v2, s[2:3]
54; GFX12-NEXT:    s_endpgm
55  %idx = call i32 @llvm.amdgcn.workgroup.id.x()
56  store i32 %idx, ptr addrspace(1) %ptrx
57  %idy = call i32 @llvm.amdgcn.workgroup.id.y()
58  store i32 %idy, ptr addrspace(1) %ptry
59
60  ret void
61}
62
63define amdgpu_kernel void @workgroup_id_xyz(ptr addrspace(1) %ptrx, ptr addrspace(1) %ptry, ptr addrspace(1) %ptrz) {
64; GFX9-LABEL: workgroup_id_xyz:
65; GFX9:       ; %bb.0:
66; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[8:9], 0x0
67; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x10
68; GFX9-NEXT:    v_mov_b32_e32 v0, ttmp9
69; GFX9-NEXT:    v_mov_b32_e32 v1, 0
70; GFX9-NEXT:    s_and_b32 s6, ttmp7, 0xffff
71; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
72; GFX9-NEXT:    global_store_dword v1, v0, s[0:1]
73; GFX9-NEXT:    v_mov_b32_e32 v0, s6
74; GFX9-NEXT:    s_lshr_b32 s0, ttmp7, 16
75; GFX9-NEXT:    global_store_dword v1, v0, s[2:3]
76; GFX9-NEXT:    v_mov_b32_e32 v0, s0
77; GFX9-NEXT:    global_store_dword v1, v0, s[4:5]
78; GFX9-NEXT:    s_endpgm
79;
80; GFX12-LABEL: workgroup_id_xyz:
81; GFX12:       ; %bb.0:
82; GFX12-NEXT:    s_clause 0x1
83; GFX12-NEXT:    s_load_b128 s[0:3], s[4:5], 0x0
84; GFX12-NEXT:    s_load_b64 s[4:5], s[4:5], 0x10
85; GFX12-NEXT:    s_and_b32 s6, ttmp7, 0xffff
86; GFX12-NEXT:    v_dual_mov_b32 v0, ttmp9 :: v_dual_mov_b32 v1, 0
87; GFX12-NEXT:    s_lshr_b32 s7, ttmp7, 16
88; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
89; GFX12-NEXT:    v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7
90; GFX12-NEXT:    s_wait_kmcnt 0x0
91; GFX12-NEXT:    s_clause 0x2
92; GFX12-NEXT:    global_store_b32 v1, v0, s[0:1]
93; GFX12-NEXT:    global_store_b32 v1, v2, s[2:3]
94; GFX12-NEXT:    global_store_b32 v1, v3, s[4:5]
95; GFX12-NEXT:    s_endpgm
96  %idx = call i32 @llvm.amdgcn.workgroup.id.x()
97  store i32 %idx, ptr addrspace(1) %ptrx
98  %idy = call i32 @llvm.amdgcn.workgroup.id.y()
99  store i32 %idy, ptr addrspace(1) %ptry
100  %idz = call i32 @llvm.amdgcn.workgroup.id.z()
101  store i32 %idz, ptr addrspace(1) %ptrz
102
103  ret void
104}
105
106declare i32 @llvm.amdgcn.workgroup.id.x()
107declare i32 @llvm.amdgcn.workgroup.id.y()
108declare i32 @llvm.amdgcn.workgroup.id.z()
109;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
110; GFX12-GISEL: {{.*}}
111; GFX12-SDAG: {{.*}}
112; GFX9-GISEL: {{.*}}
113; GFX9-SDAG: {{.*}}
114