xref: /llvm-project/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll (revision c93e001ca695e905cb965b36d63f7a348d1dd809)
1; RUN: opt -passes=amdgpu-attributor -mcpu=kaveri < %s | llc -enable-ipra=0 | FileCheck -enable-var-scope -check-prefixes=GCN,CIVI %s
2; RUN: opt -passes=amdgpu-attributor -mcpu=gfx900 < %s | llc -enable-ipra=0 | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
3
4target triple = "amdgcn-amd-amdhsa"
5
6; GCN-LABEL: {{^}}use_dispatch_ptr:
7; GCN: s_load_dword s{{[0-9]+}}, s[4:5]
8define hidden void @use_dispatch_ptr() #1 {
9  %dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
10  %value = load volatile i32, ptr addrspace(4) %dispatch_ptr
11  ret void
12}
13
14; GCN-LABEL: {{^}}kern_indirect_use_dispatch_ptr:
15; GCN-NOT: s[4:5]
16; GCN-NOT: s4
17; GCN-NOT: s5
18; GCN: .amdhsa_user_sgpr_dispatch_ptr 1
19define amdgpu_kernel void @kern_indirect_use_dispatch_ptr(i32) #1 {
20  call void @use_dispatch_ptr()
21  ret void
22}
23
24; GCN-LABEL: {{^}}use_queue_ptr:
25; GCN: s_load_dword s{{[0-9]+}}, s[6:7]
26define hidden void @use_queue_ptr() #1 {
27  %queue_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
28  %value = load volatile i32, ptr addrspace(4) %queue_ptr
29  ret void
30}
31
32; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr:
33; GCN: s_swappc_b64 s[30:31], s[10:11]
34; GCN: .amdhsa_user_sgpr_queue_ptr 1
35define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) #1 {
36  call void @use_queue_ptr()
37  ret void
38}
39
40; GCN-LABEL: {{^}}use_queue_ptr_addrspacecast:
41; CIVI: s_load_dword [[APERTURE_LOAD:s[0-9]+]], s[4:5], 0x0
42; CIVI: v_mov_b32_e32 v[[LO:[0-9]+]], 16
43; CIVI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]]
44
45; GFX9: s_mov_b64 s[{{[0-9]+}}:[[HI:[0-9]+]]], src_shared_base
46; GFX9-DAG: v_mov_b32_e32 v[[VGPR_HI:[0-9]+]], s[[HI]]
47; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[VGPR_HI]]]
48
49; CIVI: {{flat|global}}_store_dword v[[[LO]]:[[HI]]]
50define hidden void @use_queue_ptr_addrspacecast() #1 {
51  %asc = addrspacecast ptr addrspace(3) inttoptr (i32 16 to ptr addrspace(3)) to ptr
52  store volatile i32 0, ptr %asc
53  ret void
54}
55
56; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr_addrspacecast:
57; CIVI: s_swappc_b64 s[30:31], s[4:5]
58; CIVI: .amdhsa_user_sgpr_queue_ptr 0
59
60; GFX9-NOT: s_mov_b64 s[6:7]
61; GFX9: .amdhsa_user_sgpr_queue_ptr 0
62define amdgpu_kernel void @kern_indirect_use_queue_ptr_addrspacecast(i32) #1 {
63  call void @use_queue_ptr_addrspacecast()
64  ret void
65}
66
67; Not really supported in callable functions.
68; GCN-LABEL: {{^}}use_kernarg_segment_ptr:
69; GCN: s_mov_b64 [[PTR:s\[[0-9]+:[0-9]+\]]], 0
70; GCN: s_load_dword s{{[0-9]+}}, [[PTR]], 0x0
71define hidden void @use_kernarg_segment_ptr() #1 {
72  %kernarg_segment_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
73  %value = load volatile i32, ptr addrspace(4) %kernarg_segment_ptr
74  ret void
75}
76
77; GCN-LABEL: {{^}}use_implicitarg_ptr:
78; GCN: s_load_dword s{{[0-9]+}}, s[8:9]
79define hidden void @use_implicitarg_ptr() #1 {
80  %implicit.arg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
81  %value = load volatile i32, ptr addrspace(4) %implicit.arg.ptr
82  ret void
83}
84
85; GCN-LABEL: {{^}}kern_indirect_use_kernarg_segment_ptr:
86; GCN: .amdhsa_user_sgpr_kernarg_segment_ptr 1
87define amdgpu_kernel void @kern_indirect_use_kernarg_segment_ptr(i32) #1 {
88  call void @use_kernarg_segment_ptr()
89  ret void
90}
91
92; GCN-LABEL: {{^}}use_dispatch_id:
93; GCN: ; use s[10:11]
94define hidden void @use_dispatch_id() #1 {
95  %id = call i64 @llvm.amdgcn.dispatch.id()
96  call void asm sideeffect "; use $0", "s"(i64 %id)
97  ret void
98}
99
100; No kernarg segment so that there is a mov to check. With kernarg
101; pointer enabled, it happens to end up in the right place anyway.
102
103; GCN-LABEL: {{^}}kern_indirect_use_dispatch_id:
104; GCN: s_mov_b64 s[10:11], s[4:5]
105; GCN: .amdhsa_user_sgpr_dispatch_id 1
106define amdgpu_kernel void @kern_indirect_use_dispatch_id() #1 {
107  call void @use_dispatch_id()
108  ret void
109}
110
111; GCN-LABEL: {{^}}use_workgroup_id_x:
112; GCN: s_waitcnt
113; GCN: ; use s12
114define hidden void @use_workgroup_id_x() #1 {
115  %val = call i32 @llvm.amdgcn.workgroup.id.x()
116  call void asm sideeffect "; use $0", "s"(i32 %val)
117  ret void
118}
119
120; GCN-LABEL: {{^}}use_stack_workgroup_id_x:
121; GCN: s_waitcnt
122; GCN-NOT: s32
123; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}}
124; GCN: ; use s12
125; GCN: s_setpc_b64
126define hidden void @use_stack_workgroup_id_x() #1 {
127  %alloca = alloca i32, addrspace(5)
128  store volatile i32 0, ptr addrspace(5) %alloca
129  %val = call i32 @llvm.amdgcn.workgroup.id.x()
130  call void asm sideeffect "; use $0", "s"(i32 %val)
131  ret void
132}
133
134; GCN-LABEL: {{^}}use_workgroup_id_y:
135; GCN: s_waitcnt
136; GCN: ; use s13
137define hidden void @use_workgroup_id_y() #1 {
138  %val = call i32 @llvm.amdgcn.workgroup.id.y()
139  call void asm sideeffect "; use $0", "s"(i32 %val)
140  ret void
141}
142
143; GCN-LABEL: {{^}}use_workgroup_id_z:
144; GCN: s_waitcnt
145; GCN: ; use s14
146define hidden void @use_workgroup_id_z() #1 {
147  %val = call i32 @llvm.amdgcn.workgroup.id.z()
148  call void asm sideeffect "; use $0", "s"(i32 %val)
149  ret void
150}
151
152; GCN-LABEL: {{^}}use_workgroup_id_xy:
153; GCN: ; use s12
154; GCN: ; use s13
155define hidden void @use_workgroup_id_xy() #1 {
156  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
157  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
158  call void asm sideeffect "; use $0", "s"(i32 %val0)
159  call void asm sideeffect "; use $0", "s"(i32 %val1)
160  ret void
161}
162
163; GCN-LABEL: {{^}}use_workgroup_id_xyz:
164; GCN: ; use s12
165; GCN: ; use s13
166; GCN: ; use s14
167define hidden void @use_workgroup_id_xyz() #1 {
168  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
169  %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
170  %val2 = call i32 @llvm.amdgcn.workgroup.id.z()
171  call void asm sideeffect "; use $0", "s"(i32 %val0)
172  call void asm sideeffect "; use $0", "s"(i32 %val1)
173  call void asm sideeffect "; use $0", "s"(i32 %val2)
174  ret void
175}
176
177; GCN-LABEL: {{^}}use_workgroup_id_xz:
178; GCN: ; use s12
179; GCN: ; use s14
180define hidden void @use_workgroup_id_xz() #1 {
181  %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
182  %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
183  call void asm sideeffect "; use $0", "s"(i32 %val0)
184  call void asm sideeffect "; use $0", "s"(i32 %val1)
185  ret void
186}
187
188; GCN-LABEL: {{^}}use_workgroup_id_yz:
189; GCN: ; use s13
190; GCN: ; use s14
191define hidden void @use_workgroup_id_yz() #1 {
192  %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
193  %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
194  call void asm sideeffect "; use $0", "s"(i32 %val0)
195  call void asm sideeffect "; use $0", "s"(i32 %val1)
196  ret void
197}
198
199; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_x:
200; GCN-NOT: s6
201; GCN: s_mov_b32 s12, s6
202; GCN: s_mov_b32 s32, 0
203; GCN: s_getpc_b64 s[4:5]
204; GCN-NEXT: s_add_u32 s4, s4, use_workgroup_id_x@rel32@lo+4
205; GCN-NEXT: s_addc_u32 s5, s5, use_workgroup_id_x@rel32@hi+12
206; GCN: s_swappc_b64
207; GCN-NEXT: s_endpgm
208
209; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
210; GCN: .amdhsa_system_sgpr_workgroup_id_y 0
211; GCN: .amdhsa_system_sgpr_workgroup_id_z 0
212define amdgpu_kernel void @kern_indirect_use_workgroup_id_x() #1 {
213  call void @use_workgroup_id_x()
214  ret void
215}
216
217; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_y:
218; GCN-NOT: s12
219; GCN: s_mov_b32 s13, s7
220; GCN-NOT: s12
221; GCN: s_mov_b32 s32, 0
222; GCN: s_swappc_b64
223
224; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
225; GCN: .amdhsa_system_sgpr_workgroup_id_y 1
226; GCN: .amdhsa_system_sgpr_workgroup_id_z 0
227define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 {
228  call void @use_workgroup_id_y()
229  ret void
230}
231
232; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_z:
233; GCN-NOT: s12
234; GCN-NOT: s13
235; GCN: s_mov_b32 s14, s7
236; GCN-NOT: s12
237; GCN-NOT: s13
238
239; GCN: s_mov_b32 s32, 0
240; GCN: s_swappc_b64
241
242; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
243; GCN: .amdhsa_system_sgpr_workgroup_id_y 0
244; GCN: .amdhsa_system_sgpr_workgroup_id_z 1
245define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 {
246  call void @use_workgroup_id_z()
247  ret void
248}
249
250; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xy:
251; GCN-NOT: s14
252; GCN: s_mov_b32 s12, s6
253; GCN-NEXT: s_mov_b32 s13, s7
254; GCN-NOT: s14
255
256; GCN: s_mov_b32 s32, 0
257; GCN: s_swappc_b64
258
259; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
260; GCN: .amdhsa_system_sgpr_workgroup_id_y 1
261; GCN: .amdhsa_system_sgpr_workgroup_id_z 0
262define amdgpu_kernel void @kern_indirect_use_workgroup_id_xy() #1 {
263  call void @use_workgroup_id_xy()
264  ret void
265}
266
267; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xyz:
268; GCN: s_mov_b32 s12, s6
269; GCN: s_mov_b32 s13, s7
270; GCN: s_mov_b32 s14, s8
271; GCN: s_mov_b32 s32, 0
272; GCN: s_swappc_b64
273
274; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
275; GCN: .amdhsa_system_sgpr_workgroup_id_y 1
276; GCN: .amdhsa_system_sgpr_workgroup_id_z 1
277define amdgpu_kernel void @kern_indirect_use_workgroup_id_xyz() #1 {
278  call void @use_workgroup_id_xyz()
279  ret void
280}
281
282; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xz:
283
284; GCN-NOT: s13
285; GCN: s_mov_b32 s12, s6
286; GCN-NEXT: s_mov_b32 s14, s7
287; GCN-NOT: s13
288
289; GCN: s_mov_b32 s32, 0
290; GCN: s_swappc_b64
291
292; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
293; GCN: .amdhsa_system_sgpr_workgroup_id_y 0
294; GCN: .amdhsa_system_sgpr_workgroup_id_z 1
295define amdgpu_kernel void @kern_indirect_use_workgroup_id_xz() #1 {
296  call void @use_workgroup_id_xz()
297  ret void
298}
299
300; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_yz:
301
302; GCN: s_mov_b32 s13, s7
303; GCN: s_mov_b32 s14, s8
304
305; GCN: s_mov_b32 s32, 0
306; GCN: s_swappc_b64
307
308; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
309; GCN: .amdhsa_system_sgpr_workgroup_id_y 1
310; GCN: .amdhsa_system_sgpr_workgroup_id_z 1
311define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() #1 {
312  call void @use_workgroup_id_yz()
313  ret void
314}
315
316; Argument is in right place already
317; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_x:
318; GCN-NOT: s12
319; GCN-NOT: s13
320; GCN-NOT: s14
321; GCN: v_readlane_b32 s30, v40, 0
322define hidden void @func_indirect_use_workgroup_id_x() #1 {
323  call void @use_workgroup_id_x()
324  ret void
325}
326
327; Argument is in right place already. We are free to clobber other
328; SGPR arguments
329; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y:
330; GCN-NOT: s12
331; GCN-NOT: s13
332; GCN-NOT: s14
333define hidden void @func_indirect_use_workgroup_id_y() #1 {
334  call void @use_workgroup_id_y()
335  ret void
336}
337
338; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z:
339; GCN-NOT: s12
340; GCN-NOT: s13
341; GCN-NOT: s14
342define hidden void @func_indirect_use_workgroup_id_z() #1 {
343  call void @use_workgroup_id_z()
344  ret void
345}
346
347; GCN-LABEL: {{^}}other_arg_use_workgroup_id_x:
348; CIVI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
349; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0, off
350; GCN: ; use s12
351define hidden void @other_arg_use_workgroup_id_x(i32 %arg0) #1 {
352  %val = call i32 @llvm.amdgcn.workgroup.id.x()
353  store volatile i32 %arg0, ptr addrspace(1) undef
354  call void asm sideeffect "; use $0", "s"(i32 %val)
355  ret void
356}
357
358; GCN-LABEL: {{^}}other_arg_use_workgroup_id_y:
359; CIVI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
360; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0, off
361; GCN: ; use s13
362define hidden void @other_arg_use_workgroup_id_y(i32 %arg0) #1 {
363  %val = call i32 @llvm.amdgcn.workgroup.id.y()
364  store volatile i32 %arg0, ptr addrspace(1) undef
365  call void asm sideeffect "; use $0", "s"(i32 %val)
366  ret void
367}
368
369; GCN-LABEL: {{^}}other_arg_use_workgroup_id_z:
370; CIVI: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
371; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0, off
372; GCN: ; use s14
373define hidden void @other_arg_use_workgroup_id_z(i32 %arg0) #1 {
374  %val = call i32 @llvm.amdgcn.workgroup.id.z()
375  store volatile i32 %arg0, ptr addrspace(1) undef
376  call void asm sideeffect "; use $0", "s"(i32 %val)
377  ret void
378}
379
380; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_x:
381
382; GCN-NOT: s13
383; GCN-NOT: s14
384; GCN-DAG: s_mov_b32 s12, s6
385; GCN-DAG: v_mov_b32_e32 v0, 0x22b
386; GCN-NOT: s13
387; GCN-NOT: s14
388
389; GCN-DAG: s_mov_b32 s32, 0
390; GCN: s_swappc_b64
391
392; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
393; GCN: .amdhsa_system_sgpr_workgroup_id_y 0
394; GCN: .amdhsa_system_sgpr_workgroup_id_z 0
395define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_x() #1 {
396  call void @other_arg_use_workgroup_id_x(i32 555)
397  ret void
398}
399
400; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_y:
401; GCN-DAG: v_mov_b32_e32 v0, 0x22b
402; GCN-DAG: s_mov_b32 s13, s7
403
404; GCN-DAG: s_mov_b32 s32, 0
405; GCN: s_swappc_b64
406
407; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
408; GCN: .amdhsa_system_sgpr_workgroup_id_y 1
409; GCN: .amdhsa_system_sgpr_workgroup_id_z 0
410define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_y() #1 {
411  call void @other_arg_use_workgroup_id_y(i32 555)
412  ret void
413}
414
415; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_z:
416; GCN-DAG: v_mov_b32_e32 v0, 0x22b
417; GCN-DAG: s_mov_b32 s14, s7
418
419; GCN: s_mov_b32 s32, 0
420; GCN: s_swappc_b64
421
422; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
423; GCN: .amdhsa_system_sgpr_workgroup_id_y 0
424; GCN: .amdhsa_system_sgpr_workgroup_id_z 1
425define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_z() #1 {
426  call void @other_arg_use_workgroup_id_z(i32 555)
427  ret void
428}
429
430; GCN-LABEL: {{^}}use_every_sgpr_input:
431; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32{{$}}
432; GCN: s_load_dword s{{[0-9]+}}, s[4:5]
433; GCN: s_load_dword s{{[0-9]+}}, s[6:7]
434; GCN: s_load_dword s{{[0-9]+}}, s[8:9]
435; GCN: ; use s[10:11]
436; GCN: ; use s12
437; GCN: ; use s13
438; GCN: ; use s14
439define hidden void @use_every_sgpr_input() #1 {
440  %alloca = alloca i32, align 4, addrspace(5)
441  store volatile i32 0, ptr addrspace(5) %alloca
442
443  %dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
444  %val0 = load volatile i32, ptr addrspace(4) %dispatch_ptr
445
446  %queue_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
447  %val1 = load volatile i32, ptr addrspace(4) %queue_ptr
448
449  %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
450  %val2 = load volatile i32, ptr addrspace(4) %implicitarg.ptr
451
452  %val3 = call i64 @llvm.amdgcn.dispatch.id()
453  call void asm sideeffect "; use $0", "s"(i64 %val3)
454
455  %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
456  call void asm sideeffect "; use $0", "s"(i32 %val4)
457
458  %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
459  call void asm sideeffect "; use $0", "s"(i32 %val5)
460
461  %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
462  call void asm sideeffect "; use $0", "s"(i32 %val6)
463
464  ret void
465}
466
467; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input:
468; GCN: s_mov_b32 s32, 0
469; GCN: s_swappc_b64
470
471; GCN: .amdhsa_user_sgpr_private_segment_buffer 1
472; GCN: .amdhsa_user_sgpr_dispatch_ptr 1
473; GCN: .amdhsa_user_sgpr_queue_ptr 1
474; GCN: .amdhsa_user_sgpr_kernarg_segment_ptr 1
475; GCN: .amdhsa_user_sgpr_dispatch_id 1
476; GCN: .amdhsa_user_sgpr_flat_scratch_init 1
477; GCN: .amdhsa_user_sgpr_private_segment_size 0
478; GCN: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
479; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
480; GCN: .amdhsa_system_sgpr_workgroup_id_y 1
481; GCN: .amdhsa_system_sgpr_workgroup_id_z 1
482; GCN: .amdhsa_system_sgpr_workgroup_info 0
483; GCN: .amdhsa_system_vgpr_workitem_id 0
484define amdgpu_kernel void @kern_indirect_use_every_sgpr_input(i8) #1 {
485  call void @use_every_sgpr_input()
486  ret void
487}
488
489; We have to pass the kernarg segment, but there are no kernel
490; arguments so null is passed.
491; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input_no_kernargs:
492; GCN: s_mov_b64 s[10:11], s[8:9]
493; GCN: s_mov_b32 s32, 0
494; GCN: s_swappc_b64
495
496; GCN: .amdhsa_user_sgpr_private_segment_buffer 1
497; GCN: .amdhsa_user_sgpr_dispatch_ptr 1
498; GCN: .amdhsa_user_sgpr_queue_ptr 1
499; GCN: .amdhsa_user_sgpr_kernarg_segment_ptr 0
500; GCN: .amdhsa_user_sgpr_dispatch_id 1
501; GCN: .amdhsa_user_sgpr_flat_scratch_init 1
502; GCN: .amdhsa_user_sgpr_private_segment_size 0
503; GCN: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
504; GCN: .amdhsa_system_sgpr_workgroup_id_x 1
505; GCN: .amdhsa_system_sgpr_workgroup_id_y 1
506; GCN: .amdhsa_system_sgpr_workgroup_id_z 1
507; GCN: .amdhsa_system_sgpr_workgroup_info 0
508; GCN: .amdhsa_system_vgpr_workitem_id 0
509define amdgpu_kernel void @kern_indirect_use_every_sgpr_input_no_kernargs() #2 {
510  call void @use_every_sgpr_input()
511  ret void
512}
513
514; GCN-LABEL: {{^}}func_indirect_use_every_sgpr_input:
515; GCN-NOT: s6
516; GCN-NOT: s7
517; GCN-NOT: s8
518; GCN-NOT: s9
519; GCN-NOT: s10
520; GCN-NOT: s11
521; GCN-NOT: s12
522; GCN-NOT: s13
523; GCN-NOT: s[6:7]
524; GCN-NOT: s[8:9]
525; GCN-NOT: s[10:11]
526; GCN-NOT: s[12:13]
527; GCN-NOT: s14
528; GCN: s_or_saveexec_b64 s[16:17], -1
529define hidden void @func_indirect_use_every_sgpr_input() #1 {
530  call void @use_every_sgpr_input()
531  ret void
532}
533
534; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz:
535; GCN-NOT: s12
536; GCN-NOT: s13
537; GCN-NOT: s14
538; GCN: ; use s[10:11]
539; GCN: ; use s12
540; GCN: ; use s13
541; GCN: ; use s14
542
543; GCN: s_swappc_b64
544define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
545  %alloca = alloca i32, align 4, addrspace(5)
546  store volatile i32 0, ptr addrspace(5) %alloca
547
548  %dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
549  %val0 = load volatile i32, ptr addrspace(4) %dispatch_ptr
550
551  %queue_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
552  %val1 = load volatile i32, ptr addrspace(4) %queue_ptr
553
554  %kernarg_segment_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
555  %val2 = load volatile i32, ptr addrspace(4) %kernarg_segment_ptr
556
557  %val3 = call i64 @llvm.amdgcn.dispatch.id()
558  call void asm sideeffect "; use $0", "s"(i64 %val3)
559
560  %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
561  call void asm sideeffect "; use $0", "s"(i32 %val4)
562
563  %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
564  call void asm sideeffect "; use $0", "s"(i32 %val5)
565
566  %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
567  call void asm sideeffect "; use $0", "s"(i32 %val6)
568
569  call void @use_workgroup_id_xyz()
570  ret void
571}
572
573declare i32 @llvm.amdgcn.workgroup.id.x() #0
574declare i32 @llvm.amdgcn.workgroup.id.y() #0
575declare i32 @llvm.amdgcn.workgroup.id.z() #0
576declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
577declare noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
578declare noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
579declare i64 @llvm.amdgcn.dispatch.id() #0
580declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
581
582attributes #0 = { nounwind readnone speculatable }
583attributes #1 = { nounwind noinline }
584attributes #2 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="0" }
585
586!llvm.module.flags = !{!0}
587!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
588