xref: /llvm-project/llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s
3
4; LDS is allocated per-kernel. Module scope variables are gathered into a struct which is
5; allocated at address zero, if used by the kernel. Kernel scope variables are gathered into
6; a per-kernel struct and allocated immediately after the module scope.
7; This test checks that the module and kernel scope variables are allocated in deterministic
8; order without spurious alignment padding between the two
9
10; External LDS is checked because it influences LDS padding in general and because it will
11; not be moved into either module or kernel struct
12
13@module_variable = addrspace(3) global i16 undef
14
15; Variables are allocated into module scope block when used by a non-kernel function
16define void @use_module() #0 {
17; CHECK-LABEL: use_module:
18; CHECK:       ; %bb.0:
19; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20; CHECK-NEXT:    v_mov_b32_e32 v0, 0
21; CHECK-NEXT:    ds_write_b16 v0, v0
22; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
23; CHECK-NEXT:    s_setpc_b64 s[30:31]
24  store i16 0, ptr addrspace(3) @module_variable
25  ret void
26}
27
28; Variables only used by kernels are specialised and allocated per-kernel
29@kernel_normal = addrspace(3) global i16 undef
30@kernel_overalign = addrspace(3) global i16 undef, align 4
31
32; External LDS shall not introduce padding between module and kernel scope variables
33@extern_normal = external addrspace(3) global [0 x float]
34@extern_overalign = external addrspace(3) global [0 x float], align 8
35
36
37; External LDS does not influence the frame when called indirectly either
38define void @use_extern_normal() #0 {
39; CHECK-LABEL: use_extern_normal:
40; CHECK:       ; %bb.0:
41; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
42; CHECK-NEXT:    s_getpc_b64 s[6:7]
43; CHECK-NEXT:    s_add_u32 s6, s6, llvm.amdgcn.dynlds.offset.table@rel32@lo+4
44; CHECK-NEXT:    s_addc_u32 s7, s7, llvm.amdgcn.dynlds.offset.table@rel32@hi+12
45; CHECK-NEXT:    s_mov_b32 s4, s15
46; CHECK-NEXT:    s_ashr_i32 s5, s15, 31
47; CHECK-NEXT:    v_mov_b32_e32 v0, 0x4048f5c3
48; CHECK-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
49; CHECK-NEXT:    s_add_u32 s4, s4, s6
50; CHECK-NEXT:    s_addc_u32 s5, s5, s7
51; CHECK-NEXT:    s_load_dword s4, s[4:5], 0x0
52; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
53; CHECK-NEXT:    v_mov_b32_e32 v1, s4
54; CHECK-NEXT:    ds_write_b32 v1, v0
55; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
56; CHECK-NEXT:    s_setpc_b64 s[30:31]
57  %arrayidx = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_normal, i32 0, i32 0
58  store float 0x40091EB860000000, ptr addrspace(3) %arrayidx
59  ret void
60}
61
62define void @use_extern_overalign() #0 {
63; CHECK-LABEL: use_extern_overalign:
64; CHECK:       ; %bb.0:
65; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66; CHECK-NEXT:    s_getpc_b64 s[6:7]
67; CHECK-NEXT:    s_add_u32 s6, s6, llvm.amdgcn.dynlds.offset.table@rel32@lo+4
68; CHECK-NEXT:    s_addc_u32 s7, s7, llvm.amdgcn.dynlds.offset.table@rel32@hi+12
69; CHECK-NEXT:    s_mov_b32 s4, s15
70; CHECK-NEXT:    s_ashr_i32 s5, s15, 31
71; CHECK-NEXT:    v_mov_b32_e32 v0, 0x42280000
72; CHECK-NEXT:    s_lshl_b64 s[4:5], s[4:5], 2
73; CHECK-NEXT:    s_add_u32 s4, s4, s6
74; CHECK-NEXT:    s_addc_u32 s5, s5, s7
75; CHECK-NEXT:    s_load_dword s4, s[4:5], 0x0
76; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
77; CHECK-NEXT:    v_mov_b32_e32 v1, s4
78; CHECK-NEXT:    ds_write_b32 v1, v0 offset:4
79; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
80; CHECK-NEXT:    s_setpc_b64 s[30:31]
81  %arrayidx = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_overalign, i32 0, i32 1
82  store float 4.200000e+01, ptr addrspace(3) %arrayidx
83  ret void
84}
85
86
87; First 2^3 of 2^4 cases encoded into function names
88; no use of extern variable from nested function
89; module_variable used/not-used
90; kernel variable normal/overaligned
91; extern variable normal/overaligned
92
93define amdgpu_kernel void @module_0_kernel_normal_extern_normal(i32 %idx) {
94; CHECK-LABEL: module_0_kernel_normal_extern_normal:
95; CHECK:       ; %bb.0:
96; CHECK-NEXT:    s_load_dword s0, s[8:9], 0x0
97; CHECK-NEXT:    v_mov_b32_e32 v0, 2
98; CHECK-NEXT:    v_mov_b32_e32 v1, 0
99; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
100; CHECK-NEXT:    s_lshl_b32 s0, s0, 2
101; CHECK-NEXT:    s_add_i32 s0, s0, 4
102; CHECK-NEXT:    v_mov_b32_e32 v2, s0
103; CHECK-NEXT:    ds_write_b16 v1, v0
104; CHECK-NEXT:    ds_write_b32 v2, v1
105; CHECK-NEXT:    s_endpgm
106  store i16 2, ptr addrspace(3) @kernel_normal
107
108  %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_normal, i32 0, i32 %idx
109  store float 0.0, ptr addrspace(3) %arrayidx1
110  ret void
111}
112
113define amdgpu_kernel void @module_1_kernel_normal_extern_normal(i32 %idx) {
114; CHECK-LABEL: module_1_kernel_normal_extern_normal:
115; CHECK:       ; %bb.0:
116; CHECK-NEXT:    s_add_u32 s12, s12, s17
117; CHECK-NEXT:    s_mov_b32 s32, 0
118; CHECK-NEXT:    s_addc_u32 s13, s13, 0
119; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
120; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
121; CHECK-NEXT:    s_add_u32 s0, s0, s17
122; CHECK-NEXT:    s_addc_u32 s1, s1, 0
123; CHECK-NEXT:    s_add_u32 s12, s8, 8
124; CHECK-NEXT:    s_addc_u32 s13, s9, 0
125; CHECK-NEXT:    s_getpc_b64 s[18:19]
126; CHECK-NEXT:    s_add_u32 s18, s18, use_module@gotpcrel32@lo+4
127; CHECK-NEXT:    s_addc_u32 s19, s19, use_module@gotpcrel32@hi+12
128; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
129; CHECK-NEXT:    s_load_dwordx2 s[20:21], s[18:19], 0x0
130; CHECK-NEXT:    s_load_dword s17, s[8:9], 0x0
131; CHECK-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
132; CHECK-NEXT:    s_mov_b64 s[8:9], s[12:13]
133; CHECK-NEXT:    s_mov_b32 s12, s14
134; CHECK-NEXT:    s_mov_b32 s13, s15
135; CHECK-NEXT:    s_mov_b32 s14, s16
136; CHECK-NEXT:    v_or3_b32 v31, v0, v1, v2
137; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
138; CHECK-NEXT:    s_swappc_b64 s[30:31], s[20:21]
139; CHECK-NEXT:    s_lshl_b32 s4, s17, 2
140; CHECK-NEXT:    v_mov_b32_e32 v0, 1
141; CHECK-NEXT:    v_mov_b32_e32 v1, 0
142; CHECK-NEXT:    s_add_i32 s4, s4, 4
143; CHECK-NEXT:    v_mov_b32_e32 v2, 2
144; CHECK-NEXT:    v_mov_b32_e32 v3, s4
145; CHECK-NEXT:    ds_write_b16 v1, v0
146; CHECK-NEXT:    ds_write_b16 v1, v2 offset:2
147; CHECK-NEXT:    ds_write_b32 v3, v1
148; CHECK-NEXT:    s_endpgm
149  call void @use_module()
150  store i16 1, ptr addrspace(3) @module_variable
151
152  store i16 2, ptr addrspace(3) @kernel_normal
153
154  %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_normal, i32 0, i32 %idx
155  store float 0.0, ptr addrspace(3) %arrayidx1
156  ret void
157}
158
159define amdgpu_kernel void @module_0_kernel_overalign_extern_normal(i32 %idx) {
160; CHECK-LABEL: module_0_kernel_overalign_extern_normal:
161; CHECK:       ; %bb.0:
162; CHECK-NEXT:    s_load_dword s0, s[8:9], 0x0
163; CHECK-NEXT:    v_mov_b32_e32 v0, 2
164; CHECK-NEXT:    v_mov_b32_e32 v1, 0
165; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
166; CHECK-NEXT:    s_lshl_b32 s0, s0, 2
167; CHECK-NEXT:    s_add_i32 s0, s0, 4
168; CHECK-NEXT:    v_mov_b32_e32 v2, s0
169; CHECK-NEXT:    ds_write_b16 v1, v0
170; CHECK-NEXT:    ds_write_b32 v2, v1
171; CHECK-NEXT:    s_endpgm
172  store i16 2, ptr addrspace(3) @kernel_overalign
173
174  %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_normal, i32 0, i32 %idx
175  store float 0.0, ptr addrspace(3) %arrayidx1
176  ret void
177}
178
179define amdgpu_kernel void @module_1_kernel_overalign_extern_normal(i32 %idx) {
180; CHECK-LABEL: module_1_kernel_overalign_extern_normal:
181; CHECK:       ; %bb.0:
182; CHECK-NEXT:    s_add_u32 s12, s12, s17
183; CHECK-NEXT:    s_mov_b32 s32, 0
184; CHECK-NEXT:    s_addc_u32 s13, s13, 0
185; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
186; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
187; CHECK-NEXT:    s_add_u32 s0, s0, s17
188; CHECK-NEXT:    s_addc_u32 s1, s1, 0
189; CHECK-NEXT:    s_add_u32 s12, s8, 8
190; CHECK-NEXT:    s_addc_u32 s13, s9, 0
191; CHECK-NEXT:    s_getpc_b64 s[18:19]
192; CHECK-NEXT:    s_add_u32 s18, s18, use_module@gotpcrel32@lo+4
193; CHECK-NEXT:    s_addc_u32 s19, s19, use_module@gotpcrel32@hi+12
194; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
195; CHECK-NEXT:    s_load_dwordx2 s[20:21], s[18:19], 0x0
196; CHECK-NEXT:    s_load_dword s17, s[8:9], 0x0
197; CHECK-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
198; CHECK-NEXT:    s_mov_b64 s[8:9], s[12:13]
199; CHECK-NEXT:    s_mov_b32 s12, s14
200; CHECK-NEXT:    s_mov_b32 s13, s15
201; CHECK-NEXT:    s_mov_b32 s14, s16
202; CHECK-NEXT:    v_or3_b32 v31, v0, v1, v2
203; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
204; CHECK-NEXT:    s_swappc_b64 s[30:31], s[20:21]
205; CHECK-NEXT:    s_lshl_b32 s4, s17, 2
206; CHECK-NEXT:    v_mov_b32_e32 v0, 1
207; CHECK-NEXT:    v_mov_b32_e32 v1, 0
208; CHECK-NEXT:    s_add_i32 s4, s4, 8
209; CHECK-NEXT:    v_mov_b32_e32 v2, 2
210; CHECK-NEXT:    v_mov_b32_e32 v3, s4
211; CHECK-NEXT:    ds_write_b16 v1, v0
212; CHECK-NEXT:    ds_write_b16 v1, v2 offset:4
213; CHECK-NEXT:    ds_write_b32 v3, v1
214; CHECK-NEXT:    s_endpgm
215  call void @use_module()
216  store i16 1, ptr addrspace(3) @module_variable
217
218  store i16 2, ptr addrspace(3) @kernel_overalign
219
220  %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_normal, i32 0, i32 %idx
221  store float 0.0, ptr addrspace(3) %arrayidx1
222  ret void
223}
224
225define amdgpu_kernel void @module_0_kernel_normal_extern_overalign(i32 %idx) {
226; CHECK-LABEL: module_0_kernel_normal_extern_overalign:
227; CHECK:       ; %bb.0:
228; CHECK-NEXT:    s_load_dword s0, s[8:9], 0x0
229; CHECK-NEXT:    v_mov_b32_e32 v0, 2
230; CHECK-NEXT:    v_mov_b32_e32 v1, 0
231; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
232; CHECK-NEXT:    s_lshl_b32 s0, s0, 2
233; CHECK-NEXT:    s_add_i32 s0, s0, 8
234; CHECK-NEXT:    v_mov_b32_e32 v2, s0
235; CHECK-NEXT:    ds_write_b16 v1, v0
236; CHECK-NEXT:    ds_write_b32 v2, v1
237; CHECK-NEXT:    s_endpgm
238  store i16 2, ptr addrspace(3) @kernel_normal
239
240  %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_overalign, i32 0, i32 %idx
241  store float 0.0, ptr addrspace(3) %arrayidx1
242  ret void
243}
244
245define amdgpu_kernel void @module_1_kernel_normal_extern_overalign(i32 %idx) {
246; CHECK-LABEL: module_1_kernel_normal_extern_overalign:
247; CHECK:       ; %bb.0:
248; CHECK-NEXT:    s_add_u32 s12, s12, s17
249; CHECK-NEXT:    s_mov_b32 s32, 0
250; CHECK-NEXT:    s_addc_u32 s13, s13, 0
251; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
252; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
253; CHECK-NEXT:    s_add_u32 s0, s0, s17
254; CHECK-NEXT:    s_addc_u32 s1, s1, 0
255; CHECK-NEXT:    s_add_u32 s12, s8, 8
256; CHECK-NEXT:    s_addc_u32 s13, s9, 0
257; CHECK-NEXT:    s_getpc_b64 s[18:19]
258; CHECK-NEXT:    s_add_u32 s18, s18, use_module@gotpcrel32@lo+4
259; CHECK-NEXT:    s_addc_u32 s19, s19, use_module@gotpcrel32@hi+12
260; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
261; CHECK-NEXT:    s_load_dwordx2 s[20:21], s[18:19], 0x0
262; CHECK-NEXT:    s_load_dword s17, s[8:9], 0x0
263; CHECK-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
264; CHECK-NEXT:    s_mov_b64 s[8:9], s[12:13]
265; CHECK-NEXT:    s_mov_b32 s12, s14
266; CHECK-NEXT:    s_mov_b32 s13, s15
267; CHECK-NEXT:    s_mov_b32 s14, s16
268; CHECK-NEXT:    v_or3_b32 v31, v0, v1, v2
269; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
270; CHECK-NEXT:    s_swappc_b64 s[30:31], s[20:21]
271; CHECK-NEXT:    s_lshl_b32 s4, s17, 2
272; CHECK-NEXT:    v_mov_b32_e32 v0, 1
273; CHECK-NEXT:    v_mov_b32_e32 v1, 0
274; CHECK-NEXT:    s_add_i32 s4, s4, 8
275; CHECK-NEXT:    v_mov_b32_e32 v2, 2
276; CHECK-NEXT:    v_mov_b32_e32 v3, s4
277; CHECK-NEXT:    ds_write_b16 v1, v0
278; CHECK-NEXT:    ds_write_b16 v1, v2 offset:2
279; CHECK-NEXT:    ds_write_b32 v3, v1
280; CHECK-NEXT:    s_endpgm
281  call void @use_module()
282  store i16 1, ptr addrspace(3) @module_variable
283
284  store i16 2, ptr addrspace(3) @kernel_normal
285
286  %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_overalign, i32 0, i32 %idx
287  store float 0.0, ptr addrspace(3) %arrayidx1
288  ret void
289}
290
291define amdgpu_kernel void @module_0_kernel_overalign_extern_overalign(i32 %idx) {
292; CHECK-LABEL: module_0_kernel_overalign_extern_overalign:
293; CHECK:       ; %bb.0:
294; CHECK-NEXT:    s_load_dword s0, s[8:9], 0x0
295; CHECK-NEXT:    v_mov_b32_e32 v0, 2
296; CHECK-NEXT:    v_mov_b32_e32 v1, 0
297; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
298; CHECK-NEXT:    s_lshl_b32 s0, s0, 2
299; CHECK-NEXT:    s_add_i32 s0, s0, 8
300; CHECK-NEXT:    v_mov_b32_e32 v2, s0
301; CHECK-NEXT:    ds_write_b16 v1, v0
302; CHECK-NEXT:    ds_write_b32 v2, v1
303; CHECK-NEXT:    s_endpgm
304  store i16 2, ptr addrspace(3) @kernel_overalign
305
306  %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_overalign, i32 0, i32 %idx
307  store float 0.0, ptr addrspace(3) %arrayidx1
308  ret void
309}
310
311define amdgpu_kernel void @module_1_kernel_overalign_extern_overalign(i32 %idx) {
312; CHECK-LABEL: module_1_kernel_overalign_extern_overalign:
313; CHECK:       ; %bb.0:
314; CHECK-NEXT:    s_add_u32 s12, s12, s17
315; CHECK-NEXT:    s_mov_b32 s32, 0
316; CHECK-NEXT:    s_addc_u32 s13, s13, 0
317; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
318; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
319; CHECK-NEXT:    s_add_u32 s0, s0, s17
320; CHECK-NEXT:    s_addc_u32 s1, s1, 0
321; CHECK-NEXT:    s_add_u32 s12, s8, 8
322; CHECK-NEXT:    s_addc_u32 s13, s9, 0
323; CHECK-NEXT:    s_getpc_b64 s[18:19]
324; CHECK-NEXT:    s_add_u32 s18, s18, use_module@gotpcrel32@lo+4
325; CHECK-NEXT:    s_addc_u32 s19, s19, use_module@gotpcrel32@hi+12
326; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
327; CHECK-NEXT:    s_load_dwordx2 s[20:21], s[18:19], 0x0
328; CHECK-NEXT:    s_load_dword s17, s[8:9], 0x0
329; CHECK-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
330; CHECK-NEXT:    s_mov_b64 s[8:9], s[12:13]
331; CHECK-NEXT:    s_mov_b32 s12, s14
332; CHECK-NEXT:    s_mov_b32 s13, s15
333; CHECK-NEXT:    s_mov_b32 s14, s16
334; CHECK-NEXT:    v_or3_b32 v31, v0, v1, v2
335; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
336; CHECK-NEXT:    s_swappc_b64 s[30:31], s[20:21]
337; CHECK-NEXT:    s_lshl_b32 s4, s17, 2
338; CHECK-NEXT:    v_mov_b32_e32 v0, 1
339; CHECK-NEXT:    v_mov_b32_e32 v1, 0
340; CHECK-NEXT:    s_add_i32 s4, s4, 8
341; CHECK-NEXT:    v_mov_b32_e32 v2, 2
342; CHECK-NEXT:    v_mov_b32_e32 v3, s4
343; CHECK-NEXT:    ds_write_b16 v1, v0
344; CHECK-NEXT:    ds_write_b16 v1, v2 offset:4
345; CHECK-NEXT:    ds_write_b32 v3, v1
346; CHECK-NEXT:    s_endpgm
347  call void @use_module()
348  store i16 1, ptr addrspace(3) @module_variable
349
350  store i16 2, ptr addrspace(3) @kernel_overalign
351
352  %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @extern_overalign, i32 0, i32 %idx
353  store float 0.0, ptr addrspace(3) %arrayidx1
354  ret void
355}
356
357
358;; Second 2^3 of 2^4 cases encoded into function names
359; with extern variable from nested function
360; module_variable used/not-used
361; kernel variable normal/overaligned
362; extern variable normal/overaligned
363
364define amdgpu_kernel void @module_0_kernel_normal_indirect_extern_normal(i32 %idx) {
365; CHECK-LABEL: module_0_kernel_normal_indirect_extern_normal:
366; CHECK:       ; %bb.0:
367; CHECK-NEXT:    s_add_u32 s12, s12, s17
368; CHECK-NEXT:    s_mov_b32 s32, 0
369; CHECK-NEXT:    s_addc_u32 s13, s13, 0
370; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
371; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
372; CHECK-NEXT:    s_add_u32 s0, s0, s17
373; CHECK-NEXT:    s_addc_u32 s1, s1, 0
374; CHECK-NEXT:    s_add_u32 s8, s8, 8
375; CHECK-NEXT:    s_addc_u32 s9, s9, 0
376; CHECK-NEXT:    s_mov_b32 s13, s15
377; CHECK-NEXT:    s_mov_b32 s12, s14
378; CHECK-NEXT:    s_getpc_b64 s[14:15]
379; CHECK-NEXT:    s_add_u32 s14, s14, use_extern_normal@gotpcrel32@lo+4
380; CHECK-NEXT:    s_addc_u32 s15, s15, use_extern_normal@gotpcrel32@hi+12
381; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
382; CHECK-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
383; CHECK-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
384; CHECK-NEXT:    v_mov_b32_e32 v3, 2
385; CHECK-NEXT:    v_mov_b32_e32 v4, 0
386; CHECK-NEXT:    s_mov_b32 s14, s16
387; CHECK-NEXT:    s_mov_b32 s15, 0
388; CHECK-NEXT:    v_or3_b32 v31, v0, v1, v2
389; CHECK-NEXT:    ds_write_b16 v4, v3
390; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
391; CHECK-NEXT:    s_swappc_b64 s[30:31], s[18:19]
392; CHECK-NEXT:    s_endpgm
393  store i16 2, ptr addrspace(3) @kernel_normal
394
395  call void @use_extern_normal()
396  ret void
397}
398
399define amdgpu_kernel void @module_1_kernel_normal_indirect_extern_normal(i32 %idx) {
400; CHECK-LABEL: module_1_kernel_normal_indirect_extern_normal:
401; CHECK:       ; %bb.0:
402; CHECK-NEXT:    s_add_u32 s12, s12, s17
403; CHECK-NEXT:    s_mov_b32 s32, 0
404; CHECK-NEXT:    s_addc_u32 s13, s13, 0
405; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
406; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
407; CHECK-NEXT:    s_add_u32 s0, s0, s17
408; CHECK-NEXT:    s_addc_u32 s1, s1, 0
409; CHECK-NEXT:    s_add_u32 s8, s8, 8
410; CHECK-NEXT:    s_addc_u32 s9, s9, 0
411; CHECK-NEXT:    s_mov_b32 s13, s15
412; CHECK-NEXT:    s_mov_b32 s12, s14
413; CHECK-NEXT:    s_getpc_b64 s[14:15]
414; CHECK-NEXT:    s_add_u32 s14, s14, use_module@gotpcrel32@lo+4
415; CHECK-NEXT:    s_addc_u32 s15, s15, use_module@gotpcrel32@hi+12
416; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
417; CHECK-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
418; CHECK-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
419; CHECK-NEXT:    s_mov_b32 s14, s16
420; CHECK-NEXT:    s_mov_b32 s15, 4
421; CHECK-NEXT:    v_or3_b32 v31, v0, v1, v2
422; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
423; CHECK-NEXT:    s_swappc_b64 s[30:31], s[18:19]
424; CHECK-NEXT:    s_getpc_b64 s[14:15]
425; CHECK-NEXT:    s_add_u32 s14, s14, use_extern_normal@gotpcrel32@lo+4
426; CHECK-NEXT:    s_addc_u32 s15, s15, use_extern_normal@gotpcrel32@hi+12
427; CHECK-NEXT:    v_mov_b32_e32 v0, 1
428; CHECK-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
429; CHECK-NEXT:    v_mov_b32_e32 v1, 0
430; CHECK-NEXT:    v_mov_b32_e32 v2, 2
431; CHECK-NEXT:    s_mov_b32 s14, s16
432; CHECK-NEXT:    s_mov_b32 s15, 4
433; CHECK-NEXT:    ds_write_b16 v1, v0
434; CHECK-NEXT:    ds_write_b16 v1, v2 offset:2
435; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
436; CHECK-NEXT:    s_swappc_b64 s[30:31], s[18:19]
437; CHECK-NEXT:    s_endpgm
438  call void @use_module()
439  store i16 1, ptr addrspace(3) @module_variable
440
441  store i16 2, ptr addrspace(3) @kernel_normal
442
443  call void @use_extern_normal()
444  ret void
445}
446
447define amdgpu_kernel void @module_0_kernel_overalign_indirect_extern_normal(i32 %idx) {
448; CHECK-LABEL: module_0_kernel_overalign_indirect_extern_normal:
449; CHECK:       ; %bb.0:
450; CHECK-NEXT:    s_add_u32 s12, s12, s17
451; CHECK-NEXT:    s_mov_b32 s32, 0
452; CHECK-NEXT:    s_addc_u32 s13, s13, 0
453; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
454; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
455; CHECK-NEXT:    s_add_u32 s0, s0, s17
456; CHECK-NEXT:    s_addc_u32 s1, s1, 0
457; CHECK-NEXT:    s_add_u32 s8, s8, 8
458; CHECK-NEXT:    s_addc_u32 s9, s9, 0
459; CHECK-NEXT:    s_mov_b32 s13, s15
460; CHECK-NEXT:    s_mov_b32 s12, s14
461; CHECK-NEXT:    s_getpc_b64 s[14:15]
462; CHECK-NEXT:    s_add_u32 s14, s14, use_extern_normal@gotpcrel32@lo+4
463; CHECK-NEXT:    s_addc_u32 s15, s15, use_extern_normal@gotpcrel32@hi+12
464; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
465; CHECK-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
466; CHECK-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
467; CHECK-NEXT:    v_mov_b32_e32 v3, 2
468; CHECK-NEXT:    v_mov_b32_e32 v4, 0
469; CHECK-NEXT:    s_mov_b32 s14, s16
470; CHECK-NEXT:    s_mov_b32 s15, 2
471; CHECK-NEXT:    v_or3_b32 v31, v0, v1, v2
472; CHECK-NEXT:    ds_write_b16 v4, v3
473; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
474; CHECK-NEXT:    s_swappc_b64 s[30:31], s[18:19]
475; CHECK-NEXT:    s_endpgm
476  store i16 2, ptr addrspace(3) @kernel_overalign
477
478  call void @use_extern_normal()
479  ret void
480}
481
482define amdgpu_kernel void @module_1_kernel_overalign_indirect_extern_normal(i32 %idx) {
483; CHECK-LABEL: module_1_kernel_overalign_indirect_extern_normal:
484; CHECK:       ; %bb.0:
485; CHECK-NEXT:    s_add_u32 s12, s12, s17
486; CHECK-NEXT:    s_mov_b32 s32, 0
487; CHECK-NEXT:    s_addc_u32 s13, s13, 0
488; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
489; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
490; CHECK-NEXT:    s_add_u32 s0, s0, s17
491; CHECK-NEXT:    s_addc_u32 s1, s1, 0
492; CHECK-NEXT:    s_add_u32 s8, s8, 8
493; CHECK-NEXT:    s_addc_u32 s9, s9, 0
494; CHECK-NEXT:    s_mov_b32 s13, s15
495; CHECK-NEXT:    s_mov_b32 s12, s14
496; CHECK-NEXT:    s_getpc_b64 s[14:15]
497; CHECK-NEXT:    s_add_u32 s14, s14, use_module@gotpcrel32@lo+4
498; CHECK-NEXT:    s_addc_u32 s15, s15, use_module@gotpcrel32@hi+12
499; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
500; CHECK-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
501; CHECK-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
502; CHECK-NEXT:    s_mov_b32 s14, s16
503; CHECK-NEXT:    s_mov_b32 s15, 6
504; CHECK-NEXT:    v_or3_b32 v31, v0, v1, v2
505; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
506; CHECK-NEXT:    s_swappc_b64 s[30:31], s[18:19]
507; CHECK-NEXT:    s_getpc_b64 s[14:15]
508; CHECK-NEXT:    s_add_u32 s14, s14, use_extern_normal@gotpcrel32@lo+4
509; CHECK-NEXT:    s_addc_u32 s15, s15, use_extern_normal@gotpcrel32@hi+12
510; CHECK-NEXT:    v_mov_b32_e32 v0, 1
511; CHECK-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
512; CHECK-NEXT:    v_mov_b32_e32 v1, 0
513; CHECK-NEXT:    v_mov_b32_e32 v2, 2
514; CHECK-NEXT:    s_mov_b32 s14, s16
515; CHECK-NEXT:    s_mov_b32 s15, 6
516; CHECK-NEXT:    ds_write_b16 v1, v0
517; CHECK-NEXT:    ds_write_b16 v1, v2 offset:4
518; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
519; CHECK-NEXT:    s_swappc_b64 s[30:31], s[18:19]
520; CHECK-NEXT:    s_endpgm
521  call void @use_module()
522  store i16 1, ptr addrspace(3) @module_variable
523
524  store i16 2, ptr addrspace(3) @kernel_overalign
525
526  call void @use_extern_normal()
527  ret void
528}
529
530define amdgpu_kernel void @module_0_kernel_normal_indirect_extern_overalign(i32 %idx) {
531; CHECK-LABEL: module_0_kernel_normal_indirect_extern_overalign:
532; CHECK:       ; %bb.0:
533; CHECK-NEXT:    s_add_u32 s12, s12, s17
534; CHECK-NEXT:    s_mov_b32 s32, 0
535; CHECK-NEXT:    s_addc_u32 s13, s13, 0
536; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
537; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
538; CHECK-NEXT:    s_add_u32 s0, s0, s17
539; CHECK-NEXT:    s_addc_u32 s1, s1, 0
540; CHECK-NEXT:    s_add_u32 s8, s8, 8
541; CHECK-NEXT:    s_addc_u32 s9, s9, 0
542; CHECK-NEXT:    s_mov_b32 s13, s15
543; CHECK-NEXT:    s_mov_b32 s12, s14
544; CHECK-NEXT:    s_getpc_b64 s[14:15]
545; CHECK-NEXT:    s_add_u32 s14, s14, use_extern_overalign@gotpcrel32@lo+4
546; CHECK-NEXT:    s_addc_u32 s15, s15, use_extern_overalign@gotpcrel32@hi+12
547; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
548; CHECK-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
549; CHECK-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
550; CHECK-NEXT:    v_mov_b32_e32 v3, 2
551; CHECK-NEXT:    v_mov_b32_e32 v4, 0
552; CHECK-NEXT:    s_mov_b32 s14, s16
553; CHECK-NEXT:    s_mov_b32 s15, 1
554; CHECK-NEXT:    v_or3_b32 v31, v0, v1, v2
555; CHECK-NEXT:    ds_write_b16 v4, v3
556; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
557; CHECK-NEXT:    s_swappc_b64 s[30:31], s[18:19]
558; CHECK-NEXT:    s_endpgm
559  store i16 2, ptr addrspace(3) @kernel_normal
560
561  call void @use_extern_overalign()
562  ret void
563}
564
565define amdgpu_kernel void @module_1_kernel_normal_indirect_extern_overalign(i32 %idx) {
566; CHECK-LABEL: module_1_kernel_normal_indirect_extern_overalign:
567; CHECK:       ; %bb.0:
568; CHECK-NEXT:    s_add_u32 s12, s12, s17
569; CHECK-NEXT:    s_mov_b32 s32, 0
570; CHECK-NEXT:    s_addc_u32 s13, s13, 0
571; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
572; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
573; CHECK-NEXT:    s_add_u32 s0, s0, s17
574; CHECK-NEXT:    s_addc_u32 s1, s1, 0
575; CHECK-NEXT:    s_add_u32 s8, s8, 8
576; CHECK-NEXT:    s_addc_u32 s9, s9, 0
577; CHECK-NEXT:    s_mov_b32 s13, s15
578; CHECK-NEXT:    s_mov_b32 s12, s14
579; CHECK-NEXT:    s_getpc_b64 s[14:15]
580; CHECK-NEXT:    s_add_u32 s14, s14, use_module@gotpcrel32@lo+4
581; CHECK-NEXT:    s_addc_u32 s15, s15, use_module@gotpcrel32@hi+12
582; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
583; CHECK-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
584; CHECK-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
585; CHECK-NEXT:    s_mov_b32 s14, s16
586; CHECK-NEXT:    s_mov_b32 s15, 5
587; CHECK-NEXT:    v_or3_b32 v31, v0, v1, v2
588; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
589; CHECK-NEXT:    s_swappc_b64 s[30:31], s[18:19]
590; CHECK-NEXT:    s_getpc_b64 s[14:15]
591; CHECK-NEXT:    s_add_u32 s14, s14, use_extern_overalign@gotpcrel32@lo+4
592; CHECK-NEXT:    s_addc_u32 s15, s15, use_extern_overalign@gotpcrel32@hi+12
593; CHECK-NEXT:    v_mov_b32_e32 v0, 1
594; CHECK-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
595; CHECK-NEXT:    v_mov_b32_e32 v1, 0
596; CHECK-NEXT:    v_mov_b32_e32 v2, 2
597; CHECK-NEXT:    s_mov_b32 s14, s16
598; CHECK-NEXT:    s_mov_b32 s15, 5
599; CHECK-NEXT:    ds_write_b16 v1, v0
600; CHECK-NEXT:    ds_write_b16 v1, v2 offset:2
601; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
602; CHECK-NEXT:    s_swappc_b64 s[30:31], s[18:19]
603; CHECK-NEXT:    s_endpgm
604  call void @use_module()
605  store i16 1, ptr addrspace(3) @module_variable
606
607  store i16 2, ptr addrspace(3) @kernel_normal
608
609  call void @use_extern_overalign()
610  ret void
611}
612
613define amdgpu_kernel void @module_0_kernel_overalign_indirect_extern_overalign(i32 %idx) {
614; CHECK-LABEL: module_0_kernel_overalign_indirect_extern_overalign:
615; CHECK:       ; %bb.0:
616; CHECK-NEXT:    s_add_u32 s12, s12, s17
617; CHECK-NEXT:    s_mov_b32 s32, 0
618; CHECK-NEXT:    s_addc_u32 s13, s13, 0
619; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
620; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
621; CHECK-NEXT:    s_add_u32 s0, s0, s17
622; CHECK-NEXT:    s_addc_u32 s1, s1, 0
623; CHECK-NEXT:    s_add_u32 s8, s8, 8
624; CHECK-NEXT:    s_addc_u32 s9, s9, 0
625; CHECK-NEXT:    s_mov_b32 s13, s15
626; CHECK-NEXT:    s_mov_b32 s12, s14
627; CHECK-NEXT:    s_getpc_b64 s[14:15]
628; CHECK-NEXT:    s_add_u32 s14, s14, use_extern_overalign@gotpcrel32@lo+4
629; CHECK-NEXT:    s_addc_u32 s15, s15, use_extern_overalign@gotpcrel32@hi+12
630; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
631; CHECK-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
632; CHECK-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
633; CHECK-NEXT:    v_mov_b32_e32 v3, 2
634; CHECK-NEXT:    v_mov_b32_e32 v4, 0
635; CHECK-NEXT:    s_mov_b32 s14, s16
636; CHECK-NEXT:    s_mov_b32 s15, 3
637; CHECK-NEXT:    v_or3_b32 v31, v0, v1, v2
638; CHECK-NEXT:    ds_write_b16 v4, v3
639; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
640; CHECK-NEXT:    s_swappc_b64 s[30:31], s[18:19]
641; CHECK-NEXT:    s_endpgm
642  store i16 2, ptr addrspace(3) @kernel_overalign
643
644  call void @use_extern_overalign()
645  ret void
646}
647
648define amdgpu_kernel void @module_1_kernel_overalign_indirect_extern_overalign(i32 %idx) {
649; CHECK-LABEL: module_1_kernel_overalign_indirect_extern_overalign:
650; CHECK:       ; %bb.0:
651; CHECK-NEXT:    s_add_u32 s12, s12, s17
652; CHECK-NEXT:    s_mov_b32 s32, 0
653; CHECK-NEXT:    s_addc_u32 s13, s13, 0
654; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s12
655; CHECK-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s13
656; CHECK-NEXT:    s_add_u32 s0, s0, s17
657; CHECK-NEXT:    s_addc_u32 s1, s1, 0
658; CHECK-NEXT:    s_add_u32 s8, s8, 8
659; CHECK-NEXT:    s_addc_u32 s9, s9, 0
660; CHECK-NEXT:    s_mov_b32 s13, s15
661; CHECK-NEXT:    s_mov_b32 s12, s14
662; CHECK-NEXT:    s_getpc_b64 s[14:15]
663; CHECK-NEXT:    s_add_u32 s14, s14, use_module@gotpcrel32@lo+4
664; CHECK-NEXT:    s_addc_u32 s15, s15, use_module@gotpcrel32@hi+12
665; CHECK-NEXT:    v_lshlrev_b32_e32 v2, 20, v2
666; CHECK-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
667; CHECK-NEXT:    v_lshlrev_b32_e32 v1, 10, v1
668; CHECK-NEXT:    s_mov_b32 s14, s16
669; CHECK-NEXT:    s_mov_b32 s15, 7
670; CHECK-NEXT:    v_or3_b32 v31, v0, v1, v2
671; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
672; CHECK-NEXT:    s_swappc_b64 s[30:31], s[18:19]
673; CHECK-NEXT:    s_getpc_b64 s[14:15]
674; CHECK-NEXT:    s_add_u32 s14, s14, use_extern_overalign@gotpcrel32@lo+4
675; CHECK-NEXT:    s_addc_u32 s15, s15, use_extern_overalign@gotpcrel32@hi+12
676; CHECK-NEXT:    v_mov_b32_e32 v0, 1
677; CHECK-NEXT:    s_load_dwordx2 s[18:19], s[14:15], 0x0
678; CHECK-NEXT:    v_mov_b32_e32 v1, 0
679; CHECK-NEXT:    v_mov_b32_e32 v2, 2
680; CHECK-NEXT:    s_mov_b32 s14, s16
681; CHECK-NEXT:    s_mov_b32 s15, 7
682; CHECK-NEXT:    ds_write_b16 v1, v0
683; CHECK-NEXT:    ds_write_b16 v1, v2 offset:4
684; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
685; CHECK-NEXT:    s_swappc_b64 s[30:31], s[18:19]
686; CHECK-NEXT:    s_endpgm
687  call void @use_module()
688  store i16 1, ptr addrspace(3) @module_variable
689
690  store i16 2, ptr addrspace(3) @kernel_overalign
691
692  call void @use_extern_overalign()
693  ret void
694}
695
696
697attributes #0 = { noinline }
698
699!llvm.module.flags = !{!0}
700!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
701