xref: /llvm-project/llvm/test/CodeGen/AMDGPU/call-argument-types.ll (revision 11b040192640ef3b1f481124c440f464ed6ec86a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
3; RUN: llc -mtriple=amdgcn -mcpu=hawaii -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CI %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX9 %s
5; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
6; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=HSA %s
7
8declare hidden void @external_void_func_i1(i1) #0
9declare hidden void @external_void_func_i1_signext(i1 signext) #0
10declare hidden void @external_void_func_i1_zeroext(i1 zeroext) #0
11
12declare hidden void @external_void_func_i8(i8) #0
13declare hidden void @external_void_func_i8_signext(i8 signext) #0
14declare hidden void @external_void_func_i8_zeroext(i8 zeroext) #0
15
16declare hidden void @external_void_func_i16(i16) #0
17declare hidden void @external_void_func_i16_signext(i16 signext) #0
18declare hidden void @external_void_func_i16_zeroext(i16 zeroext) #0
19
20declare hidden void @external_void_func_i32(i32) #0
21declare hidden void @external_void_func_i64(i64) #0
22declare hidden void @external_void_func_v2i64(<2 x i64>) #0
23declare hidden void @external_void_func_v3i64(<3 x i64>) #0
24declare hidden void @external_void_func_v4i64(<4 x i64>) #0
25
26declare hidden void @external_void_func_f16(half) #0
27declare hidden void @external_void_func_f32(float) #0
28declare hidden void @external_void_func_f64(double) #0
29declare hidden void @external_void_func_v2f32(<2 x float>) #0
30declare hidden void @external_void_func_v2f64(<2 x double>) #0
31declare hidden void @external_void_func_v3f32(<3 x float>) #0
32declare hidden void @external_void_func_v3f64(<3 x double>) #0
33declare hidden void @external_void_func_v5f32(<5 x float>) #0
34
35declare hidden void @external_void_func_v2i16(<2 x i16>) #0
36declare hidden void @external_void_func_v2f16(<2 x half>) #0
37declare hidden void @external_void_func_v3i16(<3 x i16>) #0
38declare hidden void @external_void_func_v3f16(<3 x half>) #0
39declare hidden void @external_void_func_v4i16(<4 x i16>) #0
40declare hidden void @external_void_func_v4f16(<4 x half>) #0
41
42declare hidden void @external_void_func_v2i32(<2 x i32>) #0
43declare hidden void @external_void_func_v3i32(<3 x i32>) #0
44declare hidden void @external_void_func_v3i32_i32(<3 x i32>, i32) #0
45declare hidden void @external_void_func_v4i32(<4 x i32>) #0
46declare hidden void @external_void_func_v5i32(<5 x i32>) #0
47declare hidden void @external_void_func_v8i32(<8 x i32>) #0
48declare hidden void @external_void_func_v16i32(<16 x i32>) #0
49declare hidden void @external_void_func_v32i32(<32 x i32>) #0
50declare hidden void @external_void_func_v32i32_i32(<32 x i32>, i32) #0
51
52; return value and argument
53declare hidden i32 @external_i32_func_i32(i32) #0
54
55; Structs
56declare hidden void @external_void_func_struct_i8_i32({ i8, i32 }) #0
57declare hidden void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 })) #0
58declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) #0
59
60declare hidden void @external_void_func_v16i8(<16 x i8>) #0
61
62; FIXME: Should be passing -1
63define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 {
64; VI-LABEL: test_call_external_void_func_i1_imm:
65; VI:       ; %bb.0:
66; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
67; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
68; VI-NEXT:    s_mov_b32 s38, -1
69; VI-NEXT:    s_mov_b32 s39, 0xe80000
70; VI-NEXT:    s_add_u32 s36, s36, s3
71; VI-NEXT:    s_addc_u32 s37, s37, 0
72; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
73; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
74; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
75; VI-NEXT:    v_mov_b32_e32 v0, 1
76; VI-NEXT:    s_mov_b32 s32, 0
77; VI-NEXT:    s_getpc_b64 s[4:5]
78; VI-NEXT:    s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4
79; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12
80; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
81; VI-NEXT:    s_endpgm
82;
83; CI-LABEL: test_call_external_void_func_i1_imm:
84; CI:       ; %bb.0:
85; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
86; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
87; CI-NEXT:    s_mov_b32 s38, -1
88; CI-NEXT:    s_mov_b32 s39, 0xe8f000
89; CI-NEXT:    s_add_u32 s36, s36, s3
90; CI-NEXT:    s_addc_u32 s37, s37, 0
91; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
92; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
93; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
94; CI-NEXT:    v_mov_b32_e32 v0, 1
95; CI-NEXT:    s_mov_b32 s32, 0
96; CI-NEXT:    s_getpc_b64 s[4:5]
97; CI-NEXT:    s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4
98; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12
99; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
100; CI-NEXT:    s_endpgm
101;
102; GFX9-LABEL: test_call_external_void_func_i1_imm:
103; GFX9:       ; %bb.0:
104; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
105; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
106; GFX9-NEXT:    s_mov_b32 s38, -1
107; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
108; GFX9-NEXT:    s_add_u32 s36, s36, s3
109; GFX9-NEXT:    s_addc_u32 s37, s37, 0
110; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
111; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
112; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
113; GFX9-NEXT:    v_mov_b32_e32 v0, 1
114; GFX9-NEXT:    s_mov_b32 s32, 0
115; GFX9-NEXT:    s_getpc_b64 s[4:5]
116; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_i1@rel32@lo+4
117; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_i1@rel32@hi+12
118; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
119; GFX9-NEXT:    s_endpgm
120;
121; GFX11-LABEL: test_call_external_void_func_i1_imm:
122; GFX11:       ; %bb.0:
123; GFX11-NEXT:    v_mov_b32_e32 v0, 1
124; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
125; GFX11-NEXT:    s_mov_b32 s32, 0
126; GFX11-NEXT:    s_getpc_b64 s[2:3]
127; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_i1@rel32@lo+4
128; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_i1@rel32@hi+12
129; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
130; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
131; GFX11-NEXT:    s_endpgm
132;
133; HSA-LABEL: test_call_external_void_func_i1_imm:
134; HSA:       ; %bb.0:
135; HSA-NEXT:    s_add_i32 s6, s6, s9
136; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
137; HSA-NEXT:    s_add_u32 s0, s0, s9
138; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
139; HSA-NEXT:    s_addc_u32 s1, s1, 0
140; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
141; HSA-NEXT:    v_mov_b32_e32 v0, 1
142; HSA-NEXT:    s_mov_b32 s32, 0
143; HSA-NEXT:    s_getpc_b64 s[8:9]
144; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_i1@rel32@lo+4
145; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_i1@rel32@hi+12
146; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
147; HSA-NEXT:    s_endpgm
148  call void @external_void_func_i1(i1 true)
149  ret void
150}
151
152define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
153; VI-LABEL: test_call_external_void_func_i1_signext:
154; VI:       ; %bb.0:
155; VI-NEXT:    s_mov_b32 s3, 0xf000
156; VI-NEXT:    s_mov_b32 s2, -1
157; VI-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 glc
158; VI-NEXT:    s_waitcnt vmcnt(0)
159; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
160; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
161; VI-NEXT:    s_mov_b32 s38, -1
162; VI-NEXT:    s_mov_b32 s39, 0xe80000
163; VI-NEXT:    s_add_u32 s36, s36, s5
164; VI-NEXT:    s_addc_u32 s37, s37, 0
165; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
166; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
167; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
168; VI-NEXT:    s_mov_b32 s32, 0
169; VI-NEXT:    s_getpc_b64 s[4:5]
170; VI-NEXT:    s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4
171; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12
172; VI-NEXT:    v_bfe_i32 v0, v0, 0, 1
173; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
174; VI-NEXT:    s_endpgm
175;
176; CI-LABEL: test_call_external_void_func_i1_signext:
177; CI:       ; %bb.0:
178; CI-NEXT:    s_mov_b32 s3, 0xf000
179; CI-NEXT:    s_mov_b32 s2, -1
180; CI-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 glc
181; CI-NEXT:    s_waitcnt vmcnt(0)
182; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
183; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
184; CI-NEXT:    s_mov_b32 s38, -1
185; CI-NEXT:    s_mov_b32 s39, 0xe8f000
186; CI-NEXT:    s_add_u32 s36, s36, s5
187; CI-NEXT:    s_addc_u32 s37, s37, 0
188; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
189; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
190; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
191; CI-NEXT:    s_mov_b32 s32, 0
192; CI-NEXT:    s_getpc_b64 s[4:5]
193; CI-NEXT:    s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4
194; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12
195; CI-NEXT:    v_bfe_i32 v0, v0, 0, 1
196; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
197; CI-NEXT:    s_endpgm
198;
199; GFX9-LABEL: test_call_external_void_func_i1_signext:
200; GFX9:       ; %bb.0:
201; GFX9-NEXT:    s_mov_b32 s3, 0xf000
202; GFX9-NEXT:    s_mov_b32 s2, -1
203; GFX9-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 glc
204; GFX9-NEXT:    s_waitcnt vmcnt(0)
205; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
206; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
207; GFX9-NEXT:    s_mov_b32 s38, -1
208; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
209; GFX9-NEXT:    s_add_u32 s36, s36, s5
210; GFX9-NEXT:    s_addc_u32 s37, s37, 0
211; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
212; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
213; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
214; GFX9-NEXT:    s_mov_b32 s32, 0
215; GFX9-NEXT:    s_getpc_b64 s[4:5]
216; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_i1_signext@rel32@lo+4
217; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_i1_signext@rel32@hi+12
218; GFX9-NEXT:    v_bfe_i32 v0, v0, 0, 1
219; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
220; GFX9-NEXT:    s_endpgm
221;
222; GFX11-LABEL: test_call_external_void_func_i1_signext:
223; GFX11:       ; %bb.0:
224; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
225; GFX11-NEXT:    s_mov_b32 s2, -1
226; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
227; GFX11-NEXT:    buffer_load_u8 v0, off, s[0:3], 0 glc dlc
228; GFX11-NEXT:    s_waitcnt vmcnt(0)
229; GFX11-NEXT:    s_mov_b32 s32, 0
230; GFX11-NEXT:    s_getpc_b64 s[2:3]
231; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_i1_signext@rel32@lo+4
232; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_i1_signext@rel32@hi+12
233; GFX11-NEXT:    v_bfe_i32 v0, v0, 0, 1
234; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
235; GFX11-NEXT:    s_endpgm
236;
237; HSA-LABEL: test_call_external_void_func_i1_signext:
238; HSA:       ; %bb.0:
239; HSA-NEXT:    s_mov_b32 s7, 0x1100f000
240; HSA-NEXT:    s_mov_b32 s6, -1
241; HSA-NEXT:    buffer_load_ubyte v0, off, s[4:7], 0 glc
242; HSA-NEXT:    s_waitcnt vmcnt(0)
243; HSA-NEXT:    s_add_i32 s8, s8, s11
244; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s8, 8
245; HSA-NEXT:    s_add_u32 s0, s0, s11
246; HSA-NEXT:    s_addc_u32 s1, s1, 0
247; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
248; HSA-NEXT:    s_mov_b32 s32, 0
249; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s9
250; HSA-NEXT:    s_getpc_b64 s[8:9]
251; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_i1_signext@rel32@lo+4
252; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_i1_signext@rel32@hi+12
253; HSA-NEXT:    v_bfe_i32 v0, v0, 0, 1
254; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
255; HSA-NEXT:    s_endpgm
256  %var = load volatile i1, ptr addrspace(1) undef
257  call void @external_void_func_i1_signext(i1 signext %var)
258  ret void
259}
260
261; FIXME: load should be scheduled before getpc
262define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
263; VI-LABEL: test_call_external_void_func_i1_zeroext:
264; VI:       ; %bb.0:
265; VI-NEXT:    s_mov_b32 s3, 0xf000
266; VI-NEXT:    s_mov_b32 s2, -1
267; VI-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 glc
268; VI-NEXT:    s_waitcnt vmcnt(0)
269; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
270; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
271; VI-NEXT:    s_mov_b32 s38, -1
272; VI-NEXT:    s_mov_b32 s39, 0xe80000
273; VI-NEXT:    s_add_u32 s36, s36, s5
274; VI-NEXT:    s_addc_u32 s37, s37, 0
275; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
276; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
277; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
278; VI-NEXT:    s_mov_b32 s32, 0
279; VI-NEXT:    s_getpc_b64 s[4:5]
280; VI-NEXT:    s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4
281; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12
282; VI-NEXT:    v_and_b32_e32 v0, 1, v0
283; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
284; VI-NEXT:    s_endpgm
285;
286; CI-LABEL: test_call_external_void_func_i1_zeroext:
287; CI:       ; %bb.0:
288; CI-NEXT:    s_mov_b32 s3, 0xf000
289; CI-NEXT:    s_mov_b32 s2, -1
290; CI-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 glc
291; CI-NEXT:    s_waitcnt vmcnt(0)
292; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
293; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
294; CI-NEXT:    s_mov_b32 s38, -1
295; CI-NEXT:    s_mov_b32 s39, 0xe8f000
296; CI-NEXT:    s_add_u32 s36, s36, s5
297; CI-NEXT:    s_addc_u32 s37, s37, 0
298; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
299; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
300; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
301; CI-NEXT:    s_mov_b32 s32, 0
302; CI-NEXT:    s_getpc_b64 s[4:5]
303; CI-NEXT:    s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4
304; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12
305; CI-NEXT:    v_and_b32_e32 v0, 1, v0
306; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
307; CI-NEXT:    s_endpgm
308;
309; GFX9-LABEL: test_call_external_void_func_i1_zeroext:
310; GFX9:       ; %bb.0:
311; GFX9-NEXT:    s_mov_b32 s3, 0xf000
312; GFX9-NEXT:    s_mov_b32 s2, -1
313; GFX9-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 glc
314; GFX9-NEXT:    s_waitcnt vmcnt(0)
315; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
316; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
317; GFX9-NEXT:    s_mov_b32 s38, -1
318; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
319; GFX9-NEXT:    s_add_u32 s36, s36, s5
320; GFX9-NEXT:    s_addc_u32 s37, s37, 0
321; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
322; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
323; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
324; GFX9-NEXT:    s_mov_b32 s32, 0
325; GFX9-NEXT:    s_getpc_b64 s[4:5]
326; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_i1_zeroext@rel32@lo+4
327; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_i1_zeroext@rel32@hi+12
328; GFX9-NEXT:    v_and_b32_e32 v0, 1, v0
329; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
330; GFX9-NEXT:    s_endpgm
331;
332; GFX11-LABEL: test_call_external_void_func_i1_zeroext:
333; GFX11:       ; %bb.0:
334; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
335; GFX11-NEXT:    s_mov_b32 s2, -1
336; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
337; GFX11-NEXT:    buffer_load_u8 v0, off, s[0:3], 0 glc dlc
338; GFX11-NEXT:    s_waitcnt vmcnt(0)
339; GFX11-NEXT:    s_mov_b32 s32, 0
340; GFX11-NEXT:    s_getpc_b64 s[2:3]
341; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_i1_zeroext@rel32@lo+4
342; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_i1_zeroext@rel32@hi+12
343; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
344; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
345; GFX11-NEXT:    s_endpgm
346;
347; HSA-LABEL: test_call_external_void_func_i1_zeroext:
348; HSA:       ; %bb.0:
349; HSA-NEXT:    s_mov_b32 s7, 0x1100f000
350; HSA-NEXT:    s_mov_b32 s6, -1
351; HSA-NEXT:    buffer_load_ubyte v0, off, s[4:7], 0 glc
352; HSA-NEXT:    s_waitcnt vmcnt(0)
353; HSA-NEXT:    s_add_i32 s8, s8, s11
354; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s8, 8
355; HSA-NEXT:    s_add_u32 s0, s0, s11
356; HSA-NEXT:    s_addc_u32 s1, s1, 0
357; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
358; HSA-NEXT:    s_mov_b32 s32, 0
359; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s9
360; HSA-NEXT:    s_getpc_b64 s[8:9]
361; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_i1_zeroext@rel32@lo+4
362; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_i1_zeroext@rel32@hi+12
363; HSA-NEXT:    v_and_b32_e32 v0, 1, v0
364; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
365; HSA-NEXT:    s_endpgm
366  %var = load volatile i1, ptr addrspace(1) undef
367  call void @external_void_func_i1_zeroext(i1 zeroext %var)
368  ret void
369}
370
371define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
372; VI-LABEL: test_call_external_void_func_i8_imm:
373; VI:       ; %bb.0:
374; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
375; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
376; VI-NEXT:    s_mov_b32 s38, -1
377; VI-NEXT:    s_mov_b32 s39, 0xe80000
378; VI-NEXT:    s_add_u32 s36, s36, s5
379; VI-NEXT:    s_addc_u32 s37, s37, 0
380; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
381; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
382; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
383; VI-NEXT:    v_mov_b32_e32 v0, 0x7b
384; VI-NEXT:    s_mov_b32 s32, 0
385; VI-NEXT:    s_getpc_b64 s[4:5]
386; VI-NEXT:    s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4
387; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12
388; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
389; VI-NEXT:    s_endpgm
390;
391; CI-LABEL: test_call_external_void_func_i8_imm:
392; CI:       ; %bb.0:
393; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
394; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
395; CI-NEXT:    s_mov_b32 s38, -1
396; CI-NEXT:    s_mov_b32 s39, 0xe8f000
397; CI-NEXT:    s_add_u32 s36, s36, s5
398; CI-NEXT:    s_addc_u32 s37, s37, 0
399; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
400; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
401; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
402; CI-NEXT:    v_mov_b32_e32 v0, 0x7b
403; CI-NEXT:    s_mov_b32 s32, 0
404; CI-NEXT:    s_getpc_b64 s[4:5]
405; CI-NEXT:    s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4
406; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12
407; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
408; CI-NEXT:    s_endpgm
409;
410; GFX9-LABEL: test_call_external_void_func_i8_imm:
411; GFX9:       ; %bb.0:
412; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
413; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
414; GFX9-NEXT:    s_mov_b32 s38, -1
415; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
416; GFX9-NEXT:    s_add_u32 s36, s36, s5
417; GFX9-NEXT:    s_addc_u32 s37, s37, 0
418; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
419; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
420; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
421; GFX9-NEXT:    v_mov_b32_e32 v0, 0x7b
422; GFX9-NEXT:    s_mov_b32 s32, 0
423; GFX9-NEXT:    s_getpc_b64 s[4:5]
424; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_i8@rel32@lo+4
425; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_i8@rel32@hi+12
426; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
427; GFX9-NEXT:    s_endpgm
428;
429; GFX11-LABEL: test_call_external_void_func_i8_imm:
430; GFX11:       ; %bb.0:
431; GFX11-NEXT:    v_mov_b32_e32 v0, 0x7b
432; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
433; GFX11-NEXT:    s_mov_b32 s32, 0
434; GFX11-NEXT:    s_getpc_b64 s[2:3]
435; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_i8@rel32@lo+4
436; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_i8@rel32@hi+12
437; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
438; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
439; GFX11-NEXT:    s_endpgm
440;
441; HSA-LABEL: test_call_external_void_func_i8_imm:
442; HSA:       ; %bb.0:
443; HSA-NEXT:    s_add_i32 s8, s8, s11
444; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s8, 8
445; HSA-NEXT:    s_add_u32 s0, s0, s11
446; HSA-NEXT:    s_addc_u32 s1, s1, 0
447; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
448; HSA-NEXT:    v_mov_b32_e32 v0, 0x7b
449; HSA-NEXT:    s_mov_b32 s32, 0
450; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s9
451; HSA-NEXT:    s_getpc_b64 s[8:9]
452; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_i8@rel32@lo+4
453; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_i8@rel32@hi+12
454; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
455; HSA-NEXT:    s_endpgm
456  call void @external_void_func_i8(i8 123)
457  ret void
458}
459
460; FIXME: don't wait before call
461define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
462; VI-LABEL: test_call_external_void_func_i8_signext:
463; VI:       ; %bb.0:
464; VI-NEXT:    s_mov_b32 s3, 0xf000
465; VI-NEXT:    s_mov_b32 s2, -1
466; VI-NEXT:    buffer_load_sbyte v0, off, s[0:3], 0 glc
467; VI-NEXT:    s_waitcnt vmcnt(0)
468; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
469; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
470; VI-NEXT:    s_mov_b32 s38, -1
471; VI-NEXT:    s_mov_b32 s39, 0xe80000
472; VI-NEXT:    s_add_u32 s36, s36, s5
473; VI-NEXT:    s_addc_u32 s37, s37, 0
474; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
475; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
476; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
477; VI-NEXT:    s_mov_b32 s32, 0
478; VI-NEXT:    s_getpc_b64 s[4:5]
479; VI-NEXT:    s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4
480; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12
481; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
482; VI-NEXT:    s_endpgm
483;
484; CI-LABEL: test_call_external_void_func_i8_signext:
485; CI:       ; %bb.0:
486; CI-NEXT:    s_mov_b32 s3, 0xf000
487; CI-NEXT:    s_mov_b32 s2, -1
488; CI-NEXT:    buffer_load_sbyte v0, off, s[0:3], 0 glc
489; CI-NEXT:    s_waitcnt vmcnt(0)
490; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
491; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
492; CI-NEXT:    s_mov_b32 s38, -1
493; CI-NEXT:    s_mov_b32 s39, 0xe8f000
494; CI-NEXT:    s_add_u32 s36, s36, s5
495; CI-NEXT:    s_addc_u32 s37, s37, 0
496; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
497; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
498; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
499; CI-NEXT:    s_mov_b32 s32, 0
500; CI-NEXT:    s_getpc_b64 s[4:5]
501; CI-NEXT:    s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4
502; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12
503; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
504; CI-NEXT:    s_endpgm
505;
506; GFX9-LABEL: test_call_external_void_func_i8_signext:
507; GFX9:       ; %bb.0:
508; GFX9-NEXT:    s_mov_b32 s3, 0xf000
509; GFX9-NEXT:    s_mov_b32 s2, -1
510; GFX9-NEXT:    buffer_load_sbyte v0, off, s[0:3], 0 glc
511; GFX9-NEXT:    s_waitcnt vmcnt(0)
512; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
513; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
514; GFX9-NEXT:    s_mov_b32 s38, -1
515; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
516; GFX9-NEXT:    s_add_u32 s36, s36, s5
517; GFX9-NEXT:    s_addc_u32 s37, s37, 0
518; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
519; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
520; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
521; GFX9-NEXT:    s_mov_b32 s32, 0
522; GFX9-NEXT:    s_getpc_b64 s[4:5]
523; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_i8_signext@rel32@lo+4
524; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_i8_signext@rel32@hi+12
525; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
526; GFX9-NEXT:    s_endpgm
527;
528; GFX11-LABEL: test_call_external_void_func_i8_signext:
529; GFX11:       ; %bb.0:
530; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
531; GFX11-NEXT:    s_mov_b32 s2, -1
532; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
533; GFX11-NEXT:    buffer_load_i8 v0, off, s[0:3], 0 glc dlc
534; GFX11-NEXT:    s_waitcnt vmcnt(0)
535; GFX11-NEXT:    s_mov_b32 s32, 0
536; GFX11-NEXT:    s_getpc_b64 s[2:3]
537; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_i8_signext@rel32@lo+4
538; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_i8_signext@rel32@hi+12
539; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
540; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
541; GFX11-NEXT:    s_endpgm
542;
543; HSA-LABEL: test_call_external_void_func_i8_signext:
544; HSA:       ; %bb.0:
545; HSA-NEXT:    s_mov_b32 s7, 0x1100f000
546; HSA-NEXT:    s_mov_b32 s6, -1
547; HSA-NEXT:    buffer_load_sbyte v0, off, s[4:7], 0 glc
548; HSA-NEXT:    s_waitcnt vmcnt(0)
549; HSA-NEXT:    s_add_i32 s8, s8, s11
550; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s8, 8
551; HSA-NEXT:    s_add_u32 s0, s0, s11
552; HSA-NEXT:    s_addc_u32 s1, s1, 0
553; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
554; HSA-NEXT:    s_mov_b32 s32, 0
555; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s9
556; HSA-NEXT:    s_getpc_b64 s[8:9]
557; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_i8_signext@rel32@lo+4
558; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_i8_signext@rel32@hi+12
559; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
560; HSA-NEXT:    s_endpgm
561  %var = load volatile i8, ptr addrspace(1) undef
562  call void @external_void_func_i8_signext(i8 signext %var)
563  ret void
564}
565
566define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
567; VI-LABEL: test_call_external_void_func_i8_zeroext:
568; VI:       ; %bb.0:
569; VI-NEXT:    s_mov_b32 s3, 0xf000
570; VI-NEXT:    s_mov_b32 s2, -1
571; VI-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 glc
572; VI-NEXT:    s_waitcnt vmcnt(0)
573; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
574; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
575; VI-NEXT:    s_mov_b32 s38, -1
576; VI-NEXT:    s_mov_b32 s39, 0xe80000
577; VI-NEXT:    s_add_u32 s36, s36, s5
578; VI-NEXT:    s_addc_u32 s37, s37, 0
579; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
580; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
581; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
582; VI-NEXT:    s_mov_b32 s32, 0
583; VI-NEXT:    s_getpc_b64 s[4:5]
584; VI-NEXT:    s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4
585; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12
586; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
587; VI-NEXT:    s_endpgm
588;
589; CI-LABEL: test_call_external_void_func_i8_zeroext:
590; CI:       ; %bb.0:
591; CI-NEXT:    s_mov_b32 s3, 0xf000
592; CI-NEXT:    s_mov_b32 s2, -1
593; CI-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 glc
594; CI-NEXT:    s_waitcnt vmcnt(0)
595; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
596; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
597; CI-NEXT:    s_mov_b32 s38, -1
598; CI-NEXT:    s_mov_b32 s39, 0xe8f000
599; CI-NEXT:    s_add_u32 s36, s36, s5
600; CI-NEXT:    s_addc_u32 s37, s37, 0
601; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
602; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
603; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
604; CI-NEXT:    s_mov_b32 s32, 0
605; CI-NEXT:    s_getpc_b64 s[4:5]
606; CI-NEXT:    s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4
607; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12
608; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
609; CI-NEXT:    s_endpgm
610;
611; GFX9-LABEL: test_call_external_void_func_i8_zeroext:
612; GFX9:       ; %bb.0:
613; GFX9-NEXT:    s_mov_b32 s3, 0xf000
614; GFX9-NEXT:    s_mov_b32 s2, -1
615; GFX9-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 glc
616; GFX9-NEXT:    s_waitcnt vmcnt(0)
617; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
618; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
619; GFX9-NEXT:    s_mov_b32 s38, -1
620; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
621; GFX9-NEXT:    s_add_u32 s36, s36, s5
622; GFX9-NEXT:    s_addc_u32 s37, s37, 0
623; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
624; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
625; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
626; GFX9-NEXT:    s_mov_b32 s32, 0
627; GFX9-NEXT:    s_getpc_b64 s[4:5]
628; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_i8_zeroext@rel32@lo+4
629; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_i8_zeroext@rel32@hi+12
630; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
631; GFX9-NEXT:    s_endpgm
632;
633; GFX11-LABEL: test_call_external_void_func_i8_zeroext:
634; GFX11:       ; %bb.0:
635; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
636; GFX11-NEXT:    s_mov_b32 s2, -1
637; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
638; GFX11-NEXT:    buffer_load_u8 v0, off, s[0:3], 0 glc dlc
639; GFX11-NEXT:    s_waitcnt vmcnt(0)
640; GFX11-NEXT:    s_mov_b32 s32, 0
641; GFX11-NEXT:    s_getpc_b64 s[2:3]
642; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_i8_zeroext@rel32@lo+4
643; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_i8_zeroext@rel32@hi+12
644; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
645; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
646; GFX11-NEXT:    s_endpgm
647;
648; HSA-LABEL: test_call_external_void_func_i8_zeroext:
649; HSA:       ; %bb.0:
650; HSA-NEXT:    s_mov_b32 s7, 0x1100f000
651; HSA-NEXT:    s_mov_b32 s6, -1
652; HSA-NEXT:    buffer_load_ubyte v0, off, s[4:7], 0 glc
653; HSA-NEXT:    s_waitcnt vmcnt(0)
654; HSA-NEXT:    s_add_i32 s8, s8, s11
655; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s8, 8
656; HSA-NEXT:    s_add_u32 s0, s0, s11
657; HSA-NEXT:    s_addc_u32 s1, s1, 0
658; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
659; HSA-NEXT:    s_mov_b32 s32, 0
660; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s9
661; HSA-NEXT:    s_getpc_b64 s[8:9]
662; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_i8_zeroext@rel32@lo+4
663; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_i8_zeroext@rel32@hi+12
664; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
665; HSA-NEXT:    s_endpgm
666  %var = load volatile i8, ptr addrspace(1) undef
667  call void @external_void_func_i8_zeroext(i8 zeroext %var)
668  ret void
669}
670
671define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
672; VI-LABEL: test_call_external_void_func_i16_imm:
673; VI:       ; %bb.0:
674; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
675; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
676; VI-NEXT:    s_mov_b32 s38, -1
677; VI-NEXT:    s_mov_b32 s39, 0xe80000
678; VI-NEXT:    s_add_u32 s36, s36, s3
679; VI-NEXT:    s_addc_u32 s37, s37, 0
680; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
681; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
682; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
683; VI-NEXT:    v_mov_b32_e32 v0, 0x7b
684; VI-NEXT:    s_mov_b32 s32, 0
685; VI-NEXT:    s_getpc_b64 s[4:5]
686; VI-NEXT:    s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4
687; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12
688; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
689; VI-NEXT:    s_endpgm
690;
691; CI-LABEL: test_call_external_void_func_i16_imm:
692; CI:       ; %bb.0:
693; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
694; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
695; CI-NEXT:    s_mov_b32 s38, -1
696; CI-NEXT:    s_mov_b32 s39, 0xe8f000
697; CI-NEXT:    s_add_u32 s36, s36, s3
698; CI-NEXT:    s_addc_u32 s37, s37, 0
699; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
700; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
701; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
702; CI-NEXT:    v_mov_b32_e32 v0, 0x7b
703; CI-NEXT:    s_mov_b32 s32, 0
704; CI-NEXT:    s_getpc_b64 s[4:5]
705; CI-NEXT:    s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4
706; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12
707; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
708; CI-NEXT:    s_endpgm
709;
710; GFX9-LABEL: test_call_external_void_func_i16_imm:
711; GFX9:       ; %bb.0:
712; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
713; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
714; GFX9-NEXT:    s_mov_b32 s38, -1
715; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
716; GFX9-NEXT:    s_add_u32 s36, s36, s3
717; GFX9-NEXT:    s_addc_u32 s37, s37, 0
718; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
719; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
720; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
721; GFX9-NEXT:    v_mov_b32_e32 v0, 0x7b
722; GFX9-NEXT:    s_mov_b32 s32, 0
723; GFX9-NEXT:    s_getpc_b64 s[4:5]
724; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_i16@rel32@lo+4
725; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_i16@rel32@hi+12
726; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
727; GFX9-NEXT:    s_endpgm
728;
729; GFX11-LABEL: test_call_external_void_func_i16_imm:
730; GFX11:       ; %bb.0:
731; GFX11-NEXT:    v_mov_b32_e32 v0, 0x7b
732; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
733; GFX11-NEXT:    s_mov_b32 s32, 0
734; GFX11-NEXT:    s_getpc_b64 s[2:3]
735; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_i16@rel32@lo+4
736; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_i16@rel32@hi+12
737; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
738; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
739; GFX11-NEXT:    s_endpgm
740;
741; HSA-LABEL: test_call_external_void_func_i16_imm:
742; HSA:       ; %bb.0:
743; HSA-NEXT:    s_add_i32 s6, s6, s9
744; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
745; HSA-NEXT:    s_add_u32 s0, s0, s9
746; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
747; HSA-NEXT:    s_addc_u32 s1, s1, 0
748; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
749; HSA-NEXT:    v_mov_b32_e32 v0, 0x7b
750; HSA-NEXT:    s_mov_b32 s32, 0
751; HSA-NEXT:    s_getpc_b64 s[8:9]
752; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_i16@rel32@lo+4
753; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_i16@rel32@hi+12
754; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
755; HSA-NEXT:    s_endpgm
756  call void @external_void_func_i16(i16 123)
757  ret void
758}
759
760define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
761; VI-LABEL: test_call_external_void_func_i16_signext:
762; VI:       ; %bb.0:
763; VI-NEXT:    s_mov_b32 s3, 0xf000
764; VI-NEXT:    s_mov_b32 s2, -1
765; VI-NEXT:    buffer_load_sshort v0, off, s[0:3], 0 glc
766; VI-NEXT:    s_waitcnt vmcnt(0)
767; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
768; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
769; VI-NEXT:    s_mov_b32 s38, -1
770; VI-NEXT:    s_mov_b32 s39, 0xe80000
771; VI-NEXT:    s_add_u32 s36, s36, s5
772; VI-NEXT:    s_addc_u32 s37, s37, 0
773; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
774; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
775; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
776; VI-NEXT:    s_mov_b32 s32, 0
777; VI-NEXT:    s_getpc_b64 s[4:5]
778; VI-NEXT:    s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4
779; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12
780; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
781; VI-NEXT:    s_endpgm
782;
783; CI-LABEL: test_call_external_void_func_i16_signext:
784; CI:       ; %bb.0:
785; CI-NEXT:    s_mov_b32 s3, 0xf000
786; CI-NEXT:    s_mov_b32 s2, -1
787; CI-NEXT:    buffer_load_sshort v0, off, s[0:3], 0 glc
788; CI-NEXT:    s_waitcnt vmcnt(0)
789; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
790; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
791; CI-NEXT:    s_mov_b32 s38, -1
792; CI-NEXT:    s_mov_b32 s39, 0xe8f000
793; CI-NEXT:    s_add_u32 s36, s36, s5
794; CI-NEXT:    s_addc_u32 s37, s37, 0
795; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
796; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
797; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
798; CI-NEXT:    s_mov_b32 s32, 0
799; CI-NEXT:    s_getpc_b64 s[4:5]
800; CI-NEXT:    s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4
801; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12
802; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
803; CI-NEXT:    s_endpgm
804;
805; GFX9-LABEL: test_call_external_void_func_i16_signext:
806; GFX9:       ; %bb.0:
807; GFX9-NEXT:    s_mov_b32 s3, 0xf000
808; GFX9-NEXT:    s_mov_b32 s2, -1
809; GFX9-NEXT:    buffer_load_sshort v0, off, s[0:3], 0 glc
810; GFX9-NEXT:    s_waitcnt vmcnt(0)
811; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
812; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
813; GFX9-NEXT:    s_mov_b32 s38, -1
814; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
815; GFX9-NEXT:    s_add_u32 s36, s36, s5
816; GFX9-NEXT:    s_addc_u32 s37, s37, 0
817; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
818; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
819; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
820; GFX9-NEXT:    s_mov_b32 s32, 0
821; GFX9-NEXT:    s_getpc_b64 s[4:5]
822; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_i16_signext@rel32@lo+4
823; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_i16_signext@rel32@hi+12
824; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
825; GFX9-NEXT:    s_endpgm
826;
827; GFX11-LABEL: test_call_external_void_func_i16_signext:
828; GFX11:       ; %bb.0:
829; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
830; GFX11-NEXT:    s_mov_b32 s2, -1
831; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
832; GFX11-NEXT:    buffer_load_i16 v0, off, s[0:3], 0 glc dlc
833; GFX11-NEXT:    s_waitcnt vmcnt(0)
834; GFX11-NEXT:    s_mov_b32 s32, 0
835; GFX11-NEXT:    s_getpc_b64 s[2:3]
836; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_i16_signext@rel32@lo+4
837; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_i16_signext@rel32@hi+12
838; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
839; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
840; GFX11-NEXT:    s_endpgm
841;
842; HSA-LABEL: test_call_external_void_func_i16_signext:
843; HSA:       ; %bb.0:
844; HSA-NEXT:    s_mov_b32 s7, 0x1100f000
845; HSA-NEXT:    s_mov_b32 s6, -1
846; HSA-NEXT:    buffer_load_sshort v0, off, s[4:7], 0 glc
847; HSA-NEXT:    s_waitcnt vmcnt(0)
848; HSA-NEXT:    s_add_i32 s8, s8, s11
849; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s8, 8
850; HSA-NEXT:    s_add_u32 s0, s0, s11
851; HSA-NEXT:    s_addc_u32 s1, s1, 0
852; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
853; HSA-NEXT:    s_mov_b32 s32, 0
854; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s9
855; HSA-NEXT:    s_getpc_b64 s[8:9]
856; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_i16_signext@rel32@lo+4
857; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_i16_signext@rel32@hi+12
858; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
859; HSA-NEXT:    s_endpgm
860  %var = load volatile i16, ptr addrspace(1) undef
861  call void @external_void_func_i16_signext(i16 signext %var)
862  ret void
863}
864
865define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
866; VI-LABEL: test_call_external_void_func_i16_zeroext:
867; VI:       ; %bb.0:
868; VI-NEXT:    s_mov_b32 s3, 0xf000
869; VI-NEXT:    s_mov_b32 s2, -1
870; VI-NEXT:    buffer_load_ushort v0, off, s[0:3], 0 glc
871; VI-NEXT:    s_waitcnt vmcnt(0)
872; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
873; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
874; VI-NEXT:    s_mov_b32 s38, -1
875; VI-NEXT:    s_mov_b32 s39, 0xe80000
876; VI-NEXT:    s_add_u32 s36, s36, s5
877; VI-NEXT:    s_addc_u32 s37, s37, 0
878; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
879; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
880; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
881; VI-NEXT:    s_mov_b32 s32, 0
882; VI-NEXT:    s_getpc_b64 s[4:5]
883; VI-NEXT:    s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4
884; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12
885; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
886; VI-NEXT:    s_endpgm
887;
888; CI-LABEL: test_call_external_void_func_i16_zeroext:
889; CI:       ; %bb.0:
890; CI-NEXT:    s_mov_b32 s3, 0xf000
891; CI-NEXT:    s_mov_b32 s2, -1
892; CI-NEXT:    buffer_load_ushort v0, off, s[0:3], 0 glc
893; CI-NEXT:    s_waitcnt vmcnt(0)
894; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
895; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
896; CI-NEXT:    s_mov_b32 s38, -1
897; CI-NEXT:    s_mov_b32 s39, 0xe8f000
898; CI-NEXT:    s_add_u32 s36, s36, s5
899; CI-NEXT:    s_addc_u32 s37, s37, 0
900; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
901; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
902; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
903; CI-NEXT:    s_mov_b32 s32, 0
904; CI-NEXT:    s_getpc_b64 s[4:5]
905; CI-NEXT:    s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4
906; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12
907; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
908; CI-NEXT:    s_endpgm
909;
910; GFX9-LABEL: test_call_external_void_func_i16_zeroext:
911; GFX9:       ; %bb.0:
912; GFX9-NEXT:    s_mov_b32 s3, 0xf000
913; GFX9-NEXT:    s_mov_b32 s2, -1
914; GFX9-NEXT:    buffer_load_ushort v0, off, s[0:3], 0 glc
915; GFX9-NEXT:    s_waitcnt vmcnt(0)
916; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
917; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
918; GFX9-NEXT:    s_mov_b32 s38, -1
919; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
920; GFX9-NEXT:    s_add_u32 s36, s36, s5
921; GFX9-NEXT:    s_addc_u32 s37, s37, 0
922; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
923; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
924; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
925; GFX9-NEXT:    s_mov_b32 s32, 0
926; GFX9-NEXT:    s_getpc_b64 s[4:5]
927; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_i16_zeroext@rel32@lo+4
928; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_i16_zeroext@rel32@hi+12
929; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
930; GFX9-NEXT:    s_endpgm
931;
932; GFX11-LABEL: test_call_external_void_func_i16_zeroext:
933; GFX11:       ; %bb.0:
934; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
935; GFX11-NEXT:    s_mov_b32 s2, -1
936; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
937; GFX11-NEXT:    buffer_load_u16 v0, off, s[0:3], 0 glc dlc
938; GFX11-NEXT:    s_waitcnt vmcnt(0)
939; GFX11-NEXT:    s_mov_b32 s32, 0
940; GFX11-NEXT:    s_getpc_b64 s[2:3]
941; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_i16_zeroext@rel32@lo+4
942; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_i16_zeroext@rel32@hi+12
943; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
944; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
945; GFX11-NEXT:    s_endpgm
946;
947; HSA-LABEL: test_call_external_void_func_i16_zeroext:
948; HSA:       ; %bb.0:
949; HSA-NEXT:    s_mov_b32 s7, 0x1100f000
950; HSA-NEXT:    s_mov_b32 s6, -1
951; HSA-NEXT:    buffer_load_ushort v0, off, s[4:7], 0 glc
952; HSA-NEXT:    s_waitcnt vmcnt(0)
953; HSA-NEXT:    s_add_i32 s8, s8, s11
954; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s8, 8
955; HSA-NEXT:    s_add_u32 s0, s0, s11
956; HSA-NEXT:    s_addc_u32 s1, s1, 0
957; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
958; HSA-NEXT:    s_mov_b32 s32, 0
959; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s9
960; HSA-NEXT:    s_getpc_b64 s[8:9]
961; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_i16_zeroext@rel32@lo+4
962; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_i16_zeroext@rel32@hi+12
963; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
964; HSA-NEXT:    s_endpgm
965  %var = load volatile i16, ptr addrspace(1) undef
966  call void @external_void_func_i16_zeroext(i16 zeroext %var)
967  ret void
968}
969
970define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 {
971; VI-LABEL: test_call_external_void_func_i32_imm:
972; VI:       ; %bb.0:
973; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
974; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
975; VI-NEXT:    s_mov_b32 s38, -1
976; VI-NEXT:    s_mov_b32 s39, 0xe80000
977; VI-NEXT:    s_add_u32 s36, s36, s5
978; VI-NEXT:    s_addc_u32 s37, s37, 0
979; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
980; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
981; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
982; VI-NEXT:    v_mov_b32_e32 v0, 42
983; VI-NEXT:    s_mov_b32 s32, 0
984; VI-NEXT:    s_getpc_b64 s[4:5]
985; VI-NEXT:    s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4
986; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12
987; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
988; VI-NEXT:    s_endpgm
989;
990; CI-LABEL: test_call_external_void_func_i32_imm:
991; CI:       ; %bb.0:
992; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
993; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
994; CI-NEXT:    s_mov_b32 s38, -1
995; CI-NEXT:    s_mov_b32 s39, 0xe8f000
996; CI-NEXT:    s_add_u32 s36, s36, s5
997; CI-NEXT:    s_addc_u32 s37, s37, 0
998; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
999; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
1000; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
1001; CI-NEXT:    v_mov_b32_e32 v0, 42
1002; CI-NEXT:    s_mov_b32 s32, 0
1003; CI-NEXT:    s_getpc_b64 s[4:5]
1004; CI-NEXT:    s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4
1005; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12
1006; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1007; CI-NEXT:    s_endpgm
1008;
1009; GFX9-LABEL: test_call_external_void_func_i32_imm:
1010; GFX9:       ; %bb.0:
1011; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1012; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1013; GFX9-NEXT:    s_mov_b32 s38, -1
1014; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
1015; GFX9-NEXT:    s_add_u32 s36, s36, s5
1016; GFX9-NEXT:    s_addc_u32 s37, s37, 0
1017; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
1018; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
1019; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
1020; GFX9-NEXT:    v_mov_b32_e32 v0, 42
1021; GFX9-NEXT:    s_mov_b32 s32, 0
1022; GFX9-NEXT:    s_getpc_b64 s[4:5]
1023; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_i32@rel32@lo+4
1024; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_i32@rel32@hi+12
1025; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1026; GFX9-NEXT:    s_endpgm
1027;
1028; GFX11-LABEL: test_call_external_void_func_i32_imm:
1029; GFX11:       ; %bb.0:
1030; GFX11-NEXT:    v_mov_b32_e32 v0, 42
1031; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
1032; GFX11-NEXT:    s_mov_b32 s32, 0
1033; GFX11-NEXT:    s_getpc_b64 s[2:3]
1034; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_i32@rel32@lo+4
1035; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_i32@rel32@hi+12
1036; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1037; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
1038; GFX11-NEXT:    s_endpgm
1039;
1040; HSA-LABEL: test_call_external_void_func_i32_imm:
1041; HSA:       ; %bb.0:
1042; HSA-NEXT:    s_add_i32 s8, s8, s11
1043; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s8, 8
1044; HSA-NEXT:    s_add_u32 s0, s0, s11
1045; HSA-NEXT:    s_addc_u32 s1, s1, 0
1046; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
1047; HSA-NEXT:    v_mov_b32_e32 v0, 42
1048; HSA-NEXT:    s_mov_b32 s32, 0
1049; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s9
1050; HSA-NEXT:    s_getpc_b64 s[8:9]
1051; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_i32@rel32@lo+4
1052; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_i32@rel32@hi+12
1053; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
1054; HSA-NEXT:    s_endpgm
1055  call void @external_void_func_i32(i32 42)
1056  ret void
1057}
1058
1059define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 {
1060; VI-LABEL: test_call_external_void_func_i64_imm:
1061; VI:       ; %bb.0:
1062; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1063; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1064; VI-NEXT:    s_mov_b32 s38, -1
1065; VI-NEXT:    s_mov_b32 s39, 0xe80000
1066; VI-NEXT:    s_add_u32 s36, s36, s3
1067; VI-NEXT:    s_addc_u32 s37, s37, 0
1068; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
1069; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
1070; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
1071; VI-NEXT:    v_mov_b32_e32 v0, 0x7b
1072; VI-NEXT:    v_mov_b32_e32 v1, 0
1073; VI-NEXT:    s_mov_b32 s32, 0
1074; VI-NEXT:    s_getpc_b64 s[4:5]
1075; VI-NEXT:    s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4
1076; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12
1077; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1078; VI-NEXT:    s_endpgm
1079;
1080; CI-LABEL: test_call_external_void_func_i64_imm:
1081; CI:       ; %bb.0:
1082; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1083; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1084; CI-NEXT:    s_mov_b32 s38, -1
1085; CI-NEXT:    s_mov_b32 s39, 0xe8f000
1086; CI-NEXT:    s_add_u32 s36, s36, s3
1087; CI-NEXT:    s_addc_u32 s37, s37, 0
1088; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
1089; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
1090; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
1091; CI-NEXT:    v_mov_b32_e32 v0, 0x7b
1092; CI-NEXT:    v_mov_b32_e32 v1, 0
1093; CI-NEXT:    s_mov_b32 s32, 0
1094; CI-NEXT:    s_getpc_b64 s[4:5]
1095; CI-NEXT:    s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4
1096; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12
1097; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1098; CI-NEXT:    s_endpgm
1099;
1100; GFX9-LABEL: test_call_external_void_func_i64_imm:
1101; GFX9:       ; %bb.0:
1102; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1103; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1104; GFX9-NEXT:    s_mov_b32 s38, -1
1105; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
1106; GFX9-NEXT:    s_add_u32 s36, s36, s3
1107; GFX9-NEXT:    s_addc_u32 s37, s37, 0
1108; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
1109; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
1110; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
1111; GFX9-NEXT:    v_mov_b32_e32 v0, 0x7b
1112; GFX9-NEXT:    v_mov_b32_e32 v1, 0
1113; GFX9-NEXT:    s_mov_b32 s32, 0
1114; GFX9-NEXT:    s_getpc_b64 s[4:5]
1115; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_i64@rel32@lo+4
1116; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_i64@rel32@hi+12
1117; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1118; GFX9-NEXT:    s_endpgm
1119;
1120; GFX11-LABEL: test_call_external_void_func_i64_imm:
1121; GFX11:       ; %bb.0:
1122; GFX11-NEXT:    v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0
1123; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
1124; GFX11-NEXT:    s_mov_b32 s32, 0
1125; GFX11-NEXT:    s_getpc_b64 s[2:3]
1126; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_i64@rel32@lo+4
1127; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_i64@rel32@hi+12
1128; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1129; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
1130; GFX11-NEXT:    s_endpgm
1131;
1132; HSA-LABEL: test_call_external_void_func_i64_imm:
1133; HSA:       ; %bb.0:
1134; HSA-NEXT:    s_add_i32 s6, s6, s9
1135; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
1136; HSA-NEXT:    s_add_u32 s0, s0, s9
1137; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
1138; HSA-NEXT:    s_addc_u32 s1, s1, 0
1139; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
1140; HSA-NEXT:    v_mov_b32_e32 v0, 0x7b
1141; HSA-NEXT:    v_mov_b32_e32 v1, 0
1142; HSA-NEXT:    s_mov_b32 s32, 0
1143; HSA-NEXT:    s_getpc_b64 s[8:9]
1144; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_i64@rel32@lo+4
1145; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_i64@rel32@hi+12
1146; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
1147; HSA-NEXT:    s_endpgm
1148  call void @external_void_func_i64(i64 123)
1149  ret void
1150}
1151
1152define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
1153; VI-LABEL: test_call_external_void_func_v2i64:
1154; VI:       ; %bb.0:
1155; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1156; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1157; VI-NEXT:    s_mov_b32 s38, -1
1158; VI-NEXT:    s_mov_b32 s39, 0xe80000
1159; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
1160; VI-NEXT:    s_mov_b32 s0, 0
1161; VI-NEXT:    s_add_u32 s36, s36, s3
1162; VI-NEXT:    s_mov_b32 s3, 0xf000
1163; VI-NEXT:    s_mov_b32 s2, -1
1164; VI-NEXT:    s_mov_b32 s1, s0
1165; VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1166; VI-NEXT:    s_addc_u32 s37, s37, 0
1167; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
1168; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
1169; VI-NEXT:    s_mov_b32 s32, 0
1170; VI-NEXT:    s_getpc_b64 s[4:5]
1171; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
1172; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
1173; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1174; VI-NEXT:    s_endpgm
1175;
1176; CI-LABEL: test_call_external_void_func_v2i64:
1177; CI:       ; %bb.0:
1178; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1179; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1180; CI-NEXT:    s_mov_b32 s38, -1
1181; CI-NEXT:    s_mov_b32 s39, 0xe8f000
1182; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
1183; CI-NEXT:    s_mov_b32 s0, 0
1184; CI-NEXT:    s_add_u32 s36, s36, s3
1185; CI-NEXT:    s_mov_b32 s3, 0xf000
1186; CI-NEXT:    s_mov_b32 s2, -1
1187; CI-NEXT:    s_mov_b32 s1, s0
1188; CI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1189; CI-NEXT:    s_addc_u32 s37, s37, 0
1190; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
1191; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
1192; CI-NEXT:    s_mov_b32 s32, 0
1193; CI-NEXT:    s_getpc_b64 s[4:5]
1194; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
1195; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
1196; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1197; CI-NEXT:    s_endpgm
1198;
1199; GFX9-LABEL: test_call_external_void_func_v2i64:
1200; GFX9:       ; %bb.0:
1201; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1202; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1203; GFX9-NEXT:    s_mov_b32 s38, -1
1204; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
1205; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
1206; GFX9-NEXT:    s_mov_b32 s0, 0
1207; GFX9-NEXT:    s_add_u32 s36, s36, s3
1208; GFX9-NEXT:    s_mov_b32 s3, 0xf000
1209; GFX9-NEXT:    s_mov_b32 s2, -1
1210; GFX9-NEXT:    s_mov_b32 s1, s0
1211; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1212; GFX9-NEXT:    s_addc_u32 s37, s37, 0
1213; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
1214; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
1215; GFX9-NEXT:    s_mov_b32 s32, 0
1216; GFX9-NEXT:    s_getpc_b64 s[4:5]
1217; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
1218; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
1219; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1220; GFX9-NEXT:    s_endpgm
1221;
1222; GFX11-LABEL: test_call_external_void_func_v2i64:
1223; GFX11:       ; %bb.0:
1224; GFX11-NEXT:    s_mov_b32 s4, 0
1225; GFX11-NEXT:    s_mov_b32 s7, 0x31016000
1226; GFX11-NEXT:    s_mov_b32 s6, -1
1227; GFX11-NEXT:    s_mov_b32 s5, s4
1228; GFX11-NEXT:    s_mov_b32 s32, 0
1229; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[4:7], 0
1230; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
1231; GFX11-NEXT:    s_getpc_b64 s[2:3]
1232; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v2i64@rel32@lo+4
1233; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v2i64@rel32@hi+12
1234; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1235; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
1236; GFX11-NEXT:    s_endpgm
1237;
1238; HSA-LABEL: test_call_external_void_func_v2i64:
1239; HSA:       ; %bb.0:
1240; HSA-NEXT:    s_add_i32 s6, s6, s9
1241; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
1242; HSA-NEXT:    s_mov_b32 s8, 0
1243; HSA-NEXT:    s_add_u32 s0, s0, s9
1244; HSA-NEXT:    s_mov_b32 s11, 0x1100f000
1245; HSA-NEXT:    s_mov_b32 s10, -1
1246; HSA-NEXT:    s_mov_b32 s9, s8
1247; HSA-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
1248; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
1249; HSA-NEXT:    s_addc_u32 s1, s1, 0
1250; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
1251; HSA-NEXT:    s_mov_b32 s32, 0
1252; HSA-NEXT:    s_getpc_b64 s[8:9]
1253; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v2i64@rel32@lo+4
1254; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v2i64@rel32@hi+12
1255; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
1256; HSA-NEXT:    s_endpgm
1257  %val = load <2 x i64>, ptr addrspace(1) null
1258  call void @external_void_func_v2i64(<2 x i64> %val)
1259  ret void
1260}
1261
1262define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 {
1263; VI-LABEL: test_call_external_void_func_v2i64_imm:
1264; VI:       ; %bb.0:
1265; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1266; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1267; VI-NEXT:    s_mov_b32 s38, -1
1268; VI-NEXT:    s_mov_b32 s39, 0xe80000
1269; VI-NEXT:    s_add_u32 s36, s36, s3
1270; VI-NEXT:    s_addc_u32 s37, s37, 0
1271; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
1272; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
1273; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
1274; VI-NEXT:    v_mov_b32_e32 v0, 1
1275; VI-NEXT:    v_mov_b32_e32 v1, 2
1276; VI-NEXT:    v_mov_b32_e32 v2, 3
1277; VI-NEXT:    v_mov_b32_e32 v3, 4
1278; VI-NEXT:    s_mov_b32 s32, 0
1279; VI-NEXT:    s_getpc_b64 s[4:5]
1280; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
1281; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
1282; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1283; VI-NEXT:    s_endpgm
1284;
1285; CI-LABEL: test_call_external_void_func_v2i64_imm:
1286; CI:       ; %bb.0:
1287; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1288; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1289; CI-NEXT:    s_mov_b32 s38, -1
1290; CI-NEXT:    s_mov_b32 s39, 0xe8f000
1291; CI-NEXT:    s_add_u32 s36, s36, s3
1292; CI-NEXT:    s_addc_u32 s37, s37, 0
1293; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
1294; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
1295; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
1296; CI-NEXT:    v_mov_b32_e32 v0, 1
1297; CI-NEXT:    v_mov_b32_e32 v1, 2
1298; CI-NEXT:    v_mov_b32_e32 v2, 3
1299; CI-NEXT:    v_mov_b32_e32 v3, 4
1300; CI-NEXT:    s_mov_b32 s32, 0
1301; CI-NEXT:    s_getpc_b64 s[4:5]
1302; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
1303; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
1304; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1305; CI-NEXT:    s_endpgm
1306;
1307; GFX9-LABEL: test_call_external_void_func_v2i64_imm:
1308; GFX9:       ; %bb.0:
1309; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1310; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1311; GFX9-NEXT:    s_mov_b32 s38, -1
1312; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
1313; GFX9-NEXT:    s_add_u32 s36, s36, s3
1314; GFX9-NEXT:    s_addc_u32 s37, s37, 0
1315; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
1316; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
1317; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
1318; GFX9-NEXT:    v_mov_b32_e32 v0, 1
1319; GFX9-NEXT:    v_mov_b32_e32 v1, 2
1320; GFX9-NEXT:    v_mov_b32_e32 v2, 3
1321; GFX9-NEXT:    v_mov_b32_e32 v3, 4
1322; GFX9-NEXT:    s_mov_b32 s32, 0
1323; GFX9-NEXT:    s_getpc_b64 s[4:5]
1324; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v2i64@rel32@lo+4
1325; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i64@rel32@hi+12
1326; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1327; GFX9-NEXT:    s_endpgm
1328;
1329; GFX11-LABEL: test_call_external_void_func_v2i64_imm:
1330; GFX11:       ; %bb.0:
1331; GFX11-NEXT:    v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
1332; GFX11-NEXT:    v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
1333; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
1334; GFX11-NEXT:    s_mov_b32 s32, 0
1335; GFX11-NEXT:    s_getpc_b64 s[2:3]
1336; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v2i64@rel32@lo+4
1337; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v2i64@rel32@hi+12
1338; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1339; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
1340; GFX11-NEXT:    s_endpgm
1341;
1342; HSA-LABEL: test_call_external_void_func_v2i64_imm:
1343; HSA:       ; %bb.0:
1344; HSA-NEXT:    s_add_i32 s6, s6, s9
1345; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
1346; HSA-NEXT:    s_add_u32 s0, s0, s9
1347; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
1348; HSA-NEXT:    s_addc_u32 s1, s1, 0
1349; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
1350; HSA-NEXT:    v_mov_b32_e32 v0, 1
1351; HSA-NEXT:    v_mov_b32_e32 v1, 2
1352; HSA-NEXT:    v_mov_b32_e32 v2, 3
1353; HSA-NEXT:    v_mov_b32_e32 v3, 4
1354; HSA-NEXT:    s_mov_b32 s32, 0
1355; HSA-NEXT:    s_getpc_b64 s[8:9]
1356; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v2i64@rel32@lo+4
1357; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v2i64@rel32@hi+12
1358; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
1359; HSA-NEXT:    s_endpgm
1360  call void @external_void_func_v2i64(<2 x i64> <i64 8589934593, i64 17179869187>)
1361  ret void
1362}
1363
1364define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
1365; VI-LABEL: test_call_external_void_func_v3i64:
1366; VI:       ; %bb.0:
1367; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1368; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1369; VI-NEXT:    s_mov_b32 s38, -1
1370; VI-NEXT:    s_mov_b32 s39, 0xe80000
1371; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
1372; VI-NEXT:    s_mov_b32 s0, 0
1373; VI-NEXT:    s_add_u32 s36, s36, s3
1374; VI-NEXT:    s_mov_b32 s3, 0xf000
1375; VI-NEXT:    s_mov_b32 s2, -1
1376; VI-NEXT:    s_mov_b32 s1, s0
1377; VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1378; VI-NEXT:    s_addc_u32 s37, s37, 0
1379; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
1380; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
1381; VI-NEXT:    v_mov_b32_e32 v4, 1
1382; VI-NEXT:    v_mov_b32_e32 v5, 2
1383; VI-NEXT:    s_mov_b32 s32, 0
1384; VI-NEXT:    s_getpc_b64 s[4:5]
1385; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4
1386; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12
1387; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1388; VI-NEXT:    s_endpgm
1389;
1390; CI-LABEL: test_call_external_void_func_v3i64:
1391; CI:       ; %bb.0:
1392; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1393; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1394; CI-NEXT:    s_mov_b32 s38, -1
1395; CI-NEXT:    s_mov_b32 s39, 0xe8f000
1396; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
1397; CI-NEXT:    s_mov_b32 s0, 0
1398; CI-NEXT:    s_add_u32 s36, s36, s3
1399; CI-NEXT:    s_mov_b32 s3, 0xf000
1400; CI-NEXT:    s_mov_b32 s2, -1
1401; CI-NEXT:    s_mov_b32 s1, s0
1402; CI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1403; CI-NEXT:    s_addc_u32 s37, s37, 0
1404; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
1405; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
1406; CI-NEXT:    v_mov_b32_e32 v4, 1
1407; CI-NEXT:    v_mov_b32_e32 v5, 2
1408; CI-NEXT:    s_mov_b32 s32, 0
1409; CI-NEXT:    s_getpc_b64 s[4:5]
1410; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4
1411; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12
1412; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1413; CI-NEXT:    s_endpgm
1414;
1415; GFX9-LABEL: test_call_external_void_func_v3i64:
1416; GFX9:       ; %bb.0:
1417; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1418; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1419; GFX9-NEXT:    s_mov_b32 s38, -1
1420; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
1421; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
1422; GFX9-NEXT:    s_mov_b32 s0, 0
1423; GFX9-NEXT:    s_add_u32 s36, s36, s3
1424; GFX9-NEXT:    s_mov_b32 s3, 0xf000
1425; GFX9-NEXT:    s_mov_b32 s2, -1
1426; GFX9-NEXT:    s_mov_b32 s1, s0
1427; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1428; GFX9-NEXT:    s_addc_u32 s37, s37, 0
1429; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
1430; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
1431; GFX9-NEXT:    v_mov_b32_e32 v4, 1
1432; GFX9-NEXT:    v_mov_b32_e32 v5, 2
1433; GFX9-NEXT:    s_mov_b32 s32, 0
1434; GFX9-NEXT:    s_getpc_b64 s[4:5]
1435; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v3i64@rel32@lo+4
1436; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i64@rel32@hi+12
1437; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1438; GFX9-NEXT:    s_endpgm
1439;
1440; GFX11-LABEL: test_call_external_void_func_v3i64:
1441; GFX11:       ; %bb.0:
1442; GFX11-NEXT:    s_mov_b32 s4, 0
1443; GFX11-NEXT:    s_mov_b32 s7, 0x31016000
1444; GFX11-NEXT:    s_mov_b32 s6, -1
1445; GFX11-NEXT:    s_mov_b32 s5, s4
1446; GFX11-NEXT:    v_dual_mov_b32 v4, 1 :: v_dual_mov_b32 v5, 2
1447; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[4:7], 0
1448; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
1449; GFX11-NEXT:    s_mov_b32 s32, 0
1450; GFX11-NEXT:    s_getpc_b64 s[2:3]
1451; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v3i64@rel32@lo+4
1452; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v3i64@rel32@hi+12
1453; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1454; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
1455; GFX11-NEXT:    s_endpgm
1456;
1457; HSA-LABEL: test_call_external_void_func_v3i64:
1458; HSA:       ; %bb.0:
1459; HSA-NEXT:    s_add_i32 s6, s6, s9
1460; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
1461; HSA-NEXT:    s_mov_b32 s8, 0
1462; HSA-NEXT:    s_add_u32 s0, s0, s9
1463; HSA-NEXT:    s_mov_b32 s11, 0x1100f000
1464; HSA-NEXT:    s_mov_b32 s10, -1
1465; HSA-NEXT:    s_mov_b32 s9, s8
1466; HSA-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
1467; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
1468; HSA-NEXT:    s_addc_u32 s1, s1, 0
1469; HSA-NEXT:    v_mov_b32_e32 v4, 1
1470; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
1471; HSA-NEXT:    v_mov_b32_e32 v5, 2
1472; HSA-NEXT:    s_mov_b32 s32, 0
1473; HSA-NEXT:    s_getpc_b64 s[8:9]
1474; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v3i64@rel32@lo+4
1475; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v3i64@rel32@hi+12
1476; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
1477; HSA-NEXT:    s_endpgm
1478  %load = load <2 x i64>, ptr addrspace(1) null
1479  %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
1480
1481  call void @external_void_func_v3i64(<3 x i64> %val)
1482  ret void
1483}
1484
1485define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
1486; VI-LABEL: test_call_external_void_func_v4i64:
1487; VI:       ; %bb.0:
1488; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1489; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1490; VI-NEXT:    s_mov_b32 s38, -1
1491; VI-NEXT:    s_mov_b32 s39, 0xe80000
1492; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
1493; VI-NEXT:    s_mov_b32 s0, 0
1494; VI-NEXT:    s_add_u32 s36, s36, s3
1495; VI-NEXT:    s_mov_b32 s3, 0xf000
1496; VI-NEXT:    s_mov_b32 s2, -1
1497; VI-NEXT:    s_mov_b32 s1, s0
1498; VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1499; VI-NEXT:    s_addc_u32 s37, s37, 0
1500; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
1501; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
1502; VI-NEXT:    v_mov_b32_e32 v4, 1
1503; VI-NEXT:    v_mov_b32_e32 v5, 2
1504; VI-NEXT:    v_mov_b32_e32 v6, 3
1505; VI-NEXT:    v_mov_b32_e32 v7, 4
1506; VI-NEXT:    s_mov_b32 s32, 0
1507; VI-NEXT:    s_getpc_b64 s[4:5]
1508; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4
1509; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12
1510; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1511; VI-NEXT:    s_endpgm
1512;
1513; CI-LABEL: test_call_external_void_func_v4i64:
1514; CI:       ; %bb.0:
1515; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1516; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1517; CI-NEXT:    s_mov_b32 s38, -1
1518; CI-NEXT:    s_mov_b32 s39, 0xe8f000
1519; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
1520; CI-NEXT:    s_mov_b32 s0, 0
1521; CI-NEXT:    s_add_u32 s36, s36, s3
1522; CI-NEXT:    s_mov_b32 s3, 0xf000
1523; CI-NEXT:    s_mov_b32 s2, -1
1524; CI-NEXT:    s_mov_b32 s1, s0
1525; CI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1526; CI-NEXT:    s_addc_u32 s37, s37, 0
1527; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
1528; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
1529; CI-NEXT:    v_mov_b32_e32 v4, 1
1530; CI-NEXT:    v_mov_b32_e32 v5, 2
1531; CI-NEXT:    v_mov_b32_e32 v6, 3
1532; CI-NEXT:    v_mov_b32_e32 v7, 4
1533; CI-NEXT:    s_mov_b32 s32, 0
1534; CI-NEXT:    s_getpc_b64 s[4:5]
1535; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4
1536; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12
1537; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1538; CI-NEXT:    s_endpgm
1539;
1540; GFX9-LABEL: test_call_external_void_func_v4i64:
1541; GFX9:       ; %bb.0:
1542; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1543; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1544; GFX9-NEXT:    s_mov_b32 s38, -1
1545; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
1546; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
1547; GFX9-NEXT:    s_mov_b32 s0, 0
1548; GFX9-NEXT:    s_add_u32 s36, s36, s3
1549; GFX9-NEXT:    s_mov_b32 s3, 0xf000
1550; GFX9-NEXT:    s_mov_b32 s2, -1
1551; GFX9-NEXT:    s_mov_b32 s1, s0
1552; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
1553; GFX9-NEXT:    s_addc_u32 s37, s37, 0
1554; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
1555; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
1556; GFX9-NEXT:    v_mov_b32_e32 v4, 1
1557; GFX9-NEXT:    v_mov_b32_e32 v5, 2
1558; GFX9-NEXT:    v_mov_b32_e32 v6, 3
1559; GFX9-NEXT:    v_mov_b32_e32 v7, 4
1560; GFX9-NEXT:    s_mov_b32 s32, 0
1561; GFX9-NEXT:    s_getpc_b64 s[4:5]
1562; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v4i64@rel32@lo+4
1563; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i64@rel32@hi+12
1564; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1565; GFX9-NEXT:    s_endpgm
1566;
1567; GFX11-LABEL: test_call_external_void_func_v4i64:
1568; GFX11:       ; %bb.0:
1569; GFX11-NEXT:    s_mov_b32 s4, 0
1570; GFX11-NEXT:    s_mov_b32 s7, 0x31016000
1571; GFX11-NEXT:    s_mov_b32 s6, -1
1572; GFX11-NEXT:    s_mov_b32 s5, s4
1573; GFX11-NEXT:    v_dual_mov_b32 v4, 1 :: v_dual_mov_b32 v5, 2
1574; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[4:7], 0
1575; GFX11-NEXT:    v_dual_mov_b32 v6, 3 :: v_dual_mov_b32 v7, 4
1576; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
1577; GFX11-NEXT:    s_mov_b32 s32, 0
1578; GFX11-NEXT:    s_getpc_b64 s[2:3]
1579; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v4i64@rel32@lo+4
1580; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v4i64@rel32@hi+12
1581; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1582; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
1583; GFX11-NEXT:    s_endpgm
1584;
1585; HSA-LABEL: test_call_external_void_func_v4i64:
1586; HSA:       ; %bb.0:
1587; HSA-NEXT:    s_add_i32 s6, s6, s9
1588; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
1589; HSA-NEXT:    s_mov_b32 s8, 0
1590; HSA-NEXT:    s_add_u32 s0, s0, s9
1591; HSA-NEXT:    s_mov_b32 s11, 0x1100f000
1592; HSA-NEXT:    s_mov_b32 s10, -1
1593; HSA-NEXT:    s_mov_b32 s9, s8
1594; HSA-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
1595; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
1596; HSA-NEXT:    s_addc_u32 s1, s1, 0
1597; HSA-NEXT:    v_mov_b32_e32 v4, 1
1598; HSA-NEXT:    v_mov_b32_e32 v5, 2
1599; HSA-NEXT:    v_mov_b32_e32 v6, 3
1600; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
1601; HSA-NEXT:    v_mov_b32_e32 v7, 4
1602; HSA-NEXT:    s_mov_b32 s32, 0
1603; HSA-NEXT:    s_getpc_b64 s[8:9]
1604; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v4i64@rel32@lo+4
1605; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v4i64@rel32@hi+12
1606; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
1607; HSA-NEXT:    s_endpgm
1608  %load = load <2 x i64>, ptr addrspace(1) null
1609  %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1610  call void @external_void_func_v4i64(<4 x i64> %val)
1611  ret void
1612}
1613
1614define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 {
1615; VI-LABEL: test_call_external_void_func_f16_imm:
1616; VI:       ; %bb.0:
1617; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1618; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1619; VI-NEXT:    s_mov_b32 s38, -1
1620; VI-NEXT:    s_mov_b32 s39, 0xe80000
1621; VI-NEXT:    s_add_u32 s36, s36, s3
1622; VI-NEXT:    s_addc_u32 s37, s37, 0
1623; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
1624; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
1625; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
1626; VI-NEXT:    v_mov_b32_e32 v0, 0x4400
1627; VI-NEXT:    s_mov_b32 s32, 0
1628; VI-NEXT:    s_getpc_b64 s[4:5]
1629; VI-NEXT:    s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4
1630; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12
1631; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1632; VI-NEXT:    s_endpgm
1633;
1634; CI-LABEL: test_call_external_void_func_f16_imm:
1635; CI:       ; %bb.0:
1636; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1637; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1638; CI-NEXT:    s_mov_b32 s38, -1
1639; CI-NEXT:    s_mov_b32 s39, 0xe8f000
1640; CI-NEXT:    s_add_u32 s36, s36, s3
1641; CI-NEXT:    s_addc_u32 s37, s37, 0
1642; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
1643; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
1644; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
1645; CI-NEXT:    v_mov_b32_e32 v0, 4.0
1646; CI-NEXT:    s_mov_b32 s32, 0
1647; CI-NEXT:    s_getpc_b64 s[4:5]
1648; CI-NEXT:    s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4
1649; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12
1650; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1651; CI-NEXT:    s_endpgm
1652;
1653; GFX9-LABEL: test_call_external_void_func_f16_imm:
1654; GFX9:       ; %bb.0:
1655; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1656; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1657; GFX9-NEXT:    s_mov_b32 s38, -1
1658; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
1659; GFX9-NEXT:    s_add_u32 s36, s36, s3
1660; GFX9-NEXT:    s_addc_u32 s37, s37, 0
1661; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
1662; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
1663; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
1664; GFX9-NEXT:    v_mov_b32_e32 v0, 0x4400
1665; GFX9-NEXT:    s_mov_b32 s32, 0
1666; GFX9-NEXT:    s_getpc_b64 s[4:5]
1667; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_f16@rel32@lo+4
1668; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_f16@rel32@hi+12
1669; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1670; GFX9-NEXT:    s_endpgm
1671;
1672; GFX11-LABEL: test_call_external_void_func_f16_imm:
1673; GFX11:       ; %bb.0:
1674; GFX11-NEXT:    v_mov_b32_e32 v0, 0x4400
1675; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
1676; GFX11-NEXT:    s_mov_b32 s32, 0
1677; GFX11-NEXT:    s_getpc_b64 s[2:3]
1678; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_f16@rel32@lo+4
1679; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_f16@rel32@hi+12
1680; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1681; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
1682; GFX11-NEXT:    s_endpgm
1683;
1684; HSA-LABEL: test_call_external_void_func_f16_imm:
1685; HSA:       ; %bb.0:
1686; HSA-NEXT:    s_add_i32 s6, s6, s9
1687; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
1688; HSA-NEXT:    s_add_u32 s0, s0, s9
1689; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
1690; HSA-NEXT:    s_addc_u32 s1, s1, 0
1691; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
1692; HSA-NEXT:    v_mov_b32_e32 v0, 0x4400
1693; HSA-NEXT:    s_mov_b32 s32, 0
1694; HSA-NEXT:    s_getpc_b64 s[8:9]
1695; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_f16@rel32@lo+4
1696; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_f16@rel32@hi+12
1697; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
1698; HSA-NEXT:    s_endpgm
1699  call void @external_void_func_f16(half 4.0)
1700  ret void
1701}
1702
1703define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 {
1704; VI-LABEL: test_call_external_void_func_f32_imm:
1705; VI:       ; %bb.0:
1706; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1707; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1708; VI-NEXT:    s_mov_b32 s38, -1
1709; VI-NEXT:    s_mov_b32 s39, 0xe80000
1710; VI-NEXT:    s_add_u32 s36, s36, s3
1711; VI-NEXT:    s_addc_u32 s37, s37, 0
1712; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
1713; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
1714; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
1715; VI-NEXT:    v_mov_b32_e32 v0, 4.0
1716; VI-NEXT:    s_mov_b32 s32, 0
1717; VI-NEXT:    s_getpc_b64 s[4:5]
1718; VI-NEXT:    s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4
1719; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12
1720; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1721; VI-NEXT:    s_endpgm
1722;
1723; CI-LABEL: test_call_external_void_func_f32_imm:
1724; CI:       ; %bb.0:
1725; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1726; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1727; CI-NEXT:    s_mov_b32 s38, -1
1728; CI-NEXT:    s_mov_b32 s39, 0xe8f000
1729; CI-NEXT:    s_add_u32 s36, s36, s3
1730; CI-NEXT:    s_addc_u32 s37, s37, 0
1731; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
1732; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
1733; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
1734; CI-NEXT:    v_mov_b32_e32 v0, 4.0
1735; CI-NEXT:    s_mov_b32 s32, 0
1736; CI-NEXT:    s_getpc_b64 s[4:5]
1737; CI-NEXT:    s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4
1738; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12
1739; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1740; CI-NEXT:    s_endpgm
1741;
1742; GFX9-LABEL: test_call_external_void_func_f32_imm:
1743; GFX9:       ; %bb.0:
1744; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1745; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1746; GFX9-NEXT:    s_mov_b32 s38, -1
1747; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
1748; GFX9-NEXT:    s_add_u32 s36, s36, s3
1749; GFX9-NEXT:    s_addc_u32 s37, s37, 0
1750; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
1751; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
1752; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
1753; GFX9-NEXT:    v_mov_b32_e32 v0, 4.0
1754; GFX9-NEXT:    s_mov_b32 s32, 0
1755; GFX9-NEXT:    s_getpc_b64 s[4:5]
1756; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_f32@rel32@lo+4
1757; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_f32@rel32@hi+12
1758; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1759; GFX9-NEXT:    s_endpgm
1760;
1761; GFX11-LABEL: test_call_external_void_func_f32_imm:
1762; GFX11:       ; %bb.0:
1763; GFX11-NEXT:    v_mov_b32_e32 v0, 4.0
1764; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
1765; GFX11-NEXT:    s_mov_b32 s32, 0
1766; GFX11-NEXT:    s_getpc_b64 s[2:3]
1767; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_f32@rel32@lo+4
1768; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_f32@rel32@hi+12
1769; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1770; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
1771; GFX11-NEXT:    s_endpgm
1772;
1773; HSA-LABEL: test_call_external_void_func_f32_imm:
1774; HSA:       ; %bb.0:
1775; HSA-NEXT:    s_add_i32 s6, s6, s9
1776; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
1777; HSA-NEXT:    s_add_u32 s0, s0, s9
1778; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
1779; HSA-NEXT:    s_addc_u32 s1, s1, 0
1780; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
1781; HSA-NEXT:    v_mov_b32_e32 v0, 4.0
1782; HSA-NEXT:    s_mov_b32 s32, 0
1783; HSA-NEXT:    s_getpc_b64 s[8:9]
1784; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_f32@rel32@lo+4
1785; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_f32@rel32@hi+12
1786; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
1787; HSA-NEXT:    s_endpgm
1788  call void @external_void_func_f32(float 4.0)
1789  ret void
1790}
1791
1792define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 {
1793; VI-LABEL: test_call_external_void_func_v2f32_imm:
1794; VI:       ; %bb.0:
1795; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1796; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1797; VI-NEXT:    s_mov_b32 s38, -1
1798; VI-NEXT:    s_mov_b32 s39, 0xe80000
1799; VI-NEXT:    s_add_u32 s36, s36, s3
1800; VI-NEXT:    s_addc_u32 s37, s37, 0
1801; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
1802; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
1803; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
1804; VI-NEXT:    v_mov_b32_e32 v0, 1.0
1805; VI-NEXT:    v_mov_b32_e32 v1, 2.0
1806; VI-NEXT:    s_mov_b32 s32, 0
1807; VI-NEXT:    s_getpc_b64 s[4:5]
1808; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4
1809; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12
1810; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1811; VI-NEXT:    s_endpgm
1812;
1813; CI-LABEL: test_call_external_void_func_v2f32_imm:
1814; CI:       ; %bb.0:
1815; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1816; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1817; CI-NEXT:    s_mov_b32 s38, -1
1818; CI-NEXT:    s_mov_b32 s39, 0xe8f000
1819; CI-NEXT:    s_add_u32 s36, s36, s3
1820; CI-NEXT:    s_addc_u32 s37, s37, 0
1821; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
1822; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
1823; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
1824; CI-NEXT:    v_mov_b32_e32 v0, 1.0
1825; CI-NEXT:    v_mov_b32_e32 v1, 2.0
1826; CI-NEXT:    s_mov_b32 s32, 0
1827; CI-NEXT:    s_getpc_b64 s[4:5]
1828; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4
1829; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12
1830; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1831; CI-NEXT:    s_endpgm
1832;
1833; GFX9-LABEL: test_call_external_void_func_v2f32_imm:
1834; GFX9:       ; %bb.0:
1835; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1836; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1837; GFX9-NEXT:    s_mov_b32 s38, -1
1838; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
1839; GFX9-NEXT:    s_add_u32 s36, s36, s3
1840; GFX9-NEXT:    s_addc_u32 s37, s37, 0
1841; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
1842; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
1843; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
1844; GFX9-NEXT:    v_mov_b32_e32 v0, 1.0
1845; GFX9-NEXT:    v_mov_b32_e32 v1, 2.0
1846; GFX9-NEXT:    s_mov_b32 s32, 0
1847; GFX9-NEXT:    s_getpc_b64 s[4:5]
1848; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v2f32@rel32@lo+4
1849; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v2f32@rel32@hi+12
1850; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1851; GFX9-NEXT:    s_endpgm
1852;
1853; GFX11-LABEL: test_call_external_void_func_v2f32_imm:
1854; GFX11:       ; %bb.0:
1855; GFX11-NEXT:    v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0
1856; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
1857; GFX11-NEXT:    s_mov_b32 s32, 0
1858; GFX11-NEXT:    s_getpc_b64 s[2:3]
1859; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v2f32@rel32@lo+4
1860; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v2f32@rel32@hi+12
1861; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1862; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
1863; GFX11-NEXT:    s_endpgm
1864;
1865; HSA-LABEL: test_call_external_void_func_v2f32_imm:
1866; HSA:       ; %bb.0:
1867; HSA-NEXT:    s_add_i32 s6, s6, s9
1868; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
1869; HSA-NEXT:    s_add_u32 s0, s0, s9
1870; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
1871; HSA-NEXT:    s_addc_u32 s1, s1, 0
1872; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
1873; HSA-NEXT:    v_mov_b32_e32 v0, 1.0
1874; HSA-NEXT:    v_mov_b32_e32 v1, 2.0
1875; HSA-NEXT:    s_mov_b32 s32, 0
1876; HSA-NEXT:    s_getpc_b64 s[8:9]
1877; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v2f32@rel32@lo+4
1878; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v2f32@rel32@hi+12
1879; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
1880; HSA-NEXT:    s_endpgm
1881  call void @external_void_func_v2f32(<2 x float> <float 1.0, float 2.0>)
1882  ret void
1883}
1884
1885define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 {
1886; VI-LABEL: test_call_external_void_func_v3f32_imm:
1887; VI:       ; %bb.0:
1888; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1889; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1890; VI-NEXT:    s_mov_b32 s38, -1
1891; VI-NEXT:    s_mov_b32 s39, 0xe80000
1892; VI-NEXT:    s_add_u32 s36, s36, s3
1893; VI-NEXT:    s_addc_u32 s37, s37, 0
1894; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
1895; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
1896; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
1897; VI-NEXT:    v_mov_b32_e32 v0, 1.0
1898; VI-NEXT:    v_mov_b32_e32 v1, 2.0
1899; VI-NEXT:    v_mov_b32_e32 v2, 4.0
1900; VI-NEXT:    s_mov_b32 s32, 0
1901; VI-NEXT:    s_getpc_b64 s[4:5]
1902; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4
1903; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12
1904; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1905; VI-NEXT:    s_endpgm
1906;
1907; CI-LABEL: test_call_external_void_func_v3f32_imm:
1908; CI:       ; %bb.0:
1909; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1910; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1911; CI-NEXT:    s_mov_b32 s38, -1
1912; CI-NEXT:    s_mov_b32 s39, 0xe8f000
1913; CI-NEXT:    s_add_u32 s36, s36, s3
1914; CI-NEXT:    s_addc_u32 s37, s37, 0
1915; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
1916; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
1917; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
1918; CI-NEXT:    v_mov_b32_e32 v0, 1.0
1919; CI-NEXT:    v_mov_b32_e32 v1, 2.0
1920; CI-NEXT:    v_mov_b32_e32 v2, 4.0
1921; CI-NEXT:    s_mov_b32 s32, 0
1922; CI-NEXT:    s_getpc_b64 s[4:5]
1923; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4
1924; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12
1925; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1926; CI-NEXT:    s_endpgm
1927;
1928; GFX9-LABEL: test_call_external_void_func_v3f32_imm:
1929; GFX9:       ; %bb.0:
1930; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1931; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1932; GFX9-NEXT:    s_mov_b32 s38, -1
1933; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
1934; GFX9-NEXT:    s_add_u32 s36, s36, s3
1935; GFX9-NEXT:    s_addc_u32 s37, s37, 0
1936; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
1937; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
1938; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
1939; GFX9-NEXT:    v_mov_b32_e32 v0, 1.0
1940; GFX9-NEXT:    v_mov_b32_e32 v1, 2.0
1941; GFX9-NEXT:    v_mov_b32_e32 v2, 4.0
1942; GFX9-NEXT:    s_mov_b32 s32, 0
1943; GFX9-NEXT:    s_getpc_b64 s[4:5]
1944; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v3f32@rel32@lo+4
1945; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v3f32@rel32@hi+12
1946; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
1947; GFX9-NEXT:    s_endpgm
1948;
1949; GFX11-LABEL: test_call_external_void_func_v3f32_imm:
1950; GFX11:       ; %bb.0:
1951; GFX11-NEXT:    v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0
1952; GFX11-NEXT:    v_mov_b32_e32 v2, 4.0
1953; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
1954; GFX11-NEXT:    s_mov_b32 s32, 0
1955; GFX11-NEXT:    s_getpc_b64 s[2:3]
1956; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v3f32@rel32@lo+4
1957; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v3f32@rel32@hi+12
1958; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1959; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
1960; GFX11-NEXT:    s_endpgm
1961;
1962; HSA-LABEL: test_call_external_void_func_v3f32_imm:
1963; HSA:       ; %bb.0:
1964; HSA-NEXT:    s_add_i32 s6, s6, s9
1965; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
1966; HSA-NEXT:    s_add_u32 s0, s0, s9
1967; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
1968; HSA-NEXT:    s_addc_u32 s1, s1, 0
1969; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
1970; HSA-NEXT:    v_mov_b32_e32 v0, 1.0
1971; HSA-NEXT:    v_mov_b32_e32 v1, 2.0
1972; HSA-NEXT:    v_mov_b32_e32 v2, 4.0
1973; HSA-NEXT:    s_mov_b32 s32, 0
1974; HSA-NEXT:    s_getpc_b64 s[8:9]
1975; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v3f32@rel32@lo+4
1976; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v3f32@rel32@hi+12
1977; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
1978; HSA-NEXT:    s_endpgm
1979  call void @external_void_func_v3f32(<3 x float> <float 1.0, float 2.0, float 4.0>)
1980  ret void
1981}
1982
1983define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 {
1984; VI-LABEL: test_call_external_void_func_v5f32_imm:
1985; VI:       ; %bb.0:
1986; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
1987; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
1988; VI-NEXT:    s_mov_b32 s38, -1
1989; VI-NEXT:    s_mov_b32 s39, 0xe80000
1990; VI-NEXT:    s_add_u32 s36, s36, s3
1991; VI-NEXT:    s_addc_u32 s37, s37, 0
1992; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
1993; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
1994; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
1995; VI-NEXT:    v_mov_b32_e32 v0, 1.0
1996; VI-NEXT:    v_mov_b32_e32 v1, 2.0
1997; VI-NEXT:    v_mov_b32_e32 v2, 4.0
1998; VI-NEXT:    v_mov_b32_e32 v3, -1.0
1999; VI-NEXT:    v_mov_b32_e32 v4, 0.5
2000; VI-NEXT:    s_mov_b32 s32, 0
2001; VI-NEXT:    s_getpc_b64 s[4:5]
2002; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4
2003; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12
2004; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2005; VI-NEXT:    s_endpgm
2006;
2007; CI-LABEL: test_call_external_void_func_v5f32_imm:
2008; CI:       ; %bb.0:
2009; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2010; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2011; CI-NEXT:    s_mov_b32 s38, -1
2012; CI-NEXT:    s_mov_b32 s39, 0xe8f000
2013; CI-NEXT:    s_add_u32 s36, s36, s3
2014; CI-NEXT:    s_addc_u32 s37, s37, 0
2015; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
2016; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
2017; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
2018; CI-NEXT:    v_mov_b32_e32 v0, 1.0
2019; CI-NEXT:    v_mov_b32_e32 v1, 2.0
2020; CI-NEXT:    v_mov_b32_e32 v2, 4.0
2021; CI-NEXT:    v_mov_b32_e32 v3, -1.0
2022; CI-NEXT:    v_mov_b32_e32 v4, 0.5
2023; CI-NEXT:    s_mov_b32 s32, 0
2024; CI-NEXT:    s_getpc_b64 s[4:5]
2025; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4
2026; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12
2027; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2028; CI-NEXT:    s_endpgm
2029;
2030; GFX9-LABEL: test_call_external_void_func_v5f32_imm:
2031; GFX9:       ; %bb.0:
2032; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2033; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2034; GFX9-NEXT:    s_mov_b32 s38, -1
2035; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
2036; GFX9-NEXT:    s_add_u32 s36, s36, s3
2037; GFX9-NEXT:    s_addc_u32 s37, s37, 0
2038; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
2039; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
2040; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
2041; GFX9-NEXT:    v_mov_b32_e32 v0, 1.0
2042; GFX9-NEXT:    v_mov_b32_e32 v1, 2.0
2043; GFX9-NEXT:    v_mov_b32_e32 v2, 4.0
2044; GFX9-NEXT:    v_mov_b32_e32 v3, -1.0
2045; GFX9-NEXT:    v_mov_b32_e32 v4, 0.5
2046; GFX9-NEXT:    s_mov_b32 s32, 0
2047; GFX9-NEXT:    s_getpc_b64 s[4:5]
2048; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v5f32@rel32@lo+4
2049; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v5f32@rel32@hi+12
2050; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2051; GFX9-NEXT:    s_endpgm
2052;
2053; GFX11-LABEL: test_call_external_void_func_v5f32_imm:
2054; GFX11:       ; %bb.0:
2055; GFX11-NEXT:    v_dual_mov_b32 v0, 1.0 :: v_dual_mov_b32 v1, 2.0
2056; GFX11-NEXT:    v_dual_mov_b32 v2, 4.0 :: v_dual_mov_b32 v3, -1.0
2057; GFX11-NEXT:    v_mov_b32_e32 v4, 0.5
2058; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
2059; GFX11-NEXT:    s_mov_b32 s32, 0
2060; GFX11-NEXT:    s_getpc_b64 s[2:3]
2061; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v5f32@rel32@lo+4
2062; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v5f32@rel32@hi+12
2063; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2064; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
2065; GFX11-NEXT:    s_endpgm
2066;
2067; HSA-LABEL: test_call_external_void_func_v5f32_imm:
2068; HSA:       ; %bb.0:
2069; HSA-NEXT:    s_add_i32 s6, s6, s9
2070; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
2071; HSA-NEXT:    s_add_u32 s0, s0, s9
2072; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
2073; HSA-NEXT:    s_addc_u32 s1, s1, 0
2074; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
2075; HSA-NEXT:    v_mov_b32_e32 v0, 1.0
2076; HSA-NEXT:    v_mov_b32_e32 v1, 2.0
2077; HSA-NEXT:    v_mov_b32_e32 v2, 4.0
2078; HSA-NEXT:    v_mov_b32_e32 v3, -1.0
2079; HSA-NEXT:    v_mov_b32_e32 v4, 0.5
2080; HSA-NEXT:    s_mov_b32 s32, 0
2081; HSA-NEXT:    s_getpc_b64 s[8:9]
2082; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v5f32@rel32@lo+4
2083; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v5f32@rel32@hi+12
2084; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
2085; HSA-NEXT:    s_endpgm
2086  call void @external_void_func_v5f32(<5 x float> <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>)
2087  ret void
2088}
2089
2090define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 {
2091; VI-LABEL: test_call_external_void_func_f64_imm:
2092; VI:       ; %bb.0:
2093; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2094; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2095; VI-NEXT:    s_mov_b32 s38, -1
2096; VI-NEXT:    s_mov_b32 s39, 0xe80000
2097; VI-NEXT:    s_add_u32 s36, s36, s3
2098; VI-NEXT:    s_addc_u32 s37, s37, 0
2099; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
2100; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
2101; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
2102; VI-NEXT:    v_mov_b32_e32 v0, 0
2103; VI-NEXT:    v_mov_b32_e32 v1, 0x40100000
2104; VI-NEXT:    s_mov_b32 s32, 0
2105; VI-NEXT:    s_getpc_b64 s[4:5]
2106; VI-NEXT:    s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4
2107; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12
2108; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2109; VI-NEXT:    s_endpgm
2110;
2111; CI-LABEL: test_call_external_void_func_f64_imm:
2112; CI:       ; %bb.0:
2113; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2114; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2115; CI-NEXT:    s_mov_b32 s38, -1
2116; CI-NEXT:    s_mov_b32 s39, 0xe8f000
2117; CI-NEXT:    s_add_u32 s36, s36, s3
2118; CI-NEXT:    s_addc_u32 s37, s37, 0
2119; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
2120; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
2121; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
2122; CI-NEXT:    v_mov_b32_e32 v0, 0
2123; CI-NEXT:    v_mov_b32_e32 v1, 0x40100000
2124; CI-NEXT:    s_mov_b32 s32, 0
2125; CI-NEXT:    s_getpc_b64 s[4:5]
2126; CI-NEXT:    s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4
2127; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12
2128; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2129; CI-NEXT:    s_endpgm
2130;
2131; GFX9-LABEL: test_call_external_void_func_f64_imm:
2132; GFX9:       ; %bb.0:
2133; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2134; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2135; GFX9-NEXT:    s_mov_b32 s38, -1
2136; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
2137; GFX9-NEXT:    s_add_u32 s36, s36, s3
2138; GFX9-NEXT:    s_addc_u32 s37, s37, 0
2139; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
2140; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
2141; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
2142; GFX9-NEXT:    v_mov_b32_e32 v0, 0
2143; GFX9-NEXT:    v_mov_b32_e32 v1, 0x40100000
2144; GFX9-NEXT:    s_mov_b32 s32, 0
2145; GFX9-NEXT:    s_getpc_b64 s[4:5]
2146; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_f64@rel32@lo+4
2147; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_f64@rel32@hi+12
2148; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2149; GFX9-NEXT:    s_endpgm
2150;
2151; GFX11-LABEL: test_call_external_void_func_f64_imm:
2152; GFX11:       ; %bb.0:
2153; GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x40100000
2154; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
2155; GFX11-NEXT:    s_mov_b32 s32, 0
2156; GFX11-NEXT:    s_getpc_b64 s[2:3]
2157; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_f64@rel32@lo+4
2158; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_f64@rel32@hi+12
2159; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2160; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
2161; GFX11-NEXT:    s_endpgm
2162;
2163; HSA-LABEL: test_call_external_void_func_f64_imm:
2164; HSA:       ; %bb.0:
2165; HSA-NEXT:    s_add_i32 s6, s6, s9
2166; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
2167; HSA-NEXT:    s_add_u32 s0, s0, s9
2168; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
2169; HSA-NEXT:    s_addc_u32 s1, s1, 0
2170; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
2171; HSA-NEXT:    v_mov_b32_e32 v0, 0
2172; HSA-NEXT:    v_mov_b32_e32 v1, 0x40100000
2173; HSA-NEXT:    s_mov_b32 s32, 0
2174; HSA-NEXT:    s_getpc_b64 s[8:9]
2175; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_f64@rel32@lo+4
2176; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_f64@rel32@hi+12
2177; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
2178; HSA-NEXT:    s_endpgm
2179  call void @external_void_func_f64(double 4.0)
2180  ret void
2181}
2182
2183define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 {
2184; VI-LABEL: test_call_external_void_func_v2f64_imm:
2185; VI:       ; %bb.0:
2186; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2187; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2188; VI-NEXT:    s_mov_b32 s38, -1
2189; VI-NEXT:    s_mov_b32 s39, 0xe80000
2190; VI-NEXT:    s_add_u32 s36, s36, s3
2191; VI-NEXT:    s_addc_u32 s37, s37, 0
2192; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
2193; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
2194; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
2195; VI-NEXT:    v_mov_b32_e32 v0, 0
2196; VI-NEXT:    v_mov_b32_e32 v1, 2.0
2197; VI-NEXT:    v_mov_b32_e32 v2, 0
2198; VI-NEXT:    v_mov_b32_e32 v3, 0x40100000
2199; VI-NEXT:    s_mov_b32 s32, 0
2200; VI-NEXT:    s_getpc_b64 s[4:5]
2201; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4
2202; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12
2203; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2204; VI-NEXT:    s_endpgm
2205;
2206; CI-LABEL: test_call_external_void_func_v2f64_imm:
2207; CI:       ; %bb.0:
2208; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2209; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2210; CI-NEXT:    s_mov_b32 s38, -1
2211; CI-NEXT:    s_mov_b32 s39, 0xe8f000
2212; CI-NEXT:    s_add_u32 s36, s36, s3
2213; CI-NEXT:    s_addc_u32 s37, s37, 0
2214; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
2215; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
2216; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
2217; CI-NEXT:    v_mov_b32_e32 v0, 0
2218; CI-NEXT:    v_mov_b32_e32 v1, 2.0
2219; CI-NEXT:    v_mov_b32_e32 v2, 0
2220; CI-NEXT:    v_mov_b32_e32 v3, 0x40100000
2221; CI-NEXT:    s_mov_b32 s32, 0
2222; CI-NEXT:    s_getpc_b64 s[4:5]
2223; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4
2224; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12
2225; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2226; CI-NEXT:    s_endpgm
2227;
2228; GFX9-LABEL: test_call_external_void_func_v2f64_imm:
2229; GFX9:       ; %bb.0:
2230; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2231; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2232; GFX9-NEXT:    s_mov_b32 s38, -1
2233; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
2234; GFX9-NEXT:    s_add_u32 s36, s36, s3
2235; GFX9-NEXT:    s_addc_u32 s37, s37, 0
2236; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
2237; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
2238; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
2239; GFX9-NEXT:    v_mov_b32_e32 v0, 0
2240; GFX9-NEXT:    v_mov_b32_e32 v1, 2.0
2241; GFX9-NEXT:    v_mov_b32_e32 v2, 0
2242; GFX9-NEXT:    v_mov_b32_e32 v3, 0x40100000
2243; GFX9-NEXT:    s_mov_b32 s32, 0
2244; GFX9-NEXT:    s_getpc_b64 s[4:5]
2245; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v2f64@rel32@lo+4
2246; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v2f64@rel32@hi+12
2247; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2248; GFX9-NEXT:    s_endpgm
2249;
2250; GFX11-LABEL: test_call_external_void_func_v2f64_imm:
2251; GFX11:       ; %bb.0:
2252; GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 2.0
2253; GFX11-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0x40100000
2254; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
2255; GFX11-NEXT:    s_mov_b32 s32, 0
2256; GFX11-NEXT:    s_getpc_b64 s[2:3]
2257; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v2f64@rel32@lo+4
2258; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v2f64@rel32@hi+12
2259; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2260; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
2261; GFX11-NEXT:    s_endpgm
2262;
2263; HSA-LABEL: test_call_external_void_func_v2f64_imm:
2264; HSA:       ; %bb.0:
2265; HSA-NEXT:    s_add_i32 s6, s6, s9
2266; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
2267; HSA-NEXT:    s_add_u32 s0, s0, s9
2268; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
2269; HSA-NEXT:    s_addc_u32 s1, s1, 0
2270; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
2271; HSA-NEXT:    v_mov_b32_e32 v0, 0
2272; HSA-NEXT:    v_mov_b32_e32 v1, 2.0
2273; HSA-NEXT:    v_mov_b32_e32 v2, 0
2274; HSA-NEXT:    v_mov_b32_e32 v3, 0x40100000
2275; HSA-NEXT:    s_mov_b32 s32, 0
2276; HSA-NEXT:    s_getpc_b64 s[8:9]
2277; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v2f64@rel32@lo+4
2278; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v2f64@rel32@hi+12
2279; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
2280; HSA-NEXT:    s_endpgm
2281  call void @external_void_func_v2f64(<2 x double> <double 2.0, double 4.0>)
2282  ret void
2283}
2284
2285define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 {
2286; VI-LABEL: test_call_external_void_func_v3f64_imm:
2287; VI:       ; %bb.0:
2288; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2289; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2290; VI-NEXT:    s_mov_b32 s38, -1
2291; VI-NEXT:    s_mov_b32 s39, 0xe80000
2292; VI-NEXT:    s_add_u32 s36, s36, s3
2293; VI-NEXT:    s_addc_u32 s37, s37, 0
2294; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
2295; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
2296; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
2297; VI-NEXT:    v_mov_b32_e32 v0, 0
2298; VI-NEXT:    v_mov_b32_e32 v1, 2.0
2299; VI-NEXT:    v_mov_b32_e32 v2, 0
2300; VI-NEXT:    v_mov_b32_e32 v3, 0x40100000
2301; VI-NEXT:    v_mov_b32_e32 v4, 0
2302; VI-NEXT:    v_mov_b32_e32 v5, 0x40200000
2303; VI-NEXT:    s_mov_b32 s32, 0
2304; VI-NEXT:    s_getpc_b64 s[4:5]
2305; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4
2306; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12
2307; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2308; VI-NEXT:    s_endpgm
2309;
2310; CI-LABEL: test_call_external_void_func_v3f64_imm:
2311; CI:       ; %bb.0:
2312; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2313; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2314; CI-NEXT:    s_mov_b32 s38, -1
2315; CI-NEXT:    s_mov_b32 s39, 0xe8f000
2316; CI-NEXT:    s_add_u32 s36, s36, s3
2317; CI-NEXT:    s_addc_u32 s37, s37, 0
2318; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
2319; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
2320; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
2321; CI-NEXT:    v_mov_b32_e32 v0, 0
2322; CI-NEXT:    v_mov_b32_e32 v1, 2.0
2323; CI-NEXT:    v_mov_b32_e32 v2, 0
2324; CI-NEXT:    v_mov_b32_e32 v3, 0x40100000
2325; CI-NEXT:    v_mov_b32_e32 v4, 0
2326; CI-NEXT:    v_mov_b32_e32 v5, 0x40200000
2327; CI-NEXT:    s_mov_b32 s32, 0
2328; CI-NEXT:    s_getpc_b64 s[4:5]
2329; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4
2330; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12
2331; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2332; CI-NEXT:    s_endpgm
2333;
2334; GFX9-LABEL: test_call_external_void_func_v3f64_imm:
2335; GFX9:       ; %bb.0:
2336; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2337; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2338; GFX9-NEXT:    s_mov_b32 s38, -1
2339; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
2340; GFX9-NEXT:    s_add_u32 s36, s36, s3
2341; GFX9-NEXT:    s_addc_u32 s37, s37, 0
2342; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
2343; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
2344; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
2345; GFX9-NEXT:    v_mov_b32_e32 v0, 0
2346; GFX9-NEXT:    v_mov_b32_e32 v1, 2.0
2347; GFX9-NEXT:    v_mov_b32_e32 v2, 0
2348; GFX9-NEXT:    v_mov_b32_e32 v3, 0x40100000
2349; GFX9-NEXT:    v_mov_b32_e32 v4, 0
2350; GFX9-NEXT:    v_mov_b32_e32 v5, 0x40200000
2351; GFX9-NEXT:    s_mov_b32 s32, 0
2352; GFX9-NEXT:    s_getpc_b64 s[4:5]
2353; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v3f64@rel32@lo+4
2354; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v3f64@rel32@hi+12
2355; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2356; GFX9-NEXT:    s_endpgm
2357;
2358; GFX11-LABEL: test_call_external_void_func_v3f64_imm:
2359; GFX11:       ; %bb.0:
2360; GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 2.0
2361; GFX11-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0x40100000
2362; GFX11-NEXT:    v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0x40200000
2363; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
2364; GFX11-NEXT:    s_mov_b32 s32, 0
2365; GFX11-NEXT:    s_getpc_b64 s[2:3]
2366; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v3f64@rel32@lo+4
2367; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v3f64@rel32@hi+12
2368; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2369; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
2370; GFX11-NEXT:    s_endpgm
2371;
2372; HSA-LABEL: test_call_external_void_func_v3f64_imm:
2373; HSA:       ; %bb.0:
2374; HSA-NEXT:    s_add_i32 s6, s6, s9
2375; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
2376; HSA-NEXT:    s_add_u32 s0, s0, s9
2377; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
2378; HSA-NEXT:    s_addc_u32 s1, s1, 0
2379; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
2380; HSA-NEXT:    v_mov_b32_e32 v0, 0
2381; HSA-NEXT:    v_mov_b32_e32 v1, 2.0
2382; HSA-NEXT:    v_mov_b32_e32 v2, 0
2383; HSA-NEXT:    v_mov_b32_e32 v3, 0x40100000
2384; HSA-NEXT:    v_mov_b32_e32 v4, 0
2385; HSA-NEXT:    v_mov_b32_e32 v5, 0x40200000
2386; HSA-NEXT:    s_mov_b32 s32, 0
2387; HSA-NEXT:    s_getpc_b64 s[8:9]
2388; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v3f64@rel32@lo+4
2389; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v3f64@rel32@hi+12
2390; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
2391; HSA-NEXT:    s_endpgm
2392  call void @external_void_func_v3f64(<3 x double> <double 2.0, double 4.0, double 8.0>)
2393  ret void
2394}
2395
2396define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
2397; VI-LABEL: test_call_external_void_func_v2i16:
2398; VI:       ; %bb.0:
2399; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2400; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2401; VI-NEXT:    s_mov_b32 s38, -1
2402; VI-NEXT:    s_mov_b32 s39, 0xe80000
2403; VI-NEXT:    s_add_u32 s36, s36, s3
2404; VI-NEXT:    s_mov_b32 s3, 0xf000
2405; VI-NEXT:    s_mov_b32 s2, -1
2406; VI-NEXT:    buffer_load_dword v0, off, s[0:3], 0
2407; VI-NEXT:    s_addc_u32 s37, s37, 0
2408; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
2409; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
2410; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
2411; VI-NEXT:    s_mov_b32 s32, 0
2412; VI-NEXT:    s_getpc_b64 s[4:5]
2413; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4
2414; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12
2415; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2416; VI-NEXT:    s_endpgm
2417;
2418; CI-LABEL: test_call_external_void_func_v2i16:
2419; CI:       ; %bb.0:
2420; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2421; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2422; CI-NEXT:    s_mov_b32 s38, -1
2423; CI-NEXT:    s_mov_b32 s39, 0xe8f000
2424; CI-NEXT:    s_add_u32 s36, s36, s3
2425; CI-NEXT:    s_mov_b32 s3, 0xf000
2426; CI-NEXT:    s_mov_b32 s2, -1
2427; CI-NEXT:    buffer_load_dword v0, off, s[0:3], 0
2428; CI-NEXT:    s_addc_u32 s37, s37, 0
2429; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
2430; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
2431; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
2432; CI-NEXT:    s_mov_b32 s32, 0
2433; CI-NEXT:    s_getpc_b64 s[4:5]
2434; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4
2435; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12
2436; CI-NEXT:    s_waitcnt vmcnt(0)
2437; CI-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
2438; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2439; CI-NEXT:    s_endpgm
2440;
2441; GFX9-LABEL: test_call_external_void_func_v2i16:
2442; GFX9:       ; %bb.0:
2443; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2444; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2445; GFX9-NEXT:    s_mov_b32 s38, -1
2446; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
2447; GFX9-NEXT:    s_add_u32 s36, s36, s3
2448; GFX9-NEXT:    s_mov_b32 s3, 0xf000
2449; GFX9-NEXT:    s_mov_b32 s2, -1
2450; GFX9-NEXT:    buffer_load_dword v0, off, s[0:3], 0
2451; GFX9-NEXT:    s_addc_u32 s37, s37, 0
2452; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
2453; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
2454; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
2455; GFX9-NEXT:    s_mov_b32 s32, 0
2456; GFX9-NEXT:    s_getpc_b64 s[4:5]
2457; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v2i16@rel32@lo+4
2458; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i16@rel32@hi+12
2459; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2460; GFX9-NEXT:    s_endpgm
2461;
2462; GFX11-LABEL: test_call_external_void_func_v2i16:
2463; GFX11:       ; %bb.0:
2464; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2465; GFX11-NEXT:    s_mov_b32 s2, -1
2466; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
2467; GFX11-NEXT:    buffer_load_b32 v0, off, s[0:3], 0
2468; GFX11-NEXT:    s_mov_b32 s32, 0
2469; GFX11-NEXT:    s_getpc_b64 s[2:3]
2470; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v2i16@rel32@lo+4
2471; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v2i16@rel32@hi+12
2472; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2473; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
2474; GFX11-NEXT:    s_endpgm
2475;
2476; HSA-LABEL: test_call_external_void_func_v2i16:
2477; HSA:       ; %bb.0:
2478; HSA-NEXT:    s_add_i32 s6, s6, s9
2479; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
2480; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
2481; HSA-NEXT:    s_mov_b32 s7, 0x1100f000
2482; HSA-NEXT:    s_mov_b32 s6, -1
2483; HSA-NEXT:    buffer_load_dword v0, off, s[4:7], 0
2484; HSA-NEXT:    s_add_u32 s0, s0, s9
2485; HSA-NEXT:    s_addc_u32 s1, s1, 0
2486; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
2487; HSA-NEXT:    s_mov_b32 s32, 0
2488; HSA-NEXT:    s_getpc_b64 s[8:9]
2489; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v2i16@rel32@lo+4
2490; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v2i16@rel32@hi+12
2491; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
2492; HSA-NEXT:    s_endpgm
2493  %val = load <2 x i16>, ptr addrspace(1) undef
2494  call void @external_void_func_v2i16(<2 x i16> %val)
2495  ret void
2496}
2497
2498define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
2499; VI-LABEL: test_call_external_void_func_v3i16:
2500; VI:       ; %bb.0:
2501; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2502; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2503; VI-NEXT:    s_mov_b32 s38, -1
2504; VI-NEXT:    s_mov_b32 s39, 0xe80000
2505; VI-NEXT:    s_add_u32 s36, s36, s3
2506; VI-NEXT:    s_mov_b32 s3, 0xf000
2507; VI-NEXT:    s_mov_b32 s2, -1
2508; VI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[0:3], 0
2509; VI-NEXT:    s_addc_u32 s37, s37, 0
2510; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
2511; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
2512; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
2513; VI-NEXT:    s_mov_b32 s32, 0
2514; VI-NEXT:    s_getpc_b64 s[4:5]
2515; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
2516; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
2517; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2518; VI-NEXT:    s_endpgm
2519;
2520; CI-LABEL: test_call_external_void_func_v3i16:
2521; CI:       ; %bb.0:
2522; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2523; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2524; CI-NEXT:    s_mov_b32 s38, -1
2525; CI-NEXT:    s_mov_b32 s39, 0xe8f000
2526; CI-NEXT:    s_add_u32 s36, s36, s3
2527; CI-NEXT:    s_mov_b32 s3, 0xf000
2528; CI-NEXT:    s_mov_b32 s2, -1
2529; CI-NEXT:    buffer_load_dwordx2 v[2:3], off, s[0:3], 0
2530; CI-NEXT:    s_addc_u32 s37, s37, 0
2531; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
2532; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
2533; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
2534; CI-NEXT:    s_mov_b32 s32, 0
2535; CI-NEXT:    s_getpc_b64 s[4:5]
2536; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
2537; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
2538; CI-NEXT:    s_waitcnt vmcnt(0)
2539; CI-NEXT:    v_alignbit_b32 v1, v3, v2, 16
2540; CI-NEXT:    v_mov_b32_e32 v0, v2
2541; CI-NEXT:    v_mov_b32_e32 v2, v3
2542; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2543; CI-NEXT:    s_endpgm
2544;
2545; GFX9-LABEL: test_call_external_void_func_v3i16:
2546; GFX9:       ; %bb.0:
2547; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2548; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2549; GFX9-NEXT:    s_mov_b32 s38, -1
2550; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
2551; GFX9-NEXT:    s_add_u32 s36, s36, s3
2552; GFX9-NEXT:    s_mov_b32 s3, 0xf000
2553; GFX9-NEXT:    s_mov_b32 s2, -1
2554; GFX9-NEXT:    buffer_load_dwordx2 v[0:1], off, s[0:3], 0
2555; GFX9-NEXT:    s_addc_u32 s37, s37, 0
2556; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
2557; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
2558; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
2559; GFX9-NEXT:    s_mov_b32 s32, 0
2560; GFX9-NEXT:    s_getpc_b64 s[4:5]
2561; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
2562; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
2563; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2564; GFX9-NEXT:    s_endpgm
2565;
2566; GFX11-LABEL: test_call_external_void_func_v3i16:
2567; GFX11:       ; %bb.0:
2568; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2569; GFX11-NEXT:    s_mov_b32 s2, -1
2570; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
2571; GFX11-NEXT:    buffer_load_b64 v[0:1], off, s[0:3], 0
2572; GFX11-NEXT:    s_mov_b32 s32, 0
2573; GFX11-NEXT:    s_getpc_b64 s[2:3]
2574; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v3i16@rel32@lo+4
2575; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v3i16@rel32@hi+12
2576; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2577; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
2578; GFX11-NEXT:    s_endpgm
2579;
2580; HSA-LABEL: test_call_external_void_func_v3i16:
2581; HSA:       ; %bb.0:
2582; HSA-NEXT:    s_add_i32 s6, s6, s9
2583; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
2584; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
2585; HSA-NEXT:    s_mov_b32 s7, 0x1100f000
2586; HSA-NEXT:    s_mov_b32 s6, -1
2587; HSA-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
2588; HSA-NEXT:    s_add_u32 s0, s0, s9
2589; HSA-NEXT:    s_addc_u32 s1, s1, 0
2590; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
2591; HSA-NEXT:    s_mov_b32 s32, 0
2592; HSA-NEXT:    s_getpc_b64 s[8:9]
2593; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v3i16@rel32@lo+4
2594; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v3i16@rel32@hi+12
2595; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
2596; HSA-NEXT:    s_endpgm
2597  %val = load <3 x i16>, ptr addrspace(1) undef
2598  call void @external_void_func_v3i16(<3 x i16> %val)
2599  ret void
2600}
2601
2602define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
2603; VI-LABEL: test_call_external_void_func_v3f16:
2604; VI:       ; %bb.0:
2605; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2606; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2607; VI-NEXT:    s_mov_b32 s38, -1
2608; VI-NEXT:    s_mov_b32 s39, 0xe80000
2609; VI-NEXT:    s_add_u32 s36, s36, s3
2610; VI-NEXT:    s_mov_b32 s3, 0xf000
2611; VI-NEXT:    s_mov_b32 s2, -1
2612; VI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[0:3], 0
2613; VI-NEXT:    s_addc_u32 s37, s37, 0
2614; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
2615; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
2616; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
2617; VI-NEXT:    s_mov_b32 s32, 0
2618; VI-NEXT:    s_getpc_b64 s[4:5]
2619; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
2620; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
2621; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2622; VI-NEXT:    s_endpgm
2623;
2624; CI-LABEL: test_call_external_void_func_v3f16:
2625; CI:       ; %bb.0:
2626; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2627; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2628; CI-NEXT:    s_mov_b32 s38, -1
2629; CI-NEXT:    s_mov_b32 s39, 0xe8f000
2630; CI-NEXT:    s_add_u32 s36, s36, s3
2631; CI-NEXT:    s_mov_b32 s3, 0xf000
2632; CI-NEXT:    s_mov_b32 s2, -1
2633; CI-NEXT:    buffer_load_dwordx2 v[1:2], off, s[0:3], 0
2634; CI-NEXT:    s_addc_u32 s37, s37, 0
2635; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
2636; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
2637; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
2638; CI-NEXT:    s_mov_b32 s32, 0
2639; CI-NEXT:    s_getpc_b64 s[4:5]
2640; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
2641; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
2642; CI-NEXT:    s_waitcnt vmcnt(0)
2643; CI-NEXT:    v_cvt_f32_f16_e32 v0, v1
2644; CI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
2645; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
2646; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2647; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2648; CI-NEXT:    s_endpgm
2649;
2650; GFX9-LABEL: test_call_external_void_func_v3f16:
2651; GFX9:       ; %bb.0:
2652; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2653; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2654; GFX9-NEXT:    s_mov_b32 s38, -1
2655; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
2656; GFX9-NEXT:    s_add_u32 s36, s36, s3
2657; GFX9-NEXT:    s_mov_b32 s3, 0xf000
2658; GFX9-NEXT:    s_mov_b32 s2, -1
2659; GFX9-NEXT:    buffer_load_dwordx2 v[0:1], off, s[0:3], 0
2660; GFX9-NEXT:    s_addc_u32 s37, s37, 0
2661; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
2662; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
2663; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
2664; GFX9-NEXT:    s_mov_b32 s32, 0
2665; GFX9-NEXT:    s_getpc_b64 s[4:5]
2666; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
2667; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
2668; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2669; GFX9-NEXT:    s_endpgm
2670;
2671; GFX11-LABEL: test_call_external_void_func_v3f16:
2672; GFX11:       ; %bb.0:
2673; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2674; GFX11-NEXT:    s_mov_b32 s2, -1
2675; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
2676; GFX11-NEXT:    buffer_load_b64 v[0:1], off, s[0:3], 0
2677; GFX11-NEXT:    s_mov_b32 s32, 0
2678; GFX11-NEXT:    s_getpc_b64 s[2:3]
2679; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v3f16@rel32@lo+4
2680; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v3f16@rel32@hi+12
2681; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2682; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
2683; GFX11-NEXT:    s_endpgm
2684;
2685; HSA-LABEL: test_call_external_void_func_v3f16:
2686; HSA:       ; %bb.0:
2687; HSA-NEXT:    s_add_i32 s6, s6, s9
2688; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
2689; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
2690; HSA-NEXT:    s_mov_b32 s7, 0x1100f000
2691; HSA-NEXT:    s_mov_b32 s6, -1
2692; HSA-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
2693; HSA-NEXT:    s_add_u32 s0, s0, s9
2694; HSA-NEXT:    s_addc_u32 s1, s1, 0
2695; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
2696; HSA-NEXT:    s_mov_b32 s32, 0
2697; HSA-NEXT:    s_getpc_b64 s[8:9]
2698; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v3f16@rel32@lo+4
2699; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v3f16@rel32@hi+12
2700; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
2701; HSA-NEXT:    s_endpgm
2702  %val = load <3 x half>, ptr addrspace(1) undef
2703  call void @external_void_func_v3f16(<3 x half> %val)
2704  ret void
2705}
2706
2707define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 {
2708; VI-LABEL: test_call_external_void_func_v3i16_imm:
2709; VI:       ; %bb.0:
2710; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2711; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2712; VI-NEXT:    s_mov_b32 s38, -1
2713; VI-NEXT:    s_mov_b32 s39, 0xe80000
2714; VI-NEXT:    s_add_u32 s36, s36, s3
2715; VI-NEXT:    s_addc_u32 s37, s37, 0
2716; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
2717; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
2718; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
2719; VI-NEXT:    v_mov_b32_e32 v0, 0x20001
2720; VI-NEXT:    v_mov_b32_e32 v1, 3
2721; VI-NEXT:    s_mov_b32 s32, 0
2722; VI-NEXT:    s_getpc_b64 s[4:5]
2723; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
2724; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
2725; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2726; VI-NEXT:    s_endpgm
2727;
2728; CI-LABEL: test_call_external_void_func_v3i16_imm:
2729; CI:       ; %bb.0:
2730; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2731; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2732; CI-NEXT:    s_mov_b32 s38, -1
2733; CI-NEXT:    s_mov_b32 s39, 0xe8f000
2734; CI-NEXT:    s_add_u32 s36, s36, s3
2735; CI-NEXT:    s_addc_u32 s37, s37, 0
2736; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
2737; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
2738; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
2739; CI-NEXT:    v_mov_b32_e32 v0, 1
2740; CI-NEXT:    v_mov_b32_e32 v1, 2
2741; CI-NEXT:    v_mov_b32_e32 v2, 3
2742; CI-NEXT:    s_mov_b32 s32, 0
2743; CI-NEXT:    s_getpc_b64 s[4:5]
2744; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
2745; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
2746; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2747; CI-NEXT:    s_endpgm
2748;
2749; GFX9-LABEL: test_call_external_void_func_v3i16_imm:
2750; GFX9:       ; %bb.0:
2751; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2752; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2753; GFX9-NEXT:    s_mov_b32 s38, -1
2754; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
2755; GFX9-NEXT:    s_add_u32 s36, s36, s3
2756; GFX9-NEXT:    s_addc_u32 s37, s37, 0
2757; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
2758; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
2759; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
2760; GFX9-NEXT:    v_mov_b32_e32 v0, 0x20001
2761; GFX9-NEXT:    v_mov_b32_e32 v1, 3
2762; GFX9-NEXT:    s_mov_b32 s32, 0
2763; GFX9-NEXT:    s_getpc_b64 s[4:5]
2764; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v3i16@rel32@lo+4
2765; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i16@rel32@hi+12
2766; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2767; GFX9-NEXT:    s_endpgm
2768;
2769; GFX11-LABEL: test_call_external_void_func_v3i16_imm:
2770; GFX11:       ; %bb.0:
2771; GFX11-NEXT:    v_dual_mov_b32 v0, 0x20001 :: v_dual_mov_b32 v1, 3
2772; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
2773; GFX11-NEXT:    s_mov_b32 s32, 0
2774; GFX11-NEXT:    s_getpc_b64 s[2:3]
2775; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v3i16@rel32@lo+4
2776; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v3i16@rel32@hi+12
2777; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2778; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
2779; GFX11-NEXT:    s_endpgm
2780;
2781; HSA-LABEL: test_call_external_void_func_v3i16_imm:
2782; HSA:       ; %bb.0:
2783; HSA-NEXT:    s_add_i32 s6, s6, s9
2784; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
2785; HSA-NEXT:    s_add_u32 s0, s0, s9
2786; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
2787; HSA-NEXT:    s_addc_u32 s1, s1, 0
2788; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
2789; HSA-NEXT:    v_mov_b32_e32 v0, 0x20001
2790; HSA-NEXT:    v_mov_b32_e32 v1, 3
2791; HSA-NEXT:    s_mov_b32 s32, 0
2792; HSA-NEXT:    s_getpc_b64 s[8:9]
2793; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v3i16@rel32@lo+4
2794; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v3i16@rel32@hi+12
2795; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
2796; HSA-NEXT:    s_endpgm
2797  call void @external_void_func_v3i16(<3 x i16> <i16 1, i16 2, i16 3>)
2798  ret void
2799}
2800
2801define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 {
2802; VI-LABEL: test_call_external_void_func_v3f16_imm:
2803; VI:       ; %bb.0:
2804; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2805; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2806; VI-NEXT:    s_mov_b32 s38, -1
2807; VI-NEXT:    s_mov_b32 s39, 0xe80000
2808; VI-NEXT:    s_add_u32 s36, s36, s3
2809; VI-NEXT:    s_addc_u32 s37, s37, 0
2810; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
2811; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
2812; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
2813; VI-NEXT:    v_mov_b32_e32 v0, 0x40003c00
2814; VI-NEXT:    v_mov_b32_e32 v1, 0x4400
2815; VI-NEXT:    s_mov_b32 s32, 0
2816; VI-NEXT:    s_getpc_b64 s[4:5]
2817; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
2818; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
2819; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2820; VI-NEXT:    s_endpgm
2821;
2822; CI-LABEL: test_call_external_void_func_v3f16_imm:
2823; CI:       ; %bb.0:
2824; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2825; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2826; CI-NEXT:    s_mov_b32 s38, -1
2827; CI-NEXT:    s_mov_b32 s39, 0xe8f000
2828; CI-NEXT:    s_add_u32 s36, s36, s3
2829; CI-NEXT:    s_addc_u32 s37, s37, 0
2830; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
2831; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
2832; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
2833; CI-NEXT:    v_mov_b32_e32 v0, 1.0
2834; CI-NEXT:    v_mov_b32_e32 v1, 2.0
2835; CI-NEXT:    v_mov_b32_e32 v2, 4.0
2836; CI-NEXT:    s_mov_b32 s32, 0
2837; CI-NEXT:    s_getpc_b64 s[4:5]
2838; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
2839; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
2840; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2841; CI-NEXT:    s_endpgm
2842;
2843; GFX9-LABEL: test_call_external_void_func_v3f16_imm:
2844; GFX9:       ; %bb.0:
2845; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2846; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2847; GFX9-NEXT:    s_mov_b32 s38, -1
2848; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
2849; GFX9-NEXT:    s_add_u32 s36, s36, s3
2850; GFX9-NEXT:    s_addc_u32 s37, s37, 0
2851; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
2852; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
2853; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
2854; GFX9-NEXT:    v_mov_b32_e32 v0, 0x40003c00
2855; GFX9-NEXT:    v_mov_b32_e32 v1, 0x4400
2856; GFX9-NEXT:    s_mov_b32 s32, 0
2857; GFX9-NEXT:    s_getpc_b64 s[4:5]
2858; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v3f16@rel32@lo+4
2859; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v3f16@rel32@hi+12
2860; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2861; GFX9-NEXT:    s_endpgm
2862;
2863; GFX11-LABEL: test_call_external_void_func_v3f16_imm:
2864; GFX11:       ; %bb.0:
2865; GFX11-NEXT:    v_mov_b32_e32 v0, 0x40003c00
2866; GFX11-NEXT:    v_mov_b32_e32 v1, 0x4400
2867; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
2868; GFX11-NEXT:    s_mov_b32 s32, 0
2869; GFX11-NEXT:    s_getpc_b64 s[2:3]
2870; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v3f16@rel32@lo+4
2871; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v3f16@rel32@hi+12
2872; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2873; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
2874; GFX11-NEXT:    s_endpgm
2875;
2876; HSA-LABEL: test_call_external_void_func_v3f16_imm:
2877; HSA:       ; %bb.0:
2878; HSA-NEXT:    s_add_i32 s6, s6, s9
2879; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
2880; HSA-NEXT:    s_add_u32 s0, s0, s9
2881; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
2882; HSA-NEXT:    s_addc_u32 s1, s1, 0
2883; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
2884; HSA-NEXT:    v_mov_b32_e32 v0, 0x40003c00
2885; HSA-NEXT:    v_mov_b32_e32 v1, 0x4400
2886; HSA-NEXT:    s_mov_b32 s32, 0
2887; HSA-NEXT:    s_getpc_b64 s[8:9]
2888; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v3f16@rel32@lo+4
2889; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v3f16@rel32@hi+12
2890; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
2891; HSA-NEXT:    s_endpgm
2892  call void @external_void_func_v3f16(<3 x half> <half 1.0, half 2.0, half 4.0>)
2893  ret void
2894}
2895
2896define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
2897; VI-LABEL: test_call_external_void_func_v4i16:
2898; VI:       ; %bb.0:
2899; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2900; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2901; VI-NEXT:    s_mov_b32 s38, -1
2902; VI-NEXT:    s_mov_b32 s39, 0xe80000
2903; VI-NEXT:    s_add_u32 s36, s36, s3
2904; VI-NEXT:    s_mov_b32 s3, 0xf000
2905; VI-NEXT:    s_mov_b32 s2, -1
2906; VI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[0:3], 0
2907; VI-NEXT:    s_addc_u32 s37, s37, 0
2908; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
2909; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
2910; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
2911; VI-NEXT:    s_mov_b32 s32, 0
2912; VI-NEXT:    s_getpc_b64 s[4:5]
2913; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
2914; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
2915; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2916; VI-NEXT:    s_endpgm
2917;
2918; CI-LABEL: test_call_external_void_func_v4i16:
2919; CI:       ; %bb.0:
2920; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2921; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2922; CI-NEXT:    s_mov_b32 s38, -1
2923; CI-NEXT:    s_mov_b32 s39, 0xe8f000
2924; CI-NEXT:    s_add_u32 s36, s36, s3
2925; CI-NEXT:    s_mov_b32 s3, 0xf000
2926; CI-NEXT:    s_mov_b32 s2, -1
2927; CI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[0:3], 0
2928; CI-NEXT:    s_addc_u32 s37, s37, 0
2929; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
2930; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
2931; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
2932; CI-NEXT:    s_mov_b32 s32, 0
2933; CI-NEXT:    s_getpc_b64 s[4:5]
2934; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
2935; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
2936; CI-NEXT:    s_waitcnt vmcnt(0)
2937; CI-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
2938; CI-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
2939; CI-NEXT:    v_mov_b32_e32 v2, v1
2940; CI-NEXT:    v_mov_b32_e32 v1, v4
2941; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2942; CI-NEXT:    s_endpgm
2943;
2944; GFX9-LABEL: test_call_external_void_func_v4i16:
2945; GFX9:       ; %bb.0:
2946; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
2947; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
2948; GFX9-NEXT:    s_mov_b32 s38, -1
2949; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
2950; GFX9-NEXT:    s_add_u32 s36, s36, s3
2951; GFX9-NEXT:    s_mov_b32 s3, 0xf000
2952; GFX9-NEXT:    s_mov_b32 s2, -1
2953; GFX9-NEXT:    buffer_load_dwordx2 v[0:1], off, s[0:3], 0
2954; GFX9-NEXT:    s_addc_u32 s37, s37, 0
2955; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
2956; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
2957; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
2958; GFX9-NEXT:    s_mov_b32 s32, 0
2959; GFX9-NEXT:    s_getpc_b64 s[4:5]
2960; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
2961; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
2962; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
2963; GFX9-NEXT:    s_endpgm
2964;
2965; GFX11-LABEL: test_call_external_void_func_v4i16:
2966; GFX11:       ; %bb.0:
2967; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2968; GFX11-NEXT:    s_mov_b32 s2, -1
2969; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
2970; GFX11-NEXT:    buffer_load_b64 v[0:1], off, s[0:3], 0
2971; GFX11-NEXT:    s_mov_b32 s32, 0
2972; GFX11-NEXT:    s_getpc_b64 s[2:3]
2973; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v4i16@rel32@lo+4
2974; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v4i16@rel32@hi+12
2975; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2976; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
2977; GFX11-NEXT:    s_endpgm
2978;
2979; HSA-LABEL: test_call_external_void_func_v4i16:
2980; HSA:       ; %bb.0:
2981; HSA-NEXT:    s_add_i32 s6, s6, s9
2982; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
2983; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
2984; HSA-NEXT:    s_mov_b32 s7, 0x1100f000
2985; HSA-NEXT:    s_mov_b32 s6, -1
2986; HSA-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
2987; HSA-NEXT:    s_add_u32 s0, s0, s9
2988; HSA-NEXT:    s_addc_u32 s1, s1, 0
2989; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
2990; HSA-NEXT:    s_mov_b32 s32, 0
2991; HSA-NEXT:    s_getpc_b64 s[8:9]
2992; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v4i16@rel32@lo+4
2993; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v4i16@rel32@hi+12
2994; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
2995; HSA-NEXT:    s_endpgm
2996  %val = load <4 x i16>, ptr addrspace(1) undef
2997  call void @external_void_func_v4i16(<4 x i16> %val)
2998  ret void
2999}
3000
3001define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 {
3002; VI-LABEL: test_call_external_void_func_v4i16_imm:
3003; VI:       ; %bb.0:
3004; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3005; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3006; VI-NEXT:    s_mov_b32 s38, -1
3007; VI-NEXT:    s_mov_b32 s39, 0xe80000
3008; VI-NEXT:    s_add_u32 s36, s36, s3
3009; VI-NEXT:    s_addc_u32 s37, s37, 0
3010; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
3011; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
3012; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
3013; VI-NEXT:    v_mov_b32_e32 v0, 0x20001
3014; VI-NEXT:    v_mov_b32_e32 v1, 0x40003
3015; VI-NEXT:    s_mov_b32 s32, 0
3016; VI-NEXT:    s_getpc_b64 s[4:5]
3017; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
3018; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
3019; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3020; VI-NEXT:    s_endpgm
3021;
3022; CI-LABEL: test_call_external_void_func_v4i16_imm:
3023; CI:       ; %bb.0:
3024; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3025; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3026; CI-NEXT:    s_mov_b32 s38, -1
3027; CI-NEXT:    s_mov_b32 s39, 0xe8f000
3028; CI-NEXT:    s_add_u32 s36, s36, s3
3029; CI-NEXT:    s_addc_u32 s37, s37, 0
3030; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
3031; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
3032; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
3033; CI-NEXT:    v_mov_b32_e32 v0, 1
3034; CI-NEXT:    v_mov_b32_e32 v1, 2
3035; CI-NEXT:    v_mov_b32_e32 v2, 3
3036; CI-NEXT:    v_mov_b32_e32 v3, 4
3037; CI-NEXT:    s_mov_b32 s32, 0
3038; CI-NEXT:    s_getpc_b64 s[4:5]
3039; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
3040; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
3041; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3042; CI-NEXT:    s_endpgm
3043;
3044; GFX9-LABEL: test_call_external_void_func_v4i16_imm:
3045; GFX9:       ; %bb.0:
3046; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3047; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3048; GFX9-NEXT:    s_mov_b32 s38, -1
3049; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
3050; GFX9-NEXT:    s_add_u32 s36, s36, s3
3051; GFX9-NEXT:    s_addc_u32 s37, s37, 0
3052; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
3053; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
3054; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
3055; GFX9-NEXT:    v_mov_b32_e32 v0, 0x20001
3056; GFX9-NEXT:    v_mov_b32_e32 v1, 0x40003
3057; GFX9-NEXT:    s_mov_b32 s32, 0
3058; GFX9-NEXT:    s_getpc_b64 s[4:5]
3059; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v4i16@rel32@lo+4
3060; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i16@rel32@hi+12
3061; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3062; GFX9-NEXT:    s_endpgm
3063;
3064; GFX11-LABEL: test_call_external_void_func_v4i16_imm:
3065; GFX11:       ; %bb.0:
3066; GFX11-NEXT:    v_mov_b32_e32 v0, 0x20001
3067; GFX11-NEXT:    v_mov_b32_e32 v1, 0x40003
3068; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
3069; GFX11-NEXT:    s_mov_b32 s32, 0
3070; GFX11-NEXT:    s_getpc_b64 s[2:3]
3071; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v4i16@rel32@lo+4
3072; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v4i16@rel32@hi+12
3073; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3074; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
3075; GFX11-NEXT:    s_endpgm
3076;
3077; HSA-LABEL: test_call_external_void_func_v4i16_imm:
3078; HSA:       ; %bb.0:
3079; HSA-NEXT:    s_add_i32 s6, s6, s9
3080; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
3081; HSA-NEXT:    s_add_u32 s0, s0, s9
3082; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
3083; HSA-NEXT:    s_addc_u32 s1, s1, 0
3084; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
3085; HSA-NEXT:    v_mov_b32_e32 v0, 0x20001
3086; HSA-NEXT:    v_mov_b32_e32 v1, 0x40003
3087; HSA-NEXT:    s_mov_b32 s32, 0
3088; HSA-NEXT:    s_getpc_b64 s[8:9]
3089; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v4i16@rel32@lo+4
3090; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v4i16@rel32@hi+12
3091; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
3092; HSA-NEXT:    s_endpgm
3093  call void @external_void_func_v4i16(<4 x i16> <i16 1, i16 2, i16 3, i16 4>)
3094  ret void
3095}
3096
3097define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
3098; VI-LABEL: test_call_external_void_func_v2f16:
3099; VI:       ; %bb.0:
3100; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3101; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3102; VI-NEXT:    s_mov_b32 s38, -1
3103; VI-NEXT:    s_mov_b32 s39, 0xe80000
3104; VI-NEXT:    s_add_u32 s36, s36, s3
3105; VI-NEXT:    s_mov_b32 s3, 0xf000
3106; VI-NEXT:    s_mov_b32 s2, -1
3107; VI-NEXT:    buffer_load_dword v0, off, s[0:3], 0
3108; VI-NEXT:    s_addc_u32 s37, s37, 0
3109; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
3110; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
3111; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
3112; VI-NEXT:    s_mov_b32 s32, 0
3113; VI-NEXT:    s_getpc_b64 s[4:5]
3114; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4
3115; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12
3116; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3117; VI-NEXT:    s_endpgm
3118;
3119; CI-LABEL: test_call_external_void_func_v2f16:
3120; CI:       ; %bb.0:
3121; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3122; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3123; CI-NEXT:    s_mov_b32 s38, -1
3124; CI-NEXT:    s_mov_b32 s39, 0xe8f000
3125; CI-NEXT:    s_add_u32 s36, s36, s3
3126; CI-NEXT:    s_mov_b32 s3, 0xf000
3127; CI-NEXT:    s_mov_b32 s2, -1
3128; CI-NEXT:    buffer_load_dword v1, off, s[0:3], 0
3129; CI-NEXT:    s_addc_u32 s37, s37, 0
3130; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
3131; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
3132; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
3133; CI-NEXT:    s_mov_b32 s32, 0
3134; CI-NEXT:    s_getpc_b64 s[4:5]
3135; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4
3136; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12
3137; CI-NEXT:    s_waitcnt vmcnt(0)
3138; CI-NEXT:    v_cvt_f32_f16_e32 v0, v1
3139; CI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3140; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
3141; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3142; CI-NEXT:    s_endpgm
3143;
3144; GFX9-LABEL: test_call_external_void_func_v2f16:
3145; GFX9:       ; %bb.0:
3146; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3147; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3148; GFX9-NEXT:    s_mov_b32 s38, -1
3149; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
3150; GFX9-NEXT:    s_add_u32 s36, s36, s3
3151; GFX9-NEXT:    s_mov_b32 s3, 0xf000
3152; GFX9-NEXT:    s_mov_b32 s2, -1
3153; GFX9-NEXT:    buffer_load_dword v0, off, s[0:3], 0
3154; GFX9-NEXT:    s_addc_u32 s37, s37, 0
3155; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
3156; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
3157; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
3158; GFX9-NEXT:    s_mov_b32 s32, 0
3159; GFX9-NEXT:    s_getpc_b64 s[4:5]
3160; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v2f16@rel32@lo+4
3161; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v2f16@rel32@hi+12
3162; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3163; GFX9-NEXT:    s_endpgm
3164;
3165; GFX11-LABEL: test_call_external_void_func_v2f16:
3166; GFX11:       ; %bb.0:
3167; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
3168; GFX11-NEXT:    s_mov_b32 s2, -1
3169; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
3170; GFX11-NEXT:    buffer_load_b32 v0, off, s[0:3], 0
3171; GFX11-NEXT:    s_mov_b32 s32, 0
3172; GFX11-NEXT:    s_getpc_b64 s[2:3]
3173; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v2f16@rel32@lo+4
3174; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v2f16@rel32@hi+12
3175; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3176; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
3177; GFX11-NEXT:    s_endpgm
3178;
3179; HSA-LABEL: test_call_external_void_func_v2f16:
3180; HSA:       ; %bb.0:
3181; HSA-NEXT:    s_add_i32 s6, s6, s9
3182; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
3183; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
3184; HSA-NEXT:    s_mov_b32 s7, 0x1100f000
3185; HSA-NEXT:    s_mov_b32 s6, -1
3186; HSA-NEXT:    buffer_load_dword v0, off, s[4:7], 0
3187; HSA-NEXT:    s_add_u32 s0, s0, s9
3188; HSA-NEXT:    s_addc_u32 s1, s1, 0
3189; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
3190; HSA-NEXT:    s_mov_b32 s32, 0
3191; HSA-NEXT:    s_getpc_b64 s[8:9]
3192; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v2f16@rel32@lo+4
3193; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v2f16@rel32@hi+12
3194; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
3195; HSA-NEXT:    s_endpgm
3196  %val = load <2 x half>, ptr addrspace(1) undef
3197  call void @external_void_func_v2f16(<2 x half> %val)
3198  ret void
3199}
3200
3201define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
3202; VI-LABEL: test_call_external_void_func_v2i32:
3203; VI:       ; %bb.0:
3204; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3205; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3206; VI-NEXT:    s_mov_b32 s38, -1
3207; VI-NEXT:    s_mov_b32 s39, 0xe80000
3208; VI-NEXT:    s_add_u32 s36, s36, s3
3209; VI-NEXT:    s_mov_b32 s3, 0xf000
3210; VI-NEXT:    s_mov_b32 s2, -1
3211; VI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[0:3], 0
3212; VI-NEXT:    s_addc_u32 s37, s37, 0
3213; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
3214; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
3215; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
3216; VI-NEXT:    s_mov_b32 s32, 0
3217; VI-NEXT:    s_getpc_b64 s[4:5]
3218; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
3219; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
3220; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3221; VI-NEXT:    s_endpgm
3222;
3223; CI-LABEL: test_call_external_void_func_v2i32:
3224; CI:       ; %bb.0:
3225; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3226; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3227; CI-NEXT:    s_mov_b32 s38, -1
3228; CI-NEXT:    s_mov_b32 s39, 0xe8f000
3229; CI-NEXT:    s_add_u32 s36, s36, s3
3230; CI-NEXT:    s_mov_b32 s3, 0xf000
3231; CI-NEXT:    s_mov_b32 s2, -1
3232; CI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[0:3], 0
3233; CI-NEXT:    s_addc_u32 s37, s37, 0
3234; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
3235; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
3236; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
3237; CI-NEXT:    s_mov_b32 s32, 0
3238; CI-NEXT:    s_getpc_b64 s[4:5]
3239; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
3240; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
3241; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3242; CI-NEXT:    s_endpgm
3243;
3244; GFX9-LABEL: test_call_external_void_func_v2i32:
3245; GFX9:       ; %bb.0:
3246; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3247; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3248; GFX9-NEXT:    s_mov_b32 s38, -1
3249; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
3250; GFX9-NEXT:    s_add_u32 s36, s36, s3
3251; GFX9-NEXT:    s_mov_b32 s3, 0xf000
3252; GFX9-NEXT:    s_mov_b32 s2, -1
3253; GFX9-NEXT:    buffer_load_dwordx2 v[0:1], off, s[0:3], 0
3254; GFX9-NEXT:    s_addc_u32 s37, s37, 0
3255; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
3256; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
3257; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
3258; GFX9-NEXT:    s_mov_b32 s32, 0
3259; GFX9-NEXT:    s_getpc_b64 s[4:5]
3260; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
3261; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
3262; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3263; GFX9-NEXT:    s_endpgm
3264;
3265; GFX11-LABEL: test_call_external_void_func_v2i32:
3266; GFX11:       ; %bb.0:
3267; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
3268; GFX11-NEXT:    s_mov_b32 s2, -1
3269; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
3270; GFX11-NEXT:    buffer_load_b64 v[0:1], off, s[0:3], 0
3271; GFX11-NEXT:    s_mov_b32 s32, 0
3272; GFX11-NEXT:    s_getpc_b64 s[2:3]
3273; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v2i32@rel32@lo+4
3274; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v2i32@rel32@hi+12
3275; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3276; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
3277; GFX11-NEXT:    s_endpgm
3278;
3279; HSA-LABEL: test_call_external_void_func_v2i32:
3280; HSA:       ; %bb.0:
3281; HSA-NEXT:    s_add_i32 s6, s6, s9
3282; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
3283; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
3284; HSA-NEXT:    s_mov_b32 s7, 0x1100f000
3285; HSA-NEXT:    s_mov_b32 s6, -1
3286; HSA-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
3287; HSA-NEXT:    s_add_u32 s0, s0, s9
3288; HSA-NEXT:    s_addc_u32 s1, s1, 0
3289; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
3290; HSA-NEXT:    s_mov_b32 s32, 0
3291; HSA-NEXT:    s_getpc_b64 s[8:9]
3292; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v2i32@rel32@lo+4
3293; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v2i32@rel32@hi+12
3294; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
3295; HSA-NEXT:    s_endpgm
3296  %val = load <2 x i32>, ptr addrspace(1) undef
3297  call void @external_void_func_v2i32(<2 x i32> %val)
3298  ret void
3299}
3300
3301define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
3302; VI-LABEL: test_call_external_void_func_v2i32_imm:
3303; VI:       ; %bb.0:
3304; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3305; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3306; VI-NEXT:    s_mov_b32 s38, -1
3307; VI-NEXT:    s_mov_b32 s39, 0xe80000
3308; VI-NEXT:    s_add_u32 s36, s36, s3
3309; VI-NEXT:    s_addc_u32 s37, s37, 0
3310; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
3311; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
3312; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
3313; VI-NEXT:    v_mov_b32_e32 v0, 1
3314; VI-NEXT:    v_mov_b32_e32 v1, 2
3315; VI-NEXT:    s_mov_b32 s32, 0
3316; VI-NEXT:    s_getpc_b64 s[4:5]
3317; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
3318; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
3319; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3320; VI-NEXT:    s_endpgm
3321;
3322; CI-LABEL: test_call_external_void_func_v2i32_imm:
3323; CI:       ; %bb.0:
3324; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3325; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3326; CI-NEXT:    s_mov_b32 s38, -1
3327; CI-NEXT:    s_mov_b32 s39, 0xe8f000
3328; CI-NEXT:    s_add_u32 s36, s36, s3
3329; CI-NEXT:    s_addc_u32 s37, s37, 0
3330; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
3331; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
3332; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
3333; CI-NEXT:    v_mov_b32_e32 v0, 1
3334; CI-NEXT:    v_mov_b32_e32 v1, 2
3335; CI-NEXT:    s_mov_b32 s32, 0
3336; CI-NEXT:    s_getpc_b64 s[4:5]
3337; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
3338; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
3339; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3340; CI-NEXT:    s_endpgm
3341;
3342; GFX9-LABEL: test_call_external_void_func_v2i32_imm:
3343; GFX9:       ; %bb.0:
3344; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3345; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3346; GFX9-NEXT:    s_mov_b32 s38, -1
3347; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
3348; GFX9-NEXT:    s_add_u32 s36, s36, s3
3349; GFX9-NEXT:    s_addc_u32 s37, s37, 0
3350; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
3351; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
3352; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
3353; GFX9-NEXT:    v_mov_b32_e32 v0, 1
3354; GFX9-NEXT:    v_mov_b32_e32 v1, 2
3355; GFX9-NEXT:    s_mov_b32 s32, 0
3356; GFX9-NEXT:    s_getpc_b64 s[4:5]
3357; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v2i32@rel32@lo+4
3358; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v2i32@rel32@hi+12
3359; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3360; GFX9-NEXT:    s_endpgm
3361;
3362; GFX11-LABEL: test_call_external_void_func_v2i32_imm:
3363; GFX11:       ; %bb.0:
3364; GFX11-NEXT:    v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
3365; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
3366; GFX11-NEXT:    s_mov_b32 s32, 0
3367; GFX11-NEXT:    s_getpc_b64 s[2:3]
3368; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v2i32@rel32@lo+4
3369; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v2i32@rel32@hi+12
3370; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3371; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
3372; GFX11-NEXT:    s_endpgm
3373;
3374; HSA-LABEL: test_call_external_void_func_v2i32_imm:
3375; HSA:       ; %bb.0:
3376; HSA-NEXT:    s_add_i32 s6, s6, s9
3377; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
3378; HSA-NEXT:    s_add_u32 s0, s0, s9
3379; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
3380; HSA-NEXT:    s_addc_u32 s1, s1, 0
3381; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
3382; HSA-NEXT:    v_mov_b32_e32 v0, 1
3383; HSA-NEXT:    v_mov_b32_e32 v1, 2
3384; HSA-NEXT:    s_mov_b32 s32, 0
3385; HSA-NEXT:    s_getpc_b64 s[8:9]
3386; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v2i32@rel32@lo+4
3387; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v2i32@rel32@hi+12
3388; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
3389; HSA-NEXT:    s_endpgm
3390  call void @external_void_func_v2i32(<2 x i32> <i32 1, i32 2>)
3391  ret void
3392}
3393
3394define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
3395; VI-LABEL: test_call_external_void_func_v3i32_imm:
3396; VI:       ; %bb.0:
3397; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3398; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3399; VI-NEXT:    s_mov_b32 s38, -1
3400; VI-NEXT:    s_mov_b32 s39, 0xe80000
3401; VI-NEXT:    s_add_u32 s36, s36, s5
3402; VI-NEXT:    s_addc_u32 s37, s37, 0
3403; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
3404; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
3405; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
3406; VI-NEXT:    v_mov_b32_e32 v0, 3
3407; VI-NEXT:    v_mov_b32_e32 v1, 4
3408; VI-NEXT:    v_mov_b32_e32 v2, 5
3409; VI-NEXT:    s_mov_b32 s32, 0
3410; VI-NEXT:    s_getpc_b64 s[4:5]
3411; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4
3412; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12
3413; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3414; VI-NEXT:    s_endpgm
3415;
3416; CI-LABEL: test_call_external_void_func_v3i32_imm:
3417; CI:       ; %bb.0:
3418; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3419; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3420; CI-NEXT:    s_mov_b32 s38, -1
3421; CI-NEXT:    s_mov_b32 s39, 0xe8f000
3422; CI-NEXT:    s_add_u32 s36, s36, s5
3423; CI-NEXT:    s_addc_u32 s37, s37, 0
3424; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
3425; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
3426; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
3427; CI-NEXT:    v_mov_b32_e32 v0, 3
3428; CI-NEXT:    v_mov_b32_e32 v1, 4
3429; CI-NEXT:    v_mov_b32_e32 v2, 5
3430; CI-NEXT:    s_mov_b32 s32, 0
3431; CI-NEXT:    s_getpc_b64 s[4:5]
3432; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4
3433; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12
3434; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3435; CI-NEXT:    s_endpgm
3436;
3437; GFX9-LABEL: test_call_external_void_func_v3i32_imm:
3438; GFX9:       ; %bb.0:
3439; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3440; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3441; GFX9-NEXT:    s_mov_b32 s38, -1
3442; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
3443; GFX9-NEXT:    s_add_u32 s36, s36, s5
3444; GFX9-NEXT:    s_addc_u32 s37, s37, 0
3445; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
3446; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
3447; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
3448; GFX9-NEXT:    v_mov_b32_e32 v0, 3
3449; GFX9-NEXT:    v_mov_b32_e32 v1, 4
3450; GFX9-NEXT:    v_mov_b32_e32 v2, 5
3451; GFX9-NEXT:    s_mov_b32 s32, 0
3452; GFX9-NEXT:    s_getpc_b64 s[4:5]
3453; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v3i32@rel32@lo+4
3454; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i32@rel32@hi+12
3455; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3456; GFX9-NEXT:    s_endpgm
3457;
3458; GFX11-LABEL: test_call_external_void_func_v3i32_imm:
3459; GFX11:       ; %bb.0:
3460; GFX11-NEXT:    v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4
3461; GFX11-NEXT:    v_mov_b32_e32 v2, 5
3462; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
3463; GFX11-NEXT:    s_mov_b32 s32, 0
3464; GFX11-NEXT:    s_getpc_b64 s[2:3]
3465; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v3i32@rel32@lo+4
3466; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v3i32@rel32@hi+12
3467; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3468; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
3469; GFX11-NEXT:    s_endpgm
3470;
3471; HSA-LABEL: test_call_external_void_func_v3i32_imm:
3472; HSA:       ; %bb.0:
3473; HSA-NEXT:    s_add_i32 s8, s8, s11
3474; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s8, 8
3475; HSA-NEXT:    s_add_u32 s0, s0, s11
3476; HSA-NEXT:    s_addc_u32 s1, s1, 0
3477; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
3478; HSA-NEXT:    v_mov_b32_e32 v0, 3
3479; HSA-NEXT:    v_mov_b32_e32 v1, 4
3480; HSA-NEXT:    v_mov_b32_e32 v2, 5
3481; HSA-NEXT:    s_mov_b32 s32, 0
3482; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s9
3483; HSA-NEXT:    s_getpc_b64 s[8:9]
3484; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v3i32@rel32@lo+4
3485; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v3i32@rel32@hi+12
3486; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
3487; HSA-NEXT:    s_endpgm
3488  call void @external_void_func_v3i32(<3 x i32> <i32 3, i32 4, i32 5>)
3489  ret void
3490}
3491
3492define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 {
3493; VI-LABEL: test_call_external_void_func_v3i32_i32:
3494; VI:       ; %bb.0:
3495; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3496; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3497; VI-NEXT:    s_mov_b32 s38, -1
3498; VI-NEXT:    s_mov_b32 s39, 0xe80000
3499; VI-NEXT:    s_add_u32 s36, s36, s5
3500; VI-NEXT:    s_addc_u32 s37, s37, 0
3501; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
3502; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
3503; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
3504; VI-NEXT:    v_mov_b32_e32 v0, 3
3505; VI-NEXT:    v_mov_b32_e32 v1, 4
3506; VI-NEXT:    v_mov_b32_e32 v2, 5
3507; VI-NEXT:    v_mov_b32_e32 v3, 6
3508; VI-NEXT:    s_mov_b32 s32, 0
3509; VI-NEXT:    s_getpc_b64 s[4:5]
3510; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4
3511; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12
3512; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3513; VI-NEXT:    s_endpgm
3514;
3515; CI-LABEL: test_call_external_void_func_v3i32_i32:
3516; CI:       ; %bb.0:
3517; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3518; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3519; CI-NEXT:    s_mov_b32 s38, -1
3520; CI-NEXT:    s_mov_b32 s39, 0xe8f000
3521; CI-NEXT:    s_add_u32 s36, s36, s5
3522; CI-NEXT:    s_addc_u32 s37, s37, 0
3523; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
3524; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
3525; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
3526; CI-NEXT:    v_mov_b32_e32 v0, 3
3527; CI-NEXT:    v_mov_b32_e32 v1, 4
3528; CI-NEXT:    v_mov_b32_e32 v2, 5
3529; CI-NEXT:    v_mov_b32_e32 v3, 6
3530; CI-NEXT:    s_mov_b32 s32, 0
3531; CI-NEXT:    s_getpc_b64 s[4:5]
3532; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4
3533; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12
3534; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3535; CI-NEXT:    s_endpgm
3536;
3537; GFX9-LABEL: test_call_external_void_func_v3i32_i32:
3538; GFX9:       ; %bb.0:
3539; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3540; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3541; GFX9-NEXT:    s_mov_b32 s38, -1
3542; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
3543; GFX9-NEXT:    s_add_u32 s36, s36, s5
3544; GFX9-NEXT:    s_addc_u32 s37, s37, 0
3545; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
3546; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
3547; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
3548; GFX9-NEXT:    v_mov_b32_e32 v0, 3
3549; GFX9-NEXT:    v_mov_b32_e32 v1, 4
3550; GFX9-NEXT:    v_mov_b32_e32 v2, 5
3551; GFX9-NEXT:    v_mov_b32_e32 v3, 6
3552; GFX9-NEXT:    s_mov_b32 s32, 0
3553; GFX9-NEXT:    s_getpc_b64 s[4:5]
3554; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v3i32_i32@rel32@lo+4
3555; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v3i32_i32@rel32@hi+12
3556; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3557; GFX9-NEXT:    s_endpgm
3558;
3559; GFX11-LABEL: test_call_external_void_func_v3i32_i32:
3560; GFX11:       ; %bb.0:
3561; GFX11-NEXT:    v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4
3562; GFX11-NEXT:    v_dual_mov_b32 v2, 5 :: v_dual_mov_b32 v3, 6
3563; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
3564; GFX11-NEXT:    s_mov_b32 s32, 0
3565; GFX11-NEXT:    s_getpc_b64 s[2:3]
3566; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v3i32_i32@rel32@lo+4
3567; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v3i32_i32@rel32@hi+12
3568; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3569; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
3570; GFX11-NEXT:    s_endpgm
3571;
3572; HSA-LABEL: test_call_external_void_func_v3i32_i32:
3573; HSA:       ; %bb.0:
3574; HSA-NEXT:    s_add_i32 s8, s8, s11
3575; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s8, 8
3576; HSA-NEXT:    s_add_u32 s0, s0, s11
3577; HSA-NEXT:    s_addc_u32 s1, s1, 0
3578; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
3579; HSA-NEXT:    v_mov_b32_e32 v0, 3
3580; HSA-NEXT:    v_mov_b32_e32 v1, 4
3581; HSA-NEXT:    v_mov_b32_e32 v2, 5
3582; HSA-NEXT:    v_mov_b32_e32 v3, 6
3583; HSA-NEXT:    s_mov_b32 s32, 0
3584; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s9
3585; HSA-NEXT:    s_getpc_b64 s[8:9]
3586; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v3i32_i32@rel32@lo+4
3587; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v3i32_i32@rel32@hi+12
3588; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
3589; HSA-NEXT:    s_endpgm
3590  call void @external_void_func_v3i32_i32(<3 x i32> <i32 3, i32 4, i32 5>, i32 6)
3591  ret void
3592}
3593
3594define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
3595; VI-LABEL: test_call_external_void_func_v4i32:
3596; VI:       ; %bb.0:
3597; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3598; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3599; VI-NEXT:    s_mov_b32 s38, -1
3600; VI-NEXT:    s_mov_b32 s39, 0xe80000
3601; VI-NEXT:    s_add_u32 s36, s36, s3
3602; VI-NEXT:    s_mov_b32 s3, 0xf000
3603; VI-NEXT:    s_mov_b32 s2, -1
3604; VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
3605; VI-NEXT:    s_addc_u32 s37, s37, 0
3606; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
3607; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
3608; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
3609; VI-NEXT:    s_mov_b32 s32, 0
3610; VI-NEXT:    s_getpc_b64 s[4:5]
3611; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
3612; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
3613; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3614; VI-NEXT:    s_endpgm
3615;
3616; CI-LABEL: test_call_external_void_func_v4i32:
3617; CI:       ; %bb.0:
3618; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3619; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3620; CI-NEXT:    s_mov_b32 s38, -1
3621; CI-NEXT:    s_mov_b32 s39, 0xe8f000
3622; CI-NEXT:    s_add_u32 s36, s36, s3
3623; CI-NEXT:    s_mov_b32 s3, 0xf000
3624; CI-NEXT:    s_mov_b32 s2, -1
3625; CI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
3626; CI-NEXT:    s_addc_u32 s37, s37, 0
3627; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
3628; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
3629; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
3630; CI-NEXT:    s_mov_b32 s32, 0
3631; CI-NEXT:    s_getpc_b64 s[4:5]
3632; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
3633; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
3634; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3635; CI-NEXT:    s_endpgm
3636;
3637; GFX9-LABEL: test_call_external_void_func_v4i32:
3638; GFX9:       ; %bb.0:
3639; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3640; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3641; GFX9-NEXT:    s_mov_b32 s38, -1
3642; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
3643; GFX9-NEXT:    s_add_u32 s36, s36, s3
3644; GFX9-NEXT:    s_mov_b32 s3, 0xf000
3645; GFX9-NEXT:    s_mov_b32 s2, -1
3646; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
3647; GFX9-NEXT:    s_addc_u32 s37, s37, 0
3648; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
3649; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
3650; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
3651; GFX9-NEXT:    s_mov_b32 s32, 0
3652; GFX9-NEXT:    s_getpc_b64 s[4:5]
3653; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
3654; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
3655; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3656; GFX9-NEXT:    s_endpgm
3657;
3658; GFX11-LABEL: test_call_external_void_func_v4i32:
3659; GFX11:       ; %bb.0:
3660; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
3661; GFX11-NEXT:    s_mov_b32 s2, -1
3662; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
3663; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
3664; GFX11-NEXT:    s_mov_b32 s32, 0
3665; GFX11-NEXT:    s_getpc_b64 s[2:3]
3666; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v4i32@rel32@lo+4
3667; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v4i32@rel32@hi+12
3668; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3669; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
3670; GFX11-NEXT:    s_endpgm
3671;
3672; HSA-LABEL: test_call_external_void_func_v4i32:
3673; HSA:       ; %bb.0:
3674; HSA-NEXT:    s_add_i32 s6, s6, s9
3675; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
3676; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
3677; HSA-NEXT:    s_mov_b32 s7, 0x1100f000
3678; HSA-NEXT:    s_mov_b32 s6, -1
3679; HSA-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
3680; HSA-NEXT:    s_add_u32 s0, s0, s9
3681; HSA-NEXT:    s_addc_u32 s1, s1, 0
3682; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
3683; HSA-NEXT:    s_mov_b32 s32, 0
3684; HSA-NEXT:    s_getpc_b64 s[8:9]
3685; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v4i32@rel32@lo+4
3686; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v4i32@rel32@hi+12
3687; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
3688; HSA-NEXT:    s_endpgm
3689  %val = load <4 x i32>, ptr addrspace(1) undef
3690  call void @external_void_func_v4i32(<4 x i32> %val)
3691  ret void
3692}
3693
3694define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 {
3695; VI-LABEL: test_call_external_void_func_v4i32_imm:
3696; VI:       ; %bb.0:
3697; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3698; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3699; VI-NEXT:    s_mov_b32 s38, -1
3700; VI-NEXT:    s_mov_b32 s39, 0xe80000
3701; VI-NEXT:    s_add_u32 s36, s36, s3
3702; VI-NEXT:    s_addc_u32 s37, s37, 0
3703; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
3704; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
3705; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
3706; VI-NEXT:    v_mov_b32_e32 v0, 1
3707; VI-NEXT:    v_mov_b32_e32 v1, 2
3708; VI-NEXT:    v_mov_b32_e32 v2, 3
3709; VI-NEXT:    v_mov_b32_e32 v3, 4
3710; VI-NEXT:    s_mov_b32 s32, 0
3711; VI-NEXT:    s_getpc_b64 s[4:5]
3712; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
3713; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
3714; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3715; VI-NEXT:    s_endpgm
3716;
3717; CI-LABEL: test_call_external_void_func_v4i32_imm:
3718; CI:       ; %bb.0:
3719; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3720; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3721; CI-NEXT:    s_mov_b32 s38, -1
3722; CI-NEXT:    s_mov_b32 s39, 0xe8f000
3723; CI-NEXT:    s_add_u32 s36, s36, s3
3724; CI-NEXT:    s_addc_u32 s37, s37, 0
3725; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
3726; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
3727; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
3728; CI-NEXT:    v_mov_b32_e32 v0, 1
3729; CI-NEXT:    v_mov_b32_e32 v1, 2
3730; CI-NEXT:    v_mov_b32_e32 v2, 3
3731; CI-NEXT:    v_mov_b32_e32 v3, 4
3732; CI-NEXT:    s_mov_b32 s32, 0
3733; CI-NEXT:    s_getpc_b64 s[4:5]
3734; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
3735; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
3736; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3737; CI-NEXT:    s_endpgm
3738;
3739; GFX9-LABEL: test_call_external_void_func_v4i32_imm:
3740; GFX9:       ; %bb.0:
3741; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3742; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3743; GFX9-NEXT:    s_mov_b32 s38, -1
3744; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
3745; GFX9-NEXT:    s_add_u32 s36, s36, s3
3746; GFX9-NEXT:    s_addc_u32 s37, s37, 0
3747; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
3748; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
3749; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
3750; GFX9-NEXT:    v_mov_b32_e32 v0, 1
3751; GFX9-NEXT:    v_mov_b32_e32 v1, 2
3752; GFX9-NEXT:    v_mov_b32_e32 v2, 3
3753; GFX9-NEXT:    v_mov_b32_e32 v3, 4
3754; GFX9-NEXT:    s_mov_b32 s32, 0
3755; GFX9-NEXT:    s_getpc_b64 s[4:5]
3756; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v4i32@rel32@lo+4
3757; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v4i32@rel32@hi+12
3758; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3759; GFX9-NEXT:    s_endpgm
3760;
3761; GFX11-LABEL: test_call_external_void_func_v4i32_imm:
3762; GFX11:       ; %bb.0:
3763; GFX11-NEXT:    v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
3764; GFX11-NEXT:    v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
3765; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
3766; GFX11-NEXT:    s_mov_b32 s32, 0
3767; GFX11-NEXT:    s_getpc_b64 s[2:3]
3768; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v4i32@rel32@lo+4
3769; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v4i32@rel32@hi+12
3770; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3771; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
3772; GFX11-NEXT:    s_endpgm
3773;
3774; HSA-LABEL: test_call_external_void_func_v4i32_imm:
3775; HSA:       ; %bb.0:
3776; HSA-NEXT:    s_add_i32 s6, s6, s9
3777; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
3778; HSA-NEXT:    s_add_u32 s0, s0, s9
3779; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
3780; HSA-NEXT:    s_addc_u32 s1, s1, 0
3781; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
3782; HSA-NEXT:    v_mov_b32_e32 v0, 1
3783; HSA-NEXT:    v_mov_b32_e32 v1, 2
3784; HSA-NEXT:    v_mov_b32_e32 v2, 3
3785; HSA-NEXT:    v_mov_b32_e32 v3, 4
3786; HSA-NEXT:    s_mov_b32 s32, 0
3787; HSA-NEXT:    s_getpc_b64 s[8:9]
3788; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v4i32@rel32@lo+4
3789; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v4i32@rel32@hi+12
3790; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
3791; HSA-NEXT:    s_endpgm
3792  call void @external_void_func_v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>)
3793  ret void
3794}
3795
3796define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 {
3797; VI-LABEL: test_call_external_void_func_v5i32_imm:
3798; VI:       ; %bb.0:
3799; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3800; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3801; VI-NEXT:    s_mov_b32 s38, -1
3802; VI-NEXT:    s_mov_b32 s39, 0xe80000
3803; VI-NEXT:    s_add_u32 s36, s36, s3
3804; VI-NEXT:    s_addc_u32 s37, s37, 0
3805; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
3806; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
3807; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
3808; VI-NEXT:    v_mov_b32_e32 v0, 1
3809; VI-NEXT:    v_mov_b32_e32 v1, 2
3810; VI-NEXT:    v_mov_b32_e32 v2, 3
3811; VI-NEXT:    v_mov_b32_e32 v3, 4
3812; VI-NEXT:    v_mov_b32_e32 v4, 5
3813; VI-NEXT:    s_mov_b32 s32, 0
3814; VI-NEXT:    s_getpc_b64 s[4:5]
3815; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4
3816; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12
3817; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3818; VI-NEXT:    s_endpgm
3819;
3820; CI-LABEL: test_call_external_void_func_v5i32_imm:
3821; CI:       ; %bb.0:
3822; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3823; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3824; CI-NEXT:    s_mov_b32 s38, -1
3825; CI-NEXT:    s_mov_b32 s39, 0xe8f000
3826; CI-NEXT:    s_add_u32 s36, s36, s3
3827; CI-NEXT:    s_addc_u32 s37, s37, 0
3828; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
3829; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
3830; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
3831; CI-NEXT:    v_mov_b32_e32 v0, 1
3832; CI-NEXT:    v_mov_b32_e32 v1, 2
3833; CI-NEXT:    v_mov_b32_e32 v2, 3
3834; CI-NEXT:    v_mov_b32_e32 v3, 4
3835; CI-NEXT:    v_mov_b32_e32 v4, 5
3836; CI-NEXT:    s_mov_b32 s32, 0
3837; CI-NEXT:    s_getpc_b64 s[4:5]
3838; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4
3839; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12
3840; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3841; CI-NEXT:    s_endpgm
3842;
3843; GFX9-LABEL: test_call_external_void_func_v5i32_imm:
3844; GFX9:       ; %bb.0:
3845; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3846; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3847; GFX9-NEXT:    s_mov_b32 s38, -1
3848; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
3849; GFX9-NEXT:    s_add_u32 s36, s36, s3
3850; GFX9-NEXT:    s_addc_u32 s37, s37, 0
3851; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
3852; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
3853; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
3854; GFX9-NEXT:    v_mov_b32_e32 v0, 1
3855; GFX9-NEXT:    v_mov_b32_e32 v1, 2
3856; GFX9-NEXT:    v_mov_b32_e32 v2, 3
3857; GFX9-NEXT:    v_mov_b32_e32 v3, 4
3858; GFX9-NEXT:    v_mov_b32_e32 v4, 5
3859; GFX9-NEXT:    s_mov_b32 s32, 0
3860; GFX9-NEXT:    s_getpc_b64 s[4:5]
3861; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v5i32@rel32@lo+4
3862; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v5i32@rel32@hi+12
3863; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3864; GFX9-NEXT:    s_endpgm
3865;
3866; GFX11-LABEL: test_call_external_void_func_v5i32_imm:
3867; GFX11:       ; %bb.0:
3868; GFX11-NEXT:    v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
3869; GFX11-NEXT:    v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
3870; GFX11-NEXT:    v_mov_b32_e32 v4, 5
3871; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
3872; GFX11-NEXT:    s_mov_b32 s32, 0
3873; GFX11-NEXT:    s_getpc_b64 s[2:3]
3874; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v5i32@rel32@lo+4
3875; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v5i32@rel32@hi+12
3876; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
3877; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
3878; GFX11-NEXT:    s_endpgm
3879;
3880; HSA-LABEL: test_call_external_void_func_v5i32_imm:
3881; HSA:       ; %bb.0:
3882; HSA-NEXT:    s_add_i32 s6, s6, s9
3883; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
3884; HSA-NEXT:    s_add_u32 s0, s0, s9
3885; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
3886; HSA-NEXT:    s_addc_u32 s1, s1, 0
3887; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
3888; HSA-NEXT:    v_mov_b32_e32 v0, 1
3889; HSA-NEXT:    v_mov_b32_e32 v1, 2
3890; HSA-NEXT:    v_mov_b32_e32 v2, 3
3891; HSA-NEXT:    v_mov_b32_e32 v3, 4
3892; HSA-NEXT:    v_mov_b32_e32 v4, 5
3893; HSA-NEXT:    s_mov_b32 s32, 0
3894; HSA-NEXT:    s_getpc_b64 s[8:9]
3895; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v5i32@rel32@lo+4
3896; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v5i32@rel32@hi+12
3897; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
3898; HSA-NEXT:    s_endpgm
3899  call void @external_void_func_v5i32(<5 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5>)
3900  ret void
3901}
3902
3903define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
3904; VI-LABEL: test_call_external_void_func_v8i32:
3905; VI:       ; %bb.0:
3906; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3907; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
3908; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
3909; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3910; VI-NEXT:    s_mov_b32 s38, -1
3911; VI-NEXT:    s_mov_b32 s39, 0xe80000
3912; VI-NEXT:    s_add_u32 s36, s36, s3
3913; VI-NEXT:    s_mov_b32 s3, 0xf000
3914; VI-NEXT:    s_mov_b32 s2, -1
3915; VI-NEXT:    s_waitcnt lgkmcnt(0)
3916; VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
3917; VI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
3918; VI-NEXT:    s_addc_u32 s37, s37, 0
3919; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
3920; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
3921; VI-NEXT:    s_mov_b32 s32, 0
3922; VI-NEXT:    s_getpc_b64 s[4:5]
3923; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
3924; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
3925; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3926; VI-NEXT:    s_endpgm
3927;
3928; CI-LABEL: test_call_external_void_func_v8i32:
3929; CI:       ; %bb.0:
3930; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3931; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
3932; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
3933; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3934; CI-NEXT:    s_mov_b32 s38, -1
3935; CI-NEXT:    s_mov_b32 s39, 0xe8f000
3936; CI-NEXT:    s_add_u32 s36, s36, s3
3937; CI-NEXT:    s_mov_b32 s3, 0xf000
3938; CI-NEXT:    s_mov_b32 s2, -1
3939; CI-NEXT:    s_waitcnt lgkmcnt(0)
3940; CI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
3941; CI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
3942; CI-NEXT:    s_addc_u32 s37, s37, 0
3943; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
3944; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
3945; CI-NEXT:    s_mov_b32 s32, 0
3946; CI-NEXT:    s_getpc_b64 s[4:5]
3947; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
3948; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
3949; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3950; CI-NEXT:    s_endpgm
3951;
3952; GFX9-LABEL: test_call_external_void_func_v8i32:
3953; GFX9:       ; %bb.0:
3954; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
3955; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
3956; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
3957; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
3958; GFX9-NEXT:    s_mov_b32 s38, -1
3959; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
3960; GFX9-NEXT:    s_add_u32 s36, s36, s3
3961; GFX9-NEXT:    s_mov_b32 s3, 0xf000
3962; GFX9-NEXT:    s_mov_b32 s2, -1
3963; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
3964; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
3965; GFX9-NEXT:    buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
3966; GFX9-NEXT:    s_addc_u32 s37, s37, 0
3967; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
3968; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
3969; GFX9-NEXT:    s_mov_b32 s32, 0
3970; GFX9-NEXT:    s_getpc_b64 s[4:5]
3971; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
3972; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
3973; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
3974; GFX9-NEXT:    s_endpgm
3975;
3976; GFX11-LABEL: test_call_external_void_func_v8i32:
3977; GFX11:       ; %bb.0:
3978; GFX11-NEXT:    s_load_b64 s[4:5], s[0:1], 0x0
3979; GFX11-NEXT:    s_mov_b32 s7, 0x31016000
3980; GFX11-NEXT:    s_mov_b32 s6, -1
3981; GFX11-NEXT:    s_mov_b32 s32, 0
3982; GFX11-NEXT:    s_getpc_b64 s[2:3]
3983; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v8i32@rel32@lo+4
3984; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v8i32@rel32@hi+12
3985; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
3986; GFX11-NEXT:    s_clause 0x1
3987; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[4:7], 0
3988; GFX11-NEXT:    buffer_load_b128 v[4:7], off, s[4:7], 0 offset:16
3989; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
3990; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
3991; GFX11-NEXT:    s_endpgm
3992;
3993; HSA-LABEL: test_call_external_void_func_v8i32:
3994; HSA:       ; %bb.0:
3995; HSA-NEXT:    s_add_i32 s6, s6, s9
3996; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
3997; HSA-NEXT:    s_add_u32 s0, s0, s9
3998; HSA-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
3999; HSA-NEXT:    s_mov_b32 s11, 0x1100f000
4000; HSA-NEXT:    s_mov_b32 s10, -1
4001; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
4002; HSA-NEXT:    s_waitcnt lgkmcnt(0)
4003; HSA-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
4004; HSA-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
4005; HSA-NEXT:    s_addc_u32 s1, s1, 0
4006; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
4007; HSA-NEXT:    s_mov_b32 s32, 0
4008; HSA-NEXT:    s_getpc_b64 s[8:9]
4009; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v8i32@rel32@lo+4
4010; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v8i32@rel32@hi+12
4011; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
4012; HSA-NEXT:    s_endpgm
4013  %ptr = load ptr addrspace(1), ptr addrspace(4) undef
4014  %val = load <8 x i32>, ptr addrspace(1) %ptr
4015  call void @external_void_func_v8i32(<8 x i32> %val)
4016  ret void
4017}
4018
4019define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 {
4020; VI-LABEL: test_call_external_void_func_v8i32_imm:
4021; VI:       ; %bb.0:
4022; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4023; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4024; VI-NEXT:    s_mov_b32 s38, -1
4025; VI-NEXT:    s_mov_b32 s39, 0xe80000
4026; VI-NEXT:    s_add_u32 s36, s36, s3
4027; VI-NEXT:    s_addc_u32 s37, s37, 0
4028; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
4029; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
4030; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
4031; VI-NEXT:    v_mov_b32_e32 v0, 1
4032; VI-NEXT:    v_mov_b32_e32 v1, 2
4033; VI-NEXT:    v_mov_b32_e32 v2, 3
4034; VI-NEXT:    v_mov_b32_e32 v3, 4
4035; VI-NEXT:    v_mov_b32_e32 v4, 5
4036; VI-NEXT:    v_mov_b32_e32 v5, 6
4037; VI-NEXT:    v_mov_b32_e32 v6, 7
4038; VI-NEXT:    v_mov_b32_e32 v7, 8
4039; VI-NEXT:    s_mov_b32 s32, 0
4040; VI-NEXT:    s_getpc_b64 s[4:5]
4041; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
4042; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
4043; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
4044; VI-NEXT:    s_endpgm
4045;
4046; CI-LABEL: test_call_external_void_func_v8i32_imm:
4047; CI:       ; %bb.0:
4048; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4049; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4050; CI-NEXT:    s_mov_b32 s38, -1
4051; CI-NEXT:    s_mov_b32 s39, 0xe8f000
4052; CI-NEXT:    s_add_u32 s36, s36, s3
4053; CI-NEXT:    s_addc_u32 s37, s37, 0
4054; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
4055; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
4056; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
4057; CI-NEXT:    v_mov_b32_e32 v0, 1
4058; CI-NEXT:    v_mov_b32_e32 v1, 2
4059; CI-NEXT:    v_mov_b32_e32 v2, 3
4060; CI-NEXT:    v_mov_b32_e32 v3, 4
4061; CI-NEXT:    v_mov_b32_e32 v4, 5
4062; CI-NEXT:    v_mov_b32_e32 v5, 6
4063; CI-NEXT:    v_mov_b32_e32 v6, 7
4064; CI-NEXT:    v_mov_b32_e32 v7, 8
4065; CI-NEXT:    s_mov_b32 s32, 0
4066; CI-NEXT:    s_getpc_b64 s[4:5]
4067; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
4068; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
4069; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
4070; CI-NEXT:    s_endpgm
4071;
4072; GFX9-LABEL: test_call_external_void_func_v8i32_imm:
4073; GFX9:       ; %bb.0:
4074; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4075; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4076; GFX9-NEXT:    s_mov_b32 s38, -1
4077; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
4078; GFX9-NEXT:    s_add_u32 s36, s36, s3
4079; GFX9-NEXT:    s_addc_u32 s37, s37, 0
4080; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
4081; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
4082; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
4083; GFX9-NEXT:    v_mov_b32_e32 v0, 1
4084; GFX9-NEXT:    v_mov_b32_e32 v1, 2
4085; GFX9-NEXT:    v_mov_b32_e32 v2, 3
4086; GFX9-NEXT:    v_mov_b32_e32 v3, 4
4087; GFX9-NEXT:    v_mov_b32_e32 v4, 5
4088; GFX9-NEXT:    v_mov_b32_e32 v5, 6
4089; GFX9-NEXT:    v_mov_b32_e32 v6, 7
4090; GFX9-NEXT:    v_mov_b32_e32 v7, 8
4091; GFX9-NEXT:    s_mov_b32 s32, 0
4092; GFX9-NEXT:    s_getpc_b64 s[4:5]
4093; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v8i32@rel32@lo+4
4094; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v8i32@rel32@hi+12
4095; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
4096; GFX9-NEXT:    s_endpgm
4097;
4098; GFX11-LABEL: test_call_external_void_func_v8i32_imm:
4099; GFX11:       ; %bb.0:
4100; GFX11-NEXT:    v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
4101; GFX11-NEXT:    v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
4102; GFX11-NEXT:    v_dual_mov_b32 v4, 5 :: v_dual_mov_b32 v5, 6
4103; GFX11-NEXT:    v_dual_mov_b32 v6, 7 :: v_dual_mov_b32 v7, 8
4104; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
4105; GFX11-NEXT:    s_mov_b32 s32, 0
4106; GFX11-NEXT:    s_getpc_b64 s[2:3]
4107; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v8i32@rel32@lo+4
4108; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v8i32@rel32@hi+12
4109; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
4110; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
4111; GFX11-NEXT:    s_endpgm
4112;
4113; HSA-LABEL: test_call_external_void_func_v8i32_imm:
4114; HSA:       ; %bb.0:
4115; HSA-NEXT:    s_add_i32 s6, s6, s9
4116; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
4117; HSA-NEXT:    s_add_u32 s0, s0, s9
4118; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
4119; HSA-NEXT:    s_addc_u32 s1, s1, 0
4120; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
4121; HSA-NEXT:    v_mov_b32_e32 v0, 1
4122; HSA-NEXT:    v_mov_b32_e32 v1, 2
4123; HSA-NEXT:    v_mov_b32_e32 v2, 3
4124; HSA-NEXT:    v_mov_b32_e32 v3, 4
4125; HSA-NEXT:    v_mov_b32_e32 v4, 5
4126; HSA-NEXT:    v_mov_b32_e32 v5, 6
4127; HSA-NEXT:    v_mov_b32_e32 v6, 7
4128; HSA-NEXT:    v_mov_b32_e32 v7, 8
4129; HSA-NEXT:    s_mov_b32 s32, 0
4130; HSA-NEXT:    s_getpc_b64 s[8:9]
4131; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v8i32@rel32@lo+4
4132; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v8i32@rel32@hi+12
4133; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
4134; HSA-NEXT:    s_endpgm
4135  call void @external_void_func_v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>)
4136  ret void
4137}
4138
4139define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
4140; VI-LABEL: test_call_external_void_func_v16i32:
4141; VI:       ; %bb.0:
4142; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4143; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
4144; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
4145; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4146; VI-NEXT:    s_mov_b32 s38, -1
4147; VI-NEXT:    s_mov_b32 s39, 0xe80000
4148; VI-NEXT:    s_add_u32 s36, s36, s3
4149; VI-NEXT:    s_mov_b32 s3, 0xf000
4150; VI-NEXT:    s_mov_b32 s2, -1
4151; VI-NEXT:    s_waitcnt lgkmcnt(0)
4152; VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
4153; VI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
4154; VI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32
4155; VI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48
4156; VI-NEXT:    s_addc_u32 s37, s37, 0
4157; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
4158; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
4159; VI-NEXT:    s_mov_b32 s32, 0
4160; VI-NEXT:    s_getpc_b64 s[4:5]
4161; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4
4162; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12
4163; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
4164; VI-NEXT:    s_endpgm
4165;
4166; CI-LABEL: test_call_external_void_func_v16i32:
4167; CI:       ; %bb.0:
4168; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4169; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
4170; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
4171; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4172; CI-NEXT:    s_mov_b32 s38, -1
4173; CI-NEXT:    s_mov_b32 s39, 0xe8f000
4174; CI-NEXT:    s_add_u32 s36, s36, s3
4175; CI-NEXT:    s_mov_b32 s3, 0xf000
4176; CI-NEXT:    s_mov_b32 s2, -1
4177; CI-NEXT:    s_waitcnt lgkmcnt(0)
4178; CI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
4179; CI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
4180; CI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32
4181; CI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48
4182; CI-NEXT:    s_addc_u32 s37, s37, 0
4183; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
4184; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
4185; CI-NEXT:    s_mov_b32 s32, 0
4186; CI-NEXT:    s_getpc_b64 s[4:5]
4187; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4
4188; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12
4189; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
4190; CI-NEXT:    s_endpgm
4191;
4192; GFX9-LABEL: test_call_external_void_func_v16i32:
4193; GFX9:       ; %bb.0:
4194; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4195; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
4196; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
4197; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4198; GFX9-NEXT:    s_mov_b32 s38, -1
4199; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
4200; GFX9-NEXT:    s_add_u32 s36, s36, s3
4201; GFX9-NEXT:    s_mov_b32 s3, 0xf000
4202; GFX9-NEXT:    s_mov_b32 s2, -1
4203; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
4204; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
4205; GFX9-NEXT:    buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
4206; GFX9-NEXT:    buffer_load_dwordx4 v[8:11], off, s[0:3], 0 offset:32
4207; GFX9-NEXT:    buffer_load_dwordx4 v[12:15], off, s[0:3], 0 offset:48
4208; GFX9-NEXT:    s_addc_u32 s37, s37, 0
4209; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
4210; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
4211; GFX9-NEXT:    s_mov_b32 s32, 0
4212; GFX9-NEXT:    s_getpc_b64 s[4:5]
4213; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v16i32@rel32@lo+4
4214; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v16i32@rel32@hi+12
4215; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
4216; GFX9-NEXT:    s_endpgm
4217;
4218; GFX11-LABEL: test_call_external_void_func_v16i32:
4219; GFX11:       ; %bb.0:
4220; GFX11-NEXT:    s_load_b64 s[4:5], s[0:1], 0x0
4221; GFX11-NEXT:    s_mov_b32 s7, 0x31016000
4222; GFX11-NEXT:    s_mov_b32 s6, -1
4223; GFX11-NEXT:    s_mov_b32 s32, 0
4224; GFX11-NEXT:    s_getpc_b64 s[2:3]
4225; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v16i32@rel32@lo+4
4226; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v16i32@rel32@hi+12
4227; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
4228; GFX11-NEXT:    s_clause 0x3
4229; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[4:7], 0
4230; GFX11-NEXT:    buffer_load_b128 v[4:7], off, s[4:7], 0 offset:16
4231; GFX11-NEXT:    buffer_load_b128 v[8:11], off, s[4:7], 0 offset:32
4232; GFX11-NEXT:    buffer_load_b128 v[12:15], off, s[4:7], 0 offset:48
4233; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
4234; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
4235; GFX11-NEXT:    s_endpgm
4236;
4237; HSA-LABEL: test_call_external_void_func_v16i32:
4238; HSA:       ; %bb.0:
4239; HSA-NEXT:    s_add_i32 s6, s6, s9
4240; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
4241; HSA-NEXT:    s_add_u32 s0, s0, s9
4242; HSA-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
4243; HSA-NEXT:    s_mov_b32 s11, 0x1100f000
4244; HSA-NEXT:    s_mov_b32 s10, -1
4245; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
4246; HSA-NEXT:    s_waitcnt lgkmcnt(0)
4247; HSA-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
4248; HSA-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
4249; HSA-NEXT:    buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32
4250; HSA-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
4251; HSA-NEXT:    s_addc_u32 s1, s1, 0
4252; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
4253; HSA-NEXT:    s_mov_b32 s32, 0
4254; HSA-NEXT:    s_getpc_b64 s[8:9]
4255; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v16i32@rel32@lo+4
4256; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v16i32@rel32@hi+12
4257; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
4258; HSA-NEXT:    s_endpgm
4259  %ptr = load ptr addrspace(1), ptr addrspace(4) undef
4260  %val = load <16 x i32>, ptr addrspace(1) %ptr
4261  call void @external_void_func_v16i32(<16 x i32> %val)
4262  ret void
4263}
4264
4265define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
4266; VI-LABEL: test_call_external_void_func_v32i32:
4267; VI:       ; %bb.0:
4268; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
4269; VI-NEXT:    s_mov_b32 s7, 0xf000
4270; VI-NEXT:    s_mov_b32 s6, -1
4271; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4272; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4273; VI-NEXT:    s_waitcnt lgkmcnt(0)
4274; VI-NEXT:    buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
4275; VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
4276; VI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
4277; VI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
4278; VI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
4279; VI-NEXT:    buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
4280; VI-NEXT:    buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
4281; VI-NEXT:    buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
4282; VI-NEXT:    s_mov_b32 s38, -1
4283; VI-NEXT:    s_mov_b32 s39, 0xe80000
4284; VI-NEXT:    s_add_u32 s36, s36, s3
4285; VI-NEXT:    s_addc_u32 s37, s37, 0
4286; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
4287; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
4288; VI-NEXT:    s_mov_b32 s32, 0
4289; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
4290; VI-NEXT:    s_getpc_b64 s[8:9]
4291; VI-NEXT:    s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4
4292; VI-NEXT:    s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12
4293; VI-NEXT:    s_waitcnt vmcnt(7)
4294; VI-NEXT:    buffer_store_dword v31, off, s[36:39], s32
4295; VI-NEXT:    s_swappc_b64 s[30:31], s[8:9]
4296; VI-NEXT:    s_endpgm
4297;
4298; CI-LABEL: test_call_external_void_func_v32i32:
4299; CI:       ; %bb.0:
4300; CI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
4301; CI-NEXT:    s_mov_b32 s7, 0xf000
4302; CI-NEXT:    s_mov_b32 s6, -1
4303; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4304; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4305; CI-NEXT:    s_waitcnt lgkmcnt(0)
4306; CI-NEXT:    buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
4307; CI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
4308; CI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
4309; CI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
4310; CI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
4311; CI-NEXT:    buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
4312; CI-NEXT:    buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
4313; CI-NEXT:    buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
4314; CI-NEXT:    s_mov_b32 s38, -1
4315; CI-NEXT:    s_mov_b32 s39, 0xe8f000
4316; CI-NEXT:    s_add_u32 s36, s36, s3
4317; CI-NEXT:    s_addc_u32 s37, s37, 0
4318; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
4319; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
4320; CI-NEXT:    s_mov_b32 s32, 0
4321; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
4322; CI-NEXT:    s_getpc_b64 s[8:9]
4323; CI-NEXT:    s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4
4324; CI-NEXT:    s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12
4325; CI-NEXT:    s_waitcnt vmcnt(7)
4326; CI-NEXT:    buffer_store_dword v31, off, s[36:39], s32
4327; CI-NEXT:    s_swappc_b64 s[30:31], s[8:9]
4328; CI-NEXT:    s_endpgm
4329;
4330; GFX9-LABEL: test_call_external_void_func_v32i32:
4331; GFX9:       ; %bb.0:
4332; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
4333; GFX9-NEXT:    s_mov_b32 s7, 0xf000
4334; GFX9-NEXT:    s_mov_b32 s6, -1
4335; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4336; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4337; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
4338; GFX9-NEXT:    buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
4339; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
4340; GFX9-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
4341; GFX9-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
4342; GFX9-NEXT:    buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
4343; GFX9-NEXT:    buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
4344; GFX9-NEXT:    buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
4345; GFX9-NEXT:    buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
4346; GFX9-NEXT:    s_mov_b32 s38, -1
4347; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
4348; GFX9-NEXT:    s_add_u32 s36, s36, s3
4349; GFX9-NEXT:    s_addc_u32 s37, s37, 0
4350; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
4351; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
4352; GFX9-NEXT:    s_mov_b32 s32, 0
4353; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
4354; GFX9-NEXT:    s_getpc_b64 s[8:9]
4355; GFX9-NEXT:    s_add_u32 s8, s8, external_void_func_v32i32@rel32@lo+4
4356; GFX9-NEXT:    s_addc_u32 s9, s9, external_void_func_v32i32@rel32@hi+12
4357; GFX9-NEXT:    s_waitcnt vmcnt(7)
4358; GFX9-NEXT:    buffer_store_dword v31, off, s[36:39], s32
4359; GFX9-NEXT:    s_swappc_b64 s[30:31], s[8:9]
4360; GFX9-NEXT:    s_endpgm
4361;
4362; GFX11-LABEL: test_call_external_void_func_v32i32:
4363; GFX11:       ; %bb.0:
4364; GFX11-NEXT:    s_load_b64 s[4:5], s[0:1], 0x0
4365; GFX11-NEXT:    s_mov_b32 s7, 0x31016000
4366; GFX11-NEXT:    s_mov_b32 s6, -1
4367; GFX11-NEXT:    s_mov_b32 s32, 0
4368; GFX11-NEXT:    s_getpc_b64 s[2:3]
4369; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v32i32@rel32@lo+4
4370; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v32i32@rel32@hi+12
4371; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
4372; GFX11-NEXT:    s_clause 0x7
4373; GFX11-NEXT:    buffer_load_b128 v[28:31], off, s[4:7], 0 offset:112
4374; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[4:7], 0
4375; GFX11-NEXT:    buffer_load_b128 v[4:7], off, s[4:7], 0 offset:16
4376; GFX11-NEXT:    buffer_load_b128 v[8:11], off, s[4:7], 0 offset:32
4377; GFX11-NEXT:    buffer_load_b128 v[12:15], off, s[4:7], 0 offset:48
4378; GFX11-NEXT:    buffer_load_b128 v[16:19], off, s[4:7], 0 offset:64
4379; GFX11-NEXT:    buffer_load_b128 v[20:23], off, s[4:7], 0 offset:80
4380; GFX11-NEXT:    buffer_load_b128 v[24:27], off, s[4:7], 0 offset:96
4381; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
4382; GFX11-NEXT:    s_waitcnt vmcnt(7)
4383; GFX11-NEXT:    scratch_store_b32 off, v31, s32
4384; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
4385; GFX11-NEXT:    s_endpgm
4386;
4387; HSA-LABEL: test_call_external_void_func_v32i32:
4388; HSA:       ; %bb.0:
4389; HSA-NEXT:    s_add_i32 s6, s6, s9
4390; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
4391; HSA-NEXT:    s_add_u32 s0, s0, s9
4392; HSA-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
4393; HSA-NEXT:    s_mov_b32 s11, 0x1100f000
4394; HSA-NEXT:    s_mov_b32 s10, -1
4395; HSA-NEXT:    s_mov_b32 s32, 0
4396; HSA-NEXT:    s_waitcnt lgkmcnt(0)
4397; HSA-NEXT:    buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:112
4398; HSA-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
4399; HSA-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
4400; HSA-NEXT:    buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32
4401; HSA-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
4402; HSA-NEXT:    buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:64
4403; HSA-NEXT:    buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:80
4404; HSA-NEXT:    buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:96
4405; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
4406; HSA-NEXT:    s_addc_u32 s1, s1, 0
4407; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
4408; HSA-NEXT:    s_getpc_b64 s[12:13]
4409; HSA-NEXT:    s_add_u32 s12, s12, external_void_func_v32i32@rel32@lo+4
4410; HSA-NEXT:    s_addc_u32 s13, s13, external_void_func_v32i32@rel32@hi+12
4411; HSA-NEXT:    s_waitcnt vmcnt(7)
4412; HSA-NEXT:    buffer_store_dword v31, off, s[0:3], s32
4413; HSA-NEXT:    s_swappc_b64 s[30:31], s[12:13]
4414; HSA-NEXT:    s_endpgm
4415  %ptr = load ptr addrspace(1), ptr addrspace(4) undef
4416  %val = load <32 x i32>, ptr addrspace(1) %ptr
4417  call void @external_void_func_v32i32(<32 x i32> %val)
4418  ret void
4419}
4420
4421define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
4422; VI-LABEL: test_call_external_void_func_v32i32_i32:
4423; VI:       ; %bb.0:
4424; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4425; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4426; VI-NEXT:    s_mov_b32 s38, -1
4427; VI-NEXT:    s_mov_b32 s39, 0xe80000
4428; VI-NEXT:    s_add_u32 s36, s36, s5
4429; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
4430; VI-NEXT:    s_mov_b32 s7, 0xf000
4431; VI-NEXT:    s_mov_b32 s6, -1
4432; VI-NEXT:    s_addc_u32 s37, s37, 0
4433; VI-NEXT:    s_waitcnt lgkmcnt(0)
4434; VI-NEXT:    buffer_load_dword v32, off, s[4:7], 0
4435; VI-NEXT:    buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
4436; VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
4437; VI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
4438; VI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
4439; VI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
4440; VI-NEXT:    buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
4441; VI-NEXT:    buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
4442; VI-NEXT:    buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
4443; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
4444; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
4445; VI-NEXT:    s_mov_b32 s32, 0
4446; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
4447; VI-NEXT:    s_getpc_b64 s[4:5]
4448; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4
4449; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12
4450; VI-NEXT:    s_waitcnt vmcnt(8)
4451; VI-NEXT:    buffer_store_dword v32, off, s[36:39], s32 offset:4
4452; VI-NEXT:    s_waitcnt vmcnt(8)
4453; VI-NEXT:    buffer_store_dword v31, off, s[36:39], s32
4454; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
4455; VI-NEXT:    s_endpgm
4456;
4457; CI-LABEL: test_call_external_void_func_v32i32_i32:
4458; CI:       ; %bb.0:
4459; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4460; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4461; CI-NEXT:    s_mov_b32 s38, -1
4462; CI-NEXT:    s_mov_b32 s39, 0xe8f000
4463; CI-NEXT:    s_add_u32 s36, s36, s5
4464; CI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
4465; CI-NEXT:    s_mov_b32 s7, 0xf000
4466; CI-NEXT:    s_mov_b32 s6, -1
4467; CI-NEXT:    s_addc_u32 s37, s37, 0
4468; CI-NEXT:    s_waitcnt lgkmcnt(0)
4469; CI-NEXT:    buffer_load_dword v32, off, s[4:7], 0
4470; CI-NEXT:    buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
4471; CI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
4472; CI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
4473; CI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
4474; CI-NEXT:    buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
4475; CI-NEXT:    buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
4476; CI-NEXT:    buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
4477; CI-NEXT:    buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
4478; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
4479; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
4480; CI-NEXT:    s_mov_b32 s32, 0
4481; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
4482; CI-NEXT:    s_getpc_b64 s[4:5]
4483; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4
4484; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12
4485; CI-NEXT:    s_waitcnt vmcnt(8)
4486; CI-NEXT:    buffer_store_dword v32, off, s[36:39], s32 offset:4
4487; CI-NEXT:    s_waitcnt vmcnt(8)
4488; CI-NEXT:    buffer_store_dword v31, off, s[36:39], s32
4489; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
4490; CI-NEXT:    s_endpgm
4491;
4492; GFX9-LABEL: test_call_external_void_func_v32i32_i32:
4493; GFX9:       ; %bb.0:
4494; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4495; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4496; GFX9-NEXT:    s_mov_b32 s38, -1
4497; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
4498; GFX9-NEXT:    s_add_u32 s36, s36, s5
4499; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
4500; GFX9-NEXT:    s_mov_b32 s7, 0xf000
4501; GFX9-NEXT:    s_mov_b32 s6, -1
4502; GFX9-NEXT:    s_addc_u32 s37, s37, 0
4503; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
4504; GFX9-NEXT:    buffer_load_dword v32, off, s[4:7], 0
4505; GFX9-NEXT:    buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
4506; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
4507; GFX9-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
4508; GFX9-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
4509; GFX9-NEXT:    buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
4510; GFX9-NEXT:    buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
4511; GFX9-NEXT:    buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
4512; GFX9-NEXT:    buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
4513; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
4514; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
4515; GFX9-NEXT:    s_mov_b32 s32, 0
4516; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
4517; GFX9-NEXT:    s_getpc_b64 s[4:5]
4518; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v32i32_i32@rel32@lo+4
4519; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v32i32_i32@rel32@hi+12
4520; GFX9-NEXT:    s_waitcnt vmcnt(8)
4521; GFX9-NEXT:    buffer_store_dword v32, off, s[36:39], s32 offset:4
4522; GFX9-NEXT:    s_waitcnt vmcnt(8)
4523; GFX9-NEXT:    buffer_store_dword v31, off, s[36:39], s32
4524; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
4525; GFX9-NEXT:    s_endpgm
4526;
4527; GFX11-LABEL: test_call_external_void_func_v32i32_i32:
4528; GFX11:       ; %bb.0:
4529; GFX11-NEXT:    s_load_b64 s[4:5], s[0:1], 0x0
4530; GFX11-NEXT:    s_mov_b32 s7, 0x31016000
4531; GFX11-NEXT:    s_mov_b32 s6, -1
4532; GFX11-NEXT:    s_mov_b32 s32, 0
4533; GFX11-NEXT:    s_getpc_b64 s[2:3]
4534; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v32i32_i32@rel32@lo+4
4535; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v32i32_i32@rel32@hi+12
4536; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
4537; GFX11-NEXT:    s_clause 0x8
4538; GFX11-NEXT:    buffer_load_b128 v[28:31], off, s[4:7], 0 offset:112
4539; GFX11-NEXT:    buffer_load_b32 v32, off, s[4:7], 0
4540; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[4:7], 0
4541; GFX11-NEXT:    buffer_load_b128 v[4:7], off, s[4:7], 0 offset:16
4542; GFX11-NEXT:    buffer_load_b128 v[8:11], off, s[4:7], 0 offset:32
4543; GFX11-NEXT:    buffer_load_b128 v[12:15], off, s[4:7], 0 offset:48
4544; GFX11-NEXT:    buffer_load_b128 v[16:19], off, s[4:7], 0 offset:64
4545; GFX11-NEXT:    buffer_load_b128 v[20:23], off, s[4:7], 0 offset:80
4546; GFX11-NEXT:    buffer_load_b128 v[24:27], off, s[4:7], 0 offset:96
4547; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
4548; GFX11-NEXT:    s_add_i32 s4, s32, 4
4549; GFX11-NEXT:    s_waitcnt vmcnt(8)
4550; GFX11-NEXT:    scratch_store_b32 off, v31, s32
4551; GFX11-NEXT:    s_waitcnt vmcnt(7)
4552; GFX11-NEXT:    scratch_store_b32 off, v32, s4
4553; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
4554; GFX11-NEXT:    s_endpgm
4555;
4556; HSA-LABEL: test_call_external_void_func_v32i32_i32:
4557; HSA:       ; %bb.0:
4558; HSA-NEXT:    s_add_i32 s8, s8, s11
4559; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s9
4560; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s8, 8
4561; HSA-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
4562; HSA-NEXT:    s_add_u32 s0, s0, s11
4563; HSA-NEXT:    s_mov_b32 s11, 0x1100f000
4564; HSA-NEXT:    s_mov_b32 s10, -1
4565; HSA-NEXT:    s_waitcnt lgkmcnt(0)
4566; HSA-NEXT:    buffer_load_dword v32, off, s[8:11], 0
4567; HSA-NEXT:    buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:112
4568; HSA-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
4569; HSA-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
4570; HSA-NEXT:    buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32
4571; HSA-NEXT:    buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48
4572; HSA-NEXT:    buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:64
4573; HSA-NEXT:    buffer_load_dwordx4 v[20:23], off, s[8:11], 0 offset:80
4574; HSA-NEXT:    buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:96
4575; HSA-NEXT:    s_mov_b32 s32, 0
4576; HSA-NEXT:    s_addc_u32 s1, s1, 0
4577; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
4578; HSA-NEXT:    s_getpc_b64 s[8:9]
4579; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v32i32_i32@rel32@lo+4
4580; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v32i32_i32@rel32@hi+12
4581; HSA-NEXT:    s_waitcnt vmcnt(8)
4582; HSA-NEXT:    buffer_store_dword v32, off, s[0:3], s32 offset:4
4583; HSA-NEXT:    s_waitcnt vmcnt(8)
4584; HSA-NEXT:    buffer_store_dword v31, off, s[0:3], s32
4585; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
4586; HSA-NEXT:    s_endpgm
4587  %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef
4588  %val0 = load <32 x i32>, ptr addrspace(1) %ptr0
4589  %val1 = load i32, ptr addrspace(1) undef
4590  call void @external_void_func_v32i32_i32(<32 x i32> %val0, i32 %val1)
4591  ret void
4592}
4593
4594define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 {
4595; VI-LABEL: test_call_external_i32_func_i32_imm:
4596; VI:       ; %bb.0:
4597; VI-NEXT:    s_mov_b32 s40, SCRATCH_RSRC_DWORD0
4598; VI-NEXT:    s_mov_b32 s41, SCRATCH_RSRC_DWORD1
4599; VI-NEXT:    s_mov_b32 s42, -1
4600; VI-NEXT:    s_mov_b32 s43, 0xe80000
4601; VI-NEXT:    s_add_u32 s40, s40, s5
4602; VI-NEXT:    s_load_dwordx2 s[36:37], s[2:3], 0x24
4603; VI-NEXT:    s_addc_u32 s41, s41, 0
4604; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
4605; VI-NEXT:    s_mov_b64 s[0:1], s[40:41]
4606; VI-NEXT:    s_mov_b64 s[2:3], s[42:43]
4607; VI-NEXT:    v_mov_b32_e32 v0, 42
4608; VI-NEXT:    s_mov_b32 s32, 0
4609; VI-NEXT:    s_mov_b32 s39, 0xf000
4610; VI-NEXT:    s_mov_b32 s38, -1
4611; VI-NEXT:    s_getpc_b64 s[4:5]
4612; VI-NEXT:    s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4
4613; VI-NEXT:    s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12
4614; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
4615; VI-NEXT:    buffer_store_dword v0, off, s[36:39], 0
4616; VI-NEXT:    s_waitcnt vmcnt(0)
4617; VI-NEXT:    s_endpgm
4618;
4619; CI-LABEL: test_call_external_i32_func_i32_imm:
4620; CI:       ; %bb.0:
4621; CI-NEXT:    s_mov_b32 s40, SCRATCH_RSRC_DWORD0
4622; CI-NEXT:    s_mov_b32 s41, SCRATCH_RSRC_DWORD1
4623; CI-NEXT:    s_mov_b32 s42, -1
4624; CI-NEXT:    s_mov_b32 s43, 0xe8f000
4625; CI-NEXT:    s_add_u32 s40, s40, s5
4626; CI-NEXT:    s_load_dwordx2 s[36:37], s[2:3], 0x9
4627; CI-NEXT:    s_addc_u32 s41, s41, 0
4628; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
4629; CI-NEXT:    s_mov_b64 s[0:1], s[40:41]
4630; CI-NEXT:    s_mov_b64 s[2:3], s[42:43]
4631; CI-NEXT:    v_mov_b32_e32 v0, 42
4632; CI-NEXT:    s_mov_b32 s32, 0
4633; CI-NEXT:    s_mov_b32 s39, 0xf000
4634; CI-NEXT:    s_mov_b32 s38, -1
4635; CI-NEXT:    s_getpc_b64 s[4:5]
4636; CI-NEXT:    s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4
4637; CI-NEXT:    s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12
4638; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
4639; CI-NEXT:    buffer_store_dword v0, off, s[36:39], 0
4640; CI-NEXT:    s_waitcnt vmcnt(0)
4641; CI-NEXT:    s_endpgm
4642;
4643; GFX9-LABEL: test_call_external_i32_func_i32_imm:
4644; GFX9:       ; %bb.0:
4645; GFX9-NEXT:    s_mov_b32 s40, SCRATCH_RSRC_DWORD0
4646; GFX9-NEXT:    s_mov_b32 s41, SCRATCH_RSRC_DWORD1
4647; GFX9-NEXT:    s_mov_b32 s42, -1
4648; GFX9-NEXT:    s_mov_b32 s43, 0xe00000
4649; GFX9-NEXT:    s_add_u32 s40, s40, s5
4650; GFX9-NEXT:    s_load_dwordx2 s[36:37], s[2:3], 0x24
4651; GFX9-NEXT:    s_addc_u32 s41, s41, 0
4652; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
4653; GFX9-NEXT:    s_mov_b64 s[0:1], s[40:41]
4654; GFX9-NEXT:    s_mov_b64 s[2:3], s[42:43]
4655; GFX9-NEXT:    v_mov_b32_e32 v0, 42
4656; GFX9-NEXT:    s_mov_b32 s32, 0
4657; GFX9-NEXT:    s_mov_b32 s39, 0xf000
4658; GFX9-NEXT:    s_mov_b32 s38, -1
4659; GFX9-NEXT:    s_getpc_b64 s[4:5]
4660; GFX9-NEXT:    s_add_u32 s4, s4, external_i32_func_i32@rel32@lo+4
4661; GFX9-NEXT:    s_addc_u32 s5, s5, external_i32_func_i32@rel32@hi+12
4662; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
4663; GFX9-NEXT:    buffer_store_dword v0, off, s[36:39], 0
4664; GFX9-NEXT:    s_waitcnt vmcnt(0)
4665; GFX9-NEXT:    s_endpgm
4666;
4667; GFX11-LABEL: test_call_external_i32_func_i32_imm:
4668; GFX11:       ; %bb.0:
4669; GFX11-NEXT:    s_load_b64 s[36:37], s[2:3], 0x24
4670; GFX11-NEXT:    v_mov_b32_e32 v0, 42
4671; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
4672; GFX11-NEXT:    s_mov_b32 s32, 0
4673; GFX11-NEXT:    s_mov_b32 s39, 0x31016000
4674; GFX11-NEXT:    s_mov_b32 s38, -1
4675; GFX11-NEXT:    s_getpc_b64 s[2:3]
4676; GFX11-NEXT:    s_add_u32 s2, s2, external_i32_func_i32@rel32@lo+4
4677; GFX11-NEXT:    s_addc_u32 s3, s3, external_i32_func_i32@rel32@hi+12
4678; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
4679; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
4680; GFX11-NEXT:    buffer_store_b32 v0, off, s[36:39], 0 dlc
4681; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4682; GFX11-NEXT:    s_endpgm
4683;
4684; HSA-LABEL: test_call_external_i32_func_i32_imm:
4685; HSA:       ; %bb.0:
4686; HSA-NEXT:    s_add_i32 s8, s8, s11
4687; HSA-NEXT:    s_load_dwordx2 s[36:37], s[6:7], 0x0
4688; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s8, 8
4689; HSA-NEXT:    s_add_u32 s0, s0, s11
4690; HSA-NEXT:    s_addc_u32 s1, s1, 0
4691; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
4692; HSA-NEXT:    v_mov_b32_e32 v0, 42
4693; HSA-NEXT:    s_mov_b32 s32, 0
4694; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s9
4695; HSA-NEXT:    s_mov_b32 s39, 0x1100f000
4696; HSA-NEXT:    s_mov_b32 s38, -1
4697; HSA-NEXT:    s_getpc_b64 s[8:9]
4698; HSA-NEXT:    s_add_u32 s8, s8, external_i32_func_i32@rel32@lo+4
4699; HSA-NEXT:    s_addc_u32 s9, s9, external_i32_func_i32@rel32@hi+12
4700; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
4701; HSA-NEXT:    buffer_store_dword v0, off, s[36:39], 0
4702; HSA-NEXT:    s_waitcnt vmcnt(0)
4703; HSA-NEXT:    s_endpgm
4704  %val = call i32 @external_i32_func_i32(i32 42)
4705  store volatile i32 %val, ptr addrspace(1) %out
4706  ret void
4707}
4708
4709define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
4710; VI-LABEL: test_call_external_void_func_struct_i8_i32:
4711; VI:       ; %bb.0:
4712; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4713; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
4714; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
4715; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4716; VI-NEXT:    s_mov_b32 s38, -1
4717; VI-NEXT:    s_mov_b32 s39, 0xe80000
4718; VI-NEXT:    s_add_u32 s36, s36, s3
4719; VI-NEXT:    s_mov_b32 s3, 0xf000
4720; VI-NEXT:    s_mov_b32 s2, -1
4721; VI-NEXT:    s_waitcnt lgkmcnt(0)
4722; VI-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0
4723; VI-NEXT:    buffer_load_dword v1, off, s[0:3], 0 offset:4
4724; VI-NEXT:    s_addc_u32 s37, s37, 0
4725; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
4726; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
4727; VI-NEXT:    s_mov_b32 s32, 0
4728; VI-NEXT:    s_getpc_b64 s[4:5]
4729; VI-NEXT:    s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4
4730; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12
4731; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
4732; VI-NEXT:    s_endpgm
4733;
4734; CI-LABEL: test_call_external_void_func_struct_i8_i32:
4735; CI:       ; %bb.0:
4736; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4737; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
4738; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
4739; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4740; CI-NEXT:    s_mov_b32 s38, -1
4741; CI-NEXT:    s_mov_b32 s39, 0xe8f000
4742; CI-NEXT:    s_add_u32 s36, s36, s3
4743; CI-NEXT:    s_mov_b32 s3, 0xf000
4744; CI-NEXT:    s_mov_b32 s2, -1
4745; CI-NEXT:    s_waitcnt lgkmcnt(0)
4746; CI-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0
4747; CI-NEXT:    buffer_load_dword v1, off, s[0:3], 0 offset:4
4748; CI-NEXT:    s_addc_u32 s37, s37, 0
4749; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
4750; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
4751; CI-NEXT:    s_mov_b32 s32, 0
4752; CI-NEXT:    s_getpc_b64 s[4:5]
4753; CI-NEXT:    s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4
4754; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12
4755; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
4756; CI-NEXT:    s_endpgm
4757;
4758; GFX9-LABEL: test_call_external_void_func_struct_i8_i32:
4759; GFX9:       ; %bb.0:
4760; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4761; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
4762; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
4763; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4764; GFX9-NEXT:    s_mov_b32 s38, -1
4765; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
4766; GFX9-NEXT:    s_add_u32 s36, s36, s3
4767; GFX9-NEXT:    s_mov_b32 s3, 0xf000
4768; GFX9-NEXT:    s_mov_b32 s2, -1
4769; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
4770; GFX9-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0
4771; GFX9-NEXT:    buffer_load_dword v1, off, s[0:3], 0 offset:4
4772; GFX9-NEXT:    s_addc_u32 s37, s37, 0
4773; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
4774; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
4775; GFX9-NEXT:    s_mov_b32 s32, 0
4776; GFX9-NEXT:    s_getpc_b64 s[4:5]
4777; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_struct_i8_i32@rel32@lo+4
4778; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_struct_i8_i32@rel32@hi+12
4779; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
4780; GFX9-NEXT:    s_endpgm
4781;
4782; GFX11-LABEL: test_call_external_void_func_struct_i8_i32:
4783; GFX11:       ; %bb.0:
4784; GFX11-NEXT:    s_load_b64 s[4:5], s[0:1], 0x0
4785; GFX11-NEXT:    s_mov_b32 s7, 0x31016000
4786; GFX11-NEXT:    s_mov_b32 s6, -1
4787; GFX11-NEXT:    s_mov_b32 s32, 0
4788; GFX11-NEXT:    s_getpc_b64 s[2:3]
4789; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_struct_i8_i32@rel32@lo+4
4790; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_struct_i8_i32@rel32@hi+12
4791; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
4792; GFX11-NEXT:    s_clause 0x1
4793; GFX11-NEXT:    buffer_load_u8 v0, off, s[4:7], 0
4794; GFX11-NEXT:    buffer_load_b32 v1, off, s[4:7], 0 offset:4
4795; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
4796; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
4797; GFX11-NEXT:    s_endpgm
4798;
4799; HSA-LABEL: test_call_external_void_func_struct_i8_i32:
4800; HSA:       ; %bb.0:
4801; HSA-NEXT:    s_add_i32 s6, s6, s9
4802; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
4803; HSA-NEXT:    s_add_u32 s0, s0, s9
4804; HSA-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
4805; HSA-NEXT:    s_mov_b32 s11, 0x1100f000
4806; HSA-NEXT:    s_mov_b32 s10, -1
4807; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
4808; HSA-NEXT:    s_waitcnt lgkmcnt(0)
4809; HSA-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
4810; HSA-NEXT:    buffer_load_dword v1, off, s[8:11], 0 offset:4
4811; HSA-NEXT:    s_addc_u32 s1, s1, 0
4812; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
4813; HSA-NEXT:    s_mov_b32 s32, 0
4814; HSA-NEXT:    s_getpc_b64 s[8:9]
4815; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_struct_i8_i32@rel32@lo+4
4816; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_struct_i8_i32@rel32@hi+12
4817; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
4818; HSA-NEXT:    s_endpgm
4819  %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef
4820  %val = load { i8, i32 }, ptr addrspace(1) %ptr0
4821  call void @external_void_func_struct_i8_i32({ i8, i32 } %val)
4822  ret void
4823}
4824
4825define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 {
4826; VI-LABEL: test_call_external_void_func_byval_struct_i8_i32:
4827; VI:       ; %bb.0:
4828; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4829; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4830; VI-NEXT:    s_mov_b32 s38, -1
4831; VI-NEXT:    s_mov_b32 s39, 0xe80000
4832; VI-NEXT:    s_add_u32 s36, s36, s3
4833; VI-NEXT:    s_addc_u32 s37, s37, 0
4834; VI-NEXT:    v_mov_b32_e32 v0, 3
4835; VI-NEXT:    buffer_store_byte v0, off, s[36:39], 0
4836; VI-NEXT:    v_mov_b32_e32 v0, 8
4837; VI-NEXT:    buffer_store_dword v0, off, s[36:39], 0 offset:4
4838; VI-NEXT:    buffer_load_dword v0, off, s[36:39], 0 offset:4
4839; VI-NEXT:    buffer_load_dword v1, off, s[36:39], 0
4840; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
4841; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
4842; VI-NEXT:    s_movk_i32 s32, 0x400
4843; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
4844; VI-NEXT:    s_getpc_b64 s[4:5]
4845; VI-NEXT:    s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4
4846; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12
4847; VI-NEXT:    s_waitcnt vmcnt(1)
4848; VI-NEXT:    buffer_store_dword v0, off, s[36:39], s32 offset:4
4849; VI-NEXT:    s_waitcnt vmcnt(1)
4850; VI-NEXT:    buffer_store_dword v1, off, s[36:39], s32
4851; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
4852; VI-NEXT:    s_endpgm
4853;
4854; CI-LABEL: test_call_external_void_func_byval_struct_i8_i32:
4855; CI:       ; %bb.0:
4856; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4857; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4858; CI-NEXT:    s_mov_b32 s38, -1
4859; CI-NEXT:    s_mov_b32 s39, 0xe8f000
4860; CI-NEXT:    s_add_u32 s36, s36, s3
4861; CI-NEXT:    s_addc_u32 s37, s37, 0
4862; CI-NEXT:    v_mov_b32_e32 v0, 3
4863; CI-NEXT:    buffer_store_byte v0, off, s[36:39], 0
4864; CI-NEXT:    v_mov_b32_e32 v0, 8
4865; CI-NEXT:    buffer_store_dword v0, off, s[36:39], 0 offset:4
4866; CI-NEXT:    buffer_load_dword v0, off, s[36:39], 0 offset:4
4867; CI-NEXT:    buffer_load_dword v1, off, s[36:39], 0
4868; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
4869; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
4870; CI-NEXT:    s_movk_i32 s32, 0x400
4871; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
4872; CI-NEXT:    s_getpc_b64 s[4:5]
4873; CI-NEXT:    s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4
4874; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12
4875; CI-NEXT:    s_waitcnt vmcnt(1)
4876; CI-NEXT:    buffer_store_dword v0, off, s[36:39], s32 offset:4
4877; CI-NEXT:    s_waitcnt vmcnt(1)
4878; CI-NEXT:    buffer_store_dword v1, off, s[36:39], s32
4879; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
4880; CI-NEXT:    s_endpgm
4881;
4882; GFX9-LABEL: test_call_external_void_func_byval_struct_i8_i32:
4883; GFX9:       ; %bb.0:
4884; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4885; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4886; GFX9-NEXT:    s_mov_b32 s38, -1
4887; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
4888; GFX9-NEXT:    s_add_u32 s36, s36, s3
4889; GFX9-NEXT:    s_addc_u32 s37, s37, 0
4890; GFX9-NEXT:    v_mov_b32_e32 v0, 3
4891; GFX9-NEXT:    buffer_store_byte v0, off, s[36:39], 0
4892; GFX9-NEXT:    v_mov_b32_e32 v0, 8
4893; GFX9-NEXT:    buffer_store_dword v0, off, s[36:39], 0 offset:4
4894; GFX9-NEXT:    buffer_load_dword v0, off, s[36:39], 0 offset:4
4895; GFX9-NEXT:    s_nop 0
4896; GFX9-NEXT:    buffer_load_dword v1, off, s[36:39], 0
4897; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
4898; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
4899; GFX9-NEXT:    s_movk_i32 s32, 0x400
4900; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
4901; GFX9-NEXT:    s_getpc_b64 s[4:5]
4902; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_byval_struct_i8_i32@rel32@lo+4
4903; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_byval_struct_i8_i32@rel32@hi+12
4904; GFX9-NEXT:    s_waitcnt vmcnt(1)
4905; GFX9-NEXT:    buffer_store_dword v0, off, s[36:39], s32 offset:4
4906; GFX9-NEXT:    s_waitcnt vmcnt(1)
4907; GFX9-NEXT:    buffer_store_dword v1, off, s[36:39], s32
4908; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
4909; GFX9-NEXT:    s_endpgm
4910;
4911; GFX11-LABEL: test_call_external_void_func_byval_struct_i8_i32:
4912; GFX11:       ; %bb.0:
4913; GFX11-NEXT:    v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8
4914; GFX11-NEXT:    s_mov_b32 s32, 16
4915; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
4916; GFX11-NEXT:    s_getpc_b64 s[2:3]
4917; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_byval_struct_i8_i32@rel32@lo+4
4918; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_byval_struct_i8_i32@rel32@hi+12
4919; GFX11-NEXT:    s_clause 0x1
4920; GFX11-NEXT:    scratch_store_b8 off, v0, off
4921; GFX11-NEXT:    scratch_store_b32 off, v1, off offset:4
4922; GFX11-NEXT:    scratch_load_b64 v[0:1], off, off
4923; GFX11-NEXT:    s_waitcnt vmcnt(0)
4924; GFX11-NEXT:    scratch_store_b64 off, v[0:1], s32
4925; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
4926; GFX11-NEXT:    s_endpgm
4927;
4928; HSA-LABEL: test_call_external_void_func_byval_struct_i8_i32:
4929; HSA:       ; %bb.0:
4930; HSA-NEXT:    s_add_i32 s6, s6, s9
4931; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
4932; HSA-NEXT:    s_add_u32 s0, s0, s9
4933; HSA-NEXT:    s_addc_u32 s1, s1, 0
4934; HSA-NEXT:    v_mov_b32_e32 v0, 3
4935; HSA-NEXT:    buffer_store_byte v0, off, s[0:3], 0
4936; HSA-NEXT:    v_mov_b32_e32 v0, 8
4937; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
4938; HSA-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:4
4939; HSA-NEXT:    buffer_load_dword v1, off, s[0:3], 0
4940; HSA-NEXT:    s_movk_i32 s32, 0x400
4941; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
4942; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
4943; HSA-NEXT:    s_getpc_b64 s[8:9]
4944; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_byval_struct_i8_i32@rel32@lo+4
4945; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_byval_struct_i8_i32@rel32@hi+12
4946; HSA-NEXT:    s_waitcnt vmcnt(1)
4947; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
4948; HSA-NEXT:    s_waitcnt vmcnt(1)
4949; HSA-NEXT:    buffer_store_dword v1, off, s[0:3], s32
4950; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
4951; HSA-NEXT:    s_endpgm
4952  %val = alloca { i8, i32 }, align 8, addrspace(5)
4953  %gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 0
4954  %gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 1
4955  store i8 3, ptr addrspace(5) %gep0
4956  store i32 8, ptr addrspace(5) %gep1
4957  call void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %val)
4958  ret void
4959}
4960
4961define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
4962; VI-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
4963; VI:       ; %bb.0:
4964; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
4965; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
4966; VI-NEXT:    s_mov_b32 s38, -1
4967; VI-NEXT:    s_mov_b32 s39, 0xe80000
4968; VI-NEXT:    s_add_u32 s36, s36, s5
4969; VI-NEXT:    s_addc_u32 s37, s37, 0
4970; VI-NEXT:    v_mov_b32_e32 v0, 3
4971; VI-NEXT:    buffer_store_byte v0, off, s[36:39], 0
4972; VI-NEXT:    v_mov_b32_e32 v0, 8
4973; VI-NEXT:    buffer_store_dword v0, off, s[36:39], 0 offset:4
4974; VI-NEXT:    buffer_load_dword v0, off, s[36:39], 0 offset:4
4975; VI-NEXT:    buffer_load_dword v1, off, s[36:39], 0
4976; VI-NEXT:    s_movk_i32 s32, 0x800
4977; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
4978; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
4979; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
4980; VI-NEXT:    s_getpc_b64 s[4:5]
4981; VI-NEXT:    s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
4982; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
4983; VI-NEXT:    s_waitcnt vmcnt(1)
4984; VI-NEXT:    buffer_store_dword v0, off, s[36:39], s32 offset:4
4985; VI-NEXT:    s_waitcnt vmcnt(1)
4986; VI-NEXT:    buffer_store_dword v1, off, s[36:39], s32
4987; VI-NEXT:    v_mov_b32_e32 v0, 8
4988; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
4989; VI-NEXT:    buffer_load_ubyte v0, off, s[36:39], 0 offset:8
4990; VI-NEXT:    buffer_load_dword v1, off, s[36:39], 0 offset:12
4991; VI-NEXT:    s_mov_b32 s3, 0xf000
4992; VI-NEXT:    s_mov_b32 s2, -1
4993; VI-NEXT:    s_waitcnt vmcnt(1)
4994; VI-NEXT:    buffer_store_byte v0, off, s[0:3], 0
4995; VI-NEXT:    s_waitcnt vmcnt(0)
4996; VI-NEXT:    buffer_store_dword v1, off, s[0:3], 0
4997; VI-NEXT:    s_waitcnt vmcnt(0)
4998; VI-NEXT:    s_endpgm
4999;
5000; CI-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
5001; CI:       ; %bb.0:
5002; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
5003; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
5004; CI-NEXT:    s_mov_b32 s38, -1
5005; CI-NEXT:    s_mov_b32 s39, 0xe8f000
5006; CI-NEXT:    s_add_u32 s36, s36, s5
5007; CI-NEXT:    s_addc_u32 s37, s37, 0
5008; CI-NEXT:    v_mov_b32_e32 v0, 3
5009; CI-NEXT:    buffer_store_byte v0, off, s[36:39], 0
5010; CI-NEXT:    v_mov_b32_e32 v0, 8
5011; CI-NEXT:    buffer_store_dword v0, off, s[36:39], 0 offset:4
5012; CI-NEXT:    buffer_load_dword v0, off, s[36:39], 0 offset:4
5013; CI-NEXT:    buffer_load_dword v1, off, s[36:39], 0
5014; CI-NEXT:    s_movk_i32 s32, 0x800
5015; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
5016; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
5017; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
5018; CI-NEXT:    s_getpc_b64 s[4:5]
5019; CI-NEXT:    s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
5020; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
5021; CI-NEXT:    s_waitcnt vmcnt(1)
5022; CI-NEXT:    buffer_store_dword v0, off, s[36:39], s32 offset:4
5023; CI-NEXT:    s_waitcnt vmcnt(1)
5024; CI-NEXT:    buffer_store_dword v1, off, s[36:39], s32
5025; CI-NEXT:    v_mov_b32_e32 v0, 8
5026; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
5027; CI-NEXT:    buffer_load_ubyte v0, off, s[36:39], 0 offset:8
5028; CI-NEXT:    buffer_load_dword v1, off, s[36:39], 0 offset:12
5029; CI-NEXT:    s_mov_b32 s3, 0xf000
5030; CI-NEXT:    s_mov_b32 s2, -1
5031; CI-NEXT:    s_waitcnt vmcnt(1)
5032; CI-NEXT:    buffer_store_byte v0, off, s[0:3], 0
5033; CI-NEXT:    s_waitcnt vmcnt(0)
5034; CI-NEXT:    buffer_store_dword v1, off, s[0:3], 0
5035; CI-NEXT:    s_waitcnt vmcnt(0)
5036; CI-NEXT:    s_endpgm
5037;
5038; GFX9-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
5039; GFX9:       ; %bb.0:
5040; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
5041; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
5042; GFX9-NEXT:    s_mov_b32 s38, -1
5043; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
5044; GFX9-NEXT:    s_add_u32 s36, s36, s5
5045; GFX9-NEXT:    s_addc_u32 s37, s37, 0
5046; GFX9-NEXT:    v_mov_b32_e32 v0, 3
5047; GFX9-NEXT:    buffer_store_byte v0, off, s[36:39], 0
5048; GFX9-NEXT:    v_mov_b32_e32 v0, 8
5049; GFX9-NEXT:    buffer_store_dword v0, off, s[36:39], 0 offset:4
5050; GFX9-NEXT:    buffer_load_dword v0, off, s[36:39], 0 offset:4
5051; GFX9-NEXT:    s_nop 0
5052; GFX9-NEXT:    buffer_load_dword v1, off, s[36:39], 0
5053; GFX9-NEXT:    s_movk_i32 s32, 0x800
5054; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
5055; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
5056; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
5057; GFX9-NEXT:    s_getpc_b64 s[4:5]
5058; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
5059; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
5060; GFX9-NEXT:    s_waitcnt vmcnt(1)
5061; GFX9-NEXT:    buffer_store_dword v0, off, s[36:39], s32 offset:4
5062; GFX9-NEXT:    s_waitcnt vmcnt(1)
5063; GFX9-NEXT:    buffer_store_dword v1, off, s[36:39], s32
5064; GFX9-NEXT:    v_mov_b32_e32 v0, 8
5065; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
5066; GFX9-NEXT:    buffer_load_ubyte v0, off, s[36:39], 0 offset:8
5067; GFX9-NEXT:    buffer_load_dword v1, off, s[36:39], 0 offset:12
5068; GFX9-NEXT:    s_mov_b32 s3, 0xf000
5069; GFX9-NEXT:    s_mov_b32 s2, -1
5070; GFX9-NEXT:    s_waitcnt vmcnt(1)
5071; GFX9-NEXT:    buffer_store_byte v0, off, s[0:3], 0
5072; GFX9-NEXT:    s_waitcnt vmcnt(0)
5073; GFX9-NEXT:    buffer_store_dword v1, off, s[0:3], 0
5074; GFX9-NEXT:    s_waitcnt vmcnt(0)
5075; GFX9-NEXT:    s_endpgm
5076;
5077; GFX11-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
5078; GFX11:       ; %bb.0:
5079; GFX11-NEXT:    v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 8
5080; GFX11-NEXT:    s_mov_b32 s32, 32
5081; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
5082; GFX11-NEXT:    s_getpc_b64 s[2:3]
5083; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
5084; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
5085; GFX11-NEXT:    s_clause 0x1
5086; GFX11-NEXT:    scratch_store_b8 off, v0, off
5087; GFX11-NEXT:    scratch_store_b32 off, v1, off offset:4
5088; GFX11-NEXT:    scratch_load_b64 v[0:1], off, off
5089; GFX11-NEXT:    s_waitcnt vmcnt(0)
5090; GFX11-NEXT:    scratch_store_b64 off, v[0:1], s32
5091; GFX11-NEXT:    v_mov_b32_e32 v0, 8
5092; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
5093; GFX11-NEXT:    s_clause 0x1
5094; GFX11-NEXT:    scratch_load_u8 v0, off, off offset:8
5095; GFX11-NEXT:    scratch_load_b32 v1, off, off offset:12
5096; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
5097; GFX11-NEXT:    s_mov_b32 s2, -1
5098; GFX11-NEXT:    s_waitcnt vmcnt(1)
5099; GFX11-NEXT:    buffer_store_b8 v0, off, s[0:3], 0 dlc
5100; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
5101; GFX11-NEXT:    s_waitcnt vmcnt(0)
5102; GFX11-NEXT:    buffer_store_b32 v1, off, s[0:3], 0 dlc
5103; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
5104; GFX11-NEXT:    s_nop 0
5105; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
5106; GFX11-NEXT:    s_endpgm
5107;
5108; HSA-LABEL: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32:
5109; HSA:       ; %bb.0:
5110; HSA-NEXT:    s_add_i32 s8, s8, s11
5111; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s8, 8
5112; HSA-NEXT:    s_add_u32 s0, s0, s11
5113; HSA-NEXT:    s_addc_u32 s1, s1, 0
5114; HSA-NEXT:    v_mov_b32_e32 v0, 3
5115; HSA-NEXT:    buffer_store_byte v0, off, s[0:3], 0
5116; HSA-NEXT:    v_mov_b32_e32 v0, 8
5117; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
5118; HSA-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:4
5119; HSA-NEXT:    buffer_load_dword v1, off, s[0:3], 0
5120; HSA-NEXT:    s_movk_i32 s32, 0x800
5121; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
5122; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s9
5123; HSA-NEXT:    s_getpc_b64 s[8:9]
5124; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@lo+4
5125; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_sret_struct_i8_i32_byval_struct_i8_i32@rel32@hi+12
5126; HSA-NEXT:    s_waitcnt vmcnt(1)
5127; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
5128; HSA-NEXT:    s_waitcnt vmcnt(1)
5129; HSA-NEXT:    buffer_store_dword v1, off, s[0:3], s32
5130; HSA-NEXT:    v_mov_b32_e32 v0, 8
5131; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
5132; HSA-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0 offset:8
5133; HSA-NEXT:    buffer_load_dword v1, off, s[0:3], 0 offset:12
5134; HSA-NEXT:    s_mov_b32 s7, 0x1100f000
5135; HSA-NEXT:    s_mov_b32 s6, -1
5136; HSA-NEXT:    s_waitcnt vmcnt(1)
5137; HSA-NEXT:    buffer_store_byte v0, off, s[4:7], 0
5138; HSA-NEXT:    s_waitcnt vmcnt(0)
5139; HSA-NEXT:    buffer_store_dword v1, off, s[4:7], 0
5140; HSA-NEXT:    s_waitcnt vmcnt(0)
5141; HSA-NEXT:    s_endpgm
5142  %in.val = alloca { i8, i32 }, align 8, addrspace(5)
5143  %out.val = alloca { i8, i32 }, align 8, addrspace(5)
5144  %in.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 0
5145  %in.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 1
5146  store i8 3, ptr addrspace(5) %in.gep0
5147  store i32 8, ptr addrspace(5) %in.gep1
5148  call void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) %out.val, ptr addrspace(5) byval({ i8, i32 }) %in.val)
5149  %out.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 0
5150  %out.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 1
5151  %out.val0 = load i8, ptr addrspace(5) %out.gep0
5152  %out.val1 = load i32, ptr addrspace(5) %out.gep1
5153
5154  store volatile i8 %out.val0, ptr addrspace(1) undef
5155  store volatile i32 %out.val1, ptr addrspace(1) undef
5156  ret void
5157}
5158
5159define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
5160; VI-LABEL: test_call_external_void_func_v16i8:
5161; VI:       ; %bb.0:
5162; VI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
5163; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
5164; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
5165; VI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
5166; VI-NEXT:    s_mov_b32 s38, -1
5167; VI-NEXT:    s_mov_b32 s39, 0xe80000
5168; VI-NEXT:    s_add_u32 s36, s36, s3
5169; VI-NEXT:    s_mov_b32 s3, 0xf000
5170; VI-NEXT:    s_mov_b32 s2, -1
5171; VI-NEXT:    s_waitcnt lgkmcnt(0)
5172; VI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
5173; VI-NEXT:    s_addc_u32 s37, s37, 0
5174; VI-NEXT:    s_mov_b64 s[0:1], s[36:37]
5175; VI-NEXT:    s_mov_b64 s[2:3], s[38:39]
5176; VI-NEXT:    s_mov_b32 s32, 0
5177; VI-NEXT:    s_getpc_b64 s[4:5]
5178; VI-NEXT:    s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4
5179; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12
5180; VI-NEXT:    s_waitcnt vmcnt(0)
5181; VI-NEXT:    v_lshrrev_b32_e32 v16, 8, v0
5182; VI-NEXT:    v_lshrrev_b32_e32 v17, 16, v0
5183; VI-NEXT:    v_lshrrev_b32_e32 v18, 24, v0
5184; VI-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
5185; VI-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
5186; VI-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
5187; VI-NEXT:    v_lshrrev_b32_e32 v9, 8, v2
5188; VI-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
5189; VI-NEXT:    v_lshrrev_b32_e32 v11, 24, v2
5190; VI-NEXT:    v_lshrrev_b32_e32 v13, 8, v3
5191; VI-NEXT:    v_lshrrev_b32_e32 v14, 16, v3
5192; VI-NEXT:    v_lshrrev_b32_e32 v15, 24, v3
5193; VI-NEXT:    v_mov_b32_e32 v4, v1
5194; VI-NEXT:    v_mov_b32_e32 v8, v2
5195; VI-NEXT:    v_mov_b32_e32 v12, v3
5196; VI-NEXT:    v_mov_b32_e32 v1, v16
5197; VI-NEXT:    v_mov_b32_e32 v2, v17
5198; VI-NEXT:    v_mov_b32_e32 v3, v18
5199; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
5200; VI-NEXT:    s_endpgm
5201;
5202; CI-LABEL: test_call_external_void_func_v16i8:
5203; CI:       ; %bb.0:
5204; CI-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
5205; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
5206; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
5207; CI-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
5208; CI-NEXT:    s_mov_b32 s38, -1
5209; CI-NEXT:    s_mov_b32 s39, 0xe8f000
5210; CI-NEXT:    s_add_u32 s36, s36, s3
5211; CI-NEXT:    s_mov_b32 s3, 0xf000
5212; CI-NEXT:    s_mov_b32 s2, -1
5213; CI-NEXT:    s_waitcnt lgkmcnt(0)
5214; CI-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
5215; CI-NEXT:    s_addc_u32 s37, s37, 0
5216; CI-NEXT:    s_mov_b64 s[0:1], s[36:37]
5217; CI-NEXT:    s_mov_b64 s[2:3], s[38:39]
5218; CI-NEXT:    s_mov_b32 s32, 0
5219; CI-NEXT:    s_getpc_b64 s[4:5]
5220; CI-NEXT:    s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4
5221; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12
5222; CI-NEXT:    s_waitcnt vmcnt(0)
5223; CI-NEXT:    v_lshrrev_b32_e32 v16, 8, v0
5224; CI-NEXT:    v_lshrrev_b32_e32 v17, 16, v0
5225; CI-NEXT:    v_lshrrev_b32_e32 v18, 24, v0
5226; CI-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
5227; CI-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
5228; CI-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
5229; CI-NEXT:    v_lshrrev_b32_e32 v9, 8, v2
5230; CI-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
5231; CI-NEXT:    v_lshrrev_b32_e32 v11, 24, v2
5232; CI-NEXT:    v_lshrrev_b32_e32 v13, 8, v3
5233; CI-NEXT:    v_lshrrev_b32_e32 v14, 16, v3
5234; CI-NEXT:    v_lshrrev_b32_e32 v15, 24, v3
5235; CI-NEXT:    v_mov_b32_e32 v4, v1
5236; CI-NEXT:    v_mov_b32_e32 v8, v2
5237; CI-NEXT:    v_mov_b32_e32 v12, v3
5238; CI-NEXT:    v_mov_b32_e32 v1, v16
5239; CI-NEXT:    v_mov_b32_e32 v2, v17
5240; CI-NEXT:    v_mov_b32_e32 v3, v18
5241; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
5242; CI-NEXT:    s_endpgm
5243;
5244; GFX9-LABEL: test_call_external_void_func_v16i8:
5245; GFX9:       ; %bb.0:
5246; GFX9-NEXT:    s_mov_b32 s36, SCRATCH_RSRC_DWORD0
5247; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
5248; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
5249; GFX9-NEXT:    s_mov_b32 s37, SCRATCH_RSRC_DWORD1
5250; GFX9-NEXT:    s_mov_b32 s38, -1
5251; GFX9-NEXT:    s_mov_b32 s39, 0xe00000
5252; GFX9-NEXT:    s_add_u32 s36, s36, s3
5253; GFX9-NEXT:    s_mov_b32 s3, 0xf000
5254; GFX9-NEXT:    s_mov_b32 s2, -1
5255; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
5256; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
5257; GFX9-NEXT:    s_addc_u32 s37, s37, 0
5258; GFX9-NEXT:    s_mov_b64 s[0:1], s[36:37]
5259; GFX9-NEXT:    s_mov_b64 s[2:3], s[38:39]
5260; GFX9-NEXT:    s_mov_b32 s32, 0
5261; GFX9-NEXT:    s_getpc_b64 s[4:5]
5262; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_v16i8@rel32@lo+4
5263; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_v16i8@rel32@hi+12
5264; GFX9-NEXT:    s_waitcnt vmcnt(0)
5265; GFX9-NEXT:    v_lshrrev_b32_e32 v16, 8, v0
5266; GFX9-NEXT:    v_lshrrev_b32_e32 v17, 16, v0
5267; GFX9-NEXT:    v_lshrrev_b32_e32 v18, 24, v0
5268; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
5269; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
5270; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
5271; GFX9-NEXT:    v_lshrrev_b32_e32 v9, 8, v2
5272; GFX9-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
5273; GFX9-NEXT:    v_lshrrev_b32_e32 v11, 24, v2
5274; GFX9-NEXT:    v_lshrrev_b32_e32 v13, 8, v3
5275; GFX9-NEXT:    v_lshrrev_b32_e32 v14, 16, v3
5276; GFX9-NEXT:    v_lshrrev_b32_e32 v15, 24, v3
5277; GFX9-NEXT:    v_mov_b32_e32 v4, v1
5278; GFX9-NEXT:    v_mov_b32_e32 v8, v2
5279; GFX9-NEXT:    v_mov_b32_e32 v12, v3
5280; GFX9-NEXT:    v_mov_b32_e32 v1, v16
5281; GFX9-NEXT:    v_mov_b32_e32 v2, v17
5282; GFX9-NEXT:    v_mov_b32_e32 v3, v18
5283; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
5284; GFX9-NEXT:    s_endpgm
5285;
5286; GFX11-LABEL: test_call_external_void_func_v16i8:
5287; GFX11:       ; %bb.0:
5288; GFX11-NEXT:    s_load_b64 s[4:5], s[0:1], 0x0
5289; GFX11-NEXT:    s_mov_b32 s7, 0x31016000
5290; GFX11-NEXT:    s_mov_b32 s6, -1
5291; GFX11-NEXT:    s_mov_b32 s32, 0
5292; GFX11-NEXT:    s_getpc_b64 s[2:3]
5293; GFX11-NEXT:    s_add_u32 s2, s2, external_void_func_v16i8@rel32@lo+4
5294; GFX11-NEXT:    s_addc_u32 s3, s3, external_void_func_v16i8@rel32@hi+12
5295; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
5296; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[4:7], 0
5297; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
5298; GFX11-NEXT:    s_waitcnt vmcnt(0)
5299; GFX11-NEXT:    v_lshrrev_b32_e32 v16, 8, v0
5300; GFX11-NEXT:    v_lshrrev_b32_e32 v17, 16, v0
5301; GFX11-NEXT:    v_lshrrev_b32_e32 v18, 24, v0
5302; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
5303; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
5304; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
5305; GFX11-NEXT:    v_lshrrev_b32_e32 v9, 8, v2
5306; GFX11-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
5307; GFX11-NEXT:    v_lshrrev_b32_e32 v11, 24, v2
5308; GFX11-NEXT:    v_lshrrev_b32_e32 v13, 8, v3
5309; GFX11-NEXT:    v_lshrrev_b32_e32 v14, 16, v3
5310; GFX11-NEXT:    v_lshrrev_b32_e32 v15, 24, v3
5311; GFX11-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v16
5312; GFX11-NEXT:    v_mov_b32_e32 v8, v2
5313; GFX11-NEXT:    v_dual_mov_b32 v12, v3 :: v_dual_mov_b32 v3, v18
5314; GFX11-NEXT:    v_mov_b32_e32 v2, v17
5315; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
5316; GFX11-NEXT:    s_endpgm
5317;
5318; HSA-LABEL: test_call_external_void_func_v16i8:
5319; HSA:       ; %bb.0:
5320; HSA-NEXT:    s_add_i32 s6, s6, s9
5321; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s6, 8
5322; HSA-NEXT:    s_add_u32 s0, s0, s9
5323; HSA-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x0
5324; HSA-NEXT:    s_mov_b32 s11, 0x1100f000
5325; HSA-NEXT:    s_mov_b32 s10, -1
5326; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s7
5327; HSA-NEXT:    s_addc_u32 s1, s1, 0
5328; HSA-NEXT:    s_waitcnt lgkmcnt(0)
5329; HSA-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
5330; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
5331; HSA-NEXT:    s_mov_b32 s32, 0
5332; HSA-NEXT:    s_getpc_b64 s[8:9]
5333; HSA-NEXT:    s_add_u32 s8, s8, external_void_func_v16i8@rel32@lo+4
5334; HSA-NEXT:    s_addc_u32 s9, s9, external_void_func_v16i8@rel32@hi+12
5335; HSA-NEXT:    s_waitcnt vmcnt(0)
5336; HSA-NEXT:    v_lshrrev_b32_e32 v16, 8, v0
5337; HSA-NEXT:    v_lshrrev_b32_e32 v17, 16, v0
5338; HSA-NEXT:    v_lshrrev_b32_e32 v18, 24, v0
5339; HSA-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
5340; HSA-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
5341; HSA-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
5342; HSA-NEXT:    v_lshrrev_b32_e32 v9, 8, v2
5343; HSA-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
5344; HSA-NEXT:    v_lshrrev_b32_e32 v11, 24, v2
5345; HSA-NEXT:    v_lshrrev_b32_e32 v13, 8, v3
5346; HSA-NEXT:    v_lshrrev_b32_e32 v14, 16, v3
5347; HSA-NEXT:    v_lshrrev_b32_e32 v15, 24, v3
5348; HSA-NEXT:    v_mov_b32_e32 v4, v1
5349; HSA-NEXT:    v_mov_b32_e32 v8, v2
5350; HSA-NEXT:    v_mov_b32_e32 v12, v3
5351; HSA-NEXT:    v_mov_b32_e32 v1, v16
5352; HSA-NEXT:    v_mov_b32_e32 v2, v17
5353; HSA-NEXT:    v_mov_b32_e32 v3, v18
5354; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
5355; HSA-NEXT:    s_endpgm
5356  %ptr = load ptr addrspace(1), ptr addrspace(4) undef
5357  %val = load <16 x i8>, ptr addrspace(1) %ptr
5358  call void @external_void_func_v16i8(<16 x i8> %val)
5359  ret void
5360}
5361
5362define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
5363; VI-LABEL: stack_passed_arg_alignment_v32i32_f64:
5364; VI:       ; %bb.0: ; %entry
5365; VI-NEXT:    s_mov_b32 s52, SCRATCH_RSRC_DWORD0
5366; VI-NEXT:    s_mov_b32 s53, SCRATCH_RSRC_DWORD1
5367; VI-NEXT:    s_mov_b32 s54, -1
5368; VI-NEXT:    s_mov_b32 s55, 0xe80000
5369; VI-NEXT:    s_add_u32 s52, s52, s5
5370; VI-NEXT:    s_load_dwordx16 s[8:23], s[2:3], 0x64
5371; VI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0xa4
5372; VI-NEXT:    s_load_dwordx16 s[36:51], s[2:3], 0x24
5373; VI-NEXT:    s_mov_b32 s32, 0
5374; VI-NEXT:    s_addc_u32 s53, s53, 0
5375; VI-NEXT:    s_waitcnt lgkmcnt(0)
5376; VI-NEXT:    v_mov_b32_e32 v0, s23
5377; VI-NEXT:    buffer_store_dword v0, off, s[52:55], s32
5378; VI-NEXT:    v_mov_b32_e32 v0, s4
5379; VI-NEXT:    buffer_store_dword v0, off, s[52:55], s32 offset:4
5380; VI-NEXT:    v_mov_b32_e32 v0, s5
5381; VI-NEXT:    s_mov_b64 s[6:7], s[0:1]
5382; VI-NEXT:    s_mov_b64 s[0:1], s[52:53]
5383; VI-NEXT:    buffer_store_dword v0, off, s[52:55], s32 offset:8
5384; VI-NEXT:    s_mov_b64 s[2:3], s[54:55]
5385; VI-NEXT:    v_mov_b32_e32 v0, s36
5386; VI-NEXT:    v_mov_b32_e32 v1, s37
5387; VI-NEXT:    v_mov_b32_e32 v2, s38
5388; VI-NEXT:    v_mov_b32_e32 v3, s39
5389; VI-NEXT:    v_mov_b32_e32 v4, s40
5390; VI-NEXT:    v_mov_b32_e32 v5, s41
5391; VI-NEXT:    v_mov_b32_e32 v6, s42
5392; VI-NEXT:    v_mov_b32_e32 v7, s43
5393; VI-NEXT:    v_mov_b32_e32 v8, s44
5394; VI-NEXT:    v_mov_b32_e32 v9, s45
5395; VI-NEXT:    v_mov_b32_e32 v10, s46
5396; VI-NEXT:    v_mov_b32_e32 v11, s47
5397; VI-NEXT:    v_mov_b32_e32 v12, s48
5398; VI-NEXT:    v_mov_b32_e32 v13, s49
5399; VI-NEXT:    v_mov_b32_e32 v14, s50
5400; VI-NEXT:    v_mov_b32_e32 v15, s51
5401; VI-NEXT:    v_mov_b32_e32 v16, s8
5402; VI-NEXT:    v_mov_b32_e32 v17, s9
5403; VI-NEXT:    v_mov_b32_e32 v18, s10
5404; VI-NEXT:    v_mov_b32_e32 v19, s11
5405; VI-NEXT:    v_mov_b32_e32 v20, s12
5406; VI-NEXT:    v_mov_b32_e32 v21, s13
5407; VI-NEXT:    v_mov_b32_e32 v22, s14
5408; VI-NEXT:    v_mov_b32_e32 v23, s15
5409; VI-NEXT:    v_mov_b32_e32 v24, s16
5410; VI-NEXT:    v_mov_b32_e32 v25, s17
5411; VI-NEXT:    v_mov_b32_e32 v26, s18
5412; VI-NEXT:    v_mov_b32_e32 v27, s19
5413; VI-NEXT:    v_mov_b32_e32 v28, s20
5414; VI-NEXT:    v_mov_b32_e32 v29, s21
5415; VI-NEXT:    v_mov_b32_e32 v30, s22
5416; VI-NEXT:    s_getpc_b64 s[4:5]
5417; VI-NEXT:    s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
5418; VI-NEXT:    s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
5419; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
5420; VI-NEXT:    s_endpgm
5421;
5422; CI-LABEL: stack_passed_arg_alignment_v32i32_f64:
5423; CI:       ; %bb.0: ; %entry
5424; CI-NEXT:    s_mov_b32 s52, SCRATCH_RSRC_DWORD0
5425; CI-NEXT:    s_mov_b32 s53, SCRATCH_RSRC_DWORD1
5426; CI-NEXT:    s_mov_b32 s54, -1
5427; CI-NEXT:    s_mov_b32 s55, 0xe8f000
5428; CI-NEXT:    s_add_u32 s52, s52, s5
5429; CI-NEXT:    s_load_dwordx16 s[8:23], s[2:3], 0x19
5430; CI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x29
5431; CI-NEXT:    s_load_dwordx16 s[36:51], s[2:3], 0x9
5432; CI-NEXT:    s_mov_b32 s32, 0
5433; CI-NEXT:    s_addc_u32 s53, s53, 0
5434; CI-NEXT:    s_waitcnt lgkmcnt(0)
5435; CI-NEXT:    v_mov_b32_e32 v0, s23
5436; CI-NEXT:    buffer_store_dword v0, off, s[52:55], s32
5437; CI-NEXT:    v_mov_b32_e32 v0, s4
5438; CI-NEXT:    buffer_store_dword v0, off, s[52:55], s32 offset:4
5439; CI-NEXT:    v_mov_b32_e32 v0, s5
5440; CI-NEXT:    s_mov_b64 s[6:7], s[0:1]
5441; CI-NEXT:    s_mov_b64 s[0:1], s[52:53]
5442; CI-NEXT:    buffer_store_dword v0, off, s[52:55], s32 offset:8
5443; CI-NEXT:    s_mov_b64 s[2:3], s[54:55]
5444; CI-NEXT:    v_mov_b32_e32 v0, s36
5445; CI-NEXT:    v_mov_b32_e32 v1, s37
5446; CI-NEXT:    v_mov_b32_e32 v2, s38
5447; CI-NEXT:    v_mov_b32_e32 v3, s39
5448; CI-NEXT:    v_mov_b32_e32 v4, s40
5449; CI-NEXT:    v_mov_b32_e32 v5, s41
5450; CI-NEXT:    v_mov_b32_e32 v6, s42
5451; CI-NEXT:    v_mov_b32_e32 v7, s43
5452; CI-NEXT:    v_mov_b32_e32 v8, s44
5453; CI-NEXT:    v_mov_b32_e32 v9, s45
5454; CI-NEXT:    v_mov_b32_e32 v10, s46
5455; CI-NEXT:    v_mov_b32_e32 v11, s47
5456; CI-NEXT:    v_mov_b32_e32 v12, s48
5457; CI-NEXT:    v_mov_b32_e32 v13, s49
5458; CI-NEXT:    v_mov_b32_e32 v14, s50
5459; CI-NEXT:    v_mov_b32_e32 v15, s51
5460; CI-NEXT:    v_mov_b32_e32 v16, s8
5461; CI-NEXT:    v_mov_b32_e32 v17, s9
5462; CI-NEXT:    v_mov_b32_e32 v18, s10
5463; CI-NEXT:    v_mov_b32_e32 v19, s11
5464; CI-NEXT:    v_mov_b32_e32 v20, s12
5465; CI-NEXT:    v_mov_b32_e32 v21, s13
5466; CI-NEXT:    v_mov_b32_e32 v22, s14
5467; CI-NEXT:    v_mov_b32_e32 v23, s15
5468; CI-NEXT:    v_mov_b32_e32 v24, s16
5469; CI-NEXT:    v_mov_b32_e32 v25, s17
5470; CI-NEXT:    v_mov_b32_e32 v26, s18
5471; CI-NEXT:    v_mov_b32_e32 v27, s19
5472; CI-NEXT:    v_mov_b32_e32 v28, s20
5473; CI-NEXT:    v_mov_b32_e32 v29, s21
5474; CI-NEXT:    v_mov_b32_e32 v30, s22
5475; CI-NEXT:    s_getpc_b64 s[4:5]
5476; CI-NEXT:    s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
5477; CI-NEXT:    s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
5478; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
5479; CI-NEXT:    s_endpgm
5480;
5481; GFX9-LABEL: stack_passed_arg_alignment_v32i32_f64:
5482; GFX9:       ; %bb.0: ; %entry
5483; GFX9-NEXT:    s_mov_b32 s52, SCRATCH_RSRC_DWORD0
5484; GFX9-NEXT:    s_mov_b32 s53, SCRATCH_RSRC_DWORD1
5485; GFX9-NEXT:    s_mov_b32 s54, -1
5486; GFX9-NEXT:    s_mov_b32 s55, 0xe00000
5487; GFX9-NEXT:    s_add_u32 s52, s52, s5
5488; GFX9-NEXT:    s_load_dwordx16 s[8:23], s[2:3], 0x64
5489; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0xa4
5490; GFX9-NEXT:    s_load_dwordx16 s[36:51], s[2:3], 0x24
5491; GFX9-NEXT:    s_mov_b32 s32, 0
5492; GFX9-NEXT:    s_addc_u32 s53, s53, 0
5493; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
5494; GFX9-NEXT:    v_mov_b32_e32 v0, s23
5495; GFX9-NEXT:    buffer_store_dword v0, off, s[52:55], s32
5496; GFX9-NEXT:    v_mov_b32_e32 v0, s4
5497; GFX9-NEXT:    buffer_store_dword v0, off, s[52:55], s32 offset:4
5498; GFX9-NEXT:    v_mov_b32_e32 v0, s5
5499; GFX9-NEXT:    s_mov_b64 s[6:7], s[0:1]
5500; GFX9-NEXT:    s_mov_b64 s[0:1], s[52:53]
5501; GFX9-NEXT:    buffer_store_dword v0, off, s[52:55], s32 offset:8
5502; GFX9-NEXT:    s_mov_b64 s[2:3], s[54:55]
5503; GFX9-NEXT:    v_mov_b32_e32 v0, s36
5504; GFX9-NEXT:    v_mov_b32_e32 v1, s37
5505; GFX9-NEXT:    v_mov_b32_e32 v2, s38
5506; GFX9-NEXT:    v_mov_b32_e32 v3, s39
5507; GFX9-NEXT:    v_mov_b32_e32 v4, s40
5508; GFX9-NEXT:    v_mov_b32_e32 v5, s41
5509; GFX9-NEXT:    v_mov_b32_e32 v6, s42
5510; GFX9-NEXT:    v_mov_b32_e32 v7, s43
5511; GFX9-NEXT:    v_mov_b32_e32 v8, s44
5512; GFX9-NEXT:    v_mov_b32_e32 v9, s45
5513; GFX9-NEXT:    v_mov_b32_e32 v10, s46
5514; GFX9-NEXT:    v_mov_b32_e32 v11, s47
5515; GFX9-NEXT:    v_mov_b32_e32 v12, s48
5516; GFX9-NEXT:    v_mov_b32_e32 v13, s49
5517; GFX9-NEXT:    v_mov_b32_e32 v14, s50
5518; GFX9-NEXT:    v_mov_b32_e32 v15, s51
5519; GFX9-NEXT:    v_mov_b32_e32 v16, s8
5520; GFX9-NEXT:    v_mov_b32_e32 v17, s9
5521; GFX9-NEXT:    v_mov_b32_e32 v18, s10
5522; GFX9-NEXT:    v_mov_b32_e32 v19, s11
5523; GFX9-NEXT:    v_mov_b32_e32 v20, s12
5524; GFX9-NEXT:    v_mov_b32_e32 v21, s13
5525; GFX9-NEXT:    v_mov_b32_e32 v22, s14
5526; GFX9-NEXT:    v_mov_b32_e32 v23, s15
5527; GFX9-NEXT:    v_mov_b32_e32 v24, s16
5528; GFX9-NEXT:    v_mov_b32_e32 v25, s17
5529; GFX9-NEXT:    v_mov_b32_e32 v26, s18
5530; GFX9-NEXT:    v_mov_b32_e32 v27, s19
5531; GFX9-NEXT:    v_mov_b32_e32 v28, s20
5532; GFX9-NEXT:    v_mov_b32_e32 v29, s21
5533; GFX9-NEXT:    v_mov_b32_e32 v30, s22
5534; GFX9-NEXT:    s_getpc_b64 s[4:5]
5535; GFX9-NEXT:    s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
5536; GFX9-NEXT:    s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
5537; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
5538; GFX9-NEXT:    s_endpgm
5539;
5540; GFX11-LABEL: stack_passed_arg_alignment_v32i32_f64:
5541; GFX11:       ; %bb.0: ; %entry
5542; GFX11-NEXT:    s_clause 0x2
5543; GFX11-NEXT:    s_load_b64 s[20:21], s[2:3], 0xa4
5544; GFX11-NEXT:    s_load_b512 s[4:19], s[2:3], 0x64
5545; GFX11-NEXT:    s_load_b512 s[36:51], s[2:3], 0x24
5546; GFX11-NEXT:    s_mov_b32 s32, 0
5547; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
5548; GFX11-NEXT:    s_add_i32 s22, s32, 8
5549; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
5550; GFX11-NEXT:    v_dual_mov_b32 v0, s21 :: v_dual_mov_b32 v1, s20
5551; GFX11-NEXT:    v_mov_b32_e32 v2, s19
5552; GFX11-NEXT:    s_add_i32 s19, s32, 4
5553; GFX11-NEXT:    v_dual_mov_b32 v4, s40 :: v_dual_mov_b32 v7, s43
5554; GFX11-NEXT:    scratch_store_b32 off, v0, s22
5555; GFX11-NEXT:    scratch_store_b32 off, v1, s19
5556; GFX11-NEXT:    scratch_store_b32 off, v2, s32
5557; GFX11-NEXT:    v_dual_mov_b32 v0, s36 :: v_dual_mov_b32 v3, s39
5558; GFX11-NEXT:    v_dual_mov_b32 v1, s37 :: v_dual_mov_b32 v2, s38
5559; GFX11-NEXT:    v_dual_mov_b32 v5, s41 :: v_dual_mov_b32 v6, s42
5560; GFX11-NEXT:    v_dual_mov_b32 v9, s45 :: v_dual_mov_b32 v8, s44
5561; GFX11-NEXT:    v_dual_mov_b32 v11, s47 :: v_dual_mov_b32 v10, s46
5562; GFX11-NEXT:    v_dual_mov_b32 v13, s49 :: v_dual_mov_b32 v12, s48
5563; GFX11-NEXT:    v_dual_mov_b32 v15, s51 :: v_dual_mov_b32 v14, s50
5564; GFX11-NEXT:    v_dual_mov_b32 v17, s5 :: v_dual_mov_b32 v16, s4
5565; GFX11-NEXT:    v_dual_mov_b32 v19, s7 :: v_dual_mov_b32 v18, s6
5566; GFX11-NEXT:    v_dual_mov_b32 v21, s9 :: v_dual_mov_b32 v20, s8
5567; GFX11-NEXT:    v_dual_mov_b32 v23, s11 :: v_dual_mov_b32 v22, s10
5568; GFX11-NEXT:    v_dual_mov_b32 v25, s13 :: v_dual_mov_b32 v24, s12
5569; GFX11-NEXT:    v_dual_mov_b32 v27, s15 :: v_dual_mov_b32 v26, s14
5570; GFX11-NEXT:    v_dual_mov_b32 v29, s17 :: v_dual_mov_b32 v28, s16
5571; GFX11-NEXT:    v_mov_b32_e32 v30, s18
5572; GFX11-NEXT:    s_mov_b64 s[6:7], s[0:1]
5573; GFX11-NEXT:    s_getpc_b64 s[2:3]
5574; GFX11-NEXT:    s_add_u32 s2, s2, stack_passed_f64_arg@rel32@lo+4
5575; GFX11-NEXT:    s_addc_u32 s3, s3, stack_passed_f64_arg@rel32@hi+12
5576; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
5577; GFX11-NEXT:    s_swappc_b64 s[30:31], s[2:3]
5578; GFX11-NEXT:    s_endpgm
5579;
5580; HSA-LABEL: stack_passed_arg_alignment_v32i32_f64:
5581; HSA:       ; %bb.0: ; %entry
5582; HSA-NEXT:    s_add_i32 s8, s8, s11
5583; HSA-NEXT:    s_lshr_b32 flat_scratch_hi, s8, 8
5584; HSA-NEXT:    s_mov_b32 flat_scratch_lo, s9
5585; HSA-NEXT:    s_add_u32 s0, s0, s11
5586; HSA-NEXT:    s_load_dwordx16 s[8:23], s[6:7], 0x40
5587; HSA-NEXT:    s_load_dwordx2 s[24:25], s[6:7], 0x80
5588; HSA-NEXT:    s_load_dwordx16 s[36:51], s[6:7], 0x0
5589; HSA-NEXT:    s_mov_b32 s32, 0
5590; HSA-NEXT:    s_addc_u32 s1, s1, 0
5591; HSA-NEXT:    s_waitcnt lgkmcnt(0)
5592; HSA-NEXT:    v_mov_b32_e32 v0, s23
5593; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32
5594; HSA-NEXT:    v_mov_b32_e32 v0, s24
5595; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
5596; HSA-NEXT:    v_mov_b32_e32 v0, s25
5597; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
5598; HSA-NEXT:    s_mov_b64 s[6:7], s[4:5]
5599; HSA-NEXT:    v_mov_b32_e32 v0, s36
5600; HSA-NEXT:    v_mov_b32_e32 v1, s37
5601; HSA-NEXT:    v_mov_b32_e32 v2, s38
5602; HSA-NEXT:    v_mov_b32_e32 v3, s39
5603; HSA-NEXT:    v_mov_b32_e32 v4, s40
5604; HSA-NEXT:    v_mov_b32_e32 v5, s41
5605; HSA-NEXT:    v_mov_b32_e32 v6, s42
5606; HSA-NEXT:    v_mov_b32_e32 v7, s43
5607; HSA-NEXT:    v_mov_b32_e32 v8, s44
5608; HSA-NEXT:    v_mov_b32_e32 v9, s45
5609; HSA-NEXT:    v_mov_b32_e32 v10, s46
5610; HSA-NEXT:    v_mov_b32_e32 v11, s47
5611; HSA-NEXT:    v_mov_b32_e32 v12, s48
5612; HSA-NEXT:    v_mov_b32_e32 v13, s49
5613; HSA-NEXT:    v_mov_b32_e32 v14, s50
5614; HSA-NEXT:    v_mov_b32_e32 v15, s51
5615; HSA-NEXT:    v_mov_b32_e32 v16, s8
5616; HSA-NEXT:    v_mov_b32_e32 v17, s9
5617; HSA-NEXT:    v_mov_b32_e32 v18, s10
5618; HSA-NEXT:    v_mov_b32_e32 v19, s11
5619; HSA-NEXT:    v_mov_b32_e32 v20, s12
5620; HSA-NEXT:    v_mov_b32_e32 v21, s13
5621; HSA-NEXT:    v_mov_b32_e32 v22, s14
5622; HSA-NEXT:    v_mov_b32_e32 v23, s15
5623; HSA-NEXT:    v_mov_b32_e32 v24, s16
5624; HSA-NEXT:    v_mov_b32_e32 v25, s17
5625; HSA-NEXT:    v_mov_b32_e32 v26, s18
5626; HSA-NEXT:    v_mov_b32_e32 v27, s19
5627; HSA-NEXT:    v_mov_b32_e32 v28, s20
5628; HSA-NEXT:    v_mov_b32_e32 v29, s21
5629; HSA-NEXT:    v_mov_b32_e32 v30, s22
5630; HSA-NEXT:    s_getpc_b64 s[24:25]
5631; HSA-NEXT:    s_add_u32 s24, s24, stack_passed_f64_arg@rel32@lo+4
5632; HSA-NEXT:    s_addc_u32 s25, s25, stack_passed_f64_arg@rel32@hi+12
5633; HSA-NEXT:    s_swappc_b64 s[30:31], s[24:25]
5634; HSA-NEXT:    s_endpgm
5635entry:
5636  call void @stack_passed_f64_arg(<32 x i32> %val, double %tmp)
5637  ret void
5638}
5639
5640define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
5641; VI-LABEL: tail_call_byval_align16:
5642; VI:       ; %bb.0: ; %entry
5643; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5644; VI-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:28
5645; VI-NEXT:    buffer_load_dword v32, off, s[0:3], s32
5646; VI-NEXT:    s_getpc_b64 s[4:5]
5647; VI-NEXT:    s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4
5648; VI-NEXT:    s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12
5649; VI-NEXT:    s_waitcnt vmcnt(1)
5650; VI-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:20
5651; VI-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:24
5652; VI-NEXT:    s_waitcnt vmcnt(2)
5653; VI-NEXT:    buffer_store_dword v32, off, s[0:3], s32
5654; VI-NEXT:    s_waitcnt vmcnt(1)
5655; VI-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:16
5656; VI-NEXT:    s_setpc_b64 s[4:5]
5657;
5658; CI-LABEL: tail_call_byval_align16:
5659; CI:       ; %bb.0: ; %entry
5660; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5661; CI-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:28
5662; CI-NEXT:    buffer_load_dword v32, off, s[0:3], s32
5663; CI-NEXT:    s_getpc_b64 s[4:5]
5664; CI-NEXT:    s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4
5665; CI-NEXT:    s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12
5666; CI-NEXT:    s_waitcnt vmcnt(1)
5667; CI-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:20
5668; CI-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:24
5669; CI-NEXT:    s_waitcnt vmcnt(2)
5670; CI-NEXT:    buffer_store_dword v32, off, s[0:3], s32
5671; CI-NEXT:    s_waitcnt vmcnt(1)
5672; CI-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:16
5673; CI-NEXT:    s_setpc_b64 s[4:5]
5674;
5675; GFX9-LABEL: tail_call_byval_align16:
5676; GFX9:       ; %bb.0: ; %entry
5677; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5678; GFX9-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:28
5679; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32
5680; GFX9-NEXT:    s_getpc_b64 s[4:5]
5681; GFX9-NEXT:    s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4
5682; GFX9-NEXT:    s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12
5683; GFX9-NEXT:    s_waitcnt vmcnt(1)
5684; GFX9-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:20
5685; GFX9-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:24
5686; GFX9-NEXT:    s_waitcnt vmcnt(2)
5687; GFX9-NEXT:    buffer_store_dword v32, off, s[0:3], s32
5688; GFX9-NEXT:    s_waitcnt vmcnt(1)
5689; GFX9-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:16
5690; GFX9-NEXT:    s_setpc_b64 s[4:5]
5691;
5692; GFX11-LABEL: tail_call_byval_align16:
5693; GFX11:       ; %bb.0: ; %entry
5694; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5695; GFX11-NEXT:    scratch_load_b32 v31, off, s32
5696; GFX11-NEXT:    s_getpc_b64 s[0:1]
5697; GFX11-NEXT:    s_add_u32 s0, s0, byval_align16_f64_arg@rel32@lo+4
5698; GFX11-NEXT:    s_addc_u32 s1, s1, byval_align16_f64_arg@rel32@hi+12
5699; GFX11-NEXT:    s_waitcnt vmcnt(0)
5700; GFX11-NEXT:    scratch_store_b32 off, v31, s32
5701; GFX11-NEXT:    scratch_load_b64 v[31:32], off, s32 offset:24
5702; GFX11-NEXT:    s_waitcnt vmcnt(0)
5703; GFX11-NEXT:    scratch_store_b64 off, v[31:32], s32 offset:16
5704; GFX11-NEXT:    s_setpc_b64 s[0:1]
5705;
5706; HSA-LABEL: tail_call_byval_align16:
5707; HSA:       ; %bb.0: ; %entry
5708; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5709; HSA-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:28
5710; HSA-NEXT:    buffer_load_dword v32, off, s[0:3], s32
5711; HSA-NEXT:    s_getpc_b64 s[4:5]
5712; HSA-NEXT:    s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4
5713; HSA-NEXT:    s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12
5714; HSA-NEXT:    s_waitcnt vmcnt(1)
5715; HSA-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:20
5716; HSA-NEXT:    buffer_load_dword v31, off, s[0:3], s32 offset:24
5717; HSA-NEXT:    s_waitcnt vmcnt(2)
5718; HSA-NEXT:    buffer_store_dword v32, off, s[0:3], s32
5719; HSA-NEXT:    s_waitcnt vmcnt(1)
5720; HSA-NEXT:    buffer_store_dword v31, off, s[0:3], s32 offset:16
5721; HSA-NEXT:    s_setpc_b64 s[4:5]
5722entry:
5723  %alloca = alloca double, align 8, addrspace(5)
5724  tail call void @byval_align16_f64_arg(<32 x i32> %val, ptr addrspace(5) byval(double) align 16 %alloca)
5725  ret void
5726}
5727
5728define void @tail_call_stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 {
5729; VI-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64:
5730; VI:       ; %bb.0: ; %entry
5731; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5732; VI-NEXT:    buffer_load_dword v31, off, s[0:3], s32
5733; VI-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:4
5734; VI-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:8
5735; VI-NEXT:    s_getpc_b64 s[4:5]
5736; VI-NEXT:    s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
5737; VI-NEXT:    s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
5738; VI-NEXT:    s_waitcnt vmcnt(2)
5739; VI-NEXT:    buffer_store_dword v31, off, s[0:3], s32
5740; VI-NEXT:    s_waitcnt vmcnt(2)
5741; VI-NEXT:    buffer_store_dword v32, off, s[0:3], s32 offset:4
5742; VI-NEXT:    s_waitcnt vmcnt(2)
5743; VI-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:8
5744; VI-NEXT:    s_setpc_b64 s[4:5]
5745;
5746; CI-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64:
5747; CI:       ; %bb.0: ; %entry
5748; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5749; CI-NEXT:    buffer_load_dword v31, off, s[0:3], s32
5750; CI-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:4
5751; CI-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:8
5752; CI-NEXT:    s_getpc_b64 s[4:5]
5753; CI-NEXT:    s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
5754; CI-NEXT:    s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
5755; CI-NEXT:    s_waitcnt vmcnt(2)
5756; CI-NEXT:    buffer_store_dword v31, off, s[0:3], s32
5757; CI-NEXT:    s_waitcnt vmcnt(2)
5758; CI-NEXT:    buffer_store_dword v32, off, s[0:3], s32 offset:4
5759; CI-NEXT:    s_waitcnt vmcnt(2)
5760; CI-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:8
5761; CI-NEXT:    s_setpc_b64 s[4:5]
5762;
5763; GFX9-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64:
5764; GFX9:       ; %bb.0: ; %entry
5765; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5766; GFX9-NEXT:    buffer_load_dword v31, off, s[0:3], s32
5767; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:4
5768; GFX9-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:8
5769; GFX9-NEXT:    s_getpc_b64 s[4:5]
5770; GFX9-NEXT:    s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
5771; GFX9-NEXT:    s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
5772; GFX9-NEXT:    s_waitcnt vmcnt(2)
5773; GFX9-NEXT:    buffer_store_dword v31, off, s[0:3], s32
5774; GFX9-NEXT:    s_waitcnt vmcnt(2)
5775; GFX9-NEXT:    buffer_store_dword v32, off, s[0:3], s32 offset:4
5776; GFX9-NEXT:    s_waitcnt vmcnt(2)
5777; GFX9-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:8
5778; GFX9-NEXT:    s_setpc_b64 s[4:5]
5779;
5780; GFX11-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64:
5781; GFX11:       ; %bb.0: ; %entry
5782; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5783; GFX11-NEXT:    s_clause 0x1
5784; GFX11-NEXT:    scratch_load_b32 v33, off, s32
5785; GFX11-NEXT:    scratch_load_b64 v[31:32], off, s32 offset:4
5786; GFX11-NEXT:    s_getpc_b64 s[0:1]
5787; GFX11-NEXT:    s_add_u32 s0, s0, stack_passed_f64_arg@rel32@lo+4
5788; GFX11-NEXT:    s_addc_u32 s1, s1, stack_passed_f64_arg@rel32@hi+12
5789; GFX11-NEXT:    s_waitcnt vmcnt(1)
5790; GFX11-NEXT:    scratch_store_b32 off, v33, s32
5791; GFX11-NEXT:    s_waitcnt vmcnt(0)
5792; GFX11-NEXT:    scratch_store_b64 off, v[31:32], s32 offset:4
5793; GFX11-NEXT:    s_setpc_b64 s[0:1]
5794;
5795; HSA-LABEL: tail_call_stack_passed_arg_alignment_v32i32_f64:
5796; HSA:       ; %bb.0: ; %entry
5797; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5798; HSA-NEXT:    buffer_load_dword v31, off, s[0:3], s32
5799; HSA-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:4
5800; HSA-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:8
5801; HSA-NEXT:    s_getpc_b64 s[4:5]
5802; HSA-NEXT:    s_add_u32 s4, s4, stack_passed_f64_arg@rel32@lo+4
5803; HSA-NEXT:    s_addc_u32 s5, s5, stack_passed_f64_arg@rel32@hi+12
5804; HSA-NEXT:    s_waitcnt vmcnt(2)
5805; HSA-NEXT:    buffer_store_dword v31, off, s[0:3], s32
5806; HSA-NEXT:    s_waitcnt vmcnt(2)
5807; HSA-NEXT:    buffer_store_dword v32, off, s[0:3], s32 offset:4
5808; HSA-NEXT:    s_waitcnt vmcnt(2)
5809; HSA-NEXT:    buffer_store_dword v33, off, s[0:3], s32 offset:8
5810; HSA-NEXT:    s_setpc_b64 s[4:5]
5811entry:
5812  tail call void @stack_passed_f64_arg(<32 x i32> %val, double %tmp)
5813  ret void
5814}
5815
5816define void @stack_12xv3i32() #0 {
5817; VI-LABEL: stack_12xv3i32:
5818; VI:       ; %bb.0: ; %entry
5819; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5820; VI-NEXT:    s_mov_b32 s4, s33
5821; VI-NEXT:    s_mov_b32 s33, s32
5822; VI-NEXT:    s_or_saveexec_b64 s[8:9], -1
5823; VI-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
5824; VI-NEXT:    s_mov_b64 exec, s[8:9]
5825; VI-NEXT:    s_addk_i32 s32, 0x400
5826; VI-NEXT:    v_mov_b32_e32 v0, 11
5827; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32
5828; VI-NEXT:    v_mov_b32_e32 v0, 12
5829; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
5830; VI-NEXT:    v_mov_b32_e32 v0, 13
5831; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
5832; VI-NEXT:    v_mov_b32_e32 v0, 14
5833; VI-NEXT:    v_writelane_b32 v40, s4, 2
5834; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12
5835; VI-NEXT:    v_mov_b32_e32 v0, 15
5836; VI-NEXT:    v_writelane_b32 v40, s30, 0
5837; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:16
5838; VI-NEXT:    v_mov_b32_e32 v0, 0
5839; VI-NEXT:    v_mov_b32_e32 v1, 0
5840; VI-NEXT:    v_mov_b32_e32 v2, 0
5841; VI-NEXT:    v_mov_b32_e32 v3, 1
5842; VI-NEXT:    v_mov_b32_e32 v4, 1
5843; VI-NEXT:    v_mov_b32_e32 v5, 1
5844; VI-NEXT:    v_mov_b32_e32 v6, 2
5845; VI-NEXT:    v_mov_b32_e32 v7, 2
5846; VI-NEXT:    v_mov_b32_e32 v8, 2
5847; VI-NEXT:    v_mov_b32_e32 v9, 3
5848; VI-NEXT:    v_mov_b32_e32 v10, 3
5849; VI-NEXT:    v_mov_b32_e32 v11, 3
5850; VI-NEXT:    v_mov_b32_e32 v12, 4
5851; VI-NEXT:    v_mov_b32_e32 v13, 4
5852; VI-NEXT:    v_mov_b32_e32 v14, 4
5853; VI-NEXT:    v_mov_b32_e32 v15, 5
5854; VI-NEXT:    v_mov_b32_e32 v16, 5
5855; VI-NEXT:    v_mov_b32_e32 v17, 5
5856; VI-NEXT:    v_mov_b32_e32 v18, 6
5857; VI-NEXT:    v_mov_b32_e32 v19, 6
5858; VI-NEXT:    v_mov_b32_e32 v20, 6
5859; VI-NEXT:    v_mov_b32_e32 v21, 7
5860; VI-NEXT:    v_mov_b32_e32 v22, 7
5861; VI-NEXT:    v_mov_b32_e32 v23, 7
5862; VI-NEXT:    v_mov_b32_e32 v24, 8
5863; VI-NEXT:    v_mov_b32_e32 v25, 8
5864; VI-NEXT:    v_mov_b32_e32 v26, 8
5865; VI-NEXT:    v_mov_b32_e32 v27, 9
5866; VI-NEXT:    v_mov_b32_e32 v28, 9
5867; VI-NEXT:    v_mov_b32_e32 v29, 9
5868; VI-NEXT:    v_mov_b32_e32 v30, 10
5869; VI-NEXT:    v_writelane_b32 v40, s31, 1
5870; VI-NEXT:    s_getpc_b64 s[4:5]
5871; VI-NEXT:    s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4
5872; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_12xv3i32@rel32@hi+12
5873; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
5874; VI-NEXT:    v_readlane_b32 s31, v40, 1
5875; VI-NEXT:    v_readlane_b32 s30, v40, 0
5876; VI-NEXT:    s_mov_b32 s32, s33
5877; VI-NEXT:    v_readlane_b32 s4, v40, 2
5878; VI-NEXT:    s_or_saveexec_b64 s[6:7], -1
5879; VI-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
5880; VI-NEXT:    s_mov_b64 exec, s[6:7]
5881; VI-NEXT:    s_mov_b32 s33, s4
5882; VI-NEXT:    s_waitcnt vmcnt(0)
5883; VI-NEXT:    s_setpc_b64 s[30:31]
5884;
5885; CI-LABEL: stack_12xv3i32:
5886; CI:       ; %bb.0: ; %entry
5887; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5888; CI-NEXT:    s_mov_b32 s4, s33
5889; CI-NEXT:    s_mov_b32 s33, s32
5890; CI-NEXT:    s_or_saveexec_b64 s[8:9], -1
5891; CI-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
5892; CI-NEXT:    s_mov_b64 exec, s[8:9]
5893; CI-NEXT:    s_addk_i32 s32, 0x400
5894; CI-NEXT:    v_mov_b32_e32 v0, 11
5895; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32
5896; CI-NEXT:    v_mov_b32_e32 v0, 12
5897; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
5898; CI-NEXT:    v_mov_b32_e32 v0, 13
5899; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
5900; CI-NEXT:    v_mov_b32_e32 v0, 14
5901; CI-NEXT:    v_writelane_b32 v40, s4, 2
5902; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12
5903; CI-NEXT:    v_mov_b32_e32 v0, 15
5904; CI-NEXT:    v_writelane_b32 v40, s30, 0
5905; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:16
5906; CI-NEXT:    v_mov_b32_e32 v0, 0
5907; CI-NEXT:    v_mov_b32_e32 v1, 0
5908; CI-NEXT:    v_mov_b32_e32 v2, 0
5909; CI-NEXT:    v_mov_b32_e32 v3, 1
5910; CI-NEXT:    v_mov_b32_e32 v4, 1
5911; CI-NEXT:    v_mov_b32_e32 v5, 1
5912; CI-NEXT:    v_mov_b32_e32 v6, 2
5913; CI-NEXT:    v_mov_b32_e32 v7, 2
5914; CI-NEXT:    v_mov_b32_e32 v8, 2
5915; CI-NEXT:    v_mov_b32_e32 v9, 3
5916; CI-NEXT:    v_mov_b32_e32 v10, 3
5917; CI-NEXT:    v_mov_b32_e32 v11, 3
5918; CI-NEXT:    v_mov_b32_e32 v12, 4
5919; CI-NEXT:    v_mov_b32_e32 v13, 4
5920; CI-NEXT:    v_mov_b32_e32 v14, 4
5921; CI-NEXT:    v_mov_b32_e32 v15, 5
5922; CI-NEXT:    v_mov_b32_e32 v16, 5
5923; CI-NEXT:    v_mov_b32_e32 v17, 5
5924; CI-NEXT:    v_mov_b32_e32 v18, 6
5925; CI-NEXT:    v_mov_b32_e32 v19, 6
5926; CI-NEXT:    v_mov_b32_e32 v20, 6
5927; CI-NEXT:    v_mov_b32_e32 v21, 7
5928; CI-NEXT:    v_mov_b32_e32 v22, 7
5929; CI-NEXT:    v_mov_b32_e32 v23, 7
5930; CI-NEXT:    v_mov_b32_e32 v24, 8
5931; CI-NEXT:    v_mov_b32_e32 v25, 8
5932; CI-NEXT:    v_mov_b32_e32 v26, 8
5933; CI-NEXT:    v_mov_b32_e32 v27, 9
5934; CI-NEXT:    v_mov_b32_e32 v28, 9
5935; CI-NEXT:    v_mov_b32_e32 v29, 9
5936; CI-NEXT:    v_mov_b32_e32 v30, 10
5937; CI-NEXT:    v_writelane_b32 v40, s31, 1
5938; CI-NEXT:    s_getpc_b64 s[4:5]
5939; CI-NEXT:    s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4
5940; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_12xv3i32@rel32@hi+12
5941; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
5942; CI-NEXT:    v_readlane_b32 s31, v40, 1
5943; CI-NEXT:    v_readlane_b32 s30, v40, 0
5944; CI-NEXT:    s_mov_b32 s32, s33
5945; CI-NEXT:    v_readlane_b32 s4, v40, 2
5946; CI-NEXT:    s_or_saveexec_b64 s[6:7], -1
5947; CI-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
5948; CI-NEXT:    s_mov_b64 exec, s[6:7]
5949; CI-NEXT:    s_mov_b32 s33, s4
5950; CI-NEXT:    s_waitcnt vmcnt(0)
5951; CI-NEXT:    s_setpc_b64 s[30:31]
5952;
5953; GFX9-LABEL: stack_12xv3i32:
5954; GFX9:       ; %bb.0: ; %entry
5955; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5956; GFX9-NEXT:    s_mov_b32 s4, s33
5957; GFX9-NEXT:    s_mov_b32 s33, s32
5958; GFX9-NEXT:    s_or_saveexec_b64 s[8:9], -1
5959; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
5960; GFX9-NEXT:    s_mov_b64 exec, s[8:9]
5961; GFX9-NEXT:    s_addk_i32 s32, 0x400
5962; GFX9-NEXT:    v_mov_b32_e32 v0, 11
5963; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32
5964; GFX9-NEXT:    v_mov_b32_e32 v0, 12
5965; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
5966; GFX9-NEXT:    v_mov_b32_e32 v0, 13
5967; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
5968; GFX9-NEXT:    v_mov_b32_e32 v0, 14
5969; GFX9-NEXT:    v_writelane_b32 v40, s4, 2
5970; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12
5971; GFX9-NEXT:    v_mov_b32_e32 v0, 15
5972; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
5973; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:16
5974; GFX9-NEXT:    v_mov_b32_e32 v0, 0
5975; GFX9-NEXT:    v_mov_b32_e32 v1, 0
5976; GFX9-NEXT:    v_mov_b32_e32 v2, 0
5977; GFX9-NEXT:    v_mov_b32_e32 v3, 1
5978; GFX9-NEXT:    v_mov_b32_e32 v4, 1
5979; GFX9-NEXT:    v_mov_b32_e32 v5, 1
5980; GFX9-NEXT:    v_mov_b32_e32 v6, 2
5981; GFX9-NEXT:    v_mov_b32_e32 v7, 2
5982; GFX9-NEXT:    v_mov_b32_e32 v8, 2
5983; GFX9-NEXT:    v_mov_b32_e32 v9, 3
5984; GFX9-NEXT:    v_mov_b32_e32 v10, 3
5985; GFX9-NEXT:    v_mov_b32_e32 v11, 3
5986; GFX9-NEXT:    v_mov_b32_e32 v12, 4
5987; GFX9-NEXT:    v_mov_b32_e32 v13, 4
5988; GFX9-NEXT:    v_mov_b32_e32 v14, 4
5989; GFX9-NEXT:    v_mov_b32_e32 v15, 5
5990; GFX9-NEXT:    v_mov_b32_e32 v16, 5
5991; GFX9-NEXT:    v_mov_b32_e32 v17, 5
5992; GFX9-NEXT:    v_mov_b32_e32 v18, 6
5993; GFX9-NEXT:    v_mov_b32_e32 v19, 6
5994; GFX9-NEXT:    v_mov_b32_e32 v20, 6
5995; GFX9-NEXT:    v_mov_b32_e32 v21, 7
5996; GFX9-NEXT:    v_mov_b32_e32 v22, 7
5997; GFX9-NEXT:    v_mov_b32_e32 v23, 7
5998; GFX9-NEXT:    v_mov_b32_e32 v24, 8
5999; GFX9-NEXT:    v_mov_b32_e32 v25, 8
6000; GFX9-NEXT:    v_mov_b32_e32 v26, 8
6001; GFX9-NEXT:    v_mov_b32_e32 v27, 9
6002; GFX9-NEXT:    v_mov_b32_e32 v28, 9
6003; GFX9-NEXT:    v_mov_b32_e32 v29, 9
6004; GFX9-NEXT:    v_mov_b32_e32 v30, 10
6005; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
6006; GFX9-NEXT:    s_getpc_b64 s[4:5]
6007; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4
6008; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_12xv3i32@rel32@hi+12
6009; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
6010; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
6011; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
6012; GFX9-NEXT:    s_mov_b32 s32, s33
6013; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
6014; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
6015; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6016; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
6017; GFX9-NEXT:    s_mov_b32 s33, s4
6018; GFX9-NEXT:    s_waitcnt vmcnt(0)
6019; GFX9-NEXT:    s_setpc_b64 s[30:31]
6020;
6021; GFX11-LABEL: stack_12xv3i32:
6022; GFX11:       ; %bb.0: ; %entry
6023; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6024; GFX11-NEXT:    s_mov_b32 s0, s33
6025; GFX11-NEXT:    s_mov_b32 s33, s32
6026; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
6027; GFX11-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
6028; GFX11-NEXT:    s_mov_b32 exec_lo, s1
6029; GFX11-NEXT:    v_writelane_b32 v40, s0, 2
6030; GFX11-NEXT:    v_dual_mov_b32 v0, 11 :: v_dual_mov_b32 v1, 12
6031; GFX11-NEXT:    v_dual_mov_b32 v2, 13 :: v_dual_mov_b32 v3, 14
6032; GFX11-NEXT:    v_mov_b32_e32 v4, 15
6033; GFX11-NEXT:    s_add_i32 s32, s32, 16
6034; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
6035; GFX11-NEXT:    s_add_i32 s0, s32, 16
6036; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s32
6037; GFX11-NEXT:    scratch_store_b32 off, v4, s0
6038; GFX11-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, 0
6039; GFX11-NEXT:    v_dual_mov_b32 v3, 1 :: v_dual_mov_b32 v2, 0
6040; GFX11-NEXT:    v_dual_mov_b32 v5, 1 :: v_dual_mov_b32 v4, 1
6041; GFX11-NEXT:    v_dual_mov_b32 v7, 2 :: v_dual_mov_b32 v6, 2
6042; GFX11-NEXT:    v_dual_mov_b32 v9, 3 :: v_dual_mov_b32 v8, 2
6043; GFX11-NEXT:    v_dual_mov_b32 v11, 3 :: v_dual_mov_b32 v10, 3
6044; GFX11-NEXT:    v_dual_mov_b32 v13, 4 :: v_dual_mov_b32 v12, 4
6045; GFX11-NEXT:    v_dual_mov_b32 v15, 5 :: v_dual_mov_b32 v14, 4
6046; GFX11-NEXT:    v_dual_mov_b32 v17, 5 :: v_dual_mov_b32 v16, 5
6047; GFX11-NEXT:    v_dual_mov_b32 v19, 6 :: v_dual_mov_b32 v18, 6
6048; GFX11-NEXT:    v_dual_mov_b32 v21, 7 :: v_dual_mov_b32 v20, 6
6049; GFX11-NEXT:    v_dual_mov_b32 v23, 7 :: v_dual_mov_b32 v22, 7
6050; GFX11-NEXT:    v_dual_mov_b32 v25, 8 :: v_dual_mov_b32 v24, 8
6051; GFX11-NEXT:    v_dual_mov_b32 v27, 9 :: v_dual_mov_b32 v26, 8
6052; GFX11-NEXT:    v_dual_mov_b32 v29, 9 :: v_dual_mov_b32 v28, 9
6053; GFX11-NEXT:    v_mov_b32_e32 v30, 10
6054; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
6055; GFX11-NEXT:    s_getpc_b64 s[0:1]
6056; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_12xv3i32@rel32@lo+4
6057; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_12xv3i32@rel32@hi+12
6058; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
6059; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
6060; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
6061; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
6062; GFX11-NEXT:    s_mov_b32 s32, s33
6063; GFX11-NEXT:    v_readlane_b32 s0, v40, 2
6064; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
6065; GFX11-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
6066; GFX11-NEXT:    s_mov_b32 exec_lo, s1
6067; GFX11-NEXT:    s_mov_b32 s33, s0
6068; GFX11-NEXT:    s_waitcnt vmcnt(0)
6069; GFX11-NEXT:    s_setpc_b64 s[30:31]
6070;
6071; HSA-LABEL: stack_12xv3i32:
6072; HSA:       ; %bb.0: ; %entry
6073; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6074; HSA-NEXT:    s_mov_b32 s4, s33
6075; HSA-NEXT:    s_mov_b32 s33, s32
6076; HSA-NEXT:    s_or_saveexec_b64 s[8:9], -1
6077; HSA-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6078; HSA-NEXT:    s_mov_b64 exec, s[8:9]
6079; HSA-NEXT:    s_addk_i32 s32, 0x400
6080; HSA-NEXT:    v_mov_b32_e32 v0, 11
6081; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32
6082; HSA-NEXT:    v_mov_b32_e32 v0, 12
6083; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
6084; HSA-NEXT:    v_mov_b32_e32 v0, 13
6085; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
6086; HSA-NEXT:    v_mov_b32_e32 v0, 14
6087; HSA-NEXT:    v_writelane_b32 v40, s4, 2
6088; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12
6089; HSA-NEXT:    v_mov_b32_e32 v0, 15
6090; HSA-NEXT:    v_writelane_b32 v40, s30, 0
6091; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:16
6092; HSA-NEXT:    v_mov_b32_e32 v0, 0
6093; HSA-NEXT:    v_mov_b32_e32 v1, 0
6094; HSA-NEXT:    v_mov_b32_e32 v2, 0
6095; HSA-NEXT:    v_mov_b32_e32 v3, 1
6096; HSA-NEXT:    v_mov_b32_e32 v4, 1
6097; HSA-NEXT:    v_mov_b32_e32 v5, 1
6098; HSA-NEXT:    v_mov_b32_e32 v6, 2
6099; HSA-NEXT:    v_mov_b32_e32 v7, 2
6100; HSA-NEXT:    v_mov_b32_e32 v8, 2
6101; HSA-NEXT:    v_mov_b32_e32 v9, 3
6102; HSA-NEXT:    v_mov_b32_e32 v10, 3
6103; HSA-NEXT:    v_mov_b32_e32 v11, 3
6104; HSA-NEXT:    v_mov_b32_e32 v12, 4
6105; HSA-NEXT:    v_mov_b32_e32 v13, 4
6106; HSA-NEXT:    v_mov_b32_e32 v14, 4
6107; HSA-NEXT:    v_mov_b32_e32 v15, 5
6108; HSA-NEXT:    v_mov_b32_e32 v16, 5
6109; HSA-NEXT:    v_mov_b32_e32 v17, 5
6110; HSA-NEXT:    v_mov_b32_e32 v18, 6
6111; HSA-NEXT:    v_mov_b32_e32 v19, 6
6112; HSA-NEXT:    v_mov_b32_e32 v20, 6
6113; HSA-NEXT:    v_mov_b32_e32 v21, 7
6114; HSA-NEXT:    v_mov_b32_e32 v22, 7
6115; HSA-NEXT:    v_mov_b32_e32 v23, 7
6116; HSA-NEXT:    v_mov_b32_e32 v24, 8
6117; HSA-NEXT:    v_mov_b32_e32 v25, 8
6118; HSA-NEXT:    v_mov_b32_e32 v26, 8
6119; HSA-NEXT:    v_mov_b32_e32 v27, 9
6120; HSA-NEXT:    v_mov_b32_e32 v28, 9
6121; HSA-NEXT:    v_mov_b32_e32 v29, 9
6122; HSA-NEXT:    v_mov_b32_e32 v30, 10
6123; HSA-NEXT:    v_writelane_b32 v40, s31, 1
6124; HSA-NEXT:    s_getpc_b64 s[4:5]
6125; HSA-NEXT:    s_add_u32 s4, s4, external_void_func_12xv3i32@rel32@lo+4
6126; HSA-NEXT:    s_addc_u32 s5, s5, external_void_func_12xv3i32@rel32@hi+12
6127; HSA-NEXT:    s_swappc_b64 s[30:31], s[4:5]
6128; HSA-NEXT:    v_readlane_b32 s31, v40, 1
6129; HSA-NEXT:    v_readlane_b32 s30, v40, 0
6130; HSA-NEXT:    s_mov_b32 s32, s33
6131; HSA-NEXT:    v_readlane_b32 s4, v40, 2
6132; HSA-NEXT:    s_or_saveexec_b64 s[6:7], -1
6133; HSA-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6134; HSA-NEXT:    s_mov_b64 exec, s[6:7]
6135; HSA-NEXT:    s_mov_b32 s33, s4
6136; HSA-NEXT:    s_waitcnt vmcnt(0)
6137; HSA-NEXT:    s_setpc_b64 s[30:31]
6138entry:
6139  call void @external_void_func_12xv3i32(
6140      <3 x i32><i32 0, i32 0, i32 0>,
6141      <3 x i32><i32 1, i32 1, i32 1>,
6142      <3 x i32><i32 2, i32 2, i32 2>,
6143      <3 x i32><i32 3, i32 3, i32 3>,
6144      <3 x i32><i32 4, i32 4, i32 4>,
6145      <3 x i32><i32 5, i32 5, i32 5>,
6146      <3 x i32><i32 6, i32 6, i32 6>,
6147      <3 x i32><i32 7, i32 7, i32 7>,
6148      <3 x i32><i32 8, i32 8, i32 8>,
6149      <3 x i32><i32 9, i32 9, i32 9>,
6150      <3 x i32><i32 10, i32 11, i32 12>,
6151      <3 x i32><i32 13, i32 14, i32 15>)
6152  ret void
6153}
6154
6155define void @stack_12xv3f32() #0 {
6156; VI-LABEL: stack_12xv3f32:
6157; VI:       ; %bb.0: ; %entry
6158; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6159; VI-NEXT:    s_mov_b32 s4, s33
6160; VI-NEXT:    s_mov_b32 s33, s32
6161; VI-NEXT:    s_or_saveexec_b64 s[8:9], -1
6162; VI-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6163; VI-NEXT:    s_mov_b64 exec, s[8:9]
6164; VI-NEXT:    s_addk_i32 s32, 0x400
6165; VI-NEXT:    v_mov_b32_e32 v0, 0x41300000
6166; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32
6167; VI-NEXT:    v_mov_b32_e32 v0, 0x41400000
6168; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
6169; VI-NEXT:    v_mov_b32_e32 v0, 0x41500000
6170; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
6171; VI-NEXT:    v_mov_b32_e32 v0, 0x41600000
6172; VI-NEXT:    v_writelane_b32 v40, s4, 2
6173; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12
6174; VI-NEXT:    v_mov_b32_e32 v0, 0x41700000
6175; VI-NEXT:    v_writelane_b32 v40, s30, 0
6176; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:16
6177; VI-NEXT:    v_mov_b32_e32 v0, 0
6178; VI-NEXT:    v_mov_b32_e32 v1, 0
6179; VI-NEXT:    v_mov_b32_e32 v2, 0
6180; VI-NEXT:    v_mov_b32_e32 v3, 1.0
6181; VI-NEXT:    v_mov_b32_e32 v4, 1.0
6182; VI-NEXT:    v_mov_b32_e32 v5, 1.0
6183; VI-NEXT:    v_mov_b32_e32 v6, 2.0
6184; VI-NEXT:    v_mov_b32_e32 v7, 2.0
6185; VI-NEXT:    v_mov_b32_e32 v8, 2.0
6186; VI-NEXT:    v_mov_b32_e32 v9, 0x40400000
6187; VI-NEXT:    v_mov_b32_e32 v10, 0x40400000
6188; VI-NEXT:    v_mov_b32_e32 v11, 0x40400000
6189; VI-NEXT:    v_mov_b32_e32 v12, 4.0
6190; VI-NEXT:    v_mov_b32_e32 v13, 4.0
6191; VI-NEXT:    v_mov_b32_e32 v14, 4.0
6192; VI-NEXT:    v_mov_b32_e32 v15, 0x40a00000
6193; VI-NEXT:    v_mov_b32_e32 v16, 0x40a00000
6194; VI-NEXT:    v_mov_b32_e32 v17, 0x40a00000
6195; VI-NEXT:    v_mov_b32_e32 v18, 0x40c00000
6196; VI-NEXT:    v_mov_b32_e32 v19, 0x40c00000
6197; VI-NEXT:    v_mov_b32_e32 v20, 0x40c00000
6198; VI-NEXT:    v_mov_b32_e32 v21, 0x40e00000
6199; VI-NEXT:    v_mov_b32_e32 v22, 0x40e00000
6200; VI-NEXT:    v_mov_b32_e32 v23, 0x40e00000
6201; VI-NEXT:    v_mov_b32_e32 v24, 0x41000000
6202; VI-NEXT:    v_mov_b32_e32 v25, 0x41000000
6203; VI-NEXT:    v_mov_b32_e32 v26, 0x41000000
6204; VI-NEXT:    v_mov_b32_e32 v27, 0x41100000
6205; VI-NEXT:    v_mov_b32_e32 v28, 0x41100000
6206; VI-NEXT:    v_mov_b32_e32 v29, 0x41100000
6207; VI-NEXT:    v_mov_b32_e32 v30, 0x41200000
6208; VI-NEXT:    v_writelane_b32 v40, s31, 1
6209; VI-NEXT:    s_getpc_b64 s[4:5]
6210; VI-NEXT:    s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4
6211; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_12xv3f32@rel32@hi+12
6212; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
6213; VI-NEXT:    v_readlane_b32 s31, v40, 1
6214; VI-NEXT:    v_readlane_b32 s30, v40, 0
6215; VI-NEXT:    s_mov_b32 s32, s33
6216; VI-NEXT:    v_readlane_b32 s4, v40, 2
6217; VI-NEXT:    s_or_saveexec_b64 s[6:7], -1
6218; VI-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6219; VI-NEXT:    s_mov_b64 exec, s[6:7]
6220; VI-NEXT:    s_mov_b32 s33, s4
6221; VI-NEXT:    s_waitcnt vmcnt(0)
6222; VI-NEXT:    s_setpc_b64 s[30:31]
6223;
6224; CI-LABEL: stack_12xv3f32:
6225; CI:       ; %bb.0: ; %entry
6226; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6227; CI-NEXT:    s_mov_b32 s4, s33
6228; CI-NEXT:    s_mov_b32 s33, s32
6229; CI-NEXT:    s_or_saveexec_b64 s[8:9], -1
6230; CI-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6231; CI-NEXT:    s_mov_b64 exec, s[8:9]
6232; CI-NEXT:    s_addk_i32 s32, 0x400
6233; CI-NEXT:    v_mov_b32_e32 v0, 0x41300000
6234; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32
6235; CI-NEXT:    v_mov_b32_e32 v0, 0x41400000
6236; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
6237; CI-NEXT:    v_mov_b32_e32 v0, 0x41500000
6238; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
6239; CI-NEXT:    v_mov_b32_e32 v0, 0x41600000
6240; CI-NEXT:    v_writelane_b32 v40, s4, 2
6241; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12
6242; CI-NEXT:    v_mov_b32_e32 v0, 0x41700000
6243; CI-NEXT:    v_writelane_b32 v40, s30, 0
6244; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:16
6245; CI-NEXT:    v_mov_b32_e32 v0, 0
6246; CI-NEXT:    v_mov_b32_e32 v1, 0
6247; CI-NEXT:    v_mov_b32_e32 v2, 0
6248; CI-NEXT:    v_mov_b32_e32 v3, 1.0
6249; CI-NEXT:    v_mov_b32_e32 v4, 1.0
6250; CI-NEXT:    v_mov_b32_e32 v5, 1.0
6251; CI-NEXT:    v_mov_b32_e32 v6, 2.0
6252; CI-NEXT:    v_mov_b32_e32 v7, 2.0
6253; CI-NEXT:    v_mov_b32_e32 v8, 2.0
6254; CI-NEXT:    v_mov_b32_e32 v9, 0x40400000
6255; CI-NEXT:    v_mov_b32_e32 v10, 0x40400000
6256; CI-NEXT:    v_mov_b32_e32 v11, 0x40400000
6257; CI-NEXT:    v_mov_b32_e32 v12, 4.0
6258; CI-NEXT:    v_mov_b32_e32 v13, 4.0
6259; CI-NEXT:    v_mov_b32_e32 v14, 4.0
6260; CI-NEXT:    v_mov_b32_e32 v15, 0x40a00000
6261; CI-NEXT:    v_mov_b32_e32 v16, 0x40a00000
6262; CI-NEXT:    v_mov_b32_e32 v17, 0x40a00000
6263; CI-NEXT:    v_mov_b32_e32 v18, 0x40c00000
6264; CI-NEXT:    v_mov_b32_e32 v19, 0x40c00000
6265; CI-NEXT:    v_mov_b32_e32 v20, 0x40c00000
6266; CI-NEXT:    v_mov_b32_e32 v21, 0x40e00000
6267; CI-NEXT:    v_mov_b32_e32 v22, 0x40e00000
6268; CI-NEXT:    v_mov_b32_e32 v23, 0x40e00000
6269; CI-NEXT:    v_mov_b32_e32 v24, 0x41000000
6270; CI-NEXT:    v_mov_b32_e32 v25, 0x41000000
6271; CI-NEXT:    v_mov_b32_e32 v26, 0x41000000
6272; CI-NEXT:    v_mov_b32_e32 v27, 0x41100000
6273; CI-NEXT:    v_mov_b32_e32 v28, 0x41100000
6274; CI-NEXT:    v_mov_b32_e32 v29, 0x41100000
6275; CI-NEXT:    v_mov_b32_e32 v30, 0x41200000
6276; CI-NEXT:    v_writelane_b32 v40, s31, 1
6277; CI-NEXT:    s_getpc_b64 s[4:5]
6278; CI-NEXT:    s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4
6279; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_12xv3f32@rel32@hi+12
6280; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
6281; CI-NEXT:    v_readlane_b32 s31, v40, 1
6282; CI-NEXT:    v_readlane_b32 s30, v40, 0
6283; CI-NEXT:    s_mov_b32 s32, s33
6284; CI-NEXT:    v_readlane_b32 s4, v40, 2
6285; CI-NEXT:    s_or_saveexec_b64 s[6:7], -1
6286; CI-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6287; CI-NEXT:    s_mov_b64 exec, s[6:7]
6288; CI-NEXT:    s_mov_b32 s33, s4
6289; CI-NEXT:    s_waitcnt vmcnt(0)
6290; CI-NEXT:    s_setpc_b64 s[30:31]
6291;
6292; GFX9-LABEL: stack_12xv3f32:
6293; GFX9:       ; %bb.0: ; %entry
6294; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6295; GFX9-NEXT:    s_mov_b32 s4, s33
6296; GFX9-NEXT:    s_mov_b32 s33, s32
6297; GFX9-NEXT:    s_or_saveexec_b64 s[8:9], -1
6298; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6299; GFX9-NEXT:    s_mov_b64 exec, s[8:9]
6300; GFX9-NEXT:    s_addk_i32 s32, 0x400
6301; GFX9-NEXT:    v_mov_b32_e32 v0, 0x41300000
6302; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32
6303; GFX9-NEXT:    v_mov_b32_e32 v0, 0x41400000
6304; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
6305; GFX9-NEXT:    v_mov_b32_e32 v0, 0x41500000
6306; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
6307; GFX9-NEXT:    v_mov_b32_e32 v0, 0x41600000
6308; GFX9-NEXT:    v_writelane_b32 v40, s4, 2
6309; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12
6310; GFX9-NEXT:    v_mov_b32_e32 v0, 0x41700000
6311; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
6312; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:16
6313; GFX9-NEXT:    v_mov_b32_e32 v0, 0
6314; GFX9-NEXT:    v_mov_b32_e32 v1, 0
6315; GFX9-NEXT:    v_mov_b32_e32 v2, 0
6316; GFX9-NEXT:    v_mov_b32_e32 v3, 1.0
6317; GFX9-NEXT:    v_mov_b32_e32 v4, 1.0
6318; GFX9-NEXT:    v_mov_b32_e32 v5, 1.0
6319; GFX9-NEXT:    v_mov_b32_e32 v6, 2.0
6320; GFX9-NEXT:    v_mov_b32_e32 v7, 2.0
6321; GFX9-NEXT:    v_mov_b32_e32 v8, 2.0
6322; GFX9-NEXT:    v_mov_b32_e32 v9, 0x40400000
6323; GFX9-NEXT:    v_mov_b32_e32 v10, 0x40400000
6324; GFX9-NEXT:    v_mov_b32_e32 v11, 0x40400000
6325; GFX9-NEXT:    v_mov_b32_e32 v12, 4.0
6326; GFX9-NEXT:    v_mov_b32_e32 v13, 4.0
6327; GFX9-NEXT:    v_mov_b32_e32 v14, 4.0
6328; GFX9-NEXT:    v_mov_b32_e32 v15, 0x40a00000
6329; GFX9-NEXT:    v_mov_b32_e32 v16, 0x40a00000
6330; GFX9-NEXT:    v_mov_b32_e32 v17, 0x40a00000
6331; GFX9-NEXT:    v_mov_b32_e32 v18, 0x40c00000
6332; GFX9-NEXT:    v_mov_b32_e32 v19, 0x40c00000
6333; GFX9-NEXT:    v_mov_b32_e32 v20, 0x40c00000
6334; GFX9-NEXT:    v_mov_b32_e32 v21, 0x40e00000
6335; GFX9-NEXT:    v_mov_b32_e32 v22, 0x40e00000
6336; GFX9-NEXT:    v_mov_b32_e32 v23, 0x40e00000
6337; GFX9-NEXT:    v_mov_b32_e32 v24, 0x41000000
6338; GFX9-NEXT:    v_mov_b32_e32 v25, 0x41000000
6339; GFX9-NEXT:    v_mov_b32_e32 v26, 0x41000000
6340; GFX9-NEXT:    v_mov_b32_e32 v27, 0x41100000
6341; GFX9-NEXT:    v_mov_b32_e32 v28, 0x41100000
6342; GFX9-NEXT:    v_mov_b32_e32 v29, 0x41100000
6343; GFX9-NEXT:    v_mov_b32_e32 v30, 0x41200000
6344; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
6345; GFX9-NEXT:    s_getpc_b64 s[4:5]
6346; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4
6347; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_12xv3f32@rel32@hi+12
6348; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
6349; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
6350; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
6351; GFX9-NEXT:    s_mov_b32 s32, s33
6352; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
6353; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
6354; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6355; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
6356; GFX9-NEXT:    s_mov_b32 s33, s4
6357; GFX9-NEXT:    s_waitcnt vmcnt(0)
6358; GFX9-NEXT:    s_setpc_b64 s[30:31]
6359;
6360; GFX11-LABEL: stack_12xv3f32:
6361; GFX11:       ; %bb.0: ; %entry
6362; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6363; GFX11-NEXT:    s_mov_b32 s0, s33
6364; GFX11-NEXT:    s_mov_b32 s33, s32
6365; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
6366; GFX11-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
6367; GFX11-NEXT:    s_mov_b32 exec_lo, s1
6368; GFX11-NEXT:    v_writelane_b32 v40, s0, 2
6369; GFX11-NEXT:    v_mov_b32_e32 v0, 0x41300000
6370; GFX11-NEXT:    v_mov_b32_e32 v1, 0x41400000
6371; GFX11-NEXT:    v_mov_b32_e32 v2, 0x41500000
6372; GFX11-NEXT:    v_mov_b32_e32 v3, 0x41600000
6373; GFX11-NEXT:    v_dual_mov_b32 v4, 0x41700000 :: v_dual_mov_b32 v5, 1.0
6374; GFX11-NEXT:    s_add_i32 s32, s32, 16
6375; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
6376; GFX11-NEXT:    s_add_i32 s0, s32, 16
6377; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s32
6378; GFX11-NEXT:    scratch_store_b32 off, v4, s0
6379; GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0
6380; GFX11-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 1.0
6381; GFX11-NEXT:    v_dual_mov_b32 v4, 1.0 :: v_dual_mov_b32 v7, 2.0
6382; GFX11-NEXT:    v_dual_mov_b32 v6, 2.0 :: v_dual_mov_b32 v9, 0x40400000
6383; GFX11-NEXT:    v_dual_mov_b32 v8, 2.0 :: v_dual_mov_b32 v11, 0x40400000
6384; GFX11-NEXT:    v_dual_mov_b32 v10, 0x40400000 :: v_dual_mov_b32 v13, 4.0
6385; GFX11-NEXT:    v_dual_mov_b32 v12, 4.0 :: v_dual_mov_b32 v15, 0x40a00000
6386; GFX11-NEXT:    v_dual_mov_b32 v14, 4.0 :: v_dual_mov_b32 v17, 0x40a00000
6387; GFX11-NEXT:    v_mov_b32_e32 v16, 0x40a00000
6388; GFX11-NEXT:    v_dual_mov_b32 v18, 0x40c00000 :: v_dual_mov_b32 v19, 0x40c00000
6389; GFX11-NEXT:    v_mov_b32_e32 v20, 0x40c00000
6390; GFX11-NEXT:    v_dual_mov_b32 v21, 0x40e00000 :: v_dual_mov_b32 v22, 0x40e00000
6391; GFX11-NEXT:    v_mov_b32_e32 v23, 0x40e00000
6392; GFX11-NEXT:    v_dual_mov_b32 v24, 0x41000000 :: v_dual_mov_b32 v25, 0x41000000
6393; GFX11-NEXT:    v_mov_b32_e32 v26, 0x41000000
6394; GFX11-NEXT:    v_dual_mov_b32 v27, 0x41100000 :: v_dual_mov_b32 v28, 0x41100000
6395; GFX11-NEXT:    v_mov_b32_e32 v29, 0x41100000
6396; GFX11-NEXT:    v_mov_b32_e32 v30, 0x41200000
6397; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
6398; GFX11-NEXT:    s_getpc_b64 s[0:1]
6399; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_12xv3f32@rel32@lo+4
6400; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_12xv3f32@rel32@hi+12
6401; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
6402; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
6403; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
6404; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
6405; GFX11-NEXT:    s_mov_b32 s32, s33
6406; GFX11-NEXT:    v_readlane_b32 s0, v40, 2
6407; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
6408; GFX11-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
6409; GFX11-NEXT:    s_mov_b32 exec_lo, s1
6410; GFX11-NEXT:    s_mov_b32 s33, s0
6411; GFX11-NEXT:    s_waitcnt vmcnt(0)
6412; GFX11-NEXT:    s_setpc_b64 s[30:31]
6413;
6414; HSA-LABEL: stack_12xv3f32:
6415; HSA:       ; %bb.0: ; %entry
6416; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6417; HSA-NEXT:    s_mov_b32 s4, s33
6418; HSA-NEXT:    s_mov_b32 s33, s32
6419; HSA-NEXT:    s_or_saveexec_b64 s[8:9], -1
6420; HSA-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6421; HSA-NEXT:    s_mov_b64 exec, s[8:9]
6422; HSA-NEXT:    s_addk_i32 s32, 0x400
6423; HSA-NEXT:    v_mov_b32_e32 v0, 0x41300000
6424; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32
6425; HSA-NEXT:    v_mov_b32_e32 v0, 0x41400000
6426; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
6427; HSA-NEXT:    v_mov_b32_e32 v0, 0x41500000
6428; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
6429; HSA-NEXT:    v_mov_b32_e32 v0, 0x41600000
6430; HSA-NEXT:    v_writelane_b32 v40, s4, 2
6431; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12
6432; HSA-NEXT:    v_mov_b32_e32 v0, 0x41700000
6433; HSA-NEXT:    v_writelane_b32 v40, s30, 0
6434; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:16
6435; HSA-NEXT:    v_mov_b32_e32 v0, 0
6436; HSA-NEXT:    v_mov_b32_e32 v1, 0
6437; HSA-NEXT:    v_mov_b32_e32 v2, 0
6438; HSA-NEXT:    v_mov_b32_e32 v3, 1.0
6439; HSA-NEXT:    v_mov_b32_e32 v4, 1.0
6440; HSA-NEXT:    v_mov_b32_e32 v5, 1.0
6441; HSA-NEXT:    v_mov_b32_e32 v6, 2.0
6442; HSA-NEXT:    v_mov_b32_e32 v7, 2.0
6443; HSA-NEXT:    v_mov_b32_e32 v8, 2.0
6444; HSA-NEXT:    v_mov_b32_e32 v9, 0x40400000
6445; HSA-NEXT:    v_mov_b32_e32 v10, 0x40400000
6446; HSA-NEXT:    v_mov_b32_e32 v11, 0x40400000
6447; HSA-NEXT:    v_mov_b32_e32 v12, 4.0
6448; HSA-NEXT:    v_mov_b32_e32 v13, 4.0
6449; HSA-NEXT:    v_mov_b32_e32 v14, 4.0
6450; HSA-NEXT:    v_mov_b32_e32 v15, 0x40a00000
6451; HSA-NEXT:    v_mov_b32_e32 v16, 0x40a00000
6452; HSA-NEXT:    v_mov_b32_e32 v17, 0x40a00000
6453; HSA-NEXT:    v_mov_b32_e32 v18, 0x40c00000
6454; HSA-NEXT:    v_mov_b32_e32 v19, 0x40c00000
6455; HSA-NEXT:    v_mov_b32_e32 v20, 0x40c00000
6456; HSA-NEXT:    v_mov_b32_e32 v21, 0x40e00000
6457; HSA-NEXT:    v_mov_b32_e32 v22, 0x40e00000
6458; HSA-NEXT:    v_mov_b32_e32 v23, 0x40e00000
6459; HSA-NEXT:    v_mov_b32_e32 v24, 0x41000000
6460; HSA-NEXT:    v_mov_b32_e32 v25, 0x41000000
6461; HSA-NEXT:    v_mov_b32_e32 v26, 0x41000000
6462; HSA-NEXT:    v_mov_b32_e32 v27, 0x41100000
6463; HSA-NEXT:    v_mov_b32_e32 v28, 0x41100000
6464; HSA-NEXT:    v_mov_b32_e32 v29, 0x41100000
6465; HSA-NEXT:    v_mov_b32_e32 v30, 0x41200000
6466; HSA-NEXT:    v_writelane_b32 v40, s31, 1
6467; HSA-NEXT:    s_getpc_b64 s[4:5]
6468; HSA-NEXT:    s_add_u32 s4, s4, external_void_func_12xv3f32@rel32@lo+4
6469; HSA-NEXT:    s_addc_u32 s5, s5, external_void_func_12xv3f32@rel32@hi+12
6470; HSA-NEXT:    s_swappc_b64 s[30:31], s[4:5]
6471; HSA-NEXT:    v_readlane_b32 s31, v40, 1
6472; HSA-NEXT:    v_readlane_b32 s30, v40, 0
6473; HSA-NEXT:    s_mov_b32 s32, s33
6474; HSA-NEXT:    v_readlane_b32 s4, v40, 2
6475; HSA-NEXT:    s_or_saveexec_b64 s[6:7], -1
6476; HSA-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6477; HSA-NEXT:    s_mov_b64 exec, s[6:7]
6478; HSA-NEXT:    s_mov_b32 s33, s4
6479; HSA-NEXT:    s_waitcnt vmcnt(0)
6480; HSA-NEXT:    s_setpc_b64 s[30:31]
6481entry:
6482  call void @external_void_func_12xv3f32(
6483      <3 x float><float 0.0, float 0.0, float 0.0>,
6484      <3 x float><float 1.0, float 1.0, float 1.0>,
6485      <3 x float><float 2.0, float 2.0, float 2.0>,
6486      <3 x float><float 3.0, float 3.0, float 3.0>,
6487      <3 x float><float 4.0, float 4.0, float 4.0>,
6488      <3 x float><float 5.0, float 5.0, float 5.0>,
6489      <3 x float><float 6.0, float 6.0, float 6.0>,
6490      <3 x float><float 7.0, float 7.0, float 7.0>,
6491      <3 x float><float 8.0, float 8.0, float 8.0>,
6492      <3 x float><float 9.0, float 9.0, float 9.0>,
6493      <3 x float><float 10.0, float 11.0, float 12.0>,
6494      <3 x float><float 13.0, float 14.0, float 15.0>)
6495  ret void
6496}
6497
6498define void @stack_8xv5i32() #0 {
6499; VI-LABEL: stack_8xv5i32:
6500; VI:       ; %bb.0: ; %entry
6501; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6502; VI-NEXT:    s_mov_b32 s4, s33
6503; VI-NEXT:    s_mov_b32 s33, s32
6504; VI-NEXT:    s_or_saveexec_b64 s[8:9], -1
6505; VI-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6506; VI-NEXT:    s_mov_b64 exec, s[8:9]
6507; VI-NEXT:    s_addk_i32 s32, 0x400
6508; VI-NEXT:    v_mov_b32_e32 v0, 7
6509; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32
6510; VI-NEXT:    v_mov_b32_e32 v0, 8
6511; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
6512; VI-NEXT:    v_mov_b32_e32 v0, 9
6513; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
6514; VI-NEXT:    v_mov_b32_e32 v0, 10
6515; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12
6516; VI-NEXT:    v_mov_b32_e32 v0, 11
6517; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:16
6518; VI-NEXT:    v_mov_b32_e32 v0, 12
6519; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:20
6520; VI-NEXT:    v_mov_b32_e32 v0, 13
6521; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:24
6522; VI-NEXT:    v_mov_b32_e32 v0, 14
6523; VI-NEXT:    v_writelane_b32 v40, s4, 2
6524; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:28
6525; VI-NEXT:    v_mov_b32_e32 v0, 15
6526; VI-NEXT:    v_writelane_b32 v40, s30, 0
6527; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:32
6528; VI-NEXT:    v_mov_b32_e32 v0, 0
6529; VI-NEXT:    v_mov_b32_e32 v1, 0
6530; VI-NEXT:    v_mov_b32_e32 v2, 0
6531; VI-NEXT:    v_mov_b32_e32 v3, 0
6532; VI-NEXT:    v_mov_b32_e32 v4, 0
6533; VI-NEXT:    v_mov_b32_e32 v5, 1
6534; VI-NEXT:    v_mov_b32_e32 v6, 1
6535; VI-NEXT:    v_mov_b32_e32 v7, 1
6536; VI-NEXT:    v_mov_b32_e32 v8, 1
6537; VI-NEXT:    v_mov_b32_e32 v9, 1
6538; VI-NEXT:    v_mov_b32_e32 v10, 2
6539; VI-NEXT:    v_mov_b32_e32 v11, 2
6540; VI-NEXT:    v_mov_b32_e32 v12, 2
6541; VI-NEXT:    v_mov_b32_e32 v13, 2
6542; VI-NEXT:    v_mov_b32_e32 v14, 2
6543; VI-NEXT:    v_mov_b32_e32 v15, 3
6544; VI-NEXT:    v_mov_b32_e32 v16, 3
6545; VI-NEXT:    v_mov_b32_e32 v17, 3
6546; VI-NEXT:    v_mov_b32_e32 v18, 3
6547; VI-NEXT:    v_mov_b32_e32 v19, 3
6548; VI-NEXT:    v_mov_b32_e32 v20, 4
6549; VI-NEXT:    v_mov_b32_e32 v21, 4
6550; VI-NEXT:    v_mov_b32_e32 v22, 4
6551; VI-NEXT:    v_mov_b32_e32 v23, 4
6552; VI-NEXT:    v_mov_b32_e32 v24, 4
6553; VI-NEXT:    v_mov_b32_e32 v25, 5
6554; VI-NEXT:    v_mov_b32_e32 v26, 5
6555; VI-NEXT:    v_mov_b32_e32 v27, 5
6556; VI-NEXT:    v_mov_b32_e32 v28, 5
6557; VI-NEXT:    v_mov_b32_e32 v29, 5
6558; VI-NEXT:    v_mov_b32_e32 v30, 6
6559; VI-NEXT:    v_writelane_b32 v40, s31, 1
6560; VI-NEXT:    s_getpc_b64 s[4:5]
6561; VI-NEXT:    s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4
6562; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_8xv5i32@rel32@hi+12
6563; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
6564; VI-NEXT:    v_readlane_b32 s31, v40, 1
6565; VI-NEXT:    v_readlane_b32 s30, v40, 0
6566; VI-NEXT:    s_mov_b32 s32, s33
6567; VI-NEXT:    v_readlane_b32 s4, v40, 2
6568; VI-NEXT:    s_or_saveexec_b64 s[6:7], -1
6569; VI-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6570; VI-NEXT:    s_mov_b64 exec, s[6:7]
6571; VI-NEXT:    s_mov_b32 s33, s4
6572; VI-NEXT:    s_waitcnt vmcnt(0)
6573; VI-NEXT:    s_setpc_b64 s[30:31]
6574;
6575; CI-LABEL: stack_8xv5i32:
6576; CI:       ; %bb.0: ; %entry
6577; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6578; CI-NEXT:    s_mov_b32 s4, s33
6579; CI-NEXT:    s_mov_b32 s33, s32
6580; CI-NEXT:    s_or_saveexec_b64 s[8:9], -1
6581; CI-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6582; CI-NEXT:    s_mov_b64 exec, s[8:9]
6583; CI-NEXT:    s_addk_i32 s32, 0x400
6584; CI-NEXT:    v_mov_b32_e32 v0, 7
6585; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32
6586; CI-NEXT:    v_mov_b32_e32 v0, 8
6587; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
6588; CI-NEXT:    v_mov_b32_e32 v0, 9
6589; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
6590; CI-NEXT:    v_mov_b32_e32 v0, 10
6591; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12
6592; CI-NEXT:    v_mov_b32_e32 v0, 11
6593; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:16
6594; CI-NEXT:    v_mov_b32_e32 v0, 12
6595; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:20
6596; CI-NEXT:    v_mov_b32_e32 v0, 13
6597; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:24
6598; CI-NEXT:    v_mov_b32_e32 v0, 14
6599; CI-NEXT:    v_writelane_b32 v40, s4, 2
6600; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:28
6601; CI-NEXT:    v_mov_b32_e32 v0, 15
6602; CI-NEXT:    v_writelane_b32 v40, s30, 0
6603; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:32
6604; CI-NEXT:    v_mov_b32_e32 v0, 0
6605; CI-NEXT:    v_mov_b32_e32 v1, 0
6606; CI-NEXT:    v_mov_b32_e32 v2, 0
6607; CI-NEXT:    v_mov_b32_e32 v3, 0
6608; CI-NEXT:    v_mov_b32_e32 v4, 0
6609; CI-NEXT:    v_mov_b32_e32 v5, 1
6610; CI-NEXT:    v_mov_b32_e32 v6, 1
6611; CI-NEXT:    v_mov_b32_e32 v7, 1
6612; CI-NEXT:    v_mov_b32_e32 v8, 1
6613; CI-NEXT:    v_mov_b32_e32 v9, 1
6614; CI-NEXT:    v_mov_b32_e32 v10, 2
6615; CI-NEXT:    v_mov_b32_e32 v11, 2
6616; CI-NEXT:    v_mov_b32_e32 v12, 2
6617; CI-NEXT:    v_mov_b32_e32 v13, 2
6618; CI-NEXT:    v_mov_b32_e32 v14, 2
6619; CI-NEXT:    v_mov_b32_e32 v15, 3
6620; CI-NEXT:    v_mov_b32_e32 v16, 3
6621; CI-NEXT:    v_mov_b32_e32 v17, 3
6622; CI-NEXT:    v_mov_b32_e32 v18, 3
6623; CI-NEXT:    v_mov_b32_e32 v19, 3
6624; CI-NEXT:    v_mov_b32_e32 v20, 4
6625; CI-NEXT:    v_mov_b32_e32 v21, 4
6626; CI-NEXT:    v_mov_b32_e32 v22, 4
6627; CI-NEXT:    v_mov_b32_e32 v23, 4
6628; CI-NEXT:    v_mov_b32_e32 v24, 4
6629; CI-NEXT:    v_mov_b32_e32 v25, 5
6630; CI-NEXT:    v_mov_b32_e32 v26, 5
6631; CI-NEXT:    v_mov_b32_e32 v27, 5
6632; CI-NEXT:    v_mov_b32_e32 v28, 5
6633; CI-NEXT:    v_mov_b32_e32 v29, 5
6634; CI-NEXT:    v_mov_b32_e32 v30, 6
6635; CI-NEXT:    v_writelane_b32 v40, s31, 1
6636; CI-NEXT:    s_getpc_b64 s[4:5]
6637; CI-NEXT:    s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4
6638; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_8xv5i32@rel32@hi+12
6639; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
6640; CI-NEXT:    v_readlane_b32 s31, v40, 1
6641; CI-NEXT:    v_readlane_b32 s30, v40, 0
6642; CI-NEXT:    s_mov_b32 s32, s33
6643; CI-NEXT:    v_readlane_b32 s4, v40, 2
6644; CI-NEXT:    s_or_saveexec_b64 s[6:7], -1
6645; CI-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6646; CI-NEXT:    s_mov_b64 exec, s[6:7]
6647; CI-NEXT:    s_mov_b32 s33, s4
6648; CI-NEXT:    s_waitcnt vmcnt(0)
6649; CI-NEXT:    s_setpc_b64 s[30:31]
6650;
6651; GFX9-LABEL: stack_8xv5i32:
6652; GFX9:       ; %bb.0: ; %entry
6653; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6654; GFX9-NEXT:    s_mov_b32 s4, s33
6655; GFX9-NEXT:    s_mov_b32 s33, s32
6656; GFX9-NEXT:    s_or_saveexec_b64 s[8:9], -1
6657; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6658; GFX9-NEXT:    s_mov_b64 exec, s[8:9]
6659; GFX9-NEXT:    s_addk_i32 s32, 0x400
6660; GFX9-NEXT:    v_mov_b32_e32 v0, 7
6661; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32
6662; GFX9-NEXT:    v_mov_b32_e32 v0, 8
6663; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
6664; GFX9-NEXT:    v_mov_b32_e32 v0, 9
6665; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
6666; GFX9-NEXT:    v_mov_b32_e32 v0, 10
6667; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12
6668; GFX9-NEXT:    v_mov_b32_e32 v0, 11
6669; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:16
6670; GFX9-NEXT:    v_mov_b32_e32 v0, 12
6671; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:20
6672; GFX9-NEXT:    v_mov_b32_e32 v0, 13
6673; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:24
6674; GFX9-NEXT:    v_mov_b32_e32 v0, 14
6675; GFX9-NEXT:    v_writelane_b32 v40, s4, 2
6676; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:28
6677; GFX9-NEXT:    v_mov_b32_e32 v0, 15
6678; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
6679; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:32
6680; GFX9-NEXT:    v_mov_b32_e32 v0, 0
6681; GFX9-NEXT:    v_mov_b32_e32 v1, 0
6682; GFX9-NEXT:    v_mov_b32_e32 v2, 0
6683; GFX9-NEXT:    v_mov_b32_e32 v3, 0
6684; GFX9-NEXT:    v_mov_b32_e32 v4, 0
6685; GFX9-NEXT:    v_mov_b32_e32 v5, 1
6686; GFX9-NEXT:    v_mov_b32_e32 v6, 1
6687; GFX9-NEXT:    v_mov_b32_e32 v7, 1
6688; GFX9-NEXT:    v_mov_b32_e32 v8, 1
6689; GFX9-NEXT:    v_mov_b32_e32 v9, 1
6690; GFX9-NEXT:    v_mov_b32_e32 v10, 2
6691; GFX9-NEXT:    v_mov_b32_e32 v11, 2
6692; GFX9-NEXT:    v_mov_b32_e32 v12, 2
6693; GFX9-NEXT:    v_mov_b32_e32 v13, 2
6694; GFX9-NEXT:    v_mov_b32_e32 v14, 2
6695; GFX9-NEXT:    v_mov_b32_e32 v15, 3
6696; GFX9-NEXT:    v_mov_b32_e32 v16, 3
6697; GFX9-NEXT:    v_mov_b32_e32 v17, 3
6698; GFX9-NEXT:    v_mov_b32_e32 v18, 3
6699; GFX9-NEXT:    v_mov_b32_e32 v19, 3
6700; GFX9-NEXT:    v_mov_b32_e32 v20, 4
6701; GFX9-NEXT:    v_mov_b32_e32 v21, 4
6702; GFX9-NEXT:    v_mov_b32_e32 v22, 4
6703; GFX9-NEXT:    v_mov_b32_e32 v23, 4
6704; GFX9-NEXT:    v_mov_b32_e32 v24, 4
6705; GFX9-NEXT:    v_mov_b32_e32 v25, 5
6706; GFX9-NEXT:    v_mov_b32_e32 v26, 5
6707; GFX9-NEXT:    v_mov_b32_e32 v27, 5
6708; GFX9-NEXT:    v_mov_b32_e32 v28, 5
6709; GFX9-NEXT:    v_mov_b32_e32 v29, 5
6710; GFX9-NEXT:    v_mov_b32_e32 v30, 6
6711; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
6712; GFX9-NEXT:    s_getpc_b64 s[4:5]
6713; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4
6714; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_8xv5i32@rel32@hi+12
6715; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
6716; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
6717; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
6718; GFX9-NEXT:    s_mov_b32 s32, s33
6719; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
6720; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
6721; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6722; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
6723; GFX9-NEXT:    s_mov_b32 s33, s4
6724; GFX9-NEXT:    s_waitcnt vmcnt(0)
6725; GFX9-NEXT:    s_setpc_b64 s[30:31]
6726;
6727; GFX11-LABEL: stack_8xv5i32:
6728; GFX11:       ; %bb.0: ; %entry
6729; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6730; GFX11-NEXT:    s_mov_b32 s0, s33
6731; GFX11-NEXT:    s_mov_b32 s33, s32
6732; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
6733; GFX11-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
6734; GFX11-NEXT:    s_mov_b32 exec_lo, s1
6735; GFX11-NEXT:    v_writelane_b32 v40, s0, 2
6736; GFX11-NEXT:    v_dual_mov_b32 v0, 7 :: v_dual_mov_b32 v1, 8
6737; GFX11-NEXT:    v_dual_mov_b32 v2, 9 :: v_dual_mov_b32 v3, 10
6738; GFX11-NEXT:    v_dual_mov_b32 v8, 15 :: v_dual_mov_b32 v5, 12
6739; GFX11-NEXT:    s_add_i32 s32, s32, 16
6740; GFX11-NEXT:    v_dual_mov_b32 v4, 11 :: v_dual_mov_b32 v7, 14
6741; GFX11-NEXT:    v_mov_b32_e32 v6, 13
6742; GFX11-NEXT:    s_add_i32 s0, s32, 32
6743; GFX11-NEXT:    s_add_i32 s1, s32, 16
6744; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
6745; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s32
6746; GFX11-NEXT:    v_mov_b32_e32 v1, 0
6747; GFX11-NEXT:    scratch_store_b32 off, v8, s0
6748; GFX11-NEXT:    scratch_store_b128 off, v[4:7], s1
6749; GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v3, 0
6750; GFX11-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v5, 1
6751; GFX11-NEXT:    v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v7, 1
6752; GFX11-NEXT:    v_dual_mov_b32 v6, 1 :: v_dual_mov_b32 v9, 1
6753; GFX11-NEXT:    v_dual_mov_b32 v8, 1 :: v_dual_mov_b32 v11, 2
6754; GFX11-NEXT:    v_dual_mov_b32 v10, 2 :: v_dual_mov_b32 v13, 2
6755; GFX11-NEXT:    v_dual_mov_b32 v12, 2 :: v_dual_mov_b32 v15, 3
6756; GFX11-NEXT:    v_dual_mov_b32 v14, 2 :: v_dual_mov_b32 v17, 3
6757; GFX11-NEXT:    v_dual_mov_b32 v16, 3 :: v_dual_mov_b32 v19, 3
6758; GFX11-NEXT:    v_dual_mov_b32 v18, 3 :: v_dual_mov_b32 v21, 4
6759; GFX11-NEXT:    v_dual_mov_b32 v20, 4 :: v_dual_mov_b32 v23, 4
6760; GFX11-NEXT:    v_dual_mov_b32 v22, 4 :: v_dual_mov_b32 v25, 5
6761; GFX11-NEXT:    v_dual_mov_b32 v24, 4 :: v_dual_mov_b32 v27, 5
6762; GFX11-NEXT:    v_dual_mov_b32 v26, 5 :: v_dual_mov_b32 v29, 5
6763; GFX11-NEXT:    v_mov_b32_e32 v28, 5
6764; GFX11-NEXT:    v_mov_b32_e32 v30, 6
6765; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
6766; GFX11-NEXT:    s_getpc_b64 s[0:1]
6767; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_8xv5i32@rel32@lo+4
6768; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_8xv5i32@rel32@hi+12
6769; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
6770; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
6771; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
6772; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
6773; GFX11-NEXT:    s_mov_b32 s32, s33
6774; GFX11-NEXT:    v_readlane_b32 s0, v40, 2
6775; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
6776; GFX11-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
6777; GFX11-NEXT:    s_mov_b32 exec_lo, s1
6778; GFX11-NEXT:    s_mov_b32 s33, s0
6779; GFX11-NEXT:    s_waitcnt vmcnt(0)
6780; GFX11-NEXT:    s_setpc_b64 s[30:31]
6781;
6782; HSA-LABEL: stack_8xv5i32:
6783; HSA:       ; %bb.0: ; %entry
6784; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6785; HSA-NEXT:    s_mov_b32 s4, s33
6786; HSA-NEXT:    s_mov_b32 s33, s32
6787; HSA-NEXT:    s_or_saveexec_b64 s[8:9], -1
6788; HSA-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6789; HSA-NEXT:    s_mov_b64 exec, s[8:9]
6790; HSA-NEXT:    s_addk_i32 s32, 0x400
6791; HSA-NEXT:    v_mov_b32_e32 v0, 7
6792; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32
6793; HSA-NEXT:    v_mov_b32_e32 v0, 8
6794; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
6795; HSA-NEXT:    v_mov_b32_e32 v0, 9
6796; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
6797; HSA-NEXT:    v_mov_b32_e32 v0, 10
6798; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12
6799; HSA-NEXT:    v_mov_b32_e32 v0, 11
6800; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:16
6801; HSA-NEXT:    v_mov_b32_e32 v0, 12
6802; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:20
6803; HSA-NEXT:    v_mov_b32_e32 v0, 13
6804; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:24
6805; HSA-NEXT:    v_mov_b32_e32 v0, 14
6806; HSA-NEXT:    v_writelane_b32 v40, s4, 2
6807; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:28
6808; HSA-NEXT:    v_mov_b32_e32 v0, 15
6809; HSA-NEXT:    v_writelane_b32 v40, s30, 0
6810; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:32
6811; HSA-NEXT:    v_mov_b32_e32 v0, 0
6812; HSA-NEXT:    v_mov_b32_e32 v1, 0
6813; HSA-NEXT:    v_mov_b32_e32 v2, 0
6814; HSA-NEXT:    v_mov_b32_e32 v3, 0
6815; HSA-NEXT:    v_mov_b32_e32 v4, 0
6816; HSA-NEXT:    v_mov_b32_e32 v5, 1
6817; HSA-NEXT:    v_mov_b32_e32 v6, 1
6818; HSA-NEXT:    v_mov_b32_e32 v7, 1
6819; HSA-NEXT:    v_mov_b32_e32 v8, 1
6820; HSA-NEXT:    v_mov_b32_e32 v9, 1
6821; HSA-NEXT:    v_mov_b32_e32 v10, 2
6822; HSA-NEXT:    v_mov_b32_e32 v11, 2
6823; HSA-NEXT:    v_mov_b32_e32 v12, 2
6824; HSA-NEXT:    v_mov_b32_e32 v13, 2
6825; HSA-NEXT:    v_mov_b32_e32 v14, 2
6826; HSA-NEXT:    v_mov_b32_e32 v15, 3
6827; HSA-NEXT:    v_mov_b32_e32 v16, 3
6828; HSA-NEXT:    v_mov_b32_e32 v17, 3
6829; HSA-NEXT:    v_mov_b32_e32 v18, 3
6830; HSA-NEXT:    v_mov_b32_e32 v19, 3
6831; HSA-NEXT:    v_mov_b32_e32 v20, 4
6832; HSA-NEXT:    v_mov_b32_e32 v21, 4
6833; HSA-NEXT:    v_mov_b32_e32 v22, 4
6834; HSA-NEXT:    v_mov_b32_e32 v23, 4
6835; HSA-NEXT:    v_mov_b32_e32 v24, 4
6836; HSA-NEXT:    v_mov_b32_e32 v25, 5
6837; HSA-NEXT:    v_mov_b32_e32 v26, 5
6838; HSA-NEXT:    v_mov_b32_e32 v27, 5
6839; HSA-NEXT:    v_mov_b32_e32 v28, 5
6840; HSA-NEXT:    v_mov_b32_e32 v29, 5
6841; HSA-NEXT:    v_mov_b32_e32 v30, 6
6842; HSA-NEXT:    v_writelane_b32 v40, s31, 1
6843; HSA-NEXT:    s_getpc_b64 s[4:5]
6844; HSA-NEXT:    s_add_u32 s4, s4, external_void_func_8xv5i32@rel32@lo+4
6845; HSA-NEXT:    s_addc_u32 s5, s5, external_void_func_8xv5i32@rel32@hi+12
6846; HSA-NEXT:    s_swappc_b64 s[30:31], s[4:5]
6847; HSA-NEXT:    v_readlane_b32 s31, v40, 1
6848; HSA-NEXT:    v_readlane_b32 s30, v40, 0
6849; HSA-NEXT:    s_mov_b32 s32, s33
6850; HSA-NEXT:    v_readlane_b32 s4, v40, 2
6851; HSA-NEXT:    s_or_saveexec_b64 s[6:7], -1
6852; HSA-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6853; HSA-NEXT:    s_mov_b64 exec, s[6:7]
6854; HSA-NEXT:    s_mov_b32 s33, s4
6855; HSA-NEXT:    s_waitcnt vmcnt(0)
6856; HSA-NEXT:    s_setpc_b64 s[30:31]
6857entry:
6858  call void @external_void_func_8xv5i32(
6859      <5 x i32><i32 0, i32 0, i32 0, i32 0, i32 0>,
6860      <5 x i32><i32 1, i32 1, i32 1, i32 1, i32 1>,
6861      <5 x i32><i32 2, i32 2, i32 2, i32 2, i32 2>,
6862      <5 x i32><i32 3, i32 3, i32 3, i32 3, i32 3>,
6863      <5 x i32><i32 4, i32 4, i32 4, i32 4, i32 4>,
6864      <5 x i32><i32 5, i32 5, i32 5, i32 5, i32 5>,
6865      <5 x i32><i32 6, i32 7, i32 8, i32 9, i32 10>,
6866      <5 x i32><i32 11, i32 12, i32 13, i32 14, i32 15>)
6867  ret void
6868}
6869
6870define void @stack_8xv5f32() #0 {
6871; VI-LABEL: stack_8xv5f32:
6872; VI:       ; %bb.0: ; %entry
6873; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6874; VI-NEXT:    s_mov_b32 s4, s33
6875; VI-NEXT:    s_mov_b32 s33, s32
6876; VI-NEXT:    s_or_saveexec_b64 s[8:9], -1
6877; VI-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6878; VI-NEXT:    s_mov_b64 exec, s[8:9]
6879; VI-NEXT:    s_addk_i32 s32, 0x400
6880; VI-NEXT:    v_mov_b32_e32 v0, 0x40e00000
6881; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32
6882; VI-NEXT:    v_mov_b32_e32 v0, 0x41000000
6883; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
6884; VI-NEXT:    v_mov_b32_e32 v0, 0x41100000
6885; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
6886; VI-NEXT:    v_mov_b32_e32 v0, 0x41200000
6887; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12
6888; VI-NEXT:    v_mov_b32_e32 v0, 0x41300000
6889; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:16
6890; VI-NEXT:    v_mov_b32_e32 v0, 0x41400000
6891; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:20
6892; VI-NEXT:    v_mov_b32_e32 v0, 0x41500000
6893; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:24
6894; VI-NEXT:    v_mov_b32_e32 v0, 0x41600000
6895; VI-NEXT:    v_writelane_b32 v40, s4, 2
6896; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:28
6897; VI-NEXT:    v_mov_b32_e32 v0, 0x41700000
6898; VI-NEXT:    v_writelane_b32 v40, s30, 0
6899; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:32
6900; VI-NEXT:    v_mov_b32_e32 v0, 0
6901; VI-NEXT:    v_mov_b32_e32 v1, 0
6902; VI-NEXT:    v_mov_b32_e32 v2, 0
6903; VI-NEXT:    v_mov_b32_e32 v3, 0
6904; VI-NEXT:    v_mov_b32_e32 v4, 0
6905; VI-NEXT:    v_mov_b32_e32 v5, 1.0
6906; VI-NEXT:    v_mov_b32_e32 v6, 1.0
6907; VI-NEXT:    v_mov_b32_e32 v7, 1.0
6908; VI-NEXT:    v_mov_b32_e32 v8, 1.0
6909; VI-NEXT:    v_mov_b32_e32 v9, 1.0
6910; VI-NEXT:    v_mov_b32_e32 v10, 2.0
6911; VI-NEXT:    v_mov_b32_e32 v11, 2.0
6912; VI-NEXT:    v_mov_b32_e32 v12, 2.0
6913; VI-NEXT:    v_mov_b32_e32 v13, 2.0
6914; VI-NEXT:    v_mov_b32_e32 v14, 2.0
6915; VI-NEXT:    v_mov_b32_e32 v15, 0x40400000
6916; VI-NEXT:    v_mov_b32_e32 v16, 0x40400000
6917; VI-NEXT:    v_mov_b32_e32 v17, 0x40400000
6918; VI-NEXT:    v_mov_b32_e32 v18, 0x40400000
6919; VI-NEXT:    v_mov_b32_e32 v19, 0x40400000
6920; VI-NEXT:    v_mov_b32_e32 v20, 4.0
6921; VI-NEXT:    v_mov_b32_e32 v21, 4.0
6922; VI-NEXT:    v_mov_b32_e32 v22, 4.0
6923; VI-NEXT:    v_mov_b32_e32 v23, 4.0
6924; VI-NEXT:    v_mov_b32_e32 v24, 4.0
6925; VI-NEXT:    v_mov_b32_e32 v25, 0x40a00000
6926; VI-NEXT:    v_mov_b32_e32 v26, 0x40a00000
6927; VI-NEXT:    v_mov_b32_e32 v27, 0x40a00000
6928; VI-NEXT:    v_mov_b32_e32 v28, 0x40a00000
6929; VI-NEXT:    v_mov_b32_e32 v29, 0x40a00000
6930; VI-NEXT:    v_mov_b32_e32 v30, 0x40c00000
6931; VI-NEXT:    v_writelane_b32 v40, s31, 1
6932; VI-NEXT:    s_getpc_b64 s[4:5]
6933; VI-NEXT:    s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4
6934; VI-NEXT:    s_addc_u32 s5, s5, external_void_func_8xv5f32@rel32@hi+12
6935; VI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
6936; VI-NEXT:    v_readlane_b32 s31, v40, 1
6937; VI-NEXT:    v_readlane_b32 s30, v40, 0
6938; VI-NEXT:    s_mov_b32 s32, s33
6939; VI-NEXT:    v_readlane_b32 s4, v40, 2
6940; VI-NEXT:    s_or_saveexec_b64 s[6:7], -1
6941; VI-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
6942; VI-NEXT:    s_mov_b64 exec, s[6:7]
6943; VI-NEXT:    s_mov_b32 s33, s4
6944; VI-NEXT:    s_waitcnt vmcnt(0)
6945; VI-NEXT:    s_setpc_b64 s[30:31]
6946;
6947; CI-LABEL: stack_8xv5f32:
6948; CI:       ; %bb.0: ; %entry
6949; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6950; CI-NEXT:    s_mov_b32 s4, s33
6951; CI-NEXT:    s_mov_b32 s33, s32
6952; CI-NEXT:    s_or_saveexec_b64 s[8:9], -1
6953; CI-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
6954; CI-NEXT:    s_mov_b64 exec, s[8:9]
6955; CI-NEXT:    s_addk_i32 s32, 0x400
6956; CI-NEXT:    v_mov_b32_e32 v0, 0x40e00000
6957; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32
6958; CI-NEXT:    v_mov_b32_e32 v0, 0x41000000
6959; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
6960; CI-NEXT:    v_mov_b32_e32 v0, 0x41100000
6961; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
6962; CI-NEXT:    v_mov_b32_e32 v0, 0x41200000
6963; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12
6964; CI-NEXT:    v_mov_b32_e32 v0, 0x41300000
6965; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:16
6966; CI-NEXT:    v_mov_b32_e32 v0, 0x41400000
6967; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:20
6968; CI-NEXT:    v_mov_b32_e32 v0, 0x41500000
6969; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:24
6970; CI-NEXT:    v_mov_b32_e32 v0, 0x41600000
6971; CI-NEXT:    v_writelane_b32 v40, s4, 2
6972; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:28
6973; CI-NEXT:    v_mov_b32_e32 v0, 0x41700000
6974; CI-NEXT:    v_writelane_b32 v40, s30, 0
6975; CI-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:32
6976; CI-NEXT:    v_mov_b32_e32 v0, 0
6977; CI-NEXT:    v_mov_b32_e32 v1, 0
6978; CI-NEXT:    v_mov_b32_e32 v2, 0
6979; CI-NEXT:    v_mov_b32_e32 v3, 0
6980; CI-NEXT:    v_mov_b32_e32 v4, 0
6981; CI-NEXT:    v_mov_b32_e32 v5, 1.0
6982; CI-NEXT:    v_mov_b32_e32 v6, 1.0
6983; CI-NEXT:    v_mov_b32_e32 v7, 1.0
6984; CI-NEXT:    v_mov_b32_e32 v8, 1.0
6985; CI-NEXT:    v_mov_b32_e32 v9, 1.0
6986; CI-NEXT:    v_mov_b32_e32 v10, 2.0
6987; CI-NEXT:    v_mov_b32_e32 v11, 2.0
6988; CI-NEXT:    v_mov_b32_e32 v12, 2.0
6989; CI-NEXT:    v_mov_b32_e32 v13, 2.0
6990; CI-NEXT:    v_mov_b32_e32 v14, 2.0
6991; CI-NEXT:    v_mov_b32_e32 v15, 0x40400000
6992; CI-NEXT:    v_mov_b32_e32 v16, 0x40400000
6993; CI-NEXT:    v_mov_b32_e32 v17, 0x40400000
6994; CI-NEXT:    v_mov_b32_e32 v18, 0x40400000
6995; CI-NEXT:    v_mov_b32_e32 v19, 0x40400000
6996; CI-NEXT:    v_mov_b32_e32 v20, 4.0
6997; CI-NEXT:    v_mov_b32_e32 v21, 4.0
6998; CI-NEXT:    v_mov_b32_e32 v22, 4.0
6999; CI-NEXT:    v_mov_b32_e32 v23, 4.0
7000; CI-NEXT:    v_mov_b32_e32 v24, 4.0
7001; CI-NEXT:    v_mov_b32_e32 v25, 0x40a00000
7002; CI-NEXT:    v_mov_b32_e32 v26, 0x40a00000
7003; CI-NEXT:    v_mov_b32_e32 v27, 0x40a00000
7004; CI-NEXT:    v_mov_b32_e32 v28, 0x40a00000
7005; CI-NEXT:    v_mov_b32_e32 v29, 0x40a00000
7006; CI-NEXT:    v_mov_b32_e32 v30, 0x40c00000
7007; CI-NEXT:    v_writelane_b32 v40, s31, 1
7008; CI-NEXT:    s_getpc_b64 s[4:5]
7009; CI-NEXT:    s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4
7010; CI-NEXT:    s_addc_u32 s5, s5, external_void_func_8xv5f32@rel32@hi+12
7011; CI-NEXT:    s_swappc_b64 s[30:31], s[4:5]
7012; CI-NEXT:    v_readlane_b32 s31, v40, 1
7013; CI-NEXT:    v_readlane_b32 s30, v40, 0
7014; CI-NEXT:    s_mov_b32 s32, s33
7015; CI-NEXT:    v_readlane_b32 s4, v40, 2
7016; CI-NEXT:    s_or_saveexec_b64 s[6:7], -1
7017; CI-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7018; CI-NEXT:    s_mov_b64 exec, s[6:7]
7019; CI-NEXT:    s_mov_b32 s33, s4
7020; CI-NEXT:    s_waitcnt vmcnt(0)
7021; CI-NEXT:    s_setpc_b64 s[30:31]
7022;
7023; GFX9-LABEL: stack_8xv5f32:
7024; GFX9:       ; %bb.0: ; %entry
7025; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7026; GFX9-NEXT:    s_mov_b32 s4, s33
7027; GFX9-NEXT:    s_mov_b32 s33, s32
7028; GFX9-NEXT:    s_or_saveexec_b64 s[8:9], -1
7029; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7030; GFX9-NEXT:    s_mov_b64 exec, s[8:9]
7031; GFX9-NEXT:    s_addk_i32 s32, 0x400
7032; GFX9-NEXT:    v_mov_b32_e32 v0, 0x40e00000
7033; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32
7034; GFX9-NEXT:    v_mov_b32_e32 v0, 0x41000000
7035; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
7036; GFX9-NEXT:    v_mov_b32_e32 v0, 0x41100000
7037; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
7038; GFX9-NEXT:    v_mov_b32_e32 v0, 0x41200000
7039; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12
7040; GFX9-NEXT:    v_mov_b32_e32 v0, 0x41300000
7041; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:16
7042; GFX9-NEXT:    v_mov_b32_e32 v0, 0x41400000
7043; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:20
7044; GFX9-NEXT:    v_mov_b32_e32 v0, 0x41500000
7045; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:24
7046; GFX9-NEXT:    v_mov_b32_e32 v0, 0x41600000
7047; GFX9-NEXT:    v_writelane_b32 v40, s4, 2
7048; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:28
7049; GFX9-NEXT:    v_mov_b32_e32 v0, 0x41700000
7050; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
7051; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:32
7052; GFX9-NEXT:    v_mov_b32_e32 v0, 0
7053; GFX9-NEXT:    v_mov_b32_e32 v1, 0
7054; GFX9-NEXT:    v_mov_b32_e32 v2, 0
7055; GFX9-NEXT:    v_mov_b32_e32 v3, 0
7056; GFX9-NEXT:    v_mov_b32_e32 v4, 0
7057; GFX9-NEXT:    v_mov_b32_e32 v5, 1.0
7058; GFX9-NEXT:    v_mov_b32_e32 v6, 1.0
7059; GFX9-NEXT:    v_mov_b32_e32 v7, 1.0
7060; GFX9-NEXT:    v_mov_b32_e32 v8, 1.0
7061; GFX9-NEXT:    v_mov_b32_e32 v9, 1.0
7062; GFX9-NEXT:    v_mov_b32_e32 v10, 2.0
7063; GFX9-NEXT:    v_mov_b32_e32 v11, 2.0
7064; GFX9-NEXT:    v_mov_b32_e32 v12, 2.0
7065; GFX9-NEXT:    v_mov_b32_e32 v13, 2.0
7066; GFX9-NEXT:    v_mov_b32_e32 v14, 2.0
7067; GFX9-NEXT:    v_mov_b32_e32 v15, 0x40400000
7068; GFX9-NEXT:    v_mov_b32_e32 v16, 0x40400000
7069; GFX9-NEXT:    v_mov_b32_e32 v17, 0x40400000
7070; GFX9-NEXT:    v_mov_b32_e32 v18, 0x40400000
7071; GFX9-NEXT:    v_mov_b32_e32 v19, 0x40400000
7072; GFX9-NEXT:    v_mov_b32_e32 v20, 4.0
7073; GFX9-NEXT:    v_mov_b32_e32 v21, 4.0
7074; GFX9-NEXT:    v_mov_b32_e32 v22, 4.0
7075; GFX9-NEXT:    v_mov_b32_e32 v23, 4.0
7076; GFX9-NEXT:    v_mov_b32_e32 v24, 4.0
7077; GFX9-NEXT:    v_mov_b32_e32 v25, 0x40a00000
7078; GFX9-NEXT:    v_mov_b32_e32 v26, 0x40a00000
7079; GFX9-NEXT:    v_mov_b32_e32 v27, 0x40a00000
7080; GFX9-NEXT:    v_mov_b32_e32 v28, 0x40a00000
7081; GFX9-NEXT:    v_mov_b32_e32 v29, 0x40a00000
7082; GFX9-NEXT:    v_mov_b32_e32 v30, 0x40c00000
7083; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
7084; GFX9-NEXT:    s_getpc_b64 s[4:5]
7085; GFX9-NEXT:    s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4
7086; GFX9-NEXT:    s_addc_u32 s5, s5, external_void_func_8xv5f32@rel32@hi+12
7087; GFX9-NEXT:    s_swappc_b64 s[30:31], s[4:5]
7088; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
7089; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
7090; GFX9-NEXT:    s_mov_b32 s32, s33
7091; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
7092; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
7093; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7094; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
7095; GFX9-NEXT:    s_mov_b32 s33, s4
7096; GFX9-NEXT:    s_waitcnt vmcnt(0)
7097; GFX9-NEXT:    s_setpc_b64 s[30:31]
7098;
7099; GFX11-LABEL: stack_8xv5f32:
7100; GFX11:       ; %bb.0: ; %entry
7101; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7102; GFX11-NEXT:    s_mov_b32 s0, s33
7103; GFX11-NEXT:    s_mov_b32 s33, s32
7104; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
7105; GFX11-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
7106; GFX11-NEXT:    s_mov_b32 exec_lo, s1
7107; GFX11-NEXT:    v_writelane_b32 v40, s0, 2
7108; GFX11-NEXT:    v_mov_b32_e32 v0, 0x40e00000
7109; GFX11-NEXT:    v_mov_b32_e32 v1, 0x41000000
7110; GFX11-NEXT:    v_mov_b32_e32 v2, 0x41100000
7111; GFX11-NEXT:    v_mov_b32_e32 v3, 0x41200000
7112; GFX11-NEXT:    v_mov_b32_e32 v8, 0x41700000
7113; GFX11-NEXT:    s_add_i32 s32, s32, 16
7114; GFX11-NEXT:    v_mov_b32_e32 v4, 0x41300000
7115; GFX11-NEXT:    v_mov_b32_e32 v5, 0x41400000
7116; GFX11-NEXT:    v_dual_mov_b32 v6, 0x41500000 :: v_dual_mov_b32 v9, 1.0
7117; GFX11-NEXT:    v_mov_b32_e32 v7, 0x41600000
7118; GFX11-NEXT:    s_add_i32 s0, s32, 32
7119; GFX11-NEXT:    s_add_i32 s1, s32, 16
7120; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
7121; GFX11-NEXT:    scratch_store_b128 off, v[0:3], s32
7122; GFX11-NEXT:    scratch_store_b32 off, v8, s0
7123; GFX11-NEXT:    scratch_store_b128 off, v[4:7], s1
7124; GFX11-NEXT:    v_mov_b32_e32 v6, 1.0
7125; GFX11-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0
7126; GFX11-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0
7127; GFX11-NEXT:    v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 1.0
7128; GFX11-NEXT:    v_dual_mov_b32 v7, 1.0 :: v_dual_mov_b32 v8, 1.0
7129; GFX11-NEXT:    v_dual_mov_b32 v11, 2.0 :: v_dual_mov_b32 v10, 2.0
7130; GFX11-NEXT:    v_dual_mov_b32 v13, 2.0 :: v_dual_mov_b32 v12, 2.0
7131; GFX11-NEXT:    v_dual_mov_b32 v15, 0x40400000 :: v_dual_mov_b32 v14, 2.0
7132; GFX11-NEXT:    v_dual_mov_b32 v17, 0x40400000 :: v_dual_mov_b32 v16, 0x40400000
7133; GFX11-NEXT:    v_dual_mov_b32 v19, 0x40400000 :: v_dual_mov_b32 v18, 0x40400000
7134; GFX11-NEXT:    v_dual_mov_b32 v21, 4.0 :: v_dual_mov_b32 v20, 4.0
7135; GFX11-NEXT:    v_dual_mov_b32 v23, 4.0 :: v_dual_mov_b32 v22, 4.0
7136; GFX11-NEXT:    v_dual_mov_b32 v25, 0x40a00000 :: v_dual_mov_b32 v24, 4.0
7137; GFX11-NEXT:    v_dual_mov_b32 v27, 0x40a00000 :: v_dual_mov_b32 v26, 0x40a00000
7138; GFX11-NEXT:    v_dual_mov_b32 v29, 0x40a00000 :: v_dual_mov_b32 v28, 0x40a00000
7139; GFX11-NEXT:    v_mov_b32_e32 v30, 0x40c00000
7140; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
7141; GFX11-NEXT:    s_getpc_b64 s[0:1]
7142; GFX11-NEXT:    s_add_u32 s0, s0, external_void_func_8xv5f32@rel32@lo+4
7143; GFX11-NEXT:    s_addc_u32 s1, s1, external_void_func_8xv5f32@rel32@hi+12
7144; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
7145; GFX11-NEXT:    s_swappc_b64 s[30:31], s[0:1]
7146; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
7147; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
7148; GFX11-NEXT:    s_mov_b32 s32, s33
7149; GFX11-NEXT:    v_readlane_b32 s0, v40, 2
7150; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
7151; GFX11-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
7152; GFX11-NEXT:    s_mov_b32 exec_lo, s1
7153; GFX11-NEXT:    s_mov_b32 s33, s0
7154; GFX11-NEXT:    s_waitcnt vmcnt(0)
7155; GFX11-NEXT:    s_setpc_b64 s[30:31]
7156;
7157; HSA-LABEL: stack_8xv5f32:
7158; HSA:       ; %bb.0: ; %entry
7159; HSA-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7160; HSA-NEXT:    s_mov_b32 s4, s33
7161; HSA-NEXT:    s_mov_b32 s33, s32
7162; HSA-NEXT:    s_or_saveexec_b64 s[8:9], -1
7163; HSA-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
7164; HSA-NEXT:    s_mov_b64 exec, s[8:9]
7165; HSA-NEXT:    s_addk_i32 s32, 0x400
7166; HSA-NEXT:    v_mov_b32_e32 v0, 0x40e00000
7167; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32
7168; HSA-NEXT:    v_mov_b32_e32 v0, 0x41000000
7169; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
7170; HSA-NEXT:    v_mov_b32_e32 v0, 0x41100000
7171; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
7172; HSA-NEXT:    v_mov_b32_e32 v0, 0x41200000
7173; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12
7174; HSA-NEXT:    v_mov_b32_e32 v0, 0x41300000
7175; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:16
7176; HSA-NEXT:    v_mov_b32_e32 v0, 0x41400000
7177; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:20
7178; HSA-NEXT:    v_mov_b32_e32 v0, 0x41500000
7179; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:24
7180; HSA-NEXT:    v_mov_b32_e32 v0, 0x41600000
7181; HSA-NEXT:    v_writelane_b32 v40, s4, 2
7182; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:28
7183; HSA-NEXT:    v_mov_b32_e32 v0, 0x41700000
7184; HSA-NEXT:    v_writelane_b32 v40, s30, 0
7185; HSA-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:32
7186; HSA-NEXT:    v_mov_b32_e32 v0, 0
7187; HSA-NEXT:    v_mov_b32_e32 v1, 0
7188; HSA-NEXT:    v_mov_b32_e32 v2, 0
7189; HSA-NEXT:    v_mov_b32_e32 v3, 0
7190; HSA-NEXT:    v_mov_b32_e32 v4, 0
7191; HSA-NEXT:    v_mov_b32_e32 v5, 1.0
7192; HSA-NEXT:    v_mov_b32_e32 v6, 1.0
7193; HSA-NEXT:    v_mov_b32_e32 v7, 1.0
7194; HSA-NEXT:    v_mov_b32_e32 v8, 1.0
7195; HSA-NEXT:    v_mov_b32_e32 v9, 1.0
7196; HSA-NEXT:    v_mov_b32_e32 v10, 2.0
7197; HSA-NEXT:    v_mov_b32_e32 v11, 2.0
7198; HSA-NEXT:    v_mov_b32_e32 v12, 2.0
7199; HSA-NEXT:    v_mov_b32_e32 v13, 2.0
7200; HSA-NEXT:    v_mov_b32_e32 v14, 2.0
7201; HSA-NEXT:    v_mov_b32_e32 v15, 0x40400000
7202; HSA-NEXT:    v_mov_b32_e32 v16, 0x40400000
7203; HSA-NEXT:    v_mov_b32_e32 v17, 0x40400000
7204; HSA-NEXT:    v_mov_b32_e32 v18, 0x40400000
7205; HSA-NEXT:    v_mov_b32_e32 v19, 0x40400000
7206; HSA-NEXT:    v_mov_b32_e32 v20, 4.0
7207; HSA-NEXT:    v_mov_b32_e32 v21, 4.0
7208; HSA-NEXT:    v_mov_b32_e32 v22, 4.0
7209; HSA-NEXT:    v_mov_b32_e32 v23, 4.0
7210; HSA-NEXT:    v_mov_b32_e32 v24, 4.0
7211; HSA-NEXT:    v_mov_b32_e32 v25, 0x40a00000
7212; HSA-NEXT:    v_mov_b32_e32 v26, 0x40a00000
7213; HSA-NEXT:    v_mov_b32_e32 v27, 0x40a00000
7214; HSA-NEXT:    v_mov_b32_e32 v28, 0x40a00000
7215; HSA-NEXT:    v_mov_b32_e32 v29, 0x40a00000
7216; HSA-NEXT:    v_mov_b32_e32 v30, 0x40c00000
7217; HSA-NEXT:    v_writelane_b32 v40, s31, 1
7218; HSA-NEXT:    s_getpc_b64 s[4:5]
7219; HSA-NEXT:    s_add_u32 s4, s4, external_void_func_8xv5f32@rel32@lo+4
7220; HSA-NEXT:    s_addc_u32 s5, s5, external_void_func_8xv5f32@rel32@hi+12
7221; HSA-NEXT:    s_swappc_b64 s[30:31], s[4:5]
7222; HSA-NEXT:    v_readlane_b32 s31, v40, 1
7223; HSA-NEXT:    v_readlane_b32 s30, v40, 0
7224; HSA-NEXT:    s_mov_b32 s32, s33
7225; HSA-NEXT:    v_readlane_b32 s4, v40, 2
7226; HSA-NEXT:    s_or_saveexec_b64 s[6:7], -1
7227; HSA-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
7228; HSA-NEXT:    s_mov_b64 exec, s[6:7]
7229; HSA-NEXT:    s_mov_b32 s33, s4
7230; HSA-NEXT:    s_waitcnt vmcnt(0)
7231; HSA-NEXT:    s_setpc_b64 s[30:31]
7232entry:
7233  call void @external_void_func_8xv5f32(
7234      <5 x float><float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>,
7235      <5 x float><float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>,
7236      <5 x float><float 2.0, float 2.0, float 2.0, float 2.0, float 2.0>,
7237      <5 x float><float 3.0, float 3.0, float 3.0, float 3.0, float 3.0>,
7238      <5 x float><float 4.0, float 4.0, float 4.0, float 4.0, float 4.0>,
7239      <5 x float><float 5.0, float 5.0, float 5.0, float 5.0, float 5.0>,
7240      <5 x float><float 6.0, float 7.0, float 8.0, float 9.0, float 10.0>,
7241      <5 x float><float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>)
7242  ret void
7243}
7244
7245declare hidden void @byval_align16_f64_arg(<32 x i32>, ptr addrspace(5) byval(double) align 16) #0
7246declare hidden void @stack_passed_f64_arg(<32 x i32>, double) #0
7247declare hidden void @external_void_func_12xv3i32(<3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>,
7248    <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>) #0
7249declare hidden void @external_void_func_8xv5i32(<5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>,
7250    <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>) #0
7251declare hidden void @external_void_func_12xv3f32(<3 x float>, <3 x float>, <3 x float>, <3 x float>,
7252    <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>) #0
7253declare hidden void @external_void_func_8xv5f32(<5 x float>, <5 x float>, <5 x float>, <5 x float>,
7254    <5 x float>, <5 x float>, <5 x float>, <5 x float>) #0
7255
7256attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
7257attributes #1 = { nounwind readnone }
7258attributes #2 = { nounwind noinline }
7259
7260!llvm.module.flags = !{!0}
7261!0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
7262