xref: /llvm-project/llvm/test/CodeGen/AMDGPU/function-args-inreg.ll (revision 11b040192640ef3b1f481124c440f464ed6ec86a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s
4
5define void @void_func_i1_inreg(i1 inreg %arg0) #0 {
6; GFX9-LABEL: void_func_i1_inreg:
7; GFX9:       ; %bb.0:
8; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9; GFX9-NEXT:    s_and_b32 s4, s16, 1
10; GFX9-NEXT:    v_mov_b32_e32 v0, s4
11; GFX9-NEXT:    global_store_byte v[0:1], v0, off
12; GFX9-NEXT:    s_waitcnt vmcnt(0)
13; GFX9-NEXT:    s_setpc_b64 s[30:31]
14;
15; GFX11-LABEL: void_func_i1_inreg:
16; GFX11:       ; %bb.0:
17; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18; GFX11-NEXT:    s_and_b32 s0, s0, 1
19; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
20; GFX11-NEXT:    v_mov_b32_e32 v0, s0
21; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
22; GFX11-NEXT:    s_setpc_b64 s[30:31]
23  store i1 %arg0, ptr addrspace(1) undef
24  ret void
25}
26
27define void @void_func_i8_inreg(i8 inreg %arg0) #0 {
28; GFX9-LABEL: void_func_i8_inreg:
29; GFX9:       ; %bb.0:
30; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31; GFX9-NEXT:    v_mov_b32_e32 v0, s16
32; GFX9-NEXT:    global_store_byte v[0:1], v0, off
33; GFX9-NEXT:    s_waitcnt vmcnt(0)
34; GFX9-NEXT:    s_setpc_b64 s[30:31]
35;
36; GFX11-LABEL: void_func_i8_inreg:
37; GFX11:       ; %bb.0:
38; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39; GFX11-NEXT:    v_mov_b32_e32 v0, s0
40; GFX11-NEXT:    global_store_b8 v[0:1], v0, off
41; GFX11-NEXT:    s_setpc_b64 s[30:31]
42  store i8 %arg0, ptr addrspace(1) undef
43  ret void
44}
45
46define void @void_func_i16_inreg(i16 inreg %arg0) #0 {
47; GFX9-LABEL: void_func_i16_inreg:
48; GFX9:       ; %bb.0:
49; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50; GFX9-NEXT:    v_mov_b32_e32 v0, s16
51; GFX9-NEXT:    global_store_short v[0:1], v0, off
52; GFX9-NEXT:    s_waitcnt vmcnt(0)
53; GFX9-NEXT:    s_setpc_b64 s[30:31]
54;
55; GFX11-LABEL: void_func_i16_inreg:
56; GFX11:       ; %bb.0:
57; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
58; GFX11-NEXT:    v_mov_b32_e32 v0, s0
59; GFX11-NEXT:    global_store_b16 v[0:1], v0, off
60; GFX11-NEXT:    s_setpc_b64 s[30:31]
61  store i16 %arg0, ptr addrspace(1) undef
62  ret void
63}
64
65define void @void_func_i32_inreg(i32 inreg %arg0) #0 {
66; GFX9-LABEL: void_func_i32_inreg:
67; GFX9:       ; %bb.0:
68; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69; GFX9-NEXT:    v_mov_b32_e32 v0, s16
70; GFX9-NEXT:    global_store_dword v[0:1], v0, off
71; GFX9-NEXT:    s_waitcnt vmcnt(0)
72; GFX9-NEXT:    s_setpc_b64 s[30:31]
73;
74; GFX11-LABEL: void_func_i32_inreg:
75; GFX11:       ; %bb.0:
76; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77; GFX11-NEXT:    v_mov_b32_e32 v0, s0
78; GFX11-NEXT:    global_store_b32 v[0:1], v0, off
79; GFX11-NEXT:    s_setpc_b64 s[30:31]
80  store i32 %arg0, ptr addrspace(1) undef
81  ret void
82}
83
84define void @void_func_i64_inreg(i64 inreg %arg0) #0 {
85; GFX9-LABEL: void_func_i64_inreg:
86; GFX9:       ; %bb.0:
87; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88; GFX9-NEXT:    v_mov_b32_e32 v0, s16
89; GFX9-NEXT:    v_mov_b32_e32 v1, s17
90; GFX9-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
91; GFX9-NEXT:    s_waitcnt vmcnt(0)
92; GFX9-NEXT:    s_setpc_b64 s[30:31]
93;
94; GFX11-LABEL: void_func_i64_inreg:
95; GFX11:       ; %bb.0:
96; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
98; GFX11-NEXT:    global_store_b64 v[0:1], v[0:1], off
99; GFX11-NEXT:    s_setpc_b64 s[30:31]
100  store i64 %arg0, ptr addrspace(1) undef
101  ret void
102}
103
104define void @void_func_f16_inreg(half inreg %arg0) #0 {
105; GFX9-LABEL: void_func_f16_inreg:
106; GFX9:       ; %bb.0:
107; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108; GFX9-NEXT:    v_mov_b32_e32 v0, s16
109; GFX9-NEXT:    global_store_short v[0:1], v0, off
110; GFX9-NEXT:    s_waitcnt vmcnt(0)
111; GFX9-NEXT:    s_setpc_b64 s[30:31]
112;
113; GFX11-LABEL: void_func_f16_inreg:
114; GFX11:       ; %bb.0:
115; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
116; GFX11-NEXT:    v_mov_b32_e32 v0, s0
117; GFX11-NEXT:    global_store_b16 v[0:1], v0, off
118; GFX11-NEXT:    s_setpc_b64 s[30:31]
119  store half %arg0, ptr addrspace(1) undef
120  ret void
121}
122
123define void @void_func_f32_inreg(float inreg %arg0) #0 {
124; GFX9-LABEL: void_func_f32_inreg:
125; GFX9:       ; %bb.0:
126; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127; GFX9-NEXT:    v_mov_b32_e32 v0, s16
128; GFX9-NEXT:    global_store_dword v[0:1], v0, off
129; GFX9-NEXT:    s_waitcnt vmcnt(0)
130; GFX9-NEXT:    s_setpc_b64 s[30:31]
131;
132; GFX11-LABEL: void_func_f32_inreg:
133; GFX11:       ; %bb.0:
134; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
135; GFX11-NEXT:    v_mov_b32_e32 v0, s0
136; GFX11-NEXT:    global_store_b32 v[0:1], v0, off
137; GFX11-NEXT:    s_setpc_b64 s[30:31]
138  store float %arg0, ptr addrspace(1) undef
139  ret void
140}
141
142define void @void_func_f64_inreg(double inreg %arg0) #0 {
143; GFX9-LABEL: void_func_f64_inreg:
144; GFX9:       ; %bb.0:
145; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
146; GFX9-NEXT:    v_mov_b32_e32 v0, s16
147; GFX9-NEXT:    v_mov_b32_e32 v1, s17
148; GFX9-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
149; GFX9-NEXT:    s_waitcnt vmcnt(0)
150; GFX9-NEXT:    s_setpc_b64 s[30:31]
151;
152; GFX11-LABEL: void_func_f64_inreg:
153; GFX11:       ; %bb.0:
154; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
155; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
156; GFX11-NEXT:    global_store_b64 v[0:1], v[0:1], off
157; GFX11-NEXT:    s_setpc_b64 s[30:31]
158  store double %arg0, ptr addrspace(1) undef
159  ret void
160}
161
162define void @void_func_v2i16_inreg(<2 x i16> inreg %arg0) #0 {
163; GFX9-LABEL: void_func_v2i16_inreg:
164; GFX9:       ; %bb.0:
165; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166; GFX9-NEXT:    v_mov_b32_e32 v0, s16
167; GFX9-NEXT:    global_store_dword v[0:1], v0, off
168; GFX9-NEXT:    s_waitcnt vmcnt(0)
169; GFX9-NEXT:    s_setpc_b64 s[30:31]
170;
171; GFX11-LABEL: void_func_v2i16_inreg:
172; GFX11:       ; %bb.0:
173; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
174; GFX11-NEXT:    v_mov_b32_e32 v0, s0
175; GFX11-NEXT:    global_store_b32 v[0:1], v0, off
176; GFX11-NEXT:    s_setpc_b64 s[30:31]
177  store <2 x i16> %arg0, ptr addrspace(1) undef
178  ret void
179}
180
181define void @void_func_v3i16_inreg(<3 x i16> inreg %arg0) #0 {
182; GFX9-LABEL: void_func_v3i16_inreg:
183; GFX9:       ; %bb.0:
184; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185; GFX9-NEXT:    v_mov_b32_e32 v0, s17
186; GFX9-NEXT:    global_store_short v[0:1], v0, off
187; GFX9-NEXT:    v_mov_b32_e32 v0, s16
188; GFX9-NEXT:    global_store_dword v[0:1], v0, off
189; GFX9-NEXT:    s_waitcnt vmcnt(0)
190; GFX9-NEXT:    s_setpc_b64 s[30:31]
191;
192; GFX11-LABEL: void_func_v3i16_inreg:
193; GFX11:       ; %bb.0:
194; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
195; GFX11-NEXT:    v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0
196; GFX11-NEXT:    s_clause 0x1
197; GFX11-NEXT:    global_store_b16 v[0:1], v0, off
198; GFX11-NEXT:    global_store_b32 v[0:1], v1, off
199; GFX11-NEXT:    s_setpc_b64 s[30:31]
200  store <3 x i16> %arg0, ptr addrspace(1) undef
201  ret void
202}
203
204define void @void_func_v4i16_inreg(<4 x i16> inreg %arg0) #0 {
205; GFX89-LABEL: void_func_v4i16_inreg:
206; GFX89:       ; %bb.0:
207; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
208; GFX89-NEXT:    v_mov_b32_e32 v0, s4
209; GFX89-NEXT:    v_mov_b32_e32 v1, s5
210; GFX89-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
211; GFX89-NEXT:    s_waitcnt vmcnt(0)
212; GFX89-NEXT:    s_setpc_b64 s[30:31]
213;
214; GFX9-LABEL: void_func_v4i16_inreg:
215; GFX9:       ; %bb.0:
216; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217; GFX9-NEXT:    v_mov_b32_e32 v0, s16
218; GFX9-NEXT:    v_mov_b32_e32 v1, s17
219; GFX9-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
220; GFX9-NEXT:    s_waitcnt vmcnt(0)
221; GFX9-NEXT:    s_setpc_b64 s[30:31]
222;
223; GFX11-LABEL: void_func_v4i16_inreg:
224; GFX11:       ; %bb.0:
225; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
226; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
227; GFX11-NEXT:    global_store_b64 v[0:1], v[0:1], off
228; GFX11-NEXT:    s_setpc_b64 s[30:31]
229  store <4 x i16> %arg0, ptr addrspace(1) undef
230  ret void
231}
232
233define void @void_func_v5i16_inreg(<5 x i16> inreg %arg0) #0 {
234; GFX9-LABEL: void_func_v5i16_inreg:
235; GFX9:       ; %bb.0:
236; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
237; GFX9-NEXT:    v_mov_b32_e32 v0, s18
238; GFX9-NEXT:    global_store_short v[0:1], v0, off
239; GFX9-NEXT:    v_mov_b32_e32 v0, s16
240; GFX9-NEXT:    v_mov_b32_e32 v1, s17
241; GFX9-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
242; GFX9-NEXT:    s_waitcnt vmcnt(0)
243; GFX9-NEXT:    s_setpc_b64 s[30:31]
244;
245; GFX11-LABEL: void_func_v5i16_inreg:
246; GFX11:       ; %bb.0:
247; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
248; GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v1, s1
249; GFX11-NEXT:    v_mov_b32_e32 v0, s0
250; GFX11-NEXT:    s_clause 0x1
251; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
252; GFX11-NEXT:    global_store_b64 v[0:1], v[0:1], off
253; GFX11-NEXT:    s_setpc_b64 s[30:31]
254  store <5 x i16> %arg0, ptr addrspace(1) undef
255  ret void
256}
257
258define void @void_func_v8i16_inreg(<8 x i16> inreg %arg0) #0 {
259; GFX9-LABEL: void_func_v8i16_inreg:
260; GFX9:       ; %bb.0:
261; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
262; GFX9-NEXT:    v_mov_b32_e32 v0, s16
263; GFX9-NEXT:    v_mov_b32_e32 v1, s17
264; GFX9-NEXT:    v_mov_b32_e32 v2, s18
265; GFX9-NEXT:    v_mov_b32_e32 v3, s19
266; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
267; GFX9-NEXT:    s_waitcnt vmcnt(0)
268; GFX9-NEXT:    s_setpc_b64 s[30:31]
269;
270; GFX11-LABEL: void_func_v8i16_inreg:
271; GFX11:       ; %bb.0:
272; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
274; GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
275; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
276; GFX11-NEXT:    s_setpc_b64 s[30:31]
277  store <8 x i16> %arg0, ptr addrspace(1) undef
278  ret void
279}
280
281define void @void_func_v2i32_inreg(<2 x i32> inreg %arg0) #0 {
282; GFX9-LABEL: void_func_v2i32_inreg:
283; GFX9:       ; %bb.0:
284; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
285; GFX9-NEXT:    v_mov_b32_e32 v0, s16
286; GFX9-NEXT:    v_mov_b32_e32 v1, s17
287; GFX9-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
288; GFX9-NEXT:    s_waitcnt vmcnt(0)
289; GFX9-NEXT:    s_setpc_b64 s[30:31]
290;
291; GFX11-LABEL: void_func_v2i32_inreg:
292; GFX11:       ; %bb.0:
293; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
294; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
295; GFX11-NEXT:    global_store_b64 v[0:1], v[0:1], off
296; GFX11-NEXT:    s_setpc_b64 s[30:31]
297  store <2 x i32> %arg0, ptr addrspace(1) undef
298  ret void
299}
300
301define void @void_func_v3i32_inreg(<3 x i32> inreg %arg0) #0 {
302; GFX9-LABEL: void_func_v3i32_inreg:
303; GFX9:       ; %bb.0:
304; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
305; GFX9-NEXT:    v_mov_b32_e32 v0, s16
306; GFX9-NEXT:    v_mov_b32_e32 v1, s17
307; GFX9-NEXT:    v_mov_b32_e32 v2, s18
308; GFX9-NEXT:    global_store_dwordx3 v[0:1], v[0:2], off
309; GFX9-NEXT:    s_waitcnt vmcnt(0)
310; GFX9-NEXT:    s_setpc_b64 s[30:31]
311;
312; GFX11-LABEL: void_func_v3i32_inreg:
313; GFX11:       ; %bb.0:
314; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
315; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
316; GFX11-NEXT:    v_mov_b32_e32 v2, s2
317; GFX11-NEXT:    global_store_b96 v[0:1], v[0:2], off
318; GFX11-NEXT:    s_setpc_b64 s[30:31]
319  store <3 x i32> %arg0, ptr addrspace(1) undef
320  ret void
321}
322
323define void @void_func_v4i32_inreg(<4 x i32> inreg %arg0) #0 {
324; GFX9-LABEL: void_func_v4i32_inreg:
325; GFX9:       ; %bb.0:
326; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
327; GFX9-NEXT:    v_mov_b32_e32 v0, s16
328; GFX9-NEXT:    v_mov_b32_e32 v1, s17
329; GFX9-NEXT:    v_mov_b32_e32 v2, s18
330; GFX9-NEXT:    v_mov_b32_e32 v3, s19
331; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
332; GFX9-NEXT:    s_waitcnt vmcnt(0)
333; GFX9-NEXT:    s_setpc_b64 s[30:31]
334;
335; GFX11-LABEL: void_func_v4i32_inreg:
336; GFX11:       ; %bb.0:
337; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
338; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
339; GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
340; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
341; GFX11-NEXT:    s_setpc_b64 s[30:31]
342  store <4 x i32> %arg0, ptr addrspace(1) undef
343  ret void
344}
345
346define void @void_func_v5i32_inreg(<5 x i32> inreg %arg0) #0 {
347; GFX9-LABEL: void_func_v5i32_inreg:
348; GFX9:       ; %bb.0:
349; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
350; GFX9-NEXT:    v_mov_b32_e32 v0, s20
351; GFX9-NEXT:    global_store_dword v[0:1], v0, off
352; GFX9-NEXT:    v_mov_b32_e32 v0, s16
353; GFX9-NEXT:    v_mov_b32_e32 v1, s17
354; GFX9-NEXT:    v_mov_b32_e32 v2, s18
355; GFX9-NEXT:    v_mov_b32_e32 v3, s19
356; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
357; GFX9-NEXT:    s_waitcnt vmcnt(0)
358; GFX9-NEXT:    s_setpc_b64 s[30:31]
359;
360; GFX11-LABEL: void_func_v5i32_inreg:
361; GFX11:       ; %bb.0:
362; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
363; GFX11-NEXT:    v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v1, s1
364; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3
365; GFX11-NEXT:    v_mov_b32_e32 v2, s2
366; GFX11-NEXT:    s_clause 0x1
367; GFX11-NEXT:    global_store_b32 v[0:1], v4, off
368; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
369; GFX11-NEXT:    s_setpc_b64 s[30:31]
370  store <5 x i32> %arg0, ptr addrspace(1) undef
371  ret void
372}
373
374define void @void_func_v8i32_inreg(<8 x i32> inreg %arg0) #0 {
375; GFX9-LABEL: void_func_v8i32_inreg:
376; GFX9:       ; %bb.0:
377; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
378; GFX9-NEXT:    v_mov_b32_e32 v0, s20
379; GFX9-NEXT:    v_mov_b32_e32 v1, s21
380; GFX9-NEXT:    v_mov_b32_e32 v2, s22
381; GFX9-NEXT:    v_mov_b32_e32 v3, s23
382; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
383; GFX9-NEXT:    s_nop 0
384; GFX9-NEXT:    v_mov_b32_e32 v0, s16
385; GFX9-NEXT:    v_mov_b32_e32 v1, s17
386; GFX9-NEXT:    v_mov_b32_e32 v2, s18
387; GFX9-NEXT:    v_mov_b32_e32 v3, s19
388; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
389; GFX9-NEXT:    s_waitcnt vmcnt(0)
390; GFX9-NEXT:    s_setpc_b64 s[30:31]
391;
392; GFX11-LABEL: void_func_v8i32_inreg:
393; GFX11:       ; %bb.0:
394; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
395; GFX11-NEXT:    v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
396; GFX11-NEXT:    v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
397; GFX11-NEXT:    v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
398; GFX11-NEXT:    v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
399; GFX11-NEXT:    s_clause 0x1
400; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
401; GFX11-NEXT:    global_store_b128 v[0:1], v[4:7], off
402; GFX11-NEXT:    s_setpc_b64 s[30:31]
403  store <8 x i32> %arg0, ptr addrspace(1) undef
404  ret void
405}
406
407define void @void_func_v16i32_inreg(<16 x i32> inreg %arg0) #0 {
408; CIGFX89-LABEL: void_func_v16i32_inreg:
409; CIGFX89:       ; %bb.0:
410; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
411; CIGFX89-NEXT:    v_mov_b32_e32 v0, s16
412; CIGFX89-NEXT:    v_mov_b32_e32 v1, s17
413; CIGFX89-NEXT:    v_mov_b32_e32 v2, s18
414; CIGFX89-NEXT:    v_mov_b32_e32 v3, s19
415; CIGFX89-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
416; CIGFX89-NEXT:    s_nop 0
417; CIGFX89-NEXT:    v_mov_b32_e32 v0, s12
418; CIGFX89-NEXT:    v_mov_b32_e32 v1, s13
419; CIGFX89-NEXT:    v_mov_b32_e32 v2, s14
420; CIGFX89-NEXT:    v_mov_b32_e32 v3, s15
421; CIGFX89-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
422; CIGFX89-NEXT:    s_nop 0
423; CIGFX89-NEXT:    v_mov_b32_e32 v0, s8
424; CIGFX89-NEXT:    v_mov_b32_e32 v1, s9
425; CIGFX89-NEXT:    v_mov_b32_e32 v2, s10
426; CIGFX89-NEXT:    v_mov_b32_e32 v3, s11
427; CIGFX89-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
428; CIGFX89-NEXT:    s_nop 0
429; CIGFX89-NEXT:    v_mov_b32_e32 v0, s4
430; CIGFX89-NEXT:    v_mov_b32_e32 v1, s5
431; CIGFX89-NEXT:    v_mov_b32_e32 v2, s6
432; CIGFX89-NEXT:    v_mov_b32_e32 v3, s7
433; CIGFX89-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
434; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
435; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
436;
437; GFX9-LABEL: void_func_v16i32_inreg:
438; GFX9:       ; %bb.0:
439; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
440; GFX9-NEXT:    v_mov_b32_e32 v3, v1
441; GFX9-NEXT:    v_mov_b32_e32 v2, v0
442; GFX9-NEXT:    v_mov_b32_e32 v0, s28
443; GFX9-NEXT:    v_mov_b32_e32 v1, s29
444; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
445; GFX9-NEXT:    s_nop 0
446; GFX9-NEXT:    v_mov_b32_e32 v0, s24
447; GFX9-NEXT:    v_mov_b32_e32 v1, s25
448; GFX9-NEXT:    v_mov_b32_e32 v2, s26
449; GFX9-NEXT:    v_mov_b32_e32 v3, s27
450; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
451; GFX9-NEXT:    s_nop 0
452; GFX9-NEXT:    v_mov_b32_e32 v0, s20
453; GFX9-NEXT:    v_mov_b32_e32 v1, s21
454; GFX9-NEXT:    v_mov_b32_e32 v2, s22
455; GFX9-NEXT:    v_mov_b32_e32 v3, s23
456; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
457; GFX9-NEXT:    s_nop 0
458; GFX9-NEXT:    v_mov_b32_e32 v0, s16
459; GFX9-NEXT:    v_mov_b32_e32 v1, s17
460; GFX9-NEXT:    v_mov_b32_e32 v2, s18
461; GFX9-NEXT:    v_mov_b32_e32 v3, s19
462; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
463; GFX9-NEXT:    s_waitcnt vmcnt(0)
464; GFX9-NEXT:    s_setpc_b64 s[30:31]
465;
466; GFX11-LABEL: void_func_v16i32_inreg:
467; GFX11:       ; %bb.0:
468; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
469; GFX11-NEXT:    v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
470; GFX11-NEXT:    v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
471; GFX11-NEXT:    v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
472; GFX11-NEXT:    v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
473; GFX11-NEXT:    v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
474; GFX11-NEXT:    v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
475; GFX11-NEXT:    v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1
476; GFX11-NEXT:    v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3
477; GFX11-NEXT:    s_clause 0x3
478; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
479; GFX11-NEXT:    global_store_b128 v[0:1], v[4:7], off
480; GFX11-NEXT:    global_store_b128 v[0:1], v[8:11], off
481; GFX11-NEXT:    global_store_b128 v[0:1], v[12:15], off
482; GFX11-NEXT:    s_setpc_b64 s[30:31]
483  store <16 x i32> %arg0, ptr addrspace(1) undef
484  ret void
485}
486
487define void @void_func_v32i32_inreg(<32 x i32> inreg %arg0) #0 {
488; GFX9-LABEL: void_func_v32i32_inreg:
489; GFX9:       ; %bb.0:
490; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
491; GFX9-NEXT:    v_mov_b32_e32 v19, v1
492; GFX9-NEXT:    v_mov_b32_e32 v18, v0
493; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[14:17], off
494; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[10:13], off
495; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[6:9], off
496; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
497; GFX9-NEXT:    v_mov_b32_e32 v16, s28
498; GFX9-NEXT:    v_mov_b32_e32 v17, s29
499; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[16:19], off
500; GFX9-NEXT:    v_mov_b32_e32 v0, s24
501; GFX9-NEXT:    v_mov_b32_e32 v1, s25
502; GFX9-NEXT:    v_mov_b32_e32 v2, s26
503; GFX9-NEXT:    v_mov_b32_e32 v3, s27
504; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
505; GFX9-NEXT:    s_nop 0
506; GFX9-NEXT:    v_mov_b32_e32 v0, s20
507; GFX9-NEXT:    v_mov_b32_e32 v1, s21
508; GFX9-NEXT:    v_mov_b32_e32 v2, s22
509; GFX9-NEXT:    v_mov_b32_e32 v3, s23
510; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
511; GFX9-NEXT:    s_nop 0
512; GFX9-NEXT:    v_mov_b32_e32 v0, s16
513; GFX9-NEXT:    v_mov_b32_e32 v1, s17
514; GFX9-NEXT:    v_mov_b32_e32 v2, s18
515; GFX9-NEXT:    v_mov_b32_e32 v3, s19
516; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
517; GFX9-NEXT:    s_waitcnt vmcnt(0)
518; GFX9-NEXT:    s_setpc_b64 s[30:31]
519;
520; GFX11-LABEL: void_func_v32i32_inreg:
521; GFX11:       ; %bb.0:
522; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
523; GFX11-NEXT:    v_dual_mov_b32 v15, v1 :: v_dual_mov_b32 v14, v0
524; GFX11-NEXT:    s_clause 0x2
525; GFX11-NEXT:    global_store_b128 v[0:1], v[10:13], off
526; GFX11-NEXT:    global_store_b128 v[0:1], v[6:9], off
527; GFX11-NEXT:    global_store_b128 v[0:1], v[2:5], off
528; GFX11-NEXT:    v_dual_mov_b32 v12, s28 :: v_dual_mov_b32 v13, s29
529; GFX11-NEXT:    v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
530; GFX11-NEXT:    v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
531; GFX11-NEXT:    v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
532; GFX11-NEXT:    v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
533; GFX11-NEXT:    v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
534; GFX11-NEXT:    v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
535; GFX11-NEXT:    v_dual_mov_b32 v16, s0 :: v_dual_mov_b32 v17, s1
536; GFX11-NEXT:    v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v19, s3
537; GFX11-NEXT:    s_clause 0x4
538; GFX11-NEXT:    global_store_b128 v[0:1], v[12:15], off
539; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
540; GFX11-NEXT:    global_store_b128 v[0:1], v[4:7], off
541; GFX11-NEXT:    global_store_b128 v[0:1], v[8:11], off
542; GFX11-NEXT:    global_store_b128 v[0:1], v[16:19], off
543; GFX11-NEXT:    s_setpc_b64 s[30:31]
544  store <32 x i32> %arg0, ptr addrspace(1) undef
545  ret void
546}
547
548define void @void_func_v2i64_inreg(<2 x i64> inreg %arg0) #0 {
549; GFX9-LABEL: void_func_v2i64_inreg:
550; GFX9:       ; %bb.0:
551; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
552; GFX9-NEXT:    v_mov_b32_e32 v0, s16
553; GFX9-NEXT:    v_mov_b32_e32 v1, s17
554; GFX9-NEXT:    v_mov_b32_e32 v2, s18
555; GFX9-NEXT:    v_mov_b32_e32 v3, s19
556; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
557; GFX9-NEXT:    s_waitcnt vmcnt(0)
558; GFX9-NEXT:    s_setpc_b64 s[30:31]
559;
560; GFX11-LABEL: void_func_v2i64_inreg:
561; GFX11:       ; %bb.0:
562; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
563; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
564; GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
565; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
566; GFX11-NEXT:    s_setpc_b64 s[30:31]
567  store <2 x i64> %arg0, ptr addrspace(1) undef
568  ret void
569}
570
571define void @void_func_v3i64_inreg(<3 x i64> inreg %arg0) #0 {
572; GFX9-LABEL: void_func_v3i64_inreg:
573; GFX9:       ; %bb.0:
574; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
575; GFX9-NEXT:    v_mov_b32_e32 v0, s20
576; GFX9-NEXT:    v_mov_b32_e32 v1, s21
577; GFX9-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
578; GFX9-NEXT:    v_mov_b32_e32 v0, s16
579; GFX9-NEXT:    v_mov_b32_e32 v1, s17
580; GFX9-NEXT:    v_mov_b32_e32 v2, s18
581; GFX9-NEXT:    v_mov_b32_e32 v3, s19
582; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
583; GFX9-NEXT:    s_waitcnt vmcnt(0)
584; GFX9-NEXT:    s_setpc_b64 s[30:31]
585;
586; GFX11-LABEL: void_func_v3i64_inreg:
587; GFX11:       ; %bb.0:
588; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
589; GFX11-NEXT:    v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v5, s17
590; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
591; GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
592; GFX11-NEXT:    s_clause 0x1
593; GFX11-NEXT:    global_store_b64 v[0:1], v[4:5], off
594; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
595; GFX11-NEXT:    s_setpc_b64 s[30:31]
596  store <3 x i64> %arg0, ptr addrspace(1) undef
597  ret void
598}
599
600define void @void_func_v4i64_inreg(<4 x i64> inreg %arg0) #0 {
601; GFX9-LABEL: void_func_v4i64_inreg:
602; GFX9:       ; %bb.0:
603; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
604; GFX9-NEXT:    v_mov_b32_e32 v0, s20
605; GFX9-NEXT:    v_mov_b32_e32 v1, s21
606; GFX9-NEXT:    v_mov_b32_e32 v2, s22
607; GFX9-NEXT:    v_mov_b32_e32 v3, s23
608; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
609; GFX9-NEXT:    s_nop 0
610; GFX9-NEXT:    v_mov_b32_e32 v0, s16
611; GFX9-NEXT:    v_mov_b32_e32 v1, s17
612; GFX9-NEXT:    v_mov_b32_e32 v2, s18
613; GFX9-NEXT:    v_mov_b32_e32 v3, s19
614; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
615; GFX9-NEXT:    s_waitcnt vmcnt(0)
616; GFX9-NEXT:    s_setpc_b64 s[30:31]
617;
618; GFX11-LABEL: void_func_v4i64_inreg:
619; GFX11:       ; %bb.0:
620; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
621; GFX11-NEXT:    v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
622; GFX11-NEXT:    v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
623; GFX11-NEXT:    v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
624; GFX11-NEXT:    v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
625; GFX11-NEXT:    s_clause 0x1
626; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
627; GFX11-NEXT:    global_store_b128 v[0:1], v[4:7], off
628; GFX11-NEXT:    s_setpc_b64 s[30:31]
629  store <4 x i64> %arg0, ptr addrspace(1) undef
630  ret void
631}
632
633define void @void_func_v5i64_inreg(<5 x i64> inreg %arg0) #0 {
634; GFX9-LABEL: void_func_v5i64_inreg:
635; GFX9:       ; %bb.0:
636; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
637; GFX9-NEXT:    v_mov_b32_e32 v0, s20
638; GFX9-NEXT:    v_mov_b32_e32 v1, s21
639; GFX9-NEXT:    v_mov_b32_e32 v2, s22
640; GFX9-NEXT:    v_mov_b32_e32 v3, s23
641; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
642; GFX9-NEXT:    s_nop 0
643; GFX9-NEXT:    v_mov_b32_e32 v0, s16
644; GFX9-NEXT:    v_mov_b32_e32 v1, s17
645; GFX9-NEXT:    v_mov_b32_e32 v2, s18
646; GFX9-NEXT:    v_mov_b32_e32 v3, s19
647; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
648; GFX9-NEXT:    s_nop 0
649; GFX9-NEXT:    v_mov_b32_e32 v0, s24
650; GFX9-NEXT:    v_mov_b32_e32 v1, s25
651; GFX9-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
652; GFX9-NEXT:    s_waitcnt vmcnt(0)
653; GFX9-NEXT:    s_setpc_b64 s[30:31]
654;
655; GFX11-LABEL: void_func_v5i64_inreg:
656; GFX11:       ; %bb.0:
657; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
658; GFX11-NEXT:    v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
659; GFX11-NEXT:    v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
660; GFX11-NEXT:    v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
661; GFX11-NEXT:    v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
662; GFX11-NEXT:    v_dual_mov_b32 v8, s20 :: v_dual_mov_b32 v9, s21
663; GFX11-NEXT:    s_clause 0x2
664; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
665; GFX11-NEXT:    global_store_b128 v[0:1], v[4:7], off
666; GFX11-NEXT:    global_store_b64 v[0:1], v[8:9], off
667; GFX11-NEXT:    s_setpc_b64 s[30:31]
668  store <5 x i64> %arg0, ptr addrspace(1) undef
669  ret void
670}
671
672define void @void_func_v8i64_inreg(<8 x i64> inreg %arg0) #0 {
673; GFX9-LABEL: void_func_v8i64_inreg:
674; GFX9:       ; %bb.0:
675; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
676; GFX9-NEXT:    v_mov_b32_e32 v3, v1
677; GFX9-NEXT:    v_mov_b32_e32 v2, v0
678; GFX9-NEXT:    v_mov_b32_e32 v0, s28
679; GFX9-NEXT:    v_mov_b32_e32 v1, s29
680; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
681; GFX9-NEXT:    s_nop 0
682; GFX9-NEXT:    v_mov_b32_e32 v0, s24
683; GFX9-NEXT:    v_mov_b32_e32 v1, s25
684; GFX9-NEXT:    v_mov_b32_e32 v2, s26
685; GFX9-NEXT:    v_mov_b32_e32 v3, s27
686; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
687; GFX9-NEXT:    s_nop 0
688; GFX9-NEXT:    v_mov_b32_e32 v0, s20
689; GFX9-NEXT:    v_mov_b32_e32 v1, s21
690; GFX9-NEXT:    v_mov_b32_e32 v2, s22
691; GFX9-NEXT:    v_mov_b32_e32 v3, s23
692; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
693; GFX9-NEXT:    s_nop 0
694; GFX9-NEXT:    v_mov_b32_e32 v0, s16
695; GFX9-NEXT:    v_mov_b32_e32 v1, s17
696; GFX9-NEXT:    v_mov_b32_e32 v2, s18
697; GFX9-NEXT:    v_mov_b32_e32 v3, s19
698; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
699; GFX9-NEXT:    s_waitcnt vmcnt(0)
700; GFX9-NEXT:    s_setpc_b64 s[30:31]
701;
702; GFX11-LABEL: void_func_v8i64_inreg:
703; GFX11:       ; %bb.0:
704; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
705; GFX11-NEXT:    v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
706; GFX11-NEXT:    v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
707; GFX11-NEXT:    v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
708; GFX11-NEXT:    v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
709; GFX11-NEXT:    v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
710; GFX11-NEXT:    v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
711; GFX11-NEXT:    v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1
712; GFX11-NEXT:    v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3
713; GFX11-NEXT:    s_clause 0x3
714; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
715; GFX11-NEXT:    global_store_b128 v[0:1], v[4:7], off
716; GFX11-NEXT:    global_store_b128 v[0:1], v[8:11], off
717; GFX11-NEXT:    global_store_b128 v[0:1], v[12:15], off
718; GFX11-NEXT:    s_setpc_b64 s[30:31]
719  store <8 x i64> %arg0, ptr addrspace(1) undef
720  ret void
721}
722
723define void @void_func_v16i64_inreg(<16 x i64> inreg %arg0) #0 {
724; GFX9-LABEL: void_func_v16i64_inreg:
725; GFX9:       ; %bb.0:
726; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
727; GFX9-NEXT:    v_mov_b32_e32 v19, v1
728; GFX9-NEXT:    v_mov_b32_e32 v18, v0
729; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[14:17], off
730; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[10:13], off
731; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[6:9], off
732; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
733; GFX9-NEXT:    v_mov_b32_e32 v16, s28
734; GFX9-NEXT:    v_mov_b32_e32 v17, s29
735; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[16:19], off
736; GFX9-NEXT:    v_mov_b32_e32 v0, s24
737; GFX9-NEXT:    v_mov_b32_e32 v1, s25
738; GFX9-NEXT:    v_mov_b32_e32 v2, s26
739; GFX9-NEXT:    v_mov_b32_e32 v3, s27
740; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
741; GFX9-NEXT:    s_nop 0
742; GFX9-NEXT:    v_mov_b32_e32 v0, s20
743; GFX9-NEXT:    v_mov_b32_e32 v1, s21
744; GFX9-NEXT:    v_mov_b32_e32 v2, s22
745; GFX9-NEXT:    v_mov_b32_e32 v3, s23
746; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
747; GFX9-NEXT:    s_nop 0
748; GFX9-NEXT:    v_mov_b32_e32 v0, s16
749; GFX9-NEXT:    v_mov_b32_e32 v1, s17
750; GFX9-NEXT:    v_mov_b32_e32 v2, s18
751; GFX9-NEXT:    v_mov_b32_e32 v3, s19
752; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
753; GFX9-NEXT:    s_waitcnt vmcnt(0)
754; GFX9-NEXT:    s_setpc_b64 s[30:31]
755;
756; GFX11-LABEL: void_func_v16i64_inreg:
757; GFX11:       ; %bb.0:
758; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
759; GFX11-NEXT:    v_dual_mov_b32 v15, v1 :: v_dual_mov_b32 v14, v0
760; GFX11-NEXT:    s_clause 0x2
761; GFX11-NEXT:    global_store_b128 v[0:1], v[10:13], off
762; GFX11-NEXT:    global_store_b128 v[0:1], v[6:9], off
763; GFX11-NEXT:    global_store_b128 v[0:1], v[2:5], off
764; GFX11-NEXT:    v_dual_mov_b32 v12, s28 :: v_dual_mov_b32 v13, s29
765; GFX11-NEXT:    v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
766; GFX11-NEXT:    v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
767; GFX11-NEXT:    v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
768; GFX11-NEXT:    v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
769; GFX11-NEXT:    v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
770; GFX11-NEXT:    v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
771; GFX11-NEXT:    v_dual_mov_b32 v16, s0 :: v_dual_mov_b32 v17, s1
772; GFX11-NEXT:    v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v19, s3
773; GFX11-NEXT:    s_clause 0x4
774; GFX11-NEXT:    global_store_b128 v[0:1], v[12:15], off
775; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
776; GFX11-NEXT:    global_store_b128 v[0:1], v[4:7], off
777; GFX11-NEXT:    global_store_b128 v[0:1], v[8:11], off
778; GFX11-NEXT:    global_store_b128 v[0:1], v[16:19], off
779; GFX11-NEXT:    s_setpc_b64 s[30:31]
780  store <16 x i64> %arg0, ptr addrspace(1) undef
781  ret void
782}
783
784define void @void_func_v2f16_inreg(<2 x half> inreg %arg0) #0 {
785; GFX9-LABEL: void_func_v2f16_inreg:
786; GFX9:       ; %bb.0:
787; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
788; GFX9-NEXT:    v_mov_b32_e32 v0, s16
789; GFX9-NEXT:    global_store_dword v[0:1], v0, off
790; GFX9-NEXT:    s_waitcnt vmcnt(0)
791; GFX9-NEXT:    s_setpc_b64 s[30:31]
792;
793; GFX11-LABEL: void_func_v2f16_inreg:
794; GFX11:       ; %bb.0:
795; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
796; GFX11-NEXT:    v_mov_b32_e32 v0, s0
797; GFX11-NEXT:    global_store_b32 v[0:1], v0, off
798; GFX11-NEXT:    s_setpc_b64 s[30:31]
799  store <2 x half> %arg0, ptr addrspace(1) undef
800  ret void
801}
802
803define void @void_func_v3f16_inreg(<3 x half> inreg %arg0) #0 {
804; GFX9-LABEL: void_func_v3f16_inreg:
805; GFX9:       ; %bb.0:
806; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
807; GFX9-NEXT:    v_mov_b32_e32 v0, s17
808; GFX9-NEXT:    global_store_short v[0:1], v0, off
809; GFX9-NEXT:    v_mov_b32_e32 v0, s16
810; GFX9-NEXT:    global_store_dword v[0:1], v0, off
811; GFX9-NEXT:    s_waitcnt vmcnt(0)
812; GFX9-NEXT:    s_setpc_b64 s[30:31]
813;
814; GFX11-LABEL: void_func_v3f16_inreg:
815; GFX11:       ; %bb.0:
816; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
817; GFX11-NEXT:    v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0
818; GFX11-NEXT:    s_clause 0x1
819; GFX11-NEXT:    global_store_b16 v[0:1], v0, off
820; GFX11-NEXT:    global_store_b32 v[0:1], v1, off
821; GFX11-NEXT:    s_setpc_b64 s[30:31]
822  store <3 x half> %arg0, ptr addrspace(1) undef
823  ret void
824}
825
826define void @void_func_v4f16_inreg(<4 x half> inreg %arg0) #0 {
827; GFX9-LABEL: void_func_v4f16_inreg:
828; GFX9:       ; %bb.0:
829; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
830; GFX9-NEXT:    v_mov_b32_e32 v0, s16
831; GFX9-NEXT:    v_mov_b32_e32 v1, s17
832; GFX9-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
833; GFX9-NEXT:    s_waitcnt vmcnt(0)
834; GFX9-NEXT:    s_setpc_b64 s[30:31]
835;
836; GFX11-LABEL: void_func_v4f16_inreg:
837; GFX11:       ; %bb.0:
838; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
839; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
840; GFX11-NEXT:    global_store_b64 v[0:1], v[0:1], off
841; GFX11-NEXT:    s_setpc_b64 s[30:31]
842  store <4 x half> %arg0, ptr addrspace(1) undef
843  ret void
844}
845
846define void @void_func_v8f16_inreg(<8 x half> inreg %arg0) #0 {
847; GFX9-LABEL: void_func_v8f16_inreg:
848; GFX9:       ; %bb.0:
849; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
850; GFX9-NEXT:    v_mov_b32_e32 v0, s16
851; GFX9-NEXT:    v_mov_b32_e32 v1, s17
852; GFX9-NEXT:    v_mov_b32_e32 v2, s18
853; GFX9-NEXT:    v_mov_b32_e32 v3, s19
854; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
855; GFX9-NEXT:    s_waitcnt vmcnt(0)
856; GFX9-NEXT:    s_setpc_b64 s[30:31]
857;
858; GFX11-LABEL: void_func_v8f16_inreg:
859; GFX11:       ; %bb.0:
860; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
861; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
862; GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
863; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
864; GFX11-NEXT:    s_setpc_b64 s[30:31]
865  store <8 x half> %arg0, ptr addrspace(1) undef
866  ret void
867}
868
869define void @void_func_v16f16_inreg(<16 x half> inreg %arg0) #0 {
870; GFX9-LABEL: void_func_v16f16_inreg:
871; GFX9:       ; %bb.0:
872; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
873; GFX9-NEXT:    v_mov_b32_e32 v0, s20
874; GFX9-NEXT:    v_mov_b32_e32 v1, s21
875; GFX9-NEXT:    v_mov_b32_e32 v2, s22
876; GFX9-NEXT:    v_mov_b32_e32 v3, s23
877; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
878; GFX9-NEXT:    s_nop 0
879; GFX9-NEXT:    v_mov_b32_e32 v0, s16
880; GFX9-NEXT:    v_mov_b32_e32 v1, s17
881; GFX9-NEXT:    v_mov_b32_e32 v2, s18
882; GFX9-NEXT:    v_mov_b32_e32 v3, s19
883; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
884; GFX9-NEXT:    s_waitcnt vmcnt(0)
885; GFX9-NEXT:    s_setpc_b64 s[30:31]
886;
887; GFX11-LABEL: void_func_v16f16_inreg:
888; GFX11:       ; %bb.0:
889; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
890; GFX11-NEXT:    v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
891; GFX11-NEXT:    v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
892; GFX11-NEXT:    v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
893; GFX11-NEXT:    v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
894; GFX11-NEXT:    s_clause 0x1
895; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
896; GFX11-NEXT:    global_store_b128 v[0:1], v[4:7], off
897; GFX11-NEXT:    s_setpc_b64 s[30:31]
898  store <16 x half> %arg0, ptr addrspace(1) undef
899  ret void
900}
901
902define void @void_func_v2f32_inreg(<2 x float> inreg %arg0) #0 {
903; GFX9-LABEL: void_func_v2f32_inreg:
904; GFX9:       ; %bb.0:
905; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
906; GFX9-NEXT:    v_mov_b32_e32 v0, s16
907; GFX9-NEXT:    v_mov_b32_e32 v1, s17
908; GFX9-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
909; GFX9-NEXT:    s_waitcnt vmcnt(0)
910; GFX9-NEXT:    s_setpc_b64 s[30:31]
911;
912; GFX11-LABEL: void_func_v2f32_inreg:
913; GFX11:       ; %bb.0:
914; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
915; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
916; GFX11-NEXT:    global_store_b64 v[0:1], v[0:1], off
917; GFX11-NEXT:    s_setpc_b64 s[30:31]
918  store <2 x float> %arg0, ptr addrspace(1) undef
919  ret void
920}
921
922define void @void_func_v3f32_inreg(<3 x float> inreg %arg0) #0 {
923; GFX9-LABEL: void_func_v3f32_inreg:
924; GFX9:       ; %bb.0:
925; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
926; GFX9-NEXT:    v_mov_b32_e32 v0, s16
927; GFX9-NEXT:    v_mov_b32_e32 v1, s17
928; GFX9-NEXT:    v_mov_b32_e32 v2, s18
929; GFX9-NEXT:    global_store_dwordx3 v[0:1], v[0:2], off
930; GFX9-NEXT:    s_waitcnt vmcnt(0)
931; GFX9-NEXT:    s_setpc_b64 s[30:31]
932;
933; GFX11-LABEL: void_func_v3f32_inreg:
934; GFX11:       ; %bb.0:
935; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
936; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
937; GFX11-NEXT:    v_mov_b32_e32 v2, s2
938; GFX11-NEXT:    global_store_b96 v[0:1], v[0:2], off
939; GFX11-NEXT:    s_setpc_b64 s[30:31]
940  store <3 x float> %arg0, ptr addrspace(1) undef
941  ret void
942}
943
944define void @void_func_v4f32_inreg(<4 x float> inreg %arg0) #0 {
945; GFX9-LABEL: void_func_v4f32_inreg:
946; GFX9:       ; %bb.0:
947; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
948; GFX9-NEXT:    v_mov_b32_e32 v0, s16
949; GFX9-NEXT:    v_mov_b32_e32 v1, s17
950; GFX9-NEXT:    v_mov_b32_e32 v2, s18
951; GFX9-NEXT:    v_mov_b32_e32 v3, s19
952; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
953; GFX9-NEXT:    s_waitcnt vmcnt(0)
954; GFX9-NEXT:    s_setpc_b64 s[30:31]
955;
956; GFX11-LABEL: void_func_v4f32_inreg:
957; GFX11:       ; %bb.0:
958; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
959; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
960; GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
961; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
962; GFX11-NEXT:    s_setpc_b64 s[30:31]
963  store <4 x float> %arg0, ptr addrspace(1) undef
964  ret void
965}
966
967define void @void_func_v8f32_inreg(<8 x float> inreg %arg0) #0 {
968; GFX9-LABEL: void_func_v8f32_inreg:
969; GFX9:       ; %bb.0:
970; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
971; GFX9-NEXT:    v_mov_b32_e32 v0, s20
972; GFX9-NEXT:    v_mov_b32_e32 v1, s21
973; GFX9-NEXT:    v_mov_b32_e32 v2, s22
974; GFX9-NEXT:    v_mov_b32_e32 v3, s23
975; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
976; GFX9-NEXT:    s_nop 0
977; GFX9-NEXT:    v_mov_b32_e32 v0, s16
978; GFX9-NEXT:    v_mov_b32_e32 v1, s17
979; GFX9-NEXT:    v_mov_b32_e32 v2, s18
980; GFX9-NEXT:    v_mov_b32_e32 v3, s19
981; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
982; GFX9-NEXT:    s_waitcnt vmcnt(0)
983; GFX9-NEXT:    s_setpc_b64 s[30:31]
984;
985; GFX11-LABEL: void_func_v8f32_inreg:
986; GFX11:       ; %bb.0:
987; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
988; GFX11-NEXT:    v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
989; GFX11-NEXT:    v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
990; GFX11-NEXT:    v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
991; GFX11-NEXT:    v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
992; GFX11-NEXT:    s_clause 0x1
993; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
994; GFX11-NEXT:    global_store_b128 v[0:1], v[4:7], off
995; GFX11-NEXT:    s_setpc_b64 s[30:31]
996  store <8 x float> %arg0, ptr addrspace(1) undef
997  ret void
998}
999
1000define void @void_func_v16f32_inreg(<16 x float> inreg %arg0) #0 {
1001; GFX9-LABEL: void_func_v16f32_inreg:
1002; GFX9:       ; %bb.0:
1003; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1004; GFX9-NEXT:    v_mov_b32_e32 v3, v1
1005; GFX9-NEXT:    v_mov_b32_e32 v2, v0
1006; GFX9-NEXT:    v_mov_b32_e32 v0, s28
1007; GFX9-NEXT:    v_mov_b32_e32 v1, s29
1008; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1009; GFX9-NEXT:    s_nop 0
1010; GFX9-NEXT:    v_mov_b32_e32 v0, s24
1011; GFX9-NEXT:    v_mov_b32_e32 v1, s25
1012; GFX9-NEXT:    v_mov_b32_e32 v2, s26
1013; GFX9-NEXT:    v_mov_b32_e32 v3, s27
1014; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1015; GFX9-NEXT:    s_nop 0
1016; GFX9-NEXT:    v_mov_b32_e32 v0, s20
1017; GFX9-NEXT:    v_mov_b32_e32 v1, s21
1018; GFX9-NEXT:    v_mov_b32_e32 v2, s22
1019; GFX9-NEXT:    v_mov_b32_e32 v3, s23
1020; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1021; GFX9-NEXT:    s_nop 0
1022; GFX9-NEXT:    v_mov_b32_e32 v0, s16
1023; GFX9-NEXT:    v_mov_b32_e32 v1, s17
1024; GFX9-NEXT:    v_mov_b32_e32 v2, s18
1025; GFX9-NEXT:    v_mov_b32_e32 v3, s19
1026; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1027; GFX9-NEXT:    s_waitcnt vmcnt(0)
1028; GFX9-NEXT:    s_setpc_b64 s[30:31]
1029;
1030; GFX11-LABEL: void_func_v16f32_inreg:
1031; GFX11:       ; %bb.0:
1032; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1033; GFX11-NEXT:    v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
1034; GFX11-NEXT:    v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
1035; GFX11-NEXT:    v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
1036; GFX11-NEXT:    v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
1037; GFX11-NEXT:    v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
1038; GFX11-NEXT:    v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
1039; GFX11-NEXT:    v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1
1040; GFX11-NEXT:    v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3
1041; GFX11-NEXT:    s_clause 0x3
1042; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
1043; GFX11-NEXT:    global_store_b128 v[0:1], v[4:7], off
1044; GFX11-NEXT:    global_store_b128 v[0:1], v[8:11], off
1045; GFX11-NEXT:    global_store_b128 v[0:1], v[12:15], off
1046; GFX11-NEXT:    s_setpc_b64 s[30:31]
1047  store <16 x float> %arg0, ptr addrspace(1) undef
1048  ret void
1049}
1050
1051define void @void_func_v2f64_inreg(<2 x double> inreg %arg0) #0 {
1052; GFX9-LABEL: void_func_v2f64_inreg:
1053; GFX9:       ; %bb.0:
1054; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1055; GFX9-NEXT:    v_mov_b32_e32 v0, s16
1056; GFX9-NEXT:    v_mov_b32_e32 v1, s17
1057; GFX9-NEXT:    v_mov_b32_e32 v2, s18
1058; GFX9-NEXT:    v_mov_b32_e32 v3, s19
1059; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1060; GFX9-NEXT:    s_waitcnt vmcnt(0)
1061; GFX9-NEXT:    s_setpc_b64 s[30:31]
1062;
1063; GFX11-LABEL: void_func_v2f64_inreg:
1064; GFX11:       ; %bb.0:
1065; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1066; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1067; GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
1068; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
1069; GFX11-NEXT:    s_setpc_b64 s[30:31]
1070  store <2 x double> %arg0, ptr addrspace(1) undef
1071  ret void
1072}
1073
1074define void @void_func_v3f64_inreg(<3 x double> inreg %arg0) #0 {
1075; GFX9-LABEL: void_func_v3f64_inreg:
1076; GFX9:       ; %bb.0:
1077; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1078; GFX9-NEXT:    v_mov_b32_e32 v0, s20
1079; GFX9-NEXT:    v_mov_b32_e32 v1, s21
1080; GFX9-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
1081; GFX9-NEXT:    v_mov_b32_e32 v0, s16
1082; GFX9-NEXT:    v_mov_b32_e32 v1, s17
1083; GFX9-NEXT:    v_mov_b32_e32 v2, s18
1084; GFX9-NEXT:    v_mov_b32_e32 v3, s19
1085; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1086; GFX9-NEXT:    s_waitcnt vmcnt(0)
1087; GFX9-NEXT:    s_setpc_b64 s[30:31]
1088;
1089; GFX11-LABEL: void_func_v3f64_inreg:
1090; GFX11:       ; %bb.0:
1091; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1092; GFX11-NEXT:    v_dual_mov_b32 v4, s16 :: v_dual_mov_b32 v5, s17
1093; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1094; GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
1095; GFX11-NEXT:    s_clause 0x1
1096; GFX11-NEXT:    global_store_b64 v[0:1], v[4:5], off
1097; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
1098; GFX11-NEXT:    s_setpc_b64 s[30:31]
1099  store <3 x double> %arg0, ptr addrspace(1) undef
1100  ret void
1101}
1102
1103define void @void_func_v4f64_inreg(<4 x double> inreg %arg0) #0 {
1104; GFX9-LABEL: void_func_v4f64_inreg:
1105; GFX9:       ; %bb.0:
1106; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1107; GFX9-NEXT:    v_mov_b32_e32 v0, s20
1108; GFX9-NEXT:    v_mov_b32_e32 v1, s21
1109; GFX9-NEXT:    v_mov_b32_e32 v2, s22
1110; GFX9-NEXT:    v_mov_b32_e32 v3, s23
1111; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1112; GFX9-NEXT:    s_nop 0
1113; GFX9-NEXT:    v_mov_b32_e32 v0, s16
1114; GFX9-NEXT:    v_mov_b32_e32 v1, s17
1115; GFX9-NEXT:    v_mov_b32_e32 v2, s18
1116; GFX9-NEXT:    v_mov_b32_e32 v3, s19
1117; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1118; GFX9-NEXT:    s_waitcnt vmcnt(0)
1119; GFX9-NEXT:    s_setpc_b64 s[30:31]
1120;
1121; GFX11-LABEL: void_func_v4f64_inreg:
1122; GFX11:       ; %bb.0:
1123; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1124; GFX11-NEXT:    v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
1125; GFX11-NEXT:    v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
1126; GFX11-NEXT:    v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
1127; GFX11-NEXT:    v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
1128; GFX11-NEXT:    s_clause 0x1
1129; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
1130; GFX11-NEXT:    global_store_b128 v[0:1], v[4:7], off
1131; GFX11-NEXT:    s_setpc_b64 s[30:31]
1132  store <4 x double> %arg0, ptr addrspace(1) undef
1133  ret void
1134}
1135
1136define void @void_func_v8f64_inreg(<8 x double> inreg %arg0) #0 {
1137; GFX9-LABEL: void_func_v8f64_inreg:
1138; GFX9:       ; %bb.0:
1139; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1140; GFX9-NEXT:    v_mov_b32_e32 v3, v1
1141; GFX9-NEXT:    v_mov_b32_e32 v2, v0
1142; GFX9-NEXT:    v_mov_b32_e32 v0, s28
1143; GFX9-NEXT:    v_mov_b32_e32 v1, s29
1144; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1145; GFX9-NEXT:    s_nop 0
1146; GFX9-NEXT:    v_mov_b32_e32 v0, s24
1147; GFX9-NEXT:    v_mov_b32_e32 v1, s25
1148; GFX9-NEXT:    v_mov_b32_e32 v2, s26
1149; GFX9-NEXT:    v_mov_b32_e32 v3, s27
1150; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1151; GFX9-NEXT:    s_nop 0
1152; GFX9-NEXT:    v_mov_b32_e32 v0, s20
1153; GFX9-NEXT:    v_mov_b32_e32 v1, s21
1154; GFX9-NEXT:    v_mov_b32_e32 v2, s22
1155; GFX9-NEXT:    v_mov_b32_e32 v3, s23
1156; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1157; GFX9-NEXT:    s_nop 0
1158; GFX9-NEXT:    v_mov_b32_e32 v0, s16
1159; GFX9-NEXT:    v_mov_b32_e32 v1, s17
1160; GFX9-NEXT:    v_mov_b32_e32 v2, s18
1161; GFX9-NEXT:    v_mov_b32_e32 v3, s19
1162; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1163; GFX9-NEXT:    s_waitcnt vmcnt(0)
1164; GFX9-NEXT:    s_setpc_b64 s[30:31]
1165;
1166; GFX11-LABEL: void_func_v8f64_inreg:
1167; GFX11:       ; %bb.0:
1168; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1169; GFX11-NEXT:    v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
1170; GFX11-NEXT:    v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
1171; GFX11-NEXT:    v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
1172; GFX11-NEXT:    v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
1173; GFX11-NEXT:    v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
1174; GFX11-NEXT:    v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
1175; GFX11-NEXT:    v_dual_mov_b32 v12, s0 :: v_dual_mov_b32 v13, s1
1176; GFX11-NEXT:    v_dual_mov_b32 v14, s2 :: v_dual_mov_b32 v15, s3
1177; GFX11-NEXT:    s_clause 0x3
1178; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
1179; GFX11-NEXT:    global_store_b128 v[0:1], v[4:7], off
1180; GFX11-NEXT:    global_store_b128 v[0:1], v[8:11], off
1181; GFX11-NEXT:    global_store_b128 v[0:1], v[12:15], off
1182; GFX11-NEXT:    s_setpc_b64 s[30:31]
1183  store <8 x double> %arg0, ptr addrspace(1) undef
1184  ret void
1185}
1186
1187define void @void_func_v16f64_inreg(<16 x double> inreg %arg0) #0 {
1188; GFX9-LABEL: void_func_v16f64_inreg:
1189; GFX9:       ; %bb.0:
1190; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1191; GFX9-NEXT:    v_mov_b32_e32 v19, v1
1192; GFX9-NEXT:    v_mov_b32_e32 v18, v0
1193; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[14:17], off
1194; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[10:13], off
1195; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[6:9], off
1196; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1197; GFX9-NEXT:    v_mov_b32_e32 v16, s28
1198; GFX9-NEXT:    v_mov_b32_e32 v17, s29
1199; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[16:19], off
1200; GFX9-NEXT:    v_mov_b32_e32 v0, s24
1201; GFX9-NEXT:    v_mov_b32_e32 v1, s25
1202; GFX9-NEXT:    v_mov_b32_e32 v2, s26
1203; GFX9-NEXT:    v_mov_b32_e32 v3, s27
1204; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1205; GFX9-NEXT:    s_nop 0
1206; GFX9-NEXT:    v_mov_b32_e32 v0, s20
1207; GFX9-NEXT:    v_mov_b32_e32 v1, s21
1208; GFX9-NEXT:    v_mov_b32_e32 v2, s22
1209; GFX9-NEXT:    v_mov_b32_e32 v3, s23
1210; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1211; GFX9-NEXT:    s_nop 0
1212; GFX9-NEXT:    v_mov_b32_e32 v0, s16
1213; GFX9-NEXT:    v_mov_b32_e32 v1, s17
1214; GFX9-NEXT:    v_mov_b32_e32 v2, s18
1215; GFX9-NEXT:    v_mov_b32_e32 v3, s19
1216; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1217; GFX9-NEXT:    s_waitcnt vmcnt(0)
1218; GFX9-NEXT:    s_setpc_b64 s[30:31]
1219;
1220; GFX11-LABEL: void_func_v16f64_inreg:
1221; GFX11:       ; %bb.0:
1222; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1223; GFX11-NEXT:    v_dual_mov_b32 v15, v1 :: v_dual_mov_b32 v14, v0
1224; GFX11-NEXT:    s_clause 0x2
1225; GFX11-NEXT:    global_store_b128 v[0:1], v[10:13], off
1226; GFX11-NEXT:    global_store_b128 v[0:1], v[6:9], off
1227; GFX11-NEXT:    global_store_b128 v[0:1], v[2:5], off
1228; GFX11-NEXT:    v_dual_mov_b32 v12, s28 :: v_dual_mov_b32 v13, s29
1229; GFX11-NEXT:    v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
1230; GFX11-NEXT:    v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
1231; GFX11-NEXT:    v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
1232; GFX11-NEXT:    v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
1233; GFX11-NEXT:    v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
1234; GFX11-NEXT:    v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
1235; GFX11-NEXT:    v_dual_mov_b32 v16, s0 :: v_dual_mov_b32 v17, s1
1236; GFX11-NEXT:    v_dual_mov_b32 v18, s2 :: v_dual_mov_b32 v19, s3
1237; GFX11-NEXT:    s_clause 0x4
1238; GFX11-NEXT:    global_store_b128 v[0:1], v[12:15], off
1239; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
1240; GFX11-NEXT:    global_store_b128 v[0:1], v[4:7], off
1241; GFX11-NEXT:    global_store_b128 v[0:1], v[8:11], off
1242; GFX11-NEXT:    global_store_b128 v[0:1], v[16:19], off
1243; GFX11-NEXT:    s_setpc_b64 s[30:31]
1244  store <16 x double> %arg0, ptr addrspace(1) undef
1245  ret void
1246}
1247
1248define void @void_func_v32i32_i1_i8_i16_f32_inreg(<32 x i32> inreg %arg0, i1 inreg %arg1, i8 inreg %arg2, i16 inreg %arg3, half inreg %arg4) #0 {
1249; GFX9-LABEL: void_func_v32i32_i1_i8_i16_f32_inreg:
1250; GFX9:       ; %bb.0:
1251; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1252; GFX9-NEXT:    v_mov_b32_e32 v25, v1
1253; GFX9-NEXT:    v_mov_b32_e32 v24, v0
1254; GFX9-NEXT:    v_mov_b32_e32 v22, s28
1255; GFX9-NEXT:    v_mov_b32_e32 v23, s29
1256; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[14:17], off
1257; GFX9-NEXT:    s_waitcnt vmcnt(0)
1258; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[10:13], off
1259; GFX9-NEXT:    s_waitcnt vmcnt(0)
1260; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[6:9], off
1261; GFX9-NEXT:    s_waitcnt vmcnt(0)
1262; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1263; GFX9-NEXT:    s_waitcnt vmcnt(0)
1264; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[22:25], off
1265; GFX9-NEXT:    s_waitcnt vmcnt(0)
1266; GFX9-NEXT:    v_mov_b32_e32 v0, s24
1267; GFX9-NEXT:    v_mov_b32_e32 v1, s25
1268; GFX9-NEXT:    v_mov_b32_e32 v2, s26
1269; GFX9-NEXT:    v_mov_b32_e32 v3, s27
1270; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1271; GFX9-NEXT:    s_waitcnt vmcnt(0)
1272; GFX9-NEXT:    v_mov_b32_e32 v0, s20
1273; GFX9-NEXT:    v_mov_b32_e32 v1, s21
1274; GFX9-NEXT:    v_mov_b32_e32 v2, s22
1275; GFX9-NEXT:    v_mov_b32_e32 v3, s23
1276; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1277; GFX9-NEXT:    s_waitcnt vmcnt(0)
1278; GFX9-NEXT:    v_mov_b32_e32 v0, s16
1279; GFX9-NEXT:    v_mov_b32_e32 v1, s17
1280; GFX9-NEXT:    v_mov_b32_e32 v2, s18
1281; GFX9-NEXT:    v_mov_b32_e32 v3, s19
1282; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1283; GFX9-NEXT:    s_waitcnt vmcnt(0)
1284; GFX9-NEXT:    v_and_b32_e32 v0, 1, v18
1285; GFX9-NEXT:    global_store_byte v[0:1], v0, off
1286; GFX9-NEXT:    s_waitcnt vmcnt(0)
1287; GFX9-NEXT:    global_store_byte v[0:1], v19, off
1288; GFX9-NEXT:    s_waitcnt vmcnt(0)
1289; GFX9-NEXT:    global_store_short v[0:1], v20, off
1290; GFX9-NEXT:    s_waitcnt vmcnt(0)
1291; GFX9-NEXT:    global_store_short v[0:1], v21, off
1292; GFX9-NEXT:    s_waitcnt vmcnt(0)
1293; GFX9-NEXT:    s_setpc_b64 s[30:31]
1294;
1295; GFX11-LABEL: void_func_v32i32_i1_i8_i16_f32_inreg:
1296; GFX11:       ; %bb.0:
1297; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1298; GFX11-NEXT:    v_dual_mov_b32 v21, v1 :: v_dual_mov_b32 v20, v0
1299; GFX11-NEXT:    v_dual_mov_b32 v18, s28 :: v_dual_mov_b32 v19, s29
1300; GFX11-NEXT:    global_store_b128 v[0:1], v[10:13], off dlc
1301; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1302; GFX11-NEXT:    global_store_b128 v[0:1], v[6:9], off dlc
1303; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1304; GFX11-NEXT:    global_store_b128 v[0:1], v[2:5], off dlc
1305; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1306; GFX11-NEXT:    global_store_b128 v[0:1], v[18:21], off dlc
1307; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1308; GFX11-NEXT:    v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
1309; GFX11-NEXT:    v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
1310; GFX11-NEXT:    v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
1311; GFX11-NEXT:    v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
1312; GFX11-NEXT:    v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
1313; GFX11-NEXT:    v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
1314; GFX11-NEXT:    v_dual_mov_b32 v18, s0 :: v_dual_mov_b32 v19, s1
1315; GFX11-NEXT:    v_dual_mov_b32 v20, s2 :: v_dual_mov_b32 v21, s3
1316; GFX11-NEXT:    v_and_b32_e32 v12, 1, v14
1317; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off dlc
1318; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1319; GFX11-NEXT:    global_store_b128 v[0:1], v[4:7], off dlc
1320; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1321; GFX11-NEXT:    global_store_b128 v[0:1], v[8:11], off dlc
1322; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1323; GFX11-NEXT:    global_store_b128 v[0:1], v[18:21], off dlc
1324; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1325; GFX11-NEXT:    global_store_b8 v[0:1], v12, off dlc
1326; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1327; GFX11-NEXT:    global_store_b8 v[0:1], v15, off dlc
1328; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1329; GFX11-NEXT:    global_store_b16 v[0:1], v16, off dlc
1330; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1331; GFX11-NEXT:    global_store_b16 v[0:1], v17, off dlc
1332; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1333; GFX11-NEXT:    s_setpc_b64 s[30:31]
1334  store volatile <32 x i32> %arg0, ptr addrspace(1) undef
1335  store volatile i1 %arg1, ptr addrspace(1) undef
1336  store volatile i8 %arg2, ptr addrspace(1) undef
1337  store volatile i16 %arg3, ptr addrspace(1) undef
1338  store volatile half %arg4, ptr addrspace(1) undef
1339  ret void
1340}
1341
1342define void @void_func_v32i32_v2i32_v2f32_inreg(<32 x i32> inreg %arg0, <2 x i32> inreg %arg1, <2 x float> inreg %arg2) #0 {
1343; GFX9-LABEL: void_func_v32i32_v2i32_v2f32_inreg:
1344; GFX9:       ; %bb.0:
1345; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1346; GFX9-NEXT:    v_mov_b32_e32 v25, v1
1347; GFX9-NEXT:    v_mov_b32_e32 v24, v0
1348; GFX9-NEXT:    v_mov_b32_e32 v22, s28
1349; GFX9-NEXT:    v_mov_b32_e32 v23, s29
1350; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[14:17], off
1351; GFX9-NEXT:    s_waitcnt vmcnt(0)
1352; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[10:13], off
1353; GFX9-NEXT:    s_waitcnt vmcnt(0)
1354; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[6:9], off
1355; GFX9-NEXT:    s_waitcnt vmcnt(0)
1356; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
1357; GFX9-NEXT:    s_waitcnt vmcnt(0)
1358; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[22:25], off
1359; GFX9-NEXT:    s_waitcnt vmcnt(0)
1360; GFX9-NEXT:    v_mov_b32_e32 v0, s24
1361; GFX9-NEXT:    v_mov_b32_e32 v1, s25
1362; GFX9-NEXT:    v_mov_b32_e32 v2, s26
1363; GFX9-NEXT:    v_mov_b32_e32 v3, s27
1364; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1365; GFX9-NEXT:    s_waitcnt vmcnt(0)
1366; GFX9-NEXT:    v_mov_b32_e32 v0, s20
1367; GFX9-NEXT:    v_mov_b32_e32 v1, s21
1368; GFX9-NEXT:    v_mov_b32_e32 v2, s22
1369; GFX9-NEXT:    v_mov_b32_e32 v3, s23
1370; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1371; GFX9-NEXT:    s_waitcnt vmcnt(0)
1372; GFX9-NEXT:    v_mov_b32_e32 v0, s16
1373; GFX9-NEXT:    v_mov_b32_e32 v1, s17
1374; GFX9-NEXT:    v_mov_b32_e32 v2, s18
1375; GFX9-NEXT:    v_mov_b32_e32 v3, s19
1376; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1377; GFX9-NEXT:    s_waitcnt vmcnt(0)
1378; GFX9-NEXT:    global_store_dwordx2 v[0:1], v[18:19], off
1379; GFX9-NEXT:    s_waitcnt vmcnt(0)
1380; GFX9-NEXT:    global_store_dwordx2 v[0:1], v[20:21], off
1381; GFX9-NEXT:    s_waitcnt vmcnt(0)
1382; GFX9-NEXT:    s_setpc_b64 s[30:31]
1383;
1384; GFX11-LABEL: void_func_v32i32_v2i32_v2f32_inreg:
1385; GFX11:       ; %bb.0:
1386; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1387; GFX11-NEXT:    v_dual_mov_b32 v21, v1 :: v_dual_mov_b32 v20, v0
1388; GFX11-NEXT:    global_store_b128 v[0:1], v[10:13], off dlc
1389; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1390; GFX11-NEXT:    global_store_b128 v[0:1], v[6:9], off dlc
1391; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1392; GFX11-NEXT:    global_store_b128 v[0:1], v[2:5], off dlc
1393; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1394; GFX11-NEXT:    v_dual_mov_b32 v18, s28 :: v_dual_mov_b32 v19, s29
1395; GFX11-NEXT:    v_dual_mov_b32 v0, s24 :: v_dual_mov_b32 v1, s25
1396; GFX11-NEXT:    v_dual_mov_b32 v2, s26 :: v_dual_mov_b32 v3, s27
1397; GFX11-NEXT:    v_dual_mov_b32 v4, s20 :: v_dual_mov_b32 v5, s21
1398; GFX11-NEXT:    v_dual_mov_b32 v6, s22 :: v_dual_mov_b32 v7, s23
1399; GFX11-NEXT:    v_dual_mov_b32 v8, s16 :: v_dual_mov_b32 v9, s17
1400; GFX11-NEXT:    v_dual_mov_b32 v10, s18 :: v_dual_mov_b32 v11, s19
1401; GFX11-NEXT:    v_dual_mov_b32 v22, s0 :: v_dual_mov_b32 v23, s1
1402; GFX11-NEXT:    v_dual_mov_b32 v24, s2 :: v_dual_mov_b32 v25, s3
1403; GFX11-NEXT:    global_store_b128 v[0:1], v[18:21], off dlc
1404; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1405; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off dlc
1406; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1407; GFX11-NEXT:    global_store_b128 v[0:1], v[4:7], off dlc
1408; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1409; GFX11-NEXT:    global_store_b128 v[0:1], v[8:11], off dlc
1410; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1411; GFX11-NEXT:    global_store_b128 v[0:1], v[22:25], off dlc
1412; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1413; GFX11-NEXT:    global_store_b64 v[0:1], v[14:15], off dlc
1414; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1415; GFX11-NEXT:    global_store_b64 v[0:1], v[16:17], off dlc
1416; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1417; GFX11-NEXT:    s_setpc_b64 s[30:31]
1418  store volatile <32 x i32> %arg0, ptr addrspace(1) undef
1419  store volatile <2 x i32> %arg1, ptr addrspace(1) undef
1420  store volatile <2 x float> %arg2, ptr addrspace(1) undef
1421  ret void
1422}
1423
1424define void @too_many_args_use_workitem_id_x_inreg(
1425; GFX9-LABEL: too_many_args_use_workitem_id_x_inreg:
1426; GFX9:       ; %bb.0:
1427; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1428; GFX9-NEXT:    v_mov_b32_e32 v18, s16
1429; GFX9-NEXT:    global_store_dword v[0:1], v18, off
1430; GFX9-NEXT:    s_waitcnt vmcnt(0)
1431; GFX9-NEXT:    v_mov_b32_e32 v18, s17
1432; GFX9-NEXT:    global_store_dword v[0:1], v18, off
1433; GFX9-NEXT:    s_waitcnt vmcnt(0)
1434; GFX9-NEXT:    v_mov_b32_e32 v18, s18
1435; GFX9-NEXT:    global_store_dword v[0:1], v18, off
1436; GFX9-NEXT:    s_waitcnt vmcnt(0)
1437; GFX9-NEXT:    v_mov_b32_e32 v18, s19
1438; GFX9-NEXT:    global_store_dword v[0:1], v18, off
1439; GFX9-NEXT:    s_waitcnt vmcnt(0)
1440; GFX9-NEXT:    v_mov_b32_e32 v18, s20
1441; GFX9-NEXT:    global_store_dword v[0:1], v18, off
1442; GFX9-NEXT:    s_waitcnt vmcnt(0)
1443; GFX9-NEXT:    v_mov_b32_e32 v18, s21
1444; GFX9-NEXT:    global_store_dword v[0:1], v18, off
1445; GFX9-NEXT:    s_waitcnt vmcnt(0)
1446; GFX9-NEXT:    v_mov_b32_e32 v18, s22
1447; GFX9-NEXT:    global_store_dword v[0:1], v18, off
1448; GFX9-NEXT:    s_waitcnt vmcnt(0)
1449; GFX9-NEXT:    v_mov_b32_e32 v18, s23
1450; GFX9-NEXT:    global_store_dword v[0:1], v18, off
1451; GFX9-NEXT:    s_waitcnt vmcnt(0)
1452; GFX9-NEXT:    v_mov_b32_e32 v18, s24
1453; GFX9-NEXT:    global_store_dword v[0:1], v18, off
1454; GFX9-NEXT:    s_waitcnt vmcnt(0)
1455; GFX9-NEXT:    v_mov_b32_e32 v18, s25
1456; GFX9-NEXT:    global_store_dword v[0:1], v18, off
1457; GFX9-NEXT:    s_waitcnt vmcnt(0)
1458; GFX9-NEXT:    v_mov_b32_e32 v18, s26
1459; GFX9-NEXT:    global_store_dword v[0:1], v18, off
1460; GFX9-NEXT:    s_waitcnt vmcnt(0)
1461; GFX9-NEXT:    v_mov_b32_e32 v18, s27
1462; GFX9-NEXT:    global_store_dword v[0:1], v18, off
1463; GFX9-NEXT:    s_waitcnt vmcnt(0)
1464; GFX9-NEXT:    v_mov_b32_e32 v18, s28
1465; GFX9-NEXT:    global_store_dword v[0:1], v18, off
1466; GFX9-NEXT:    s_waitcnt vmcnt(0)
1467; GFX9-NEXT:    v_mov_b32_e32 v18, s29
1468; GFX9-NEXT:    global_store_dword v[0:1], v18, off
1469; GFX9-NEXT:    s_waitcnt vmcnt(0)
1470; GFX9-NEXT:    global_store_dword v[0:1], v0, off
1471; GFX9-NEXT:    s_waitcnt vmcnt(0)
1472; GFX9-NEXT:    global_store_dword v[0:1], v1, off
1473; GFX9-NEXT:    s_waitcnt vmcnt(0)
1474; GFX9-NEXT:    global_store_dword v[0:1], v2, off
1475; GFX9-NEXT:    s_waitcnt vmcnt(0)
1476; GFX9-NEXT:    global_store_dword v[0:1], v3, off
1477; GFX9-NEXT:    s_waitcnt vmcnt(0)
1478; GFX9-NEXT:    global_store_dword v[0:1], v4, off
1479; GFX9-NEXT:    s_waitcnt vmcnt(0)
1480; GFX9-NEXT:    global_store_dword v[0:1], v5, off
1481; GFX9-NEXT:    s_waitcnt vmcnt(0)
1482; GFX9-NEXT:    global_store_dword v[0:1], v6, off
1483; GFX9-NEXT:    s_waitcnt vmcnt(0)
1484; GFX9-NEXT:    global_store_dword v[0:1], v7, off
1485; GFX9-NEXT:    s_waitcnt vmcnt(0)
1486; GFX9-NEXT:    global_store_dword v[0:1], v8, off
1487; GFX9-NEXT:    s_waitcnt vmcnt(0)
1488; GFX9-NEXT:    global_store_dword v[0:1], v9, off
1489; GFX9-NEXT:    s_waitcnt vmcnt(0)
1490; GFX9-NEXT:    global_store_dword v[0:1], v10, off
1491; GFX9-NEXT:    s_waitcnt vmcnt(0)
1492; GFX9-NEXT:    global_store_dword v[0:1], v11, off
1493; GFX9-NEXT:    s_waitcnt vmcnt(0)
1494; GFX9-NEXT:    global_store_dword v[0:1], v12, off
1495; GFX9-NEXT:    s_waitcnt vmcnt(0)
1496; GFX9-NEXT:    global_store_dword v[0:1], v13, off
1497; GFX9-NEXT:    s_waitcnt vmcnt(0)
1498; GFX9-NEXT:    global_store_dword v[0:1], v14, off
1499; GFX9-NEXT:    s_waitcnt vmcnt(0)
1500; GFX9-NEXT:    global_store_dword v[0:1], v15, off
1501; GFX9-NEXT:    s_waitcnt vmcnt(0)
1502; GFX9-NEXT:    global_store_dword v[0:1], v16, off
1503; GFX9-NEXT:    s_waitcnt vmcnt(0)
1504; GFX9-NEXT:    global_store_dword v[0:1], v17, off
1505; GFX9-NEXT:    s_waitcnt vmcnt(0)
1506; GFX9-NEXT:    s_setpc_b64 s[30:31]
1507;
1508; GFX11-LABEL: too_many_args_use_workitem_id_x_inreg:
1509; GFX11:       ; %bb.0:
1510; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1511; GFX11-NEXT:    v_dual_mov_b32 v14, s0 :: v_dual_mov_b32 v15, s1
1512; GFX11-NEXT:    v_mov_b32_e32 v16, s2
1513; GFX11-NEXT:    v_mov_b32_e32 v18, s19
1514; GFX11-NEXT:    global_store_b32 v[0:1], v14, off dlc
1515; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1516; GFX11-NEXT:    global_store_b32 v[0:1], v15, off dlc
1517; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1518; GFX11-NEXT:    global_store_b32 v[0:1], v16, off dlc
1519; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1520; GFX11-NEXT:    v_dual_mov_b32 v14, s3 :: v_dual_mov_b32 v17, s18
1521; GFX11-NEXT:    v_dual_mov_b32 v15, s16 :: v_dual_mov_b32 v16, s17
1522; GFX11-NEXT:    global_store_b32 v[0:1], v14, off dlc
1523; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1524; GFX11-NEXT:    global_store_b32 v[0:1], v15, off dlc
1525; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1526; GFX11-NEXT:    global_store_b32 v[0:1], v16, off dlc
1527; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1528; GFX11-NEXT:    global_store_b32 v[0:1], v17, off dlc
1529; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1530; GFX11-NEXT:    global_store_b32 v[0:1], v18, off dlc
1531; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1532; GFX11-NEXT:    v_dual_mov_b32 v15, s21 :: v_dual_mov_b32 v14, s20
1533; GFX11-NEXT:    v_dual_mov_b32 v16, s22 :: v_dual_mov_b32 v17, s23
1534; GFX11-NEXT:    v_mov_b32_e32 v18, s24
1535; GFX11-NEXT:    global_store_b32 v[0:1], v14, off dlc
1536; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1537; GFX11-NEXT:    global_store_b32 v[0:1], v15, off dlc
1538; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1539; GFX11-NEXT:    global_store_b32 v[0:1], v16, off dlc
1540; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1541; GFX11-NEXT:    global_store_b32 v[0:1], v17, off dlc
1542; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1543; GFX11-NEXT:    global_store_b32 v[0:1], v18, off dlc
1544; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1545; GFX11-NEXT:    v_dual_mov_b32 v17, s28 :: v_dual_mov_b32 v14, s25
1546; GFX11-NEXT:    v_dual_mov_b32 v15, s26 :: v_dual_mov_b32 v16, s27
1547; GFX11-NEXT:    v_mov_b32_e32 v18, s29
1548; GFX11-NEXT:    global_store_b32 v[0:1], v14, off dlc
1549; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1550; GFX11-NEXT:    global_store_b32 v[0:1], v15, off dlc
1551; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1552; GFX11-NEXT:    global_store_b32 v[0:1], v16, off dlc
1553; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1554; GFX11-NEXT:    global_store_b32 v[0:1], v17, off dlc
1555; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1556; GFX11-NEXT:    global_store_b32 v[0:1], v18, off dlc
1557; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1558; GFX11-NEXT:    global_store_b32 v[0:1], v0, off dlc
1559; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1560; GFX11-NEXT:    global_store_b32 v[0:1], v1, off dlc
1561; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1562; GFX11-NEXT:    global_store_b32 v[0:1], v2, off dlc
1563; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1564; GFX11-NEXT:    global_store_b32 v[0:1], v3, off dlc
1565; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1566; GFX11-NEXT:    global_store_b32 v[0:1], v4, off dlc
1567; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1568; GFX11-NEXT:    global_store_b32 v[0:1], v5, off dlc
1569; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1570; GFX11-NEXT:    global_store_b32 v[0:1], v6, off dlc
1571; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1572; GFX11-NEXT:    global_store_b32 v[0:1], v7, off dlc
1573; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1574; GFX11-NEXT:    global_store_b32 v[0:1], v8, off dlc
1575; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1576; GFX11-NEXT:    global_store_b32 v[0:1], v9, off dlc
1577; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1578; GFX11-NEXT:    global_store_b32 v[0:1], v10, off dlc
1579; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1580; GFX11-NEXT:    global_store_b32 v[0:1], v11, off dlc
1581; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1582; GFX11-NEXT:    global_store_b32 v[0:1], v12, off dlc
1583; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1584; GFX11-NEXT:    global_store_b32 v[0:1], v13, off dlc
1585; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1586; GFX11-NEXT:    s_setpc_b64 s[30:31]
1587  i32 inreg %arg0, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, i32 inreg %arg5, i32 inreg %arg6, i32 inreg %arg7,
1588  i32 inreg %arg8, i32 inreg %arg9, i32 inreg %arg10, i32 inreg %arg11, i32 inreg %arg12, i32 inreg %arg13, i32 inreg %arg14, i32 inreg %arg15,
1589  i32 inreg %arg16, i32 inreg %arg17, i32 inreg %arg18, i32 inreg %arg19, i32 inreg %arg20, i32 inreg %arg21, i32 inreg %arg22, i32 inreg %arg23,
1590  i32 inreg %arg24, i32 inreg %arg25, i32 inreg %arg26, i32 inreg %arg27, i32 inreg %arg28, i32 inreg %arg29, i32 inreg %arg30, i32 inreg %arg31) {
1591  ;%val = call i32 @llvm.amdgcn.workitem.id.x()
1592  ;store volatile i32 %val, ptr addrspace(1) undef
1593
1594  store volatile i32 %arg0, ptr addrspace(1) undef
1595  store volatile i32 %arg1, ptr addrspace(1) undef
1596  store volatile i32 %arg2, ptr addrspace(1) undef
1597  store volatile i32 %arg3, ptr addrspace(1) undef
1598  store volatile i32 %arg4, ptr addrspace(1) undef
1599  store volatile i32 %arg5, ptr addrspace(1) undef
1600  store volatile i32 %arg6, ptr addrspace(1) undef
1601  store volatile i32 %arg7, ptr addrspace(1) undef
1602
1603  store volatile i32 %arg8, ptr addrspace(1) undef
1604  store volatile i32 %arg9, ptr addrspace(1) undef
1605  store volatile i32 %arg10, ptr addrspace(1) undef
1606  store volatile i32 %arg11, ptr addrspace(1) undef
1607  store volatile i32 %arg12, ptr addrspace(1) undef
1608  store volatile i32 %arg13, ptr addrspace(1) undef
1609  store volatile i32 %arg14, ptr addrspace(1) undef
1610  store volatile i32 %arg15, ptr addrspace(1) undef
1611
1612  store volatile i32 %arg16, ptr addrspace(1) undef
1613  store volatile i32 %arg17, ptr addrspace(1) undef
1614  store volatile i32 %arg18, ptr addrspace(1) undef
1615  store volatile i32 %arg19, ptr addrspace(1) undef
1616  store volatile i32 %arg20, ptr addrspace(1) undef
1617  store volatile i32 %arg21, ptr addrspace(1) undef
1618  store volatile i32 %arg22, ptr addrspace(1) undef
1619  store volatile i32 %arg23, ptr addrspace(1) undef
1620
1621  store volatile i32 %arg24, ptr addrspace(1) undef
1622  store volatile i32 %arg25, ptr addrspace(1) undef
1623  store volatile i32 %arg26, ptr addrspace(1) undef
1624  store volatile i32 %arg27, ptr addrspace(1) undef
1625  store volatile i32 %arg28, ptr addrspace(1) undef
1626  store volatile i32 %arg29, ptr addrspace(1) undef
1627  store volatile i32 %arg30, ptr addrspace(1) undef
1628  store volatile i32 %arg31, ptr addrspace(1) undef
1629
1630  ret void
1631}
1632
1633define void @void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1) #0 {
1634; GFX9-LABEL: void_func_i32_v2float_inreg:
1635; GFX9:       ; %bb.0:
1636; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1637; GFX9-NEXT:    v_mov_b32_e32 v0, s16
1638; GFX9-NEXT:    global_store_dword v[0:1], v0, off
1639; GFX9-NEXT:    v_mov_b32_e32 v0, s17
1640; GFX9-NEXT:    v_mov_b32_e32 v1, s18
1641; GFX9-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
1642; GFX9-NEXT:    s_waitcnt vmcnt(0)
1643; GFX9-NEXT:    s_setpc_b64 s[30:31]
1644;
1645; GFX11-LABEL: void_func_i32_v2float_inreg:
1646; GFX11:       ; %bb.0:
1647; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1648; GFX11-NEXT:    v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v1, s2
1649; GFX11-NEXT:    v_mov_b32_e32 v0, s1
1650; GFX11-NEXT:    s_clause 0x1
1651; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
1652; GFX11-NEXT:    global_store_b64 v[0:1], v[0:1], off
1653; GFX11-NEXT:    s_setpc_b64 s[30:31]
1654  store i32 %arg0, ptr addrspace(1) undef
1655  store <2 x float> %arg1, ptr addrspace(1) undef
1656  ret void
1657}
1658
1659define void @caller_void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1) #0 {
1660; GFX9-LABEL: caller_void_func_i32_v2float_inreg:
1661; GFX9:       ; %bb.0:
1662; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1663; GFX9-NEXT:    s_mov_b32 s19, s33
1664; GFX9-NEXT:    s_mov_b32 s33, s32
1665; GFX9-NEXT:    s_or_saveexec_b64 s[20:21], -1
1666; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
1667; GFX9-NEXT:    s_mov_b64 exec, s[20:21]
1668; GFX9-NEXT:    s_addk_i32 s32, 0x400
1669; GFX9-NEXT:    s_getpc_b64 s[20:21]
1670; GFX9-NEXT:    s_add_u32 s20, s20, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4
1671; GFX9-NEXT:    s_addc_u32 s21, s21, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12
1672; GFX9-NEXT:    s_load_dwordx2 s[20:21], s[20:21], 0x0
1673; GFX9-NEXT:    v_writelane_b32 v40, s19, 2
1674; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
1675; GFX9-NEXT:    s_mov_b32 s2, s18
1676; GFX9-NEXT:    s_mov_b32 s1, s17
1677; GFX9-NEXT:    s_mov_b32 s0, s16
1678; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
1679; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1680; GFX9-NEXT:    s_swappc_b64 s[30:31], s[20:21]
1681; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
1682; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
1683; GFX9-NEXT:    s_mov_b32 s32, s33
1684; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
1685; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
1686; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
1687; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
1688; GFX9-NEXT:    s_mov_b32 s33, s4
1689; GFX9-NEXT:    s_waitcnt vmcnt(0)
1690; GFX9-NEXT:    s_setpc_b64 s[30:31]
1691;
1692; GFX11-LABEL: caller_void_func_i32_v2float_inreg:
1693; GFX11:       ; %bb.0:
1694; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1695; GFX11-NEXT:    s_mov_b32 s3, s33
1696; GFX11-NEXT:    s_mov_b32 s33, s32
1697; GFX11-NEXT:    s_or_saveexec_b32 s16, -1
1698; GFX11-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
1699; GFX11-NEXT:    s_mov_b32 exec_lo, s16
1700; GFX11-NEXT:    s_add_i32 s32, s32, 16
1701; GFX11-NEXT:    s_getpc_b64 s[16:17]
1702; GFX11-NEXT:    s_add_u32 s16, s16, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4
1703; GFX11-NEXT:    s_addc_u32 s17, s17, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12
1704; GFX11-NEXT:    v_writelane_b32 v40, s3, 2
1705; GFX11-NEXT:    s_load_b64 s[16:17], s[16:17], 0x0
1706; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
1707; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
1708; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1709; GFX11-NEXT:    s_swappc_b64 s[30:31], s[16:17]
1710; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1711; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
1712; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
1713; GFX11-NEXT:    s_mov_b32 s32, s33
1714; GFX11-NEXT:    v_readlane_b32 s0, v40, 2
1715; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
1716; GFX11-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
1717; GFX11-NEXT:    s_mov_b32 exec_lo, s1
1718; GFX11-NEXT:    s_mov_b32 s33, s0
1719; GFX11-NEXT:    s_waitcnt vmcnt(0)
1720; GFX11-NEXT:    s_setpc_b64 s[30:31]
1721; GFX11-NEXT  s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1722; GFX11-NEXT  s_mov_b32 s3, s33
1723; GFX11-NEXT  s_mov_b32 s33, s32
1724; GFX11-NEXT  s_or_saveexec_b32 s4, -1
1725; GFX11-NEXT  scratch_store_b32 off, v40, s33         ; 4-byte Folded Spill
1726; GFX11-NEXT  s_mov_b32 exec_lo, s4
1727; GFX11-NEXT  s_add_i32 s32, s32, 16
1728; GFX11-NEXT  s_getpc_b64 s[4:5]
1729; GFX11-NEXT  s_add_u32 s4, s4, caller_void_func_i32_v2float_inreg@gotpcrel32@lo+4
1730; GFX11-NEXT  s_addc_u32 s5, s5, caller_void_func_i32_v2float_inreg@gotpcrel32@hi+12
1731; GFX11-NEXT  v_writelane_b32 v40, s3, 2
1732; GFX11-NEXT  s_load_b64 s[4:5], s[4:5], 0x0
1733; GFX11-NEXT  v_writelane_b32 v40, s30, 0
1734; GFX11-NEXT  v_writelane_b32 v40, s31, 1
1735; GFX11-NEXT  s_waitcnt lgkmcnt(0)
1736; GFX11-NEXT  s_swappc_b64 s[30:31], s[4:5]
1737; GFX11-NEXT  s_delay_alu instid0(VALU_DEP_1)
1738; GFX11-NEXT  v_readlane_b32 s31, v40, 1
1739; GFX11-NEXT  v_readlane_b32 s30, v40, 0
1740; GFX11-NEXT  v_readlane_b32 s0, v40, 2
1741; GFX11-NEXT  s_or_saveexec_b32 s1, -1
1742; GFX11-NEXT  scratch_load_b32 v40, off, s33          ; 4-byte Folded Reload
1743; GFX11-NEXT  s_mov_b32 exec_lo, s1
1744; GFX11-NEXT  s_add_i32 s32, s32, -16
1745; GFX11-NEXT  s_mov_b32 s33, s0
1746; GFX11-NEXT  s_waitcnt vmcnt(0)
1747; GFX11-NEXT  s_setpc_b64 s[30:31]
1748  call void @caller_void_func_i32_v2float_inreg(i32 inreg %arg0, <2 x float> inreg %arg1)
1749  ret void
1750}
1751
1752define void @void_func_bf16_inreg(bfloat inreg %arg0) #0 {
1753; GFX9-LABEL: void_func_bf16_inreg:
1754; GFX9:       ; %bb.0:
1755; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1756; GFX9-NEXT:    v_mov_b32_e32 v0, s16
1757; GFX9-NEXT:    global_store_short v[0:1], v0, off
1758; GFX9-NEXT:    s_waitcnt vmcnt(0)
1759; GFX9-NEXT:    s_setpc_b64 s[30:31]
1760;
1761; GFX11-LABEL: void_func_bf16_inreg:
1762; GFX11:       ; %bb.0:
1763; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1764; GFX11-NEXT:    v_mov_b32_e32 v0, s0
1765; GFX11-NEXT:    global_store_b16 v[0:1], v0, off
1766; GFX11-NEXT:    s_setpc_b64 s[30:31]
1767  store bfloat %arg0, ptr addrspace(1) undef
1768  ret void
1769}
1770
1771define void @void_func_v2bf16_inreg(<2 x bfloat> inreg %arg0) #0 {
1772; GFX9-LABEL: void_func_v2bf16_inreg:
1773; GFX9:       ; %bb.0:
1774; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1775; GFX9-NEXT:    v_mov_b32_e32 v0, s16
1776; GFX9-NEXT:    global_store_dword v[0:1], v0, off
1777; GFX9-NEXT:    s_waitcnt vmcnt(0)
1778; GFX9-NEXT:    s_setpc_b64 s[30:31]
1779;
1780; GFX11-LABEL: void_func_v2bf16_inreg:
1781; GFX11:       ; %bb.0:
1782; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1783; GFX11-NEXT:    v_mov_b32_e32 v0, s0
1784; GFX11-NEXT:    global_store_b32 v[0:1], v0, off
1785; GFX11-NEXT:    s_setpc_b64 s[30:31]
1786  store <2 x bfloat> %arg0, ptr addrspace(1) undef
1787  ret void
1788}
1789
1790define void @void_func_v3bf16_inreg(<3 x bfloat> inreg %arg0) #0 {
1791; GFX9-LABEL: void_func_v3bf16_inreg:
1792; GFX9:       ; %bb.0:
1793; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1794; GFX9-NEXT:    v_mov_b32_e32 v0, s17
1795; GFX9-NEXT:    global_store_short v[0:1], v0, off
1796; GFX9-NEXT:    v_mov_b32_e32 v0, s16
1797; GFX9-NEXT:    global_store_dword v[0:1], v0, off
1798; GFX9-NEXT:    s_waitcnt vmcnt(0)
1799; GFX9-NEXT:    s_setpc_b64 s[30:31]
1800;
1801; GFX11-LABEL: void_func_v3bf16_inreg:
1802; GFX11:       ; %bb.0:
1803; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1804; GFX11-NEXT:    v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0
1805; GFX11-NEXT:    s_clause 0x1
1806; GFX11-NEXT:    global_store_b16 v[0:1], v0, off
1807; GFX11-NEXT:    global_store_b32 v[0:1], v1, off
1808; GFX11-NEXT:    s_setpc_b64 s[30:31]
1809  store <3 x bfloat> %arg0, ptr addrspace(1) undef
1810  ret void
1811}
1812
1813define void @void_func_v4bf16_inreg(<4 x bfloat> inreg %arg0) #0 {
1814; GFX9-LABEL: void_func_v4bf16_inreg:
1815; GFX9:       ; %bb.0:
1816; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1817; GFX9-NEXT:    v_mov_b32_e32 v0, s16
1818; GFX9-NEXT:    v_mov_b32_e32 v1, s17
1819; GFX9-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
1820; GFX9-NEXT:    s_waitcnt vmcnt(0)
1821; GFX9-NEXT:    s_setpc_b64 s[30:31]
1822;
1823; GFX11-LABEL: void_func_v4bf16_inreg:
1824; GFX11:       ; %bb.0:
1825; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1826; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1827; GFX11-NEXT:    global_store_b64 v[0:1], v[0:1], off
1828; GFX11-NEXT:    s_setpc_b64 s[30:31]
1829  store <4 x bfloat> %arg0, ptr addrspace(1) undef
1830  ret void
1831}
1832
1833define void @void_func_v8bf16_inreg(<8 x bfloat> inreg %arg0) #0 {
1834; GFX9-LABEL: void_func_v8bf16_inreg:
1835; GFX9:       ; %bb.0:
1836; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1837; GFX9-NEXT:    v_mov_b32_e32 v0, s16
1838; GFX9-NEXT:    v_mov_b32_e32 v1, s17
1839; GFX9-NEXT:    v_mov_b32_e32 v2, s18
1840; GFX9-NEXT:    v_mov_b32_e32 v3, s19
1841; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1842; GFX9-NEXT:    s_waitcnt vmcnt(0)
1843; GFX9-NEXT:    s_setpc_b64 s[30:31]
1844;
1845; GFX11-LABEL: void_func_v8bf16_inreg:
1846; GFX11:       ; %bb.0:
1847; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1848; GFX11-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1849; GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
1850; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
1851; GFX11-NEXT:    s_setpc_b64 s[30:31]
1852  store <8 x bfloat> %arg0, ptr addrspace(1) undef
1853  ret void
1854}
1855
1856define void @void_func_v16bf16_inreg(<16 x bfloat> inreg %arg0) #0 {
1857; GFX9-LABEL: void_func_v16bf16_inreg:
1858; GFX9:       ; %bb.0:
1859; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1860; GFX9-NEXT:    v_mov_b32_e32 v0, s20
1861; GFX9-NEXT:    v_mov_b32_e32 v1, s21
1862; GFX9-NEXT:    v_mov_b32_e32 v2, s22
1863; GFX9-NEXT:    v_mov_b32_e32 v3, s23
1864; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1865; GFX9-NEXT:    s_nop 0
1866; GFX9-NEXT:    v_mov_b32_e32 v0, s16
1867; GFX9-NEXT:    v_mov_b32_e32 v1, s17
1868; GFX9-NEXT:    v_mov_b32_e32 v2, s18
1869; GFX9-NEXT:    v_mov_b32_e32 v3, s19
1870; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[0:3], off
1871; GFX9-NEXT:    s_waitcnt vmcnt(0)
1872; GFX9-NEXT:    s_setpc_b64 s[30:31]
1873;
1874; GFX11-LABEL: void_func_v16bf16_inreg:
1875; GFX11:       ; %bb.0:
1876; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1877; GFX11-NEXT:    v_dual_mov_b32 v0, s16 :: v_dual_mov_b32 v1, s17
1878; GFX11-NEXT:    v_dual_mov_b32 v2, s18 :: v_dual_mov_b32 v3, s19
1879; GFX11-NEXT:    v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v5, s1
1880; GFX11-NEXT:    v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3
1881; GFX11-NEXT:    s_clause 0x1
1882; GFX11-NEXT:    global_store_b128 v[0:1], v[0:3], off
1883; GFX11-NEXT:    global_store_b128 v[0:1], v[4:7], off
1884; GFX11-NEXT:    s_setpc_b64 s[30:31]
1885  store <16 x bfloat> %arg0, ptr addrspace(1) undef
1886  ret void
1887}
1888
1889define void @void_func_2_i32_inreg(i32 inreg %arg0, i32 inreg %arg1, ptr addrspace(1) %ptr) {
1890; GFX9-LABEL: void_func_2_i32_inreg:
1891; GFX9:       ; %bb.0:
1892; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1893; GFX9-NEXT:    v_mov_b32_e32 v2, s16
1894; GFX9-NEXT:    global_store_dword v[0:1], v2, off
1895; GFX9-NEXT:    s_waitcnt vmcnt(0)
1896; GFX9-NEXT:    v_mov_b32_e32 v2, s17
1897; GFX9-NEXT:    global_store_dword v[0:1], v2, off
1898; GFX9-NEXT:    s_waitcnt vmcnt(0)
1899; GFX9-NEXT:    s_setpc_b64 s[30:31]
1900;
1901; GFX11-LABEL: void_func_2_i32_inreg:
1902; GFX11:       ; %bb.0:
1903; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1904; GFX11-NEXT:    v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
1905; GFX11-NEXT:    global_store_b32 v[0:1], v2, off dlc
1906; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1907; GFX11-NEXT:    global_store_b32 v[0:1], v3, off dlc
1908; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1909; GFX11-NEXT:    s_setpc_b64 s[30:31]
1910  store volatile i32 %arg0, ptr addrspace(1) %ptr
1911  store volatile i32 %arg1, ptr addrspace(1) %ptr
1912  ret void
1913}
1914
1915define void @void_func_2_i64_inreg(i64 inreg %arg0, i64 inreg %arg1, ptr addrspace(1) %ptr) {
1916; GFX9-LABEL: void_func_2_i64_inreg:
1917; GFX9:       ; %bb.0:
1918; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1919; GFX9-NEXT:    v_mov_b32_e32 v4, s16
1920; GFX9-NEXT:    v_mov_b32_e32 v5, s17
1921; GFX9-NEXT:    v_mov_b32_e32 v2, s18
1922; GFX9-NEXT:    v_mov_b32_e32 v3, s19
1923; GFX9-NEXT:    global_store_dwordx2 v[0:1], v[4:5], off
1924; GFX9-NEXT:    s_waitcnt vmcnt(0)
1925; GFX9-NEXT:    global_store_dwordx2 v[0:1], v[2:3], off
1926; GFX9-NEXT:    s_waitcnt vmcnt(0)
1927; GFX9-NEXT:    s_setpc_b64 s[30:31]
1928;
1929; GFX11-LABEL: void_func_2_i64_inreg:
1930; GFX11:       ; %bb.0:
1931; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1932; GFX11-NEXT:    v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v5, s1
1933; GFX11-NEXT:    v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v3, s3
1934; GFX11-NEXT:    global_store_b64 v[0:1], v[4:5], off dlc
1935; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1936; GFX11-NEXT:    global_store_b64 v[0:1], v[2:3], off dlc
1937; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1938; GFX11-NEXT:    s_setpc_b64 s[30:31]
1939  store volatile i64 %arg0, ptr addrspace(1) %ptr
1940  store volatile i64 %arg1, ptr addrspace(1) %ptr
1941  ret void
1942}
1943
1944define void @void_func_i64_inreg_i32_inreg_i64_inreg(i64 inreg %arg0, i32 inreg %arg1, i64 inreg %arg2, ptr addrspace(1) %ptr) {
1945; GFX9-LABEL: void_func_i64_inreg_i32_inreg_i64_inreg:
1946; GFX9:       ; %bb.0:
1947; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1948; GFX9-NEXT:    v_mov_b32_e32 v4, s16
1949; GFX9-NEXT:    v_mov_b32_e32 v5, s17
1950; GFX9-NEXT:    global_store_dwordx2 v[0:1], v[4:5], off
1951; GFX9-NEXT:    s_waitcnt vmcnt(0)
1952; GFX9-NEXT:    v_mov_b32_e32 v4, s18
1953; GFX9-NEXT:    v_mov_b32_e32 v2, s19
1954; GFX9-NEXT:    v_mov_b32_e32 v3, s20
1955; GFX9-NEXT:    global_store_dword v[0:1], v4, off
1956; GFX9-NEXT:    s_waitcnt vmcnt(0)
1957; GFX9-NEXT:    global_store_dwordx2 v[0:1], v[2:3], off
1958; GFX9-NEXT:    s_waitcnt vmcnt(0)
1959; GFX9-NEXT:    s_setpc_b64 s[30:31]
1960;
1961; GFX11-LABEL: void_func_i64_inreg_i32_inreg_i64_inreg:
1962; GFX11:       ; %bb.0:
1963; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1964; GFX11-NEXT:    v_dual_mov_b32 v2, s3 :: v_dual_mov_b32 v5, s1
1965; GFX11-NEXT:    v_dual_mov_b32 v4, s0 :: v_dual_mov_b32 v3, s16
1966; GFX11-NEXT:    v_mov_b32_e32 v6, s2
1967; GFX11-NEXT:    global_store_b64 v[0:1], v[4:5], off dlc
1968; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1969; GFX11-NEXT:    global_store_b32 v[0:1], v6, off dlc
1970; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1971; GFX11-NEXT:    global_store_b64 v[0:1], v[2:3], off dlc
1972; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1973; GFX11-NEXT:    s_setpc_b64 s[30:31]
1974  store volatile i64 %arg0, ptr addrspace(1) %ptr
1975  store volatile i32 %arg1, ptr addrspace(1) %ptr
1976  store volatile i64 %arg2, ptr addrspace(1) %ptr
1977  ret void
1978}
1979
1980define void @void_func_5_i32_inreg(i32 inreg %arg0, i32 inreg %arg1, i32 inreg %arg2, i32 inreg %arg3, i32 inreg %arg4, ptr addrspace(1) %ptr) {
1981; GFX9-LABEL: void_func_5_i32_inreg:
1982; GFX9:       ; %bb.0:
1983; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1984; GFX9-NEXT:    v_mov_b32_e32 v2, s16
1985; GFX9-NEXT:    global_store_dword v[0:1], v2, off
1986; GFX9-NEXT:    s_waitcnt vmcnt(0)
1987; GFX9-NEXT:    v_mov_b32_e32 v2, s17
1988; GFX9-NEXT:    global_store_dword v[0:1], v2, off
1989; GFX9-NEXT:    s_waitcnt vmcnt(0)
1990; GFX9-NEXT:    v_mov_b32_e32 v2, s18
1991; GFX9-NEXT:    global_store_dword v[0:1], v2, off
1992; GFX9-NEXT:    s_waitcnt vmcnt(0)
1993; GFX9-NEXT:    v_mov_b32_e32 v2, s19
1994; GFX9-NEXT:    global_store_dword v[0:1], v2, off
1995; GFX9-NEXT:    s_waitcnt vmcnt(0)
1996; GFX9-NEXT:    v_mov_b32_e32 v2, s20
1997; GFX9-NEXT:    global_store_dword v[0:1], v2, off
1998; GFX9-NEXT:    s_waitcnt vmcnt(0)
1999; GFX9-NEXT:    s_setpc_b64 s[30:31]
2000;
2001; GFX11-LABEL: void_func_5_i32_inreg:
2002; GFX11:       ; %bb.0:
2003; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2004; GFX11-NEXT:    v_dual_mov_b32 v2, s0 :: v_dual_mov_b32 v3, s1
2005; GFX11-NEXT:    v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3
2006; GFX11-NEXT:    v_mov_b32_e32 v6, s16
2007; GFX11-NEXT:    global_store_b32 v[0:1], v2, off dlc
2008; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2009; GFX11-NEXT:    global_store_b32 v[0:1], v3, off dlc
2010; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2011; GFX11-NEXT:    global_store_b32 v[0:1], v4, off dlc
2012; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2013; GFX11-NEXT:    global_store_b32 v[0:1], v5, off dlc
2014; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2015; GFX11-NEXT:    global_store_b32 v[0:1], v6, off dlc
2016; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2017; GFX11-NEXT:    s_setpc_b64 s[30:31]
2018  store volatile i32 %arg0, ptr addrspace(1) %ptr
2019  store volatile i32 %arg1, ptr addrspace(1) %ptr
2020  store volatile i32 %arg2, ptr addrspace(1) %ptr
2021  store volatile i32 %arg3, ptr addrspace(1) %ptr
2022  store volatile i32 %arg4, ptr addrspace(1) %ptr
2023  ret void
2024}
2025
2026define void @void_func_a5i32_inreg([5 x i32] inreg %arg0, ptr addrspace(1) %ptr) {
2027; GFX9-LABEL: void_func_a5i32_inreg:
2028; GFX9:       ; %bb.0:
2029; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2030; GFX9-NEXT:    v_mov_b32_e32 v2, s20
2031; GFX9-NEXT:    global_store_dword v[0:1], v2, off offset:16
2032; GFX9-NEXT:    v_mov_b32_e32 v5, s19
2033; GFX9-NEXT:    v_mov_b32_e32 v4, s18
2034; GFX9-NEXT:    v_mov_b32_e32 v3, s17
2035; GFX9-NEXT:    v_mov_b32_e32 v2, s16
2036; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
2037; GFX9-NEXT:    s_waitcnt vmcnt(0)
2038; GFX9-NEXT:    s_setpc_b64 s[30:31]
2039;
2040; GFX11-LABEL: void_func_a5i32_inreg:
2041; GFX11:       ; %bb.0:
2042; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2043; GFX11-NEXT:    v_dual_mov_b32 v6, s16 :: v_dual_mov_b32 v5, s3
2044; GFX11-NEXT:    v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v3, s1
2045; GFX11-NEXT:    v_mov_b32_e32 v2, s0
2046; GFX11-NEXT:    s_clause 0x1
2047; GFX11-NEXT:    global_store_b32 v[0:1], v6, off offset:16
2048; GFX11-NEXT:    global_store_b128 v[0:1], v[2:5], off
2049; GFX11-NEXT:    s_setpc_b64 s[30:31]
2050  store [5 x i32] %arg0, ptr addrspace(1) %ptr
2051  ret void
2052}
2053
2054; Force all implicit inputs to be required
2055declare void @extern()
2056
2057define void @void_func_a13i32_inreg([13  x i32] inreg %arg0, ptr addrspace(1) %ptr) {
2058; GFX9-LABEL: void_func_a13i32_inreg:
2059; GFX9:       ; %bb.0:
2060; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2061; GFX9-NEXT:    s_mov_b32 s29, s33
2062; GFX9-NEXT:    s_mov_b32 s33, s32
2063; GFX9-NEXT:    s_or_saveexec_b64 vcc, -1
2064; GFX9-NEXT:    buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
2065; GFX9-NEXT:    s_mov_b64 exec, vcc
2066; GFX9-NEXT:    v_mov_b32_e32 v2, s28
2067; GFX9-NEXT:    global_store_dword v[0:1], v2, off offset:48
2068; GFX9-NEXT:    v_mov_b32_e32 v5, s27
2069; GFX9-NEXT:    v_mov_b32_e32 v4, s26
2070; GFX9-NEXT:    v_mov_b32_e32 v3, s25
2071; GFX9-NEXT:    v_mov_b32_e32 v2, s24
2072; GFX9-NEXT:    s_addk_i32 s32, 0x400
2073; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off offset:32
2074; GFX9-NEXT:    v_writelane_b32 v40, s29, 2
2075; GFX9-NEXT:    v_mov_b32_e32 v5, s23
2076; GFX9-NEXT:    v_mov_b32_e32 v4, s22
2077; GFX9-NEXT:    v_mov_b32_e32 v3, s21
2078; GFX9-NEXT:    v_mov_b32_e32 v2, s20
2079; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off offset:16
2080; GFX9-NEXT:    v_writelane_b32 v40, s30, 0
2081; GFX9-NEXT:    v_mov_b32_e32 v3, s17
2082; GFX9-NEXT:    v_mov_b32_e32 v2, s16
2083; GFX9-NEXT:    s_getpc_b64 s[16:17]
2084; GFX9-NEXT:    s_add_u32 s16, s16, extern@gotpcrel32@lo+4
2085; GFX9-NEXT:    s_addc_u32 s17, s17, extern@gotpcrel32@hi+12
2086; GFX9-NEXT:    s_load_dwordx2 s[16:17], s[16:17], 0x0
2087; GFX9-NEXT:    v_mov_b32_e32 v5, s19
2088; GFX9-NEXT:    v_mov_b32_e32 v4, s18
2089; GFX9-NEXT:    v_writelane_b32 v40, s31, 1
2090; GFX9-NEXT:    global_store_dwordx4 v[0:1], v[2:5], off
2091; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2092; GFX9-NEXT:    s_swappc_b64 s[30:31], s[16:17]
2093; GFX9-NEXT:    v_readlane_b32 s31, v40, 1
2094; GFX9-NEXT:    v_readlane_b32 s30, v40, 0
2095; GFX9-NEXT:    s_mov_b32 s32, s33
2096; GFX9-NEXT:    v_readlane_b32 s4, v40, 2
2097; GFX9-NEXT:    s_or_saveexec_b64 s[6:7], -1
2098; GFX9-NEXT:    buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
2099; GFX9-NEXT:    s_mov_b64 exec, s[6:7]
2100; GFX9-NEXT:    s_mov_b32 s33, s4
2101; GFX9-NEXT:    s_waitcnt vmcnt(0)
2102; GFX9-NEXT:    s_setpc_b64 s[30:31]
2103;
2104; GFX11-LABEL: void_func_a13i32_inreg:
2105; GFX11:       ; %bb.0:
2106; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2107; GFX11-NEXT:    s_mov_b32 s25, s33
2108; GFX11-NEXT:    s_mov_b32 s33, s32
2109; GFX11-NEXT:    s_or_saveexec_b32 s26, -1
2110; GFX11-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
2111; GFX11-NEXT:    s_mov_b32 exec_lo, s26
2112; GFX11-NEXT:    s_add_i32 s32, s32, 16
2113; GFX11-NEXT:    v_dual_mov_b32 v4, s22 :: v_dual_mov_b32 v3, s21
2114; GFX11-NEXT:    v_dual_mov_b32 v2, s20 :: v_dual_mov_b32 v9, s19
2115; GFX11-NEXT:    s_getpc_b64 s[20:21]
2116; GFX11-NEXT:    s_add_u32 s20, s20, extern@gotpcrel32@lo+4
2117; GFX11-NEXT:    s_addc_u32 s21, s21, extern@gotpcrel32@hi+12
2118; GFX11-NEXT:    v_dual_mov_b32 v8, s18 :: v_dual_mov_b32 v7, s17
2119; GFX11-NEXT:    v_dual_mov_b32 v6, s16 :: v_dual_mov_b32 v13, s3
2120; GFX11-NEXT:    s_load_b64 s[16:17], s[20:21], 0x0
2121; GFX11-NEXT:    v_writelane_b32 v40, s25, 2
2122; GFX11-NEXT:    v_dual_mov_b32 v14, s24 :: v_dual_mov_b32 v5, s23
2123; GFX11-NEXT:    v_dual_mov_b32 v12, s2 :: v_dual_mov_b32 v11, s1
2124; GFX11-NEXT:    v_writelane_b32 v40, s30, 0
2125; GFX11-NEXT:    v_mov_b32_e32 v10, s0
2126; GFX11-NEXT:    s_clause 0x3
2127; GFX11-NEXT:    global_store_b32 v[0:1], v14, off offset:48
2128; GFX11-NEXT:    global_store_b128 v[0:1], v[2:5], off offset:32
2129; GFX11-NEXT:    global_store_b128 v[0:1], v[6:9], off offset:16
2130; GFX11-NEXT:    global_store_b128 v[0:1], v[10:13], off
2131; GFX11-NEXT:    v_writelane_b32 v40, s31, 1
2132; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2133; GFX11-NEXT:    s_swappc_b64 s[30:31], s[16:17]
2134; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2135; GFX11-NEXT:    v_readlane_b32 s31, v40, 1
2136; GFX11-NEXT:    v_readlane_b32 s30, v40, 0
2137; GFX11-NEXT:    s_mov_b32 s32, s33
2138; GFX11-NEXT:    v_readlane_b32 s0, v40, 2
2139; GFX11-NEXT:    s_or_saveexec_b32 s1, -1
2140; GFX11-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
2141; GFX11-NEXT:    s_mov_b32 exec_lo, s1
2142; GFX11-NEXT:    s_mov_b32 s33, s0
2143; GFX11-NEXT:    s_waitcnt vmcnt(0)
2144; GFX11-NEXT:    s_setpc_b64 s[30:31]
2145  store [13 x i32] %arg0, ptr addrspace(1) %ptr
2146  call void @extern()
2147  ret void
2148}
2149
2150; define void @void_func_a14i32_inreg([14 x i32] inreg %arg0, ptr addrspace(1) %ptr) {
2151;   store [14 x i32] %arg0, ptr addrspace(1) %ptr
2152;   call void @extern()
2153;   ret void
2154; }
2155
2156; FIXME:
2157; define void @void_func_a15i32_inreg([15 x i32] inreg %arg0, ptr addrspace(1) %ptr) {
2158;   store [15 x i32] %arg0, ptr addrspace(1) %ptr
2159;   call void @extern()
2160;   ret void
2161; }
2162
2163; FIXME:
2164; define void @void_func_a16i32_inreg([16 x i32] inreg %arg0, ptr addrspace(1) %ptr) {
2165;   store [16 x i32] %arg0, ptr addrspace(1) %ptr
2166;   call void @extern()
2167;   ret void
2168; }
2169
2170; FIXME: Should still fail
2171define void @void_func_a16i32_inreg__noimplicit([16 x i32] inreg %arg0, ptr addrspace(1) %ptr) {
2172; GFX9-LABEL: void_func_a16i32_inreg__noimplicit:
2173; GFX9:       ; %bb.0:
2174; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2175; GFX9-NEXT:    v_mov_b32_e32 v7, v1
2176; GFX9-NEXT:    v_mov_b32_e32 v6, v0
2177; GFX9-NEXT:    v_mov_b32_e32 v5, s29
2178; GFX9-NEXT:    v_mov_b32_e32 v4, s28
2179; GFX9-NEXT:    global_store_dwordx4 v[2:3], v[4:7], off offset:48
2180; GFX9-NEXT:    s_nop 0
2181; GFX9-NEXT:    v_mov_b32_e32 v7, s27
2182; GFX9-NEXT:    v_mov_b32_e32 v6, s26
2183; GFX9-NEXT:    v_mov_b32_e32 v5, s25
2184; GFX9-NEXT:    v_mov_b32_e32 v4, s24
2185; GFX9-NEXT:    global_store_dwordx4 v[2:3], v[4:7], off offset:32
2186; GFX9-NEXT:    s_nop 0
2187; GFX9-NEXT:    v_mov_b32_e32 v7, s23
2188; GFX9-NEXT:    v_mov_b32_e32 v6, s22
2189; GFX9-NEXT:    v_mov_b32_e32 v5, s21
2190; GFX9-NEXT:    v_mov_b32_e32 v4, s20
2191; GFX9-NEXT:    global_store_dwordx4 v[2:3], v[4:7], off offset:16
2192; GFX9-NEXT:    s_nop 0
2193; GFX9-NEXT:    v_mov_b32_e32 v7, s19
2194; GFX9-NEXT:    v_mov_b32_e32 v6, s18
2195; GFX9-NEXT:    v_mov_b32_e32 v5, s17
2196; GFX9-NEXT:    v_mov_b32_e32 v4, s16
2197; GFX9-NEXT:    global_store_dwordx4 v[2:3], v[4:7], off
2198; GFX9-NEXT:    s_waitcnt vmcnt(0)
2199; GFX9-NEXT:    s_setpc_b64 s[30:31]
2200;
2201; GFX11-LABEL: void_func_a16i32_inreg__noimplicit:
2202; GFX11:       ; %bb.0:
2203; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2204; GFX11-NEXT:    v_dual_mov_b32 v5, s27 :: v_dual_mov_b32 v4, s26
2205; GFX11-NEXT:    v_dual_mov_b32 v3, s25 :: v_dual_mov_b32 v2, s24
2206; GFX11-NEXT:    v_dual_mov_b32 v9, s23 :: v_dual_mov_b32 v8, s22
2207; GFX11-NEXT:    v_dual_mov_b32 v7, s21 :: v_dual_mov_b32 v6, s20
2208; GFX11-NEXT:    v_dual_mov_b32 v13, s19 :: v_dual_mov_b32 v12, s18
2209; GFX11-NEXT:    v_dual_mov_b32 v11, s17 :: v_dual_mov_b32 v10, s16
2210; GFX11-NEXT:    v_dual_mov_b32 v17, s3 :: v_dual_mov_b32 v16, s2
2211; GFX11-NEXT:    v_dual_mov_b32 v15, s1 :: v_dual_mov_b32 v14, s0
2212; GFX11-NEXT:    s_clause 0x3
2213; GFX11-NEXT:    global_store_b128 v[0:1], v[2:5], off offset:48
2214; GFX11-NEXT:    global_store_b128 v[0:1], v[6:9], off offset:32
2215; GFX11-NEXT:    global_store_b128 v[0:1], v[10:13], off offset:16
2216; GFX11-NEXT:    global_store_b128 v[0:1], v[14:17], off
2217; GFX11-NEXT:    s_setpc_b64 s[30:31]
2218  store [16 x i32] %arg0, ptr addrspace(1) %ptr
2219  ret void
2220}
2221
2222attributes #0 = { nounwind }
2223attributes #1 = { nounwind noinline }
2224
2225
2226
2227
2228
2229
2230
2231