xref: /llvm-project/llvm/test/CodeGen/AMDGPU/function-args.ll (revision 6206f5444fc0732e6495703c75a67f1f90f5b418)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89,CI %s
3; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89,GFX89,VI %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CIGFX89,GFX89,GFX9 %s
5; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
6
7define void @void_func_i1(i1 %arg0) #0 {
8; CIGFX89-LABEL: void_func_i1:
9; CIGFX89:       ; %bb.0:
10; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; CIGFX89-NEXT:    v_and_b32_e32 v0, 1, v0
12; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
13; CIGFX89-NEXT:    s_mov_b32 s6, -1
14; CIGFX89-NEXT:    buffer_store_byte v0, off, s[4:7], 0
15; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
16; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
17;
18; GFX11-LABEL: void_func_i1:
19; GFX11:       ; %bb.0:
20; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
22; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
23; GFX11-NEXT:    s_mov_b32 s2, -1
24; GFX11-NEXT:    buffer_store_b8 v0, off, s[0:3], 0
25; GFX11-NEXT:    s_setpc_b64 s[30:31]
26  store i1 %arg0, ptr addrspace(1) undef
27  ret void
28}
29
30define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 {
31; CIGFX89-LABEL: void_func_i1_zeroext:
32; CIGFX89:       ; %bb.0:
33; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34; CIGFX89-NEXT:    v_or_b32_e32 v0, 12, v0
35; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
36; CIGFX89-NEXT:    s_mov_b32 s6, -1
37; CIGFX89-NEXT:    buffer_store_dword v0, off, s[4:7], 0
38; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
39; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
40;
41; GFX11-LABEL: void_func_i1_zeroext:
42; GFX11:       ; %bb.0:
43; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44; GFX11-NEXT:    v_or_b32_e32 v0, 12, v0
45; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
46; GFX11-NEXT:    s_mov_b32 s2, -1
47; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
48; GFX11-NEXT:    s_setpc_b64 s[30:31]
49  %ext = zext i1 %arg0 to i32
50  %add = add i32 %ext, 12
51  store i32 %add, ptr addrspace(1) undef
52  ret void
53}
54
55define void @void_func_i1_signext(i1 signext %arg0) #0 {
56; CI-LABEL: void_func_i1_signext:
57; CI:       ; %bb.0:
58; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
59; CI-NEXT:    v_add_i32_e32 v0, vcc, 12, v0
60; CI-NEXT:    s_mov_b32 s7, 0xf000
61; CI-NEXT:    s_mov_b32 s6, -1
62; CI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
63; CI-NEXT:    s_waitcnt vmcnt(0)
64; CI-NEXT:    s_setpc_b64 s[30:31]
65;
66; VI-LABEL: void_func_i1_signext:
67; VI:       ; %bb.0:
68; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69; VI-NEXT:    v_add_u32_e32 v0, vcc, 12, v0
70; VI-NEXT:    s_mov_b32 s7, 0xf000
71; VI-NEXT:    s_mov_b32 s6, -1
72; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
73; VI-NEXT:    s_waitcnt vmcnt(0)
74; VI-NEXT:    s_setpc_b64 s[30:31]
75;
76; GFX9-LABEL: void_func_i1_signext:
77; GFX9:       ; %bb.0:
78; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79; GFX9-NEXT:    v_add_u32_e32 v0, 12, v0
80; GFX9-NEXT:    s_mov_b32 s7, 0xf000
81; GFX9-NEXT:    s_mov_b32 s6, -1
82; GFX9-NEXT:    buffer_store_dword v0, off, s[4:7], 0
83; GFX9-NEXT:    s_waitcnt vmcnt(0)
84; GFX9-NEXT:    s_setpc_b64 s[30:31]
85;
86; GFX11-LABEL: void_func_i1_signext:
87; GFX11:       ; %bb.0:
88; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
89; GFX11-NEXT:    v_add_nc_u32_e32 v0, 12, v0
90; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
91; GFX11-NEXT:    s_mov_b32 s2, -1
92; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
93; GFX11-NEXT:    s_setpc_b64 s[30:31]
94  %ext = sext i1 %arg0 to i32
95  %add = add i32 %ext, 12
96  store i32 %add, ptr addrspace(1) undef
97  ret void
98}
99
100define void @i1_arg_i1_use(i1 %arg) #0 {
101; CIGFX89-LABEL: i1_arg_i1_use:
102; CIGFX89:       ; %bb.0: ; %bb
103; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104; CIGFX89-NEXT:    v_and_b32_e32 v0, 1, v0
105; CIGFX89-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
106; CIGFX89-NEXT:    s_xor_b64 s[6:7], vcc, -1
107; CIGFX89-NEXT:    s_and_saveexec_b64 s[4:5], s[6:7]
108; CIGFX89-NEXT:    s_cbranch_execz .LBB3_2
109; CIGFX89-NEXT:  ; %bb.1: ; %bb1
110; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
111; CIGFX89-NEXT:    s_mov_b32 s6, -1
112; CIGFX89-NEXT:    v_mov_b32_e32 v0, 0
113; CIGFX89-NEXT:    buffer_store_dword v0, off, s[4:7], 0
114; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
115; CIGFX89-NEXT:  .LBB3_2: ; %bb2
116; CIGFX89-NEXT:    s_or_b64 exec, exec, s[4:5]
117; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
118;
119; GFX11-LABEL: i1_arg_i1_use:
120; GFX11:       ; %bb.0: ; %bb
121; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
122; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
123; GFX11-NEXT:    s_mov_b32 s2, -1
124; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
125; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 1, v0
126; GFX11-NEXT:    s_xor_b32 s1, vcc_lo, -1
127; GFX11-NEXT:    s_and_saveexec_b32 s0, s1
128; GFX11-NEXT:    s_cbranch_execz .LBB3_2
129; GFX11-NEXT:  ; %bb.1: ; %bb1
130; GFX11-NEXT:    v_mov_b32_e32 v0, 0
131; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
132; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0 dlc
133; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
134; GFX11-NEXT:  .LBB3_2: ; %bb2
135; GFX11-NEXT:    s_or_b32 exec_lo, exec_lo, s0
136; GFX11-NEXT:    s_setpc_b64 s[30:31]
137bb:
138  br i1 %arg, label %bb2, label %bb1
139
140bb1:
141  store volatile i32 0, ptr addrspace(1) undef
142  br label %bb2
143
144bb2:
145  ret void
146}
147
148define void @void_func_i8(i8 %arg0) #0 {
149; CIGFX89-LABEL: void_func_i8:
150; CIGFX89:       ; %bb.0:
151; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
153; CIGFX89-NEXT:    s_mov_b32 s6, -1
154; CIGFX89-NEXT:    buffer_store_byte v0, off, s[4:7], 0
155; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
156; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
157;
158; GFX11-LABEL: void_func_i8:
159; GFX11:       ; %bb.0:
160; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
161; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
162; GFX11-NEXT:    s_mov_b32 s2, -1
163; GFX11-NEXT:    buffer_store_b8 v0, off, s[0:3], 0
164; GFX11-NEXT:    s_setpc_b64 s[30:31]
165  store i8 %arg0, ptr addrspace(1) undef
166  ret void
167}
168
169define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 {
170; CI-LABEL: void_func_i8_zeroext:
171; CI:       ; %bb.0:
172; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
173; CI-NEXT:    v_add_i32_e32 v0, vcc, 12, v0
174; CI-NEXT:    s_mov_b32 s7, 0xf000
175; CI-NEXT:    s_mov_b32 s6, -1
176; CI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
177; CI-NEXT:    s_waitcnt vmcnt(0)
178; CI-NEXT:    s_setpc_b64 s[30:31]
179;
180; VI-LABEL: void_func_i8_zeroext:
181; VI:       ; %bb.0:
182; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
183; VI-NEXT:    v_add_u32_e32 v0, vcc, 12, v0
184; VI-NEXT:    s_mov_b32 s7, 0xf000
185; VI-NEXT:    s_mov_b32 s6, -1
186; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
187; VI-NEXT:    s_waitcnt vmcnt(0)
188; VI-NEXT:    s_setpc_b64 s[30:31]
189;
190; GFX9-LABEL: void_func_i8_zeroext:
191; GFX9:       ; %bb.0:
192; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
193; GFX9-NEXT:    v_add_u32_e32 v0, 12, v0
194; GFX9-NEXT:    s_mov_b32 s7, 0xf000
195; GFX9-NEXT:    s_mov_b32 s6, -1
196; GFX9-NEXT:    buffer_store_dword v0, off, s[4:7], 0
197; GFX9-NEXT:    s_waitcnt vmcnt(0)
198; GFX9-NEXT:    s_setpc_b64 s[30:31]
199;
200; GFX11-LABEL: void_func_i8_zeroext:
201; GFX11:       ; %bb.0:
202; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
203; GFX11-NEXT:    v_add_nc_u32_e32 v0, 12, v0
204; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
205; GFX11-NEXT:    s_mov_b32 s2, -1
206; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
207; GFX11-NEXT:    s_setpc_b64 s[30:31]
208  %ext = zext i8 %arg0 to i32
209  %add = add i32 %ext, 12
210  store i32 %add, ptr addrspace(1) undef
211  ret void
212}
213
214define void @void_func_i8_signext(i8 signext %arg0) #0 {
215; CI-LABEL: void_func_i8_signext:
216; CI:       ; %bb.0:
217; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
218; CI-NEXT:    v_add_i32_e32 v0, vcc, 12, v0
219; CI-NEXT:    s_mov_b32 s7, 0xf000
220; CI-NEXT:    s_mov_b32 s6, -1
221; CI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
222; CI-NEXT:    s_waitcnt vmcnt(0)
223; CI-NEXT:    s_setpc_b64 s[30:31]
224;
225; VI-LABEL: void_func_i8_signext:
226; VI:       ; %bb.0:
227; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
228; VI-NEXT:    v_add_u32_e32 v0, vcc, 12, v0
229; VI-NEXT:    s_mov_b32 s7, 0xf000
230; VI-NEXT:    s_mov_b32 s6, -1
231; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
232; VI-NEXT:    s_waitcnt vmcnt(0)
233; VI-NEXT:    s_setpc_b64 s[30:31]
234;
235; GFX9-LABEL: void_func_i8_signext:
236; GFX9:       ; %bb.0:
237; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
238; GFX9-NEXT:    v_add_u32_e32 v0, 12, v0
239; GFX9-NEXT:    s_mov_b32 s7, 0xf000
240; GFX9-NEXT:    s_mov_b32 s6, -1
241; GFX9-NEXT:    buffer_store_dword v0, off, s[4:7], 0
242; GFX9-NEXT:    s_waitcnt vmcnt(0)
243; GFX9-NEXT:    s_setpc_b64 s[30:31]
244;
245; GFX11-LABEL: void_func_i8_signext:
246; GFX11:       ; %bb.0:
247; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
248; GFX11-NEXT:    v_add_nc_u32_e32 v0, 12, v0
249; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
250; GFX11-NEXT:    s_mov_b32 s2, -1
251; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
252; GFX11-NEXT:    s_setpc_b64 s[30:31]
253  %ext = sext i8 %arg0 to i32
254  %add = add i32 %ext, 12
255  store i32 %add, ptr addrspace(1) undef
256  ret void
257}
258
259define void @void_func_i16(i16 %arg0) #0 {
260; CIGFX89-LABEL: void_func_i16:
261; CIGFX89:       ; %bb.0:
262; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
263; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
264; CIGFX89-NEXT:    s_mov_b32 s6, -1
265; CIGFX89-NEXT:    buffer_store_short v0, off, s[4:7], 0
266; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
267; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
268;
269; GFX11-LABEL: void_func_i16:
270; GFX11:       ; %bb.0:
271; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
272; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
273; GFX11-NEXT:    s_mov_b32 s2, -1
274; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
275; GFX11-NEXT:    s_setpc_b64 s[30:31]
276  store i16 %arg0, ptr addrspace(1) undef
277  ret void
278}
279
280define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 {
281; CI-LABEL: void_func_i16_zeroext:
282; CI:       ; %bb.0:
283; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
284; CI-NEXT:    v_add_i32_e32 v0, vcc, 12, v0
285; CI-NEXT:    s_mov_b32 s7, 0xf000
286; CI-NEXT:    s_mov_b32 s6, -1
287; CI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
288; CI-NEXT:    s_waitcnt vmcnt(0)
289; CI-NEXT:    s_setpc_b64 s[30:31]
290;
291; VI-LABEL: void_func_i16_zeroext:
292; VI:       ; %bb.0:
293; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
294; VI-NEXT:    v_add_u32_e32 v0, vcc, 12, v0
295; VI-NEXT:    s_mov_b32 s7, 0xf000
296; VI-NEXT:    s_mov_b32 s6, -1
297; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
298; VI-NEXT:    s_waitcnt vmcnt(0)
299; VI-NEXT:    s_setpc_b64 s[30:31]
300;
301; GFX9-LABEL: void_func_i16_zeroext:
302; GFX9:       ; %bb.0:
303; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
304; GFX9-NEXT:    v_add_u32_e32 v0, 12, v0
305; GFX9-NEXT:    s_mov_b32 s7, 0xf000
306; GFX9-NEXT:    s_mov_b32 s6, -1
307; GFX9-NEXT:    buffer_store_dword v0, off, s[4:7], 0
308; GFX9-NEXT:    s_waitcnt vmcnt(0)
309; GFX9-NEXT:    s_setpc_b64 s[30:31]
310;
311; GFX11-LABEL: void_func_i16_zeroext:
312; GFX11:       ; %bb.0:
313; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
314; GFX11-NEXT:    v_add_nc_u32_e32 v0, 12, v0
315; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
316; GFX11-NEXT:    s_mov_b32 s2, -1
317; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
318; GFX11-NEXT:    s_setpc_b64 s[30:31]
319  %ext = zext i16 %arg0 to i32
320  %add = add i32 %ext, 12
321  store i32 %add, ptr addrspace(1) undef
322  ret void
323}
324
325define void @void_func_i16_signext(i16 signext %arg0) #0 {
326; CI-LABEL: void_func_i16_signext:
327; CI:       ; %bb.0:
328; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
329; CI-NEXT:    v_add_i32_e32 v0, vcc, 12, v0
330; CI-NEXT:    s_mov_b32 s7, 0xf000
331; CI-NEXT:    s_mov_b32 s6, -1
332; CI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
333; CI-NEXT:    s_waitcnt vmcnt(0)
334; CI-NEXT:    s_setpc_b64 s[30:31]
335;
336; VI-LABEL: void_func_i16_signext:
337; VI:       ; %bb.0:
338; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
339; VI-NEXT:    v_add_u32_e32 v0, vcc, 12, v0
340; VI-NEXT:    s_mov_b32 s7, 0xf000
341; VI-NEXT:    s_mov_b32 s6, -1
342; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
343; VI-NEXT:    s_waitcnt vmcnt(0)
344; VI-NEXT:    s_setpc_b64 s[30:31]
345;
346; GFX9-LABEL: void_func_i16_signext:
347; GFX9:       ; %bb.0:
348; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
349; GFX9-NEXT:    v_add_u32_e32 v0, 12, v0
350; GFX9-NEXT:    s_mov_b32 s7, 0xf000
351; GFX9-NEXT:    s_mov_b32 s6, -1
352; GFX9-NEXT:    buffer_store_dword v0, off, s[4:7], 0
353; GFX9-NEXT:    s_waitcnt vmcnt(0)
354; GFX9-NEXT:    s_setpc_b64 s[30:31]
355;
356; GFX11-LABEL: void_func_i16_signext:
357; GFX11:       ; %bb.0:
358; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
359; GFX11-NEXT:    v_add_nc_u32_e32 v0, 12, v0
360; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
361; GFX11-NEXT:    s_mov_b32 s2, -1
362; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
363; GFX11-NEXT:    s_setpc_b64 s[30:31]
364  %ext = sext i16 %arg0 to i32
365  %add = add i32 %ext, 12
366  store i32 %add, ptr addrspace(1) undef
367  ret void
368}
369
370define void @void_func_i32(i32 %arg0) #0 {
371; CIGFX89-LABEL: void_func_i32:
372; CIGFX89:       ; %bb.0:
373; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
374; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
375; CIGFX89-NEXT:    s_mov_b32 s6, -1
376; CIGFX89-NEXT:    buffer_store_dword v0, off, s[4:7], 0
377; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
378; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
379;
380; GFX11-LABEL: void_func_i32:
381; GFX11:       ; %bb.0:
382; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
383; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
384; GFX11-NEXT:    s_mov_b32 s2, -1
385; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
386; GFX11-NEXT:    s_setpc_b64 s[30:31]
387  store i32 %arg0, ptr addrspace(1) undef
388  ret void
389}
390
391define void @void_func_i64(i64 %arg0) #0 {
392; CIGFX89-LABEL: void_func_i64:
393; CIGFX89:       ; %bb.0:
394; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
395; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
396; CIGFX89-NEXT:    s_mov_b32 s6, -1
397; CIGFX89-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
398; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
399; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
400;
401; GFX11-LABEL: void_func_i64:
402; GFX11:       ; %bb.0:
403; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
404; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
405; GFX11-NEXT:    s_mov_b32 s2, -1
406; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[0:3], 0
407; GFX11-NEXT:    s_setpc_b64 s[30:31]
408  store i64 %arg0, ptr addrspace(1) undef
409  ret void
410}
411
412define void @void_func_f16(half %arg0) #0 {
413; CI-LABEL: void_func_f16:
414; CI:       ; %bb.0:
415; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
416; CI-NEXT:    v_cvt_f16_f32_e32 v0, v0
417; CI-NEXT:    s_mov_b32 s7, 0xf000
418; CI-NEXT:    s_mov_b32 s6, -1
419; CI-NEXT:    buffer_store_short v0, off, s[4:7], 0
420; CI-NEXT:    s_waitcnt vmcnt(0)
421; CI-NEXT:    s_setpc_b64 s[30:31]
422;
423; GFX89-LABEL: void_func_f16:
424; GFX89:       ; %bb.0:
425; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
426; GFX89-NEXT:    s_mov_b32 s7, 0xf000
427; GFX89-NEXT:    s_mov_b32 s6, -1
428; GFX89-NEXT:    buffer_store_short v0, off, s[4:7], 0
429; GFX89-NEXT:    s_waitcnt vmcnt(0)
430; GFX89-NEXT:    s_setpc_b64 s[30:31]
431;
432; GFX11-LABEL: void_func_f16:
433; GFX11:       ; %bb.0:
434; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
435; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
436; GFX11-NEXT:    s_mov_b32 s2, -1
437; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
438; GFX11-NEXT:    s_setpc_b64 s[30:31]
439  store half %arg0, ptr addrspace(1) undef
440  ret void
441}
442
443define void @void_func_f32(float %arg0) #0 {
444; CIGFX89-LABEL: void_func_f32:
445; CIGFX89:       ; %bb.0:
446; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
447; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
448; CIGFX89-NEXT:    s_mov_b32 s6, -1
449; CIGFX89-NEXT:    buffer_store_dword v0, off, s[4:7], 0
450; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
451; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
452;
453; GFX11-LABEL: void_func_f32:
454; GFX11:       ; %bb.0:
455; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
456; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
457; GFX11-NEXT:    s_mov_b32 s2, -1
458; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
459; GFX11-NEXT:    s_setpc_b64 s[30:31]
460  store float %arg0, ptr addrspace(1) undef
461  ret void
462}
463
464define void @void_func_f64(double %arg0) #0 {
465; CIGFX89-LABEL: void_func_f64:
466; CIGFX89:       ; %bb.0:
467; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
468; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
469; CIGFX89-NEXT:    s_mov_b32 s6, -1
470; CIGFX89-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
471; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
472; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
473;
474; GFX11-LABEL: void_func_f64:
475; GFX11:       ; %bb.0:
476; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
477; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
478; GFX11-NEXT:    s_mov_b32 s2, -1
479; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[0:3], 0
480; GFX11-NEXT:    s_setpc_b64 s[30:31]
481  store double %arg0, ptr addrspace(1) undef
482  ret void
483}
484
485define void @void_func_v2i32(<2 x i32> %arg0) #0 {
486; CIGFX89-LABEL: void_func_v2i32:
487; CIGFX89:       ; %bb.0:
488; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
489; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
490; CIGFX89-NEXT:    s_mov_b32 s6, -1
491; CIGFX89-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
492; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
493; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
494;
495; GFX11-LABEL: void_func_v2i32:
496; GFX11:       ; %bb.0:
497; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
498; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
499; GFX11-NEXT:    s_mov_b32 s2, -1
500; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[0:3], 0
501; GFX11-NEXT:    s_setpc_b64 s[30:31]
502  store <2 x i32> %arg0, ptr addrspace(1) undef
503  ret void
504}
505
506define void @void_func_v3i32(<3 x i32> %arg0) #0 {
507; CIGFX89-LABEL: void_func_v3i32:
508; CIGFX89:       ; %bb.0:
509; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
510; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
511; CIGFX89-NEXT:    s_mov_b32 s6, -1
512; CIGFX89-NEXT:    buffer_store_dwordx3 v[0:2], off, s[4:7], 0
513; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
514; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
515;
516; GFX11-LABEL: void_func_v3i32:
517; GFX11:       ; %bb.0:
518; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
519; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
520; GFX11-NEXT:    s_mov_b32 s2, -1
521; GFX11-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], 0
522; GFX11-NEXT:    s_setpc_b64 s[30:31]
523  store <3 x i32> %arg0, ptr addrspace(1) undef
524  ret void
525}
526
527define void @void_func_v4i32(<4 x i32> %arg0) #0 {
528; CIGFX89-LABEL: void_func_v4i32:
529; CIGFX89:       ; %bb.0:
530; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
531; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
532; CIGFX89-NEXT:    s_mov_b32 s6, -1
533; CIGFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
534; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
535; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
536;
537; GFX11-LABEL: void_func_v4i32:
538; GFX11:       ; %bb.0:
539; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
540; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
541; GFX11-NEXT:    s_mov_b32 s2, -1
542; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
543; GFX11-NEXT:    s_setpc_b64 s[30:31]
544  store <4 x i32> %arg0, ptr addrspace(1) undef
545  ret void
546}
547
548define void @void_func_v5i32(<5 x i32> %arg0) #0 {
549; CIGFX89-LABEL: void_func_v5i32:
550; CIGFX89:       ; %bb.0:
551; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
552; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
553; CIGFX89-NEXT:    s_mov_b32 s6, -1
554; CIGFX89-NEXT:    buffer_store_dword v4, off, s[4:7], 0
555; CIGFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
556; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
557; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
558;
559; GFX11-LABEL: void_func_v5i32:
560; GFX11:       ; %bb.0:
561; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
562; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
563; GFX11-NEXT:    s_mov_b32 s2, -1
564; GFX11-NEXT:    s_clause 0x1
565; GFX11-NEXT:    buffer_store_b32 v4, off, s[0:3], 0
566; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
567; GFX11-NEXT:    s_setpc_b64 s[30:31]
568  store <5 x i32> %arg0, ptr addrspace(1) undef
569  ret void
570}
571
572define void @void_func_v8i32(<8 x i32> %arg0) #0 {
573; CIGFX89-LABEL: void_func_v8i32:
574; CIGFX89:       ; %bb.0:
575; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
576; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
577; CIGFX89-NEXT:    s_mov_b32 s6, -1
578; CIGFX89-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
579; CIGFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
580; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
581; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
582;
583; GFX11-LABEL: void_func_v8i32:
584; GFX11:       ; %bb.0:
585; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
586; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
587; GFX11-NEXT:    s_mov_b32 s2, -1
588; GFX11-NEXT:    s_clause 0x1
589; GFX11-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0
590; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
591; GFX11-NEXT:    s_setpc_b64 s[30:31]
592  store <8 x i32> %arg0, ptr addrspace(1) undef
593  ret void
594}
595
596define void @void_func_v16i32(<16 x i32> %arg0) #0 {
597; CIGFX89-LABEL: void_func_v16i32:
598; CIGFX89:       ; %bb.0:
599; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
600; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
601; CIGFX89-NEXT:    s_mov_b32 s6, -1
602; CIGFX89-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
603; CIGFX89-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
604; CIGFX89-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
605; CIGFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
606; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
607; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
608;
609; GFX11-LABEL: void_func_v16i32:
610; GFX11:       ; %bb.0:
611; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
612; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
613; GFX11-NEXT:    s_mov_b32 s2, -1
614; GFX11-NEXT:    s_clause 0x3
615; GFX11-NEXT:    buffer_store_b128 v[12:15], off, s[0:3], 0
616; GFX11-NEXT:    buffer_store_b128 v[8:11], off, s[0:3], 0
617; GFX11-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0
618; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
619; GFX11-NEXT:    s_setpc_b64 s[30:31]
620  store <16 x i32> %arg0, ptr addrspace(1) undef
621  ret void
622}
623
624define void @void_func_v32i32(<32 x i32> %arg0) #0 {
625; CIGFX89-LABEL: void_func_v32i32:
626; CIGFX89:       ; %bb.0:
627; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
628; CIGFX89-NEXT:    buffer_load_dword v31, off, s[0:3], s32
629; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
630; CIGFX89-NEXT:    s_mov_b32 s6, -1
631; CIGFX89-NEXT:    buffer_store_dwordx4 v[24:27], off, s[4:7], 0
632; CIGFX89-NEXT:    buffer_store_dwordx4 v[20:23], off, s[4:7], 0
633; CIGFX89-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
634; CIGFX89-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
635; CIGFX89-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
636; CIGFX89-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
637; CIGFX89-NEXT:    s_waitcnt vmcnt(6)
638; CIGFX89-NEXT:    buffer_store_dwordx4 v[28:31], off, s[4:7], 0
639; CIGFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
640; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
641; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
642;
643; GFX11-LABEL: void_func_v32i32:
644; GFX11:       ; %bb.0:
645; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
646; GFX11-NEXT:    scratch_load_b32 v31, off, s32
647; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
648; GFX11-NEXT:    s_mov_b32 s2, -1
649; GFX11-NEXT:    s_clause 0x3
650; GFX11-NEXT:    buffer_store_b128 v[20:23], off, s[0:3], 0
651; GFX11-NEXT:    buffer_store_b128 v[16:19], off, s[0:3], 0
652; GFX11-NEXT:    buffer_store_b128 v[12:15], off, s[0:3], 0
653; GFX11-NEXT:    buffer_store_b128 v[8:11], off, s[0:3], 0
654; GFX11-NEXT:    s_waitcnt vmcnt(0)
655; GFX11-NEXT:    s_clause 0x3
656; GFX11-NEXT:    buffer_store_b128 v[28:31], off, s[0:3], 0
657; GFX11-NEXT:    buffer_store_b128 v[24:27], off, s[0:3], 0
658; GFX11-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0
659; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
660; GFX11-NEXT:    s_setpc_b64 s[30:31]
661  store <32 x i32> %arg0, ptr addrspace(1) undef
662  ret void
663}
664
665; 1 over register limit
666define void @void_func_v33i32(<33 x i32> %arg0) #0 {
667; CI-LABEL: void_func_v33i32:
668; CI:       ; %bb.0:
669; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
670; CI-NEXT:    s_mov_b32 s7, 0xf000
671; CI-NEXT:    s_mov_b32 s6, -1
672; CI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[4:7], 0
673; CI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[4:7], 0
674; CI-NEXT:    buffer_load_dword v31, off, s[0:3], s32
675; CI-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:4
676; CI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
677; CI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
678; CI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
679; CI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
680; CI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
681; CI-NEXT:    s_waitcnt vmcnt(6)
682; CI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[4:7], 0
683; CI-NEXT:    s_waitcnt vmcnt(6)
684; CI-NEXT:    buffer_store_dword v20, off, s[4:7], 0
685; CI-NEXT:    s_waitcnt vmcnt(0)
686; CI-NEXT:    s_setpc_b64 s[30:31]
687;
688; VI-LABEL: void_func_v33i32:
689; VI:       ; %bb.0:
690; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
691; VI-NEXT:    s_mov_b32 s7, 0xf000
692; VI-NEXT:    s_mov_b32 s6, -1
693; VI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[4:7], 0
694; VI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[4:7], 0
695; VI-NEXT:    buffer_load_dword v31, off, s[0:3], s32
696; VI-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:4
697; VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
698; VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
699; VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
700; VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
701; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
702; VI-NEXT:    s_waitcnt vmcnt(6)
703; VI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[4:7], 0
704; VI-NEXT:    s_waitcnt vmcnt(6)
705; VI-NEXT:    buffer_store_dword v20, off, s[4:7], 0
706; VI-NEXT:    s_waitcnt vmcnt(0)
707; VI-NEXT:    s_setpc_b64 s[30:31]
708;
709; GFX9-LABEL: void_func_v33i32:
710; GFX9:       ; %bb.0:
711; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
712; GFX9-NEXT:    s_mov_b32 s7, 0xf000
713; GFX9-NEXT:    s_mov_b32 s6, -1
714; GFX9-NEXT:    buffer_store_dwordx4 v[24:27], off, s[4:7], 0
715; GFX9-NEXT:    buffer_store_dwordx4 v[20:23], off, s[4:7], 0
716; GFX9-NEXT:    buffer_load_dword v31, off, s[0:3], s32
717; GFX9-NEXT:    s_nop 0
718; GFX9-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:4
719; GFX9-NEXT:    s_nop 0
720; GFX9-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
721; GFX9-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
722; GFX9-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
723; GFX9-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
724; GFX9-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
725; GFX9-NEXT:    s_waitcnt vmcnt(6)
726; GFX9-NEXT:    buffer_store_dwordx4 v[28:31], off, s[4:7], 0
727; GFX9-NEXT:    s_waitcnt vmcnt(6)
728; GFX9-NEXT:    buffer_store_dword v20, off, s[4:7], 0
729; GFX9-NEXT:    s_waitcnt vmcnt(0)
730; GFX9-NEXT:    s_setpc_b64 s[30:31]
731;
732; GFX11-LABEL: void_func_v33i32:
733; GFX11:       ; %bb.0:
734; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
735; GFX11-NEXT:    s_clause 0x1
736; GFX11-NEXT:    scratch_load_b32 v31, off, s32
737; GFX11-NEXT:    scratch_load_b32 v32, off, s32 offset:4
738; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
739; GFX11-NEXT:    s_mov_b32 s2, -1
740; GFX11-NEXT:    s_clause 0x5
741; GFX11-NEXT:    buffer_store_b128 v[20:23], off, s[0:3], 0
742; GFX11-NEXT:    buffer_store_b128 v[16:19], off, s[0:3], 0
743; GFX11-NEXT:    buffer_store_b128 v[12:15], off, s[0:3], 0
744; GFX11-NEXT:    buffer_store_b128 v[8:11], off, s[0:3], 0
745; GFX11-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0
746; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
747; GFX11-NEXT:    s_waitcnt vmcnt(1)
748; GFX11-NEXT:    s_clause 0x1
749; GFX11-NEXT:    buffer_store_b128 v[28:31], off, s[0:3], 0
750; GFX11-NEXT:    buffer_store_b128 v[24:27], off, s[0:3], 0
751; GFX11-NEXT:    s_waitcnt vmcnt(0)
752; GFX11-NEXT:    buffer_store_b32 v32, off, s[0:3], 0
753; GFX11-NEXT:    s_setpc_b64 s[30:31]
754  store <33 x i32> %arg0, ptr addrspace(1) undef
755  ret void
756}
757
758define void @void_func_v2i64(<2 x i64> %arg0) #0 {
759; CIGFX89-LABEL: void_func_v2i64:
760; CIGFX89:       ; %bb.0:
761; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
762; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
763; CIGFX89-NEXT:    s_mov_b32 s6, -1
764; CIGFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
765; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
766; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
767;
768; GFX11-LABEL: void_func_v2i64:
769; GFX11:       ; %bb.0:
770; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
771; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
772; GFX11-NEXT:    s_mov_b32 s2, -1
773; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
774; GFX11-NEXT:    s_setpc_b64 s[30:31]
775  store <2 x i64> %arg0, ptr addrspace(1) undef
776  ret void
777}
778
779define void @void_func_v3i64(<3 x i64> %arg0) #0 {
780; CIGFX89-LABEL: void_func_v3i64:
781; CIGFX89:       ; %bb.0:
782; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
783; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
784; CIGFX89-NEXT:    s_mov_b32 s6, -1
785; CIGFX89-NEXT:    buffer_store_dwordx2 v[4:5], off, s[4:7], 0
786; CIGFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
787; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
788; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
789;
790; GFX11-LABEL: void_func_v3i64:
791; GFX11:       ; %bb.0:
792; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
793; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
794; GFX11-NEXT:    s_mov_b32 s2, -1
795; GFX11-NEXT:    s_clause 0x1
796; GFX11-NEXT:    buffer_store_b64 v[4:5], off, s[0:3], 0
797; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
798; GFX11-NEXT:    s_setpc_b64 s[30:31]
799  store <3 x i64> %arg0, ptr addrspace(1) undef
800  ret void
801}
802
803define void @void_func_v4i64(<4 x i64> %arg0) #0 {
804; CIGFX89-LABEL: void_func_v4i64:
805; CIGFX89:       ; %bb.0:
806; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
807; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
808; CIGFX89-NEXT:    s_mov_b32 s6, -1
809; CIGFX89-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
810; CIGFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
811; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
812; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
813;
814; GFX11-LABEL: void_func_v4i64:
815; GFX11:       ; %bb.0:
816; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
817; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
818; GFX11-NEXT:    s_mov_b32 s2, -1
819; GFX11-NEXT:    s_clause 0x1
820; GFX11-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0
821; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
822; GFX11-NEXT:    s_setpc_b64 s[30:31]
823  store <4 x i64> %arg0, ptr addrspace(1) undef
824  ret void
825}
826
827define void @void_func_v5i64(<5 x i64> %arg0) #0 {
828; CIGFX89-LABEL: void_func_v5i64:
829; CIGFX89:       ; %bb.0:
830; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
831; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
832; CIGFX89-NEXT:    s_mov_b32 s6, -1
833; CIGFX89-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
834; CIGFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
835; CIGFX89-NEXT:    buffer_store_dwordx2 v[8:9], off, s[4:7], 0
836; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
837; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
838;
839; GFX11-LABEL: void_func_v5i64:
840; GFX11:       ; %bb.0:
841; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
842; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
843; GFX11-NEXT:    s_mov_b32 s2, -1
844; GFX11-NEXT:    s_clause 0x2
845; GFX11-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0
846; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
847; GFX11-NEXT:    buffer_store_b64 v[8:9], off, s[0:3], 0
848; GFX11-NEXT:    s_setpc_b64 s[30:31]
849  store <5 x i64> %arg0, ptr addrspace(1) undef
850  ret void
851}
852
853define void @void_func_v8i64(<8 x i64> %arg0) #0 {
854; CIGFX89-LABEL: void_func_v8i64:
855; CIGFX89:       ; %bb.0:
856; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
857; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
858; CIGFX89-NEXT:    s_mov_b32 s6, -1
859; CIGFX89-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
860; CIGFX89-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
861; CIGFX89-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
862; CIGFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
863; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
864; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
865;
866; GFX11-LABEL: void_func_v8i64:
867; GFX11:       ; %bb.0:
868; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
869; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
870; GFX11-NEXT:    s_mov_b32 s2, -1
871; GFX11-NEXT:    s_clause 0x3
872; GFX11-NEXT:    buffer_store_b128 v[12:15], off, s[0:3], 0
873; GFX11-NEXT:    buffer_store_b128 v[8:11], off, s[0:3], 0
874; GFX11-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0
875; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
876; GFX11-NEXT:    s_setpc_b64 s[30:31]
877  store <8 x i64> %arg0, ptr addrspace(1) undef
878  ret void
879}
880
881define void @void_func_v16i64(<16 x i64> %arg0) #0 {
882; CIGFX89-LABEL: void_func_v16i64:
883; CIGFX89:       ; %bb.0:
884; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
885; CIGFX89-NEXT:    buffer_load_dword v31, off, s[0:3], s32
886; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
887; CIGFX89-NEXT:    s_mov_b32 s6, -1
888; CIGFX89-NEXT:    buffer_store_dwordx4 v[24:27], off, s[4:7], 0
889; CIGFX89-NEXT:    buffer_store_dwordx4 v[20:23], off, s[4:7], 0
890; CIGFX89-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
891; CIGFX89-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
892; CIGFX89-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
893; CIGFX89-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
894; CIGFX89-NEXT:    s_waitcnt vmcnt(6)
895; CIGFX89-NEXT:    buffer_store_dwordx4 v[28:31], off, s[4:7], 0
896; CIGFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
897; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
898; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
899;
900; GFX11-LABEL: void_func_v16i64:
901; GFX11:       ; %bb.0:
902; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
903; GFX11-NEXT:    scratch_load_b32 v31, off, s32
904; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
905; GFX11-NEXT:    s_mov_b32 s2, -1
906; GFX11-NEXT:    s_clause 0x3
907; GFX11-NEXT:    buffer_store_b128 v[20:23], off, s[0:3], 0
908; GFX11-NEXT:    buffer_store_b128 v[16:19], off, s[0:3], 0
909; GFX11-NEXT:    buffer_store_b128 v[12:15], off, s[0:3], 0
910; GFX11-NEXT:    buffer_store_b128 v[8:11], off, s[0:3], 0
911; GFX11-NEXT:    s_waitcnt vmcnt(0)
912; GFX11-NEXT:    s_clause 0x3
913; GFX11-NEXT:    buffer_store_b128 v[28:31], off, s[0:3], 0
914; GFX11-NEXT:    buffer_store_b128 v[24:27], off, s[0:3], 0
915; GFX11-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0
916; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
917; GFX11-NEXT:    s_setpc_b64 s[30:31]
918  store <16 x i64> %arg0, ptr addrspace(1) undef
919  ret void
920}
921
922define void @void_func_v2i8(<2 x i8> %arg0) #0 {
923; CI-LABEL: void_func_v2i8:
924; CI:       ; %bb.0:
925; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
926; CI-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
927; CI-NEXT:    v_and_b32_e32 v0, 0xff, v0
928; CI-NEXT:    s_mov_b32 s4, 0
929; CI-NEXT:    v_or_b32_e32 v0, v0, v1
930; CI-NEXT:    s_mov_b32 s7, 0xf000
931; CI-NEXT:    s_mov_b32 s6, -1
932; CI-NEXT:    s_mov_b32 s5, s4
933; CI-NEXT:    buffer_store_short v0, off, s[4:7], 0
934; CI-NEXT:    s_waitcnt vmcnt(0)
935; CI-NEXT:    s_setpc_b64 s[30:31]
936;
937; GFX89-LABEL: void_func_v2i8:
938; GFX89:       ; %bb.0:
939; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
940; GFX89-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
941; GFX89-NEXT:    s_mov_b32 s4, 0
942; GFX89-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
943; GFX89-NEXT:    s_mov_b32 s7, 0xf000
944; GFX89-NEXT:    s_mov_b32 s6, -1
945; GFX89-NEXT:    s_mov_b32 s5, s4
946; GFX89-NEXT:    buffer_store_short v0, off, s[4:7], 0
947; GFX89-NEXT:    s_waitcnt vmcnt(0)
948; GFX89-NEXT:    s_setpc_b64 s[30:31]
949;
950; GFX11-LABEL: void_func_v2i8:
951; GFX11:       ; %bb.0:
952; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
953; GFX11-NEXT:    v_lshlrev_b16 v1, 8, v1
954; GFX11-NEXT:    v_and_b32_e32 v0, 0xff, v0
955; GFX11-NEXT:    s_mov_b32 s0, 0
956; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
957; GFX11-NEXT:    s_mov_b32 s2, -1
958; GFX11-NEXT:    s_mov_b32 s1, s0
959; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
960; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
961; GFX11-NEXT:    s_setpc_b64 s[30:31]
962  store <2 x i8> %arg0, ptr addrspace(1) null
963  ret void
964}
965
966define void @void_func_v2i16(<2 x i16> %arg0) #0 {
967; CI-LABEL: void_func_v2i16:
968; CI:       ; %bb.0:
969; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
970; CI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
971; CI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
972; CI-NEXT:    v_or_b32_e32 v0, v0, v1
973; CI-NEXT:    s_mov_b32 s7, 0xf000
974; CI-NEXT:    s_mov_b32 s6, -1
975; CI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
976; CI-NEXT:    s_waitcnt vmcnt(0)
977; CI-NEXT:    s_setpc_b64 s[30:31]
978;
979; GFX89-LABEL: void_func_v2i16:
980; GFX89:       ; %bb.0:
981; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
982; GFX89-NEXT:    s_mov_b32 s7, 0xf000
983; GFX89-NEXT:    s_mov_b32 s6, -1
984; GFX89-NEXT:    buffer_store_dword v0, off, s[4:7], 0
985; GFX89-NEXT:    s_waitcnt vmcnt(0)
986; GFX89-NEXT:    s_setpc_b64 s[30:31]
987;
988; GFX11-LABEL: void_func_v2i16:
989; GFX11:       ; %bb.0:
990; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
991; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
992; GFX11-NEXT:    s_mov_b32 s2, -1
993; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
994; GFX11-NEXT:    s_setpc_b64 s[30:31]
995  store <2 x i16> %arg0, ptr addrspace(1) undef
996  ret void
997}
998
999define void @void_func_v3i8(<3 x i8> %arg0) #0 {
1000; CI-LABEL: void_func_v3i8:
1001; CI:       ; %bb.0:
1002; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1003; CI-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
1004; CI-NEXT:    v_and_b32_e32 v0, 0xff, v0
1005; CI-NEXT:    s_mov_b32 s5, 0
1006; CI-NEXT:    s_mov_b32 s4, 2
1007; CI-NEXT:    s_mov_b32 s7, 0xf000
1008; CI-NEXT:    s_mov_b32 s6, -1
1009; CI-NEXT:    v_or_b32_e32 v0, v0, v1
1010; CI-NEXT:    buffer_store_byte v2, off, s[4:7], 0
1011; CI-NEXT:    s_mov_b32 s4, s5
1012; CI-NEXT:    buffer_store_short v0, off, s[4:7], 0
1013; CI-NEXT:    s_waitcnt vmcnt(0)
1014; CI-NEXT:    s_setpc_b64 s[30:31]
1015;
1016; GFX89-LABEL: void_func_v3i8:
1017; GFX89:       ; %bb.0:
1018; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1019; GFX89-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
1020; GFX89-NEXT:    s_mov_b32 s5, 0
1021; GFX89-NEXT:    s_mov_b32 s4, 2
1022; GFX89-NEXT:    s_mov_b32 s7, 0xf000
1023; GFX89-NEXT:    s_mov_b32 s6, -1
1024; GFX89-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1025; GFX89-NEXT:    buffer_store_byte v2, off, s[4:7], 0
1026; GFX89-NEXT:    s_mov_b32 s4, s5
1027; GFX89-NEXT:    buffer_store_short v0, off, s[4:7], 0
1028; GFX89-NEXT:    s_waitcnt vmcnt(0)
1029; GFX89-NEXT:    s_setpc_b64 s[30:31]
1030;
1031; GFX11-LABEL: void_func_v3i8:
1032; GFX11:       ; %bb.0:
1033; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1034; GFX11-NEXT:    v_lshlrev_b16 v1, 8, v1
1035; GFX11-NEXT:    v_and_b32_e32 v0, 0xff, v0
1036; GFX11-NEXT:    s_mov_b32 s1, 0
1037; GFX11-NEXT:    s_mov_b32 s0, 2
1038; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
1039; GFX11-NEXT:    s_mov_b32 s2, -1
1040; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
1041; GFX11-NEXT:    buffer_store_b8 v2, off, s[0:3], 0
1042; GFX11-NEXT:    s_mov_b32 s0, s1
1043; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
1044; GFX11-NEXT:    s_setpc_b64 s[30:31]
1045  store <3 x i8> %arg0, ptr addrspace(1) null
1046  ret void
1047}
1048
1049define void @void_func_v4i8(<4 x i8> %arg0) #0 {
1050; CI-LABEL: void_func_v4i8:
1051; CI:       ; %bb.0:
1052; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1053; CI-NEXT:    v_and_b32_e32 v2, 0xff, v2
1054; CI-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
1055; CI-NEXT:    v_and_b32_e32 v0, 0xff, v0
1056; CI-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
1057; CI-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1058; CI-NEXT:    v_or_b32_e32 v0, v0, v1
1059; CI-NEXT:    v_or_b32_e32 v2, v3, v2
1060; CI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1061; CI-NEXT:    s_mov_b32 s4, 0
1062; CI-NEXT:    v_or_b32_e32 v0, v0, v2
1063; CI-NEXT:    s_mov_b32 s7, 0xf000
1064; CI-NEXT:    s_mov_b32 s6, -1
1065; CI-NEXT:    s_mov_b32 s5, s4
1066; CI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1067; CI-NEXT:    s_waitcnt vmcnt(0)
1068; CI-NEXT:    s_setpc_b64 s[30:31]
1069;
1070; GFX89-LABEL: void_func_v4i8:
1071; GFX89:       ; %bb.0:
1072; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1073; GFX89-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
1074; GFX89-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1075; GFX89-NEXT:    v_lshlrev_b16_e32 v1, 8, v3
1076; GFX89-NEXT:    v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1077; GFX89-NEXT:    s_mov_b32 s4, 0
1078; GFX89-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1079; GFX89-NEXT:    s_mov_b32 s7, 0xf000
1080; GFX89-NEXT:    s_mov_b32 s6, -1
1081; GFX89-NEXT:    s_mov_b32 s5, s4
1082; GFX89-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1083; GFX89-NEXT:    s_waitcnt vmcnt(0)
1084; GFX89-NEXT:    s_setpc_b64 s[30:31]
1085;
1086; GFX11-LABEL: void_func_v4i8:
1087; GFX11:       ; %bb.0:
1088; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1089; GFX11-NEXT:    v_lshlrev_b16 v1, 8, v1
1090; GFX11-NEXT:    v_and_b32_e32 v0, 0xff, v0
1091; GFX11-NEXT:    v_lshlrev_b16 v3, 8, v3
1092; GFX11-NEXT:    v_and_b32_e32 v2, 0xff, v2
1093; GFX11-NEXT:    s_mov_b32 s0, 0
1094; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
1095; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
1096; GFX11-NEXT:    s_mov_b32 s2, -1
1097; GFX11-NEXT:    v_or_b32_e32 v1, v2, v3
1098; GFX11-NEXT:    s_mov_b32 s1, s0
1099; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1100; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1101; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1102; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1103; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
1104; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
1105; GFX11-NEXT:    s_setpc_b64 s[30:31]
1106  store <4 x i8> %arg0, ptr addrspace(1) null
1107  ret void
1108}
1109
1110define void @void_func_v5i8(<5 x i8> %arg0) #0 {
1111; CI-LABEL: void_func_v5i8:
1112; CI:       ; %bb.0:
1113; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1114; CI-NEXT:    v_and_b32_e32 v2, 0xff, v2
1115; CI-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
1116; CI-NEXT:    v_and_b32_e32 v0, 0xff, v0
1117; CI-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
1118; CI-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1119; CI-NEXT:    v_or_b32_e32 v0, v0, v1
1120; CI-NEXT:    v_or_b32_e32 v2, v3, v2
1121; CI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1122; CI-NEXT:    s_mov_b32 s5, 0
1123; CI-NEXT:    s_mov_b32 s4, 4
1124; CI-NEXT:    s_mov_b32 s7, 0xf000
1125; CI-NEXT:    s_mov_b32 s6, -1
1126; CI-NEXT:    v_or_b32_e32 v0, v0, v2
1127; CI-NEXT:    buffer_store_byte v4, off, s[4:7], 0
1128; CI-NEXT:    s_mov_b32 s4, s5
1129; CI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1130; CI-NEXT:    s_waitcnt vmcnt(0)
1131; CI-NEXT:    s_setpc_b64 s[30:31]
1132;
1133; GFX89-LABEL: void_func_v5i8:
1134; GFX89:       ; %bb.0:
1135; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1136; GFX89-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
1137; GFX89-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1138; GFX89-NEXT:    v_lshlrev_b16_e32 v1, 8, v3
1139; GFX89-NEXT:    v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1140; GFX89-NEXT:    s_mov_b32 s5, 0
1141; GFX89-NEXT:    s_mov_b32 s4, 4
1142; GFX89-NEXT:    s_mov_b32 s7, 0xf000
1143; GFX89-NEXT:    s_mov_b32 s6, -1
1144; GFX89-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1145; GFX89-NEXT:    buffer_store_byte v4, off, s[4:7], 0
1146; GFX89-NEXT:    s_mov_b32 s4, s5
1147; GFX89-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1148; GFX89-NEXT:    s_waitcnt vmcnt(0)
1149; GFX89-NEXT:    s_setpc_b64 s[30:31]
1150;
1151; GFX11-LABEL: void_func_v5i8:
1152; GFX11:       ; %bb.0:
1153; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1154; GFX11-NEXT:    v_lshlrev_b16 v1, 8, v1
1155; GFX11-NEXT:    v_and_b32_e32 v0, 0xff, v0
1156; GFX11-NEXT:    v_lshlrev_b16 v3, 8, v3
1157; GFX11-NEXT:    v_and_b32_e32 v2, 0xff, v2
1158; GFX11-NEXT:    s_mov_b32 s1, 0
1159; GFX11-NEXT:    s_mov_b32 s0, 4
1160; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
1161; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
1162; GFX11-NEXT:    v_or_b32_e32 v1, v2, v3
1163; GFX11-NEXT:    s_mov_b32 s2, -1
1164; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
1165; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1166; GFX11-NEXT:    buffer_store_b8 v4, off, s[0:3], 0
1167; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1168; GFX11-NEXT:    s_mov_b32 s0, s1
1169; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
1170; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
1171; GFX11-NEXT:    s_setpc_b64 s[30:31]
1172  store <5 x i8> %arg0, ptr addrspace(1) null
1173  ret void
1174}
1175
1176define void @void_func_v8i8(<8 x i8> %arg0) #0 {
1177; CI-LABEL: void_func_v8i8:
1178; CI:       ; %bb.0:
1179; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1180; CI-NEXT:    v_and_b32_e32 v6, 0xff, v6
1181; CI-NEXT:    v_lshlrev_b32_e32 v5, 8, v5
1182; CI-NEXT:    v_and_b32_e32 v4, 0xff, v4
1183; CI-NEXT:    v_and_b32_e32 v2, 0xff, v2
1184; CI-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
1185; CI-NEXT:    v_and_b32_e32 v0, 0xff, v0
1186; CI-NEXT:    v_lshlrev_b32_e32 v7, 24, v7
1187; CI-NEXT:    v_lshlrev_b32_e32 v6, 16, v6
1188; CI-NEXT:    v_or_b32_e32 v4, v4, v5
1189; CI-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
1190; CI-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1191; CI-NEXT:    v_or_b32_e32 v0, v0, v1
1192; CI-NEXT:    v_or_b32_e32 v6, v7, v6
1193; CI-NEXT:    v_and_b32_e32 v4, 0xffff, v4
1194; CI-NEXT:    v_or_b32_e32 v2, v3, v2
1195; CI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1196; CI-NEXT:    s_mov_b32 s4, 0
1197; CI-NEXT:    v_or_b32_e32 v4, v4, v6
1198; CI-NEXT:    v_or_b32_e32 v3, v0, v2
1199; CI-NEXT:    s_mov_b32 s7, 0xf000
1200; CI-NEXT:    s_mov_b32 s6, -1
1201; CI-NEXT:    s_mov_b32 s5, s4
1202; CI-NEXT:    buffer_store_dwordx2 v[3:4], off, s[4:7], 0
1203; CI-NEXT:    s_waitcnt vmcnt(0)
1204; CI-NEXT:    s_setpc_b64 s[30:31]
1205;
1206; GFX89-LABEL: void_func_v8i8:
1207; GFX89:       ; %bb.0:
1208; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1209; GFX89-NEXT:    v_lshlrev_b16_e32 v5, 8, v5
1210; GFX89-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
1211; GFX89-NEXT:    v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1212; GFX89-NEXT:    v_lshlrev_b16_e32 v5, 8, v7
1213; GFX89-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1214; GFX89-NEXT:    v_lshlrev_b16_e32 v1, 8, v3
1215; GFX89-NEXT:    v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1216; GFX89-NEXT:    v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1217; GFX89-NEXT:    s_mov_b32 s4, 0
1218; GFX89-NEXT:    v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1219; GFX89-NEXT:    v_or_b32_sdwa v3, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1220; GFX89-NEXT:    s_mov_b32 s7, 0xf000
1221; GFX89-NEXT:    s_mov_b32 s6, -1
1222; GFX89-NEXT:    s_mov_b32 s5, s4
1223; GFX89-NEXT:    buffer_store_dwordx2 v[3:4], off, s[4:7], 0
1224; GFX89-NEXT:    s_waitcnt vmcnt(0)
1225; GFX89-NEXT:    s_setpc_b64 s[30:31]
1226;
1227; GFX11-LABEL: void_func_v8i8:
1228; GFX11:       ; %bb.0:
1229; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1230; GFX11-NEXT:    v_lshlrev_b16 v5, 8, v5
1231; GFX11-NEXT:    v_and_b32_e32 v4, 0xff, v4
1232; GFX11-NEXT:    v_lshlrev_b16 v7, 8, v7
1233; GFX11-NEXT:    v_and_b32_e32 v6, 0xff, v6
1234; GFX11-NEXT:    v_lshlrev_b16 v1, 8, v1
1235; GFX11-NEXT:    v_and_b32_e32 v0, 0xff, v0
1236; GFX11-NEXT:    v_lshlrev_b16 v3, 8, v3
1237; GFX11-NEXT:    v_and_b32_e32 v2, 0xff, v2
1238; GFX11-NEXT:    v_or_b32_e32 v4, v4, v5
1239; GFX11-NEXT:    v_or_b32_e32 v5, v6, v7
1240; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
1241; GFX11-NEXT:    s_mov_b32 s0, 0
1242; GFX11-NEXT:    v_or_b32_e32 v1, v2, v3
1243; GFX11-NEXT:    v_and_b32_e32 v2, 0xffff, v4
1244; GFX11-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
1245; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1246; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
1247; GFX11-NEXT:    v_lshlrev_b32_e32 v4, 16, v1
1248; GFX11-NEXT:    s_mov_b32 s2, -1
1249; GFX11-NEXT:    v_or_b32_e32 v1, v2, v3
1250; GFX11-NEXT:    s_mov_b32 s1, s0
1251; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1252; GFX11-NEXT:    v_or_b32_e32 v0, v0, v4
1253; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[0:3], 0
1254; GFX11-NEXT:    s_setpc_b64 s[30:31]
1255  store <8 x i8> %arg0, ptr addrspace(1) null
1256  ret void
1257}
1258
1259define void @void_func_v16i8(<16 x i8> %arg0) #0 {
1260; CI-LABEL: void_func_v16i8:
1261; CI:       ; %bb.0:
1262; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1263; CI-NEXT:    v_and_b32_e32 v14, 0xff, v14
1264; CI-NEXT:    v_lshlrev_b32_e32 v13, 8, v13
1265; CI-NEXT:    v_and_b32_e32 v12, 0xff, v12
1266; CI-NEXT:    v_and_b32_e32 v10, 0xff, v10
1267; CI-NEXT:    v_lshlrev_b32_e32 v9, 8, v9
1268; CI-NEXT:    v_and_b32_e32 v8, 0xff, v8
1269; CI-NEXT:    v_and_b32_e32 v6, 0xff, v6
1270; CI-NEXT:    v_lshlrev_b32_e32 v5, 8, v5
1271; CI-NEXT:    v_and_b32_e32 v4, 0xff, v4
1272; CI-NEXT:    v_and_b32_e32 v2, 0xff, v2
1273; CI-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
1274; CI-NEXT:    v_and_b32_e32 v0, 0xff, v0
1275; CI-NEXT:    v_lshlrev_b32_e32 v15, 24, v15
1276; CI-NEXT:    v_lshlrev_b32_e32 v14, 16, v14
1277; CI-NEXT:    v_or_b32_e32 v12, v12, v13
1278; CI-NEXT:    v_lshlrev_b32_e32 v11, 24, v11
1279; CI-NEXT:    v_lshlrev_b32_e32 v10, 16, v10
1280; CI-NEXT:    v_or_b32_e32 v8, v8, v9
1281; CI-NEXT:    v_lshlrev_b32_e32 v7, 24, v7
1282; CI-NEXT:    v_lshlrev_b32_e32 v6, 16, v6
1283; CI-NEXT:    v_or_b32_e32 v4, v4, v5
1284; CI-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
1285; CI-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1286; CI-NEXT:    v_or_b32_e32 v0, v0, v1
1287; CI-NEXT:    v_or_b32_e32 v14, v15, v14
1288; CI-NEXT:    v_and_b32_e32 v12, 0xffff, v12
1289; CI-NEXT:    v_or_b32_e32 v10, v11, v10
1290; CI-NEXT:    v_and_b32_e32 v8, 0xffff, v8
1291; CI-NEXT:    v_or_b32_e32 v6, v7, v6
1292; CI-NEXT:    v_and_b32_e32 v4, 0xffff, v4
1293; CI-NEXT:    v_or_b32_e32 v2, v3, v2
1294; CI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1295; CI-NEXT:    s_mov_b32 s4, 0
1296; CI-NEXT:    v_or_b32_e32 v12, v12, v14
1297; CI-NEXT:    v_or_b32_e32 v11, v8, v10
1298; CI-NEXT:    v_or_b32_e32 v10, v4, v6
1299; CI-NEXT:    v_or_b32_e32 v9, v0, v2
1300; CI-NEXT:    s_mov_b32 s7, 0xf000
1301; CI-NEXT:    s_mov_b32 s6, -1
1302; CI-NEXT:    s_mov_b32 s5, s4
1303; CI-NEXT:    buffer_store_dwordx4 v[9:12], off, s[4:7], 0
1304; CI-NEXT:    s_waitcnt vmcnt(0)
1305; CI-NEXT:    s_setpc_b64 s[30:31]
1306;
1307; GFX89-LABEL: void_func_v16i8:
1308; GFX89:       ; %bb.0:
1309; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1310; GFX89-NEXT:    v_lshlrev_b16_e32 v13, 8, v13
1311; GFX89-NEXT:    v_lshlrev_b16_e32 v9, 8, v9
1312; GFX89-NEXT:    v_lshlrev_b16_e32 v5, 8, v5
1313; GFX89-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
1314; GFX89-NEXT:    v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1315; GFX89-NEXT:    v_lshlrev_b16_e32 v13, 8, v15
1316; GFX89-NEXT:    v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1317; GFX89-NEXT:    v_lshlrev_b16_e32 v9, 8, v11
1318; GFX89-NEXT:    v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1319; GFX89-NEXT:    v_lshlrev_b16_e32 v5, 8, v7
1320; GFX89-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1321; GFX89-NEXT:    v_lshlrev_b16_e32 v1, 8, v3
1322; GFX89-NEXT:    v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1323; GFX89-NEXT:    v_or_b32_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1324; GFX89-NEXT:    v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1325; GFX89-NEXT:    v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1326; GFX89-NEXT:    s_mov_b32 s4, 0
1327; GFX89-NEXT:    v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1328; GFX89-NEXT:    v_or_b32_sdwa v11, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1329; GFX89-NEXT:    v_or_b32_sdwa v10, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1330; GFX89-NEXT:    v_or_b32_sdwa v9, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1331; GFX89-NEXT:    s_mov_b32 s7, 0xf000
1332; GFX89-NEXT:    s_mov_b32 s6, -1
1333; GFX89-NEXT:    s_mov_b32 s5, s4
1334; GFX89-NEXT:    buffer_store_dwordx4 v[9:12], off, s[4:7], 0
1335; GFX89-NEXT:    s_waitcnt vmcnt(0)
1336; GFX89-NEXT:    s_setpc_b64 s[30:31]
1337;
1338; GFX11-LABEL: void_func_v16i8:
1339; GFX11:       ; %bb.0:
1340; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1341; GFX11-NEXT:    v_lshlrev_b16 v13, 8, v13
1342; GFX11-NEXT:    v_and_b32_e32 v12, 0xff, v12
1343; GFX11-NEXT:    v_lshlrev_b16 v15, 8, v15
1344; GFX11-NEXT:    v_and_b32_e32 v14, 0xff, v14
1345; GFX11-NEXT:    v_lshlrev_b16 v9, 8, v9
1346; GFX11-NEXT:    v_and_b32_e32 v8, 0xff, v8
1347; GFX11-NEXT:    v_lshlrev_b16 v11, 8, v11
1348; GFX11-NEXT:    v_and_b32_e32 v10, 0xff, v10
1349; GFX11-NEXT:    v_lshlrev_b16 v5, 8, v5
1350; GFX11-NEXT:    v_and_b32_e32 v4, 0xff, v4
1351; GFX11-NEXT:    v_lshlrev_b16 v7, 8, v7
1352; GFX11-NEXT:    v_and_b32_e32 v6, 0xff, v6
1353; GFX11-NEXT:    v_lshlrev_b16 v1, 8, v1
1354; GFX11-NEXT:    v_and_b32_e32 v0, 0xff, v0
1355; GFX11-NEXT:    v_lshlrev_b16 v3, 8, v3
1356; GFX11-NEXT:    v_and_b32_e32 v2, 0xff, v2
1357; GFX11-NEXT:    v_or_b32_e32 v12, v12, v13
1358; GFX11-NEXT:    v_or_b32_e32 v13, v14, v15
1359; GFX11-NEXT:    v_or_b32_e32 v8, v8, v9
1360; GFX11-NEXT:    v_or_b32_e32 v10, v10, v11
1361; GFX11-NEXT:    v_or_b32_e32 v4, v4, v5
1362; GFX11-NEXT:    v_or_b32_e32 v5, v6, v7
1363; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
1364; GFX11-NEXT:    v_or_b32_e32 v1, v2, v3
1365; GFX11-NEXT:    v_and_b32_e32 v9, 0xffff, v12
1366; GFX11-NEXT:    v_lshlrev_b32_e32 v12, 16, v13
1367; GFX11-NEXT:    v_and_b32_e32 v8, 0xffff, v8
1368; GFX11-NEXT:    v_lshlrev_b32_e32 v2, 16, v10
1369; GFX11-NEXT:    v_and_b32_e32 v4, 0xffff, v4
1370; GFX11-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
1371; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1372; GFX11-NEXT:    v_lshlrev_b32_e32 v6, 16, v1
1373; GFX11-NEXT:    v_or_b32_e32 v3, v9, v12
1374; GFX11-NEXT:    v_or_b32_e32 v2, v8, v2
1375; GFX11-NEXT:    v_or_b32_e32 v1, v4, v5
1376; GFX11-NEXT:    s_mov_b32 s0, 0
1377; GFX11-NEXT:    v_or_b32_e32 v0, v0, v6
1378; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
1379; GFX11-NEXT:    s_mov_b32 s2, -1
1380; GFX11-NEXT:    s_mov_b32 s1, s0
1381; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
1382; GFX11-NEXT:    s_setpc_b64 s[30:31]
1383  store <16 x i8> %arg0, ptr addrspace(1) null
1384  ret void
1385}
1386
1387define void @void_func_v32i8(<32 x i8> %arg0) #0 {
1388; CI-LABEL: void_func_v32i8:
1389; CI:       ; %bb.0:
1390; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1391; CI-NEXT:    v_lshlrev_b32_e32 v5, 8, v5
1392; CI-NEXT:    v_and_b32_e32 v4, 0xff, v4
1393; CI-NEXT:    v_or_b32_e32 v4, v4, v5
1394; CI-NEXT:    buffer_load_dword v5, off, s[0:3], s32
1395; CI-NEXT:    v_lshlrev_b32_e32 v9, 8, v9
1396; CI-NEXT:    v_and_b32_e32 v8, 0xff, v8
1397; CI-NEXT:    v_lshlrev_b32_e32 v13, 8, v13
1398; CI-NEXT:    v_and_b32_e32 v12, 0xff, v12
1399; CI-NEXT:    v_or_b32_e32 v8, v8, v9
1400; CI-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
1401; CI-NEXT:    v_and_b32_e32 v0, 0xff, v0
1402; CI-NEXT:    v_and_b32_e32 v9, 0xff, v14
1403; CI-NEXT:    v_and_b32_e32 v10, 0xff, v10
1404; CI-NEXT:    v_and_b32_e32 v6, 0xff, v6
1405; CI-NEXT:    v_or_b32_e32 v12, v12, v13
1406; CI-NEXT:    v_or_b32_e32 v0, v0, v1
1407; CI-NEXT:    v_lshlrev_b32_e32 v1, 24, v15
1408; CI-NEXT:    v_lshlrev_b32_e32 v11, 24, v11
1409; CI-NEXT:    v_lshlrev_b32_e32 v7, 24, v7
1410; CI-NEXT:    v_and_b32_e32 v2, 0xff, v2
1411; CI-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
1412; CI-NEXT:    v_lshlrev_b32_e32 v10, 16, v10
1413; CI-NEXT:    v_lshlrev_b32_e32 v6, 16, v6
1414; CI-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
1415; CI-NEXT:    v_lshlrev_b32_e32 v13, 8, v29
1416; CI-NEXT:    v_and_b32_e32 v14, 0xff, v28
1417; CI-NEXT:    v_and_b32_e32 v26, 0xff, v26
1418; CI-NEXT:    v_lshlrev_b32_e32 v25, 8, v25
1419; CI-NEXT:    v_and_b32_e32 v24, 0xff, v24
1420; CI-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
1421; CI-NEXT:    v_or_b32_e32 v1, v1, v9
1422; CI-NEXT:    v_or_b32_e32 v9, v11, v10
1423; CI-NEXT:    v_and_b32_e32 v10, 0xffff, v12
1424; CI-NEXT:    v_or_b32_e32 v6, v7, v6
1425; CI-NEXT:    v_and_b32_e32 v4, 0xffff, v4
1426; CI-NEXT:    v_lshlrev_b32_e32 v15, 24, v27
1427; CI-NEXT:    v_and_b32_e32 v27, 0xff, v30
1428; CI-NEXT:    v_or_b32_e32 v13, v14, v13
1429; CI-NEXT:    v_lshlrev_b32_e32 v14, 16, v26
1430; CI-NEXT:    v_or_b32_e32 v7, v3, v2
1431; CI-NEXT:    v_or_b32_e32 v3, v10, v1
1432; CI-NEXT:    v_or_b32_e32 v1, v4, v6
1433; CI-NEXT:    v_lshlrev_b32_e32 v26, 16, v27
1434; CI-NEXT:    v_or_b32_e32 v11, v15, v14
1435; CI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1436; CI-NEXT:    v_and_b32_e32 v12, 0xffff, v13
1437; CI-NEXT:    v_and_b32_e32 v8, 0xffff, v8
1438; CI-NEXT:    v_or_b32_e32 v0, v0, v7
1439; CI-NEXT:    v_or_b32_e32 v2, v8, v9
1440; CI-NEXT:    v_and_b32_e32 v8, 0xff, v20
1441; CI-NEXT:    v_and_b32_e32 v9, 0xff, v16
1442; CI-NEXT:    s_mov_b32 s5, 0
1443; CI-NEXT:    s_mov_b32 s4, 16
1444; CI-NEXT:    s_mov_b32 s7, 0xf000
1445; CI-NEXT:    s_mov_b32 s6, -1
1446; CI-NEXT:    s_waitcnt vmcnt(0)
1447; CI-NEXT:    v_lshlrev_b32_e32 v4, 24, v5
1448; CI-NEXT:    v_or_b32_e32 v5, v24, v25
1449; CI-NEXT:    v_and_b32_e32 v5, 0xffff, v5
1450; CI-NEXT:    v_or_b32_e32 v4, v4, v26
1451; CI-NEXT:    v_or_b32_e32 v6, v5, v11
1452; CI-NEXT:    v_and_b32_e32 v5, 0xff, v22
1453; CI-NEXT:    v_or_b32_e32 v7, v12, v4
1454; CI-NEXT:    v_lshlrev_b32_e32 v4, 24, v23
1455; CI-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
1456; CI-NEXT:    v_or_b32_e32 v4, v4, v5
1457; CI-NEXT:    v_lshlrev_b32_e32 v5, 8, v21
1458; CI-NEXT:    v_or_b32_e32 v5, v8, v5
1459; CI-NEXT:    v_and_b32_e32 v5, 0xffff, v5
1460; CI-NEXT:    v_and_b32_e32 v8, 0xff, v18
1461; CI-NEXT:    v_or_b32_e32 v5, v5, v4
1462; CI-NEXT:    v_lshlrev_b32_e32 v4, 24, v19
1463; CI-NEXT:    v_lshlrev_b32_e32 v8, 16, v8
1464; CI-NEXT:    v_or_b32_e32 v4, v4, v8
1465; CI-NEXT:    v_lshlrev_b32_e32 v8, 8, v17
1466; CI-NEXT:    v_or_b32_e32 v8, v9, v8
1467; CI-NEXT:    v_and_b32_e32 v8, 0xffff, v8
1468; CI-NEXT:    v_or_b32_e32 v4, v8, v4
1469; CI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
1470; CI-NEXT:    s_mov_b32 s4, s5
1471; CI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1472; CI-NEXT:    s_waitcnt vmcnt(0)
1473; CI-NEXT:    s_setpc_b64 s[30:31]
1474;
1475; GFX89-LABEL: void_func_v32i8:
1476; GFX89:       ; %bb.0:
1477; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1478; GFX89-NEXT:    v_lshlrev_b16_e32 v9, 8, v9
1479; GFX89-NEXT:    v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1480; GFX89-NEXT:    v_lshlrev_b16_e32 v9, 8, v11
1481; GFX89-NEXT:    v_or_b32_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1482; GFX89-NEXT:    buffer_load_ubyte v10, off, s[0:3], s32
1483; GFX89-NEXT:    v_lshlrev_b16_e32 v13, 8, v13
1484; GFX89-NEXT:    v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1485; GFX89-NEXT:    v_lshlrev_b16_e32 v13, 8, v15
1486; GFX89-NEXT:    v_lshlrev_b16_e32 v5, 8, v5
1487; GFX89-NEXT:    v_lshlrev_b16_e32 v7, 8, v7
1488; GFX89-NEXT:    v_lshlrev_b16_e32 v3, 8, v3
1489; GFX89-NEXT:    v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1490; GFX89-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
1491; GFX89-NEXT:    v_lshlrev_b16_e32 v11, 8, v29
1492; GFX89-NEXT:    v_lshlrev_b16_e32 v14, 8, v25
1493; GFX89-NEXT:    v_lshlrev_b16_e32 v15, 8, v27
1494; GFX89-NEXT:    v_lshlrev_b16_e32 v21, 8, v21
1495; GFX89-NEXT:    v_lshlrev_b16_e32 v23, 8, v23
1496; GFX89-NEXT:    v_lshlrev_b16_e32 v17, 8, v17
1497; GFX89-NEXT:    v_lshlrev_b16_e32 v19, 8, v19
1498; GFX89-NEXT:    v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1499; GFX89-NEXT:    v_or_b32_sdwa v5, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1500; GFX89-NEXT:    v_or_b32_sdwa v6, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1501; GFX89-NEXT:    v_or_b32_sdwa v2, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1502; GFX89-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1503; GFX89-NEXT:    v_or_b32_sdwa v7, v28, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1504; GFX89-NEXT:    v_or_b32_sdwa v11, v24, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1505; GFX89-NEXT:    v_or_b32_sdwa v14, v26, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1506; GFX89-NEXT:    v_or_b32_sdwa v15, v20, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1507; GFX89-NEXT:    v_or_b32_sdwa v20, v22, v23 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1508; GFX89-NEXT:    v_or_b32_sdwa v16, v16, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1509; GFX89-NEXT:    v_or_b32_sdwa v17, v18, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1510; GFX89-NEXT:    s_mov_b32 s5, 0
1511; GFX89-NEXT:    s_mov_b32 s4, 16
1512; GFX89-NEXT:    s_mov_b32 s7, 0xf000
1513; GFX89-NEXT:    s_mov_b32 s6, -1
1514; GFX89-NEXT:    v_or_b32_sdwa v1, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1515; GFX89-NEXT:    v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1516; GFX89-NEXT:    v_or_b32_sdwa v6, v11, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1517; GFX89-NEXT:    v_or_b32_sdwa v5, v15, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1518; GFX89-NEXT:    v_or_b32_sdwa v4, v16, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1519; GFX89-NEXT:    v_or_b32_sdwa v3, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1520; GFX89-NEXT:    s_waitcnt vmcnt(0)
1521; GFX89-NEXT:    v_lshlrev_b16_e32 v8, 8, v10
1522; GFX89-NEXT:    v_or_b32_sdwa v8, v30, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
1523; GFX89-NEXT:    v_or_b32_sdwa v7, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1524; GFX89-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
1525; GFX89-NEXT:    s_mov_b32 s4, s5
1526; GFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1527; GFX89-NEXT:    s_waitcnt vmcnt(0)
1528; GFX89-NEXT:    s_setpc_b64 s[30:31]
1529;
1530; GFX11-LABEL: void_func_v32i8:
1531; GFX11:       ; %bb.0:
1532; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1533; GFX11-NEXT:    scratch_load_u8 v31, off, s32
1534; GFX11-NEXT:    v_lshlrev_b16 v1, 8, v1
1535; GFX11-NEXT:    v_and_b32_e32 v0, 0xff, v0
1536; GFX11-NEXT:    v_lshlrev_b16 v3, 8, v3
1537; GFX11-NEXT:    v_and_b32_e32 v2, 0xff, v2
1538; GFX11-NEXT:    v_lshlrev_b16 v9, 8, v9
1539; GFX11-NEXT:    v_and_b32_e32 v8, 0xff, v8
1540; GFX11-NEXT:    v_lshlrev_b16 v11, 8, v11
1541; GFX11-NEXT:    v_and_b32_e32 v10, 0xff, v10
1542; GFX11-NEXT:    v_lshlrev_b16 v17, 8, v17
1543; GFX11-NEXT:    v_and_b32_e32 v16, 0xff, v16
1544; GFX11-NEXT:    v_or_b32_e32 v0, v0, v1
1545; GFX11-NEXT:    v_or_b32_e32 v1, v2, v3
1546; GFX11-NEXT:    v_lshlrev_b16 v13, 8, v13
1547; GFX11-NEXT:    v_and_b32_e32 v12, 0xff, v12
1548; GFX11-NEXT:    v_lshlrev_b16 v15, 8, v15
1549; GFX11-NEXT:    v_and_b32_e32 v14, 0xff, v14
1550; GFX11-NEXT:    v_lshlrev_b16 v5, 8, v5
1551; GFX11-NEXT:    v_and_b32_e32 v4, 0xff, v4
1552; GFX11-NEXT:    v_lshlrev_b16 v7, 8, v7
1553; GFX11-NEXT:    v_and_b32_e32 v6, 0xff, v6
1554; GFX11-NEXT:    v_lshlrev_b16 v29, 8, v29
1555; GFX11-NEXT:    v_and_b32_e32 v28, 0xff, v28
1556; GFX11-NEXT:    v_and_b32_e32 v30, 0xff, v30
1557; GFX11-NEXT:    v_lshlrev_b16 v25, 8, v25
1558; GFX11-NEXT:    v_and_b32_e32 v24, 0xff, v24
1559; GFX11-NEXT:    v_lshlrev_b16 v27, 8, v27
1560; GFX11-NEXT:    v_and_b32_e32 v26, 0xff, v26
1561; GFX11-NEXT:    v_lshlrev_b16 v21, 8, v21
1562; GFX11-NEXT:    v_and_b32_e32 v20, 0xff, v20
1563; GFX11-NEXT:    v_lshlrev_b16 v23, 8, v23
1564; GFX11-NEXT:    v_and_b32_e32 v22, 0xff, v22
1565; GFX11-NEXT:    v_lshlrev_b16 v19, 8, v19
1566; GFX11-NEXT:    v_and_b32_e32 v18, 0xff, v18
1567; GFX11-NEXT:    v_or_b32_e32 v8, v8, v9
1568; GFX11-NEXT:    v_or_b32_e32 v9, v10, v11
1569; GFX11-NEXT:    v_or_b32_e32 v11, v16, v17
1570; GFX11-NEXT:    v_lshlrev_b32_e32 v17, 16, v1
1571; GFX11-NEXT:    v_or_b32_e32 v12, v12, v13
1572; GFX11-NEXT:    v_or_b32_e32 v13, v14, v15
1573; GFX11-NEXT:    v_or_b32_e32 v4, v4, v5
1574; GFX11-NEXT:    v_or_b32_e32 v5, v6, v7
1575; GFX11-NEXT:    v_or_b32_e32 v2, v28, v29
1576; GFX11-NEXT:    v_or_b32_e32 v3, v24, v25
1577; GFX11-NEXT:    v_or_b32_e32 v6, v26, v27
1578; GFX11-NEXT:    v_or_b32_e32 v7, v20, v21
1579; GFX11-NEXT:    v_or_b32_e32 v10, v22, v23
1580; GFX11-NEXT:    v_or_b32_e32 v14, v18, v19
1581; GFX11-NEXT:    v_and_b32_e32 v15, 0xffff, v4
1582; GFX11-NEXT:    v_lshlrev_b32_e32 v16, 16, v5
1583; GFX11-NEXT:    v_and_b32_e32 v18, 0xffff, v2
1584; GFX11-NEXT:    v_and_b32_e32 v4, 0xffff, v3
1585; GFX11-NEXT:    v_lshlrev_b32_e32 v5, 16, v6
1586; GFX11-NEXT:    v_and_b32_e32 v7, 0xffff, v7
1587; GFX11-NEXT:    v_lshlrev_b32_e32 v10, 16, v10
1588; GFX11-NEXT:    v_and_b32_e32 v11, 0xffff, v11
1589; GFX11-NEXT:    v_lshlrev_b32_e32 v14, 16, v14
1590; GFX11-NEXT:    v_and_b32_e32 v12, 0xffff, v12
1591; GFX11-NEXT:    v_lshlrev_b32_e32 v13, 16, v13
1592; GFX11-NEXT:    v_and_b32_e32 v8, 0xffff, v8
1593; GFX11-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
1594; GFX11-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1595; GFX11-NEXT:    v_or_b32_e32 v6, v4, v5
1596; GFX11-NEXT:    v_or_b32_e32 v5, v7, v10
1597; GFX11-NEXT:    v_or_b32_e32 v4, v11, v14
1598; GFX11-NEXT:    v_or_b32_e32 v3, v12, v13
1599; GFX11-NEXT:    v_or_b32_e32 v2, v8, v9
1600; GFX11-NEXT:    v_or_b32_e32 v0, v0, v17
1601; GFX11-NEXT:    s_mov_b32 s1, 0
1602; GFX11-NEXT:    s_mov_b32 s0, 16
1603; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
1604; GFX11-NEXT:    s_mov_b32 s2, -1
1605; GFX11-NEXT:    s_waitcnt vmcnt(0)
1606; GFX11-NEXT:    v_lshlrev_b16 v1, 8, v31
1607; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1608; GFX11-NEXT:    v_or_b32_e32 v1, v30, v1
1609; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1610; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1611; GFX11-NEXT:    v_or_b32_e32 v7, v18, v1
1612; GFX11-NEXT:    v_or_b32_e32 v1, v15, v16
1613; GFX11-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0
1614; GFX11-NEXT:    s_mov_b32 s0, s1
1615; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
1616; GFX11-NEXT:    s_setpc_b64 s[30:31]
1617  store <32 x i8> %arg0, ptr addrspace(1) null
1618  ret void
1619}
1620
1621define void @void_func_v3i16(<3 x i16> %arg0) #0 {
1622; CI-LABEL: void_func_v3i16:
1623; CI:       ; %bb.0:
1624; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1625; CI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1626; CI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1627; CI-NEXT:    s_mov_b32 s7, 0xf000
1628; CI-NEXT:    s_mov_b32 s6, -1
1629; CI-NEXT:    v_or_b32_e32 v0, v0, v1
1630; CI-NEXT:    buffer_store_short v2, off, s[4:7], 0
1631; CI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1632; CI-NEXT:    s_waitcnt vmcnt(0)
1633; CI-NEXT:    s_setpc_b64 s[30:31]
1634;
1635; GFX89-LABEL: void_func_v3i16:
1636; GFX89:       ; %bb.0:
1637; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1638; GFX89-NEXT:    s_mov_b32 s7, 0xf000
1639; GFX89-NEXT:    s_mov_b32 s6, -1
1640; GFX89-NEXT:    buffer_store_short v1, off, s[4:7], 0
1641; GFX89-NEXT:    buffer_store_dword v0, off, s[4:7], 0
1642; GFX89-NEXT:    s_waitcnt vmcnt(0)
1643; GFX89-NEXT:    s_setpc_b64 s[30:31]
1644;
1645; GFX11-LABEL: void_func_v3i16:
1646; GFX11:       ; %bb.0:
1647; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1648; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
1649; GFX11-NEXT:    s_mov_b32 s2, -1
1650; GFX11-NEXT:    s_clause 0x1
1651; GFX11-NEXT:    buffer_store_b16 v1, off, s[0:3], 0
1652; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
1653; GFX11-NEXT:    s_setpc_b64 s[30:31]
1654  store <3 x i16> %arg0, ptr addrspace(1) undef
1655  ret void
1656}
1657
1658define void @void_func_v4i16(<4 x i16> %arg0) #0 {
1659; CI-LABEL: void_func_v4i16:
1660; CI:       ; %bb.0:
1661; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1662; CI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1663; CI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1664; CI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1665; CI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1666; CI-NEXT:    v_or_b32_e32 v2, v2, v3
1667; CI-NEXT:    v_or_b32_e32 v1, v0, v1
1668; CI-NEXT:    s_mov_b32 s7, 0xf000
1669; CI-NEXT:    s_mov_b32 s6, -1
1670; CI-NEXT:    buffer_store_dwordx2 v[1:2], off, s[4:7], 0
1671; CI-NEXT:    s_waitcnt vmcnt(0)
1672; CI-NEXT:    s_setpc_b64 s[30:31]
1673;
1674; GFX89-LABEL: void_func_v4i16:
1675; GFX89:       ; %bb.0:
1676; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1677; GFX89-NEXT:    s_mov_b32 s7, 0xf000
1678; GFX89-NEXT:    s_mov_b32 s6, -1
1679; GFX89-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1680; GFX89-NEXT:    s_waitcnt vmcnt(0)
1681; GFX89-NEXT:    s_setpc_b64 s[30:31]
1682;
1683; GFX11-LABEL: void_func_v4i16:
1684; GFX11:       ; %bb.0:
1685; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1686; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
1687; GFX11-NEXT:    s_mov_b32 s2, -1
1688; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[0:3], 0
1689; GFX11-NEXT:    s_setpc_b64 s[30:31]
1690  store <4 x i16> %arg0, ptr addrspace(1) undef
1691  ret void
1692}
1693
1694define void @void_func_v5i16(<5 x i16> %arg0) #0 {
1695; CI-LABEL: void_func_v5i16:
1696; CI:       ; %bb.0:
1697; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1698; CI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1699; CI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1700; CI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1701; CI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1702; CI-NEXT:    s_mov_b32 s7, 0xf000
1703; CI-NEXT:    s_mov_b32 s6, -1
1704; CI-NEXT:    v_or_b32_e32 v2, v2, v3
1705; CI-NEXT:    v_or_b32_e32 v1, v0, v1
1706; CI-NEXT:    buffer_store_short v4, off, s[4:7], 0
1707; CI-NEXT:    buffer_store_dwordx2 v[1:2], off, s[4:7], 0
1708; CI-NEXT:    s_waitcnt vmcnt(0)
1709; CI-NEXT:    s_setpc_b64 s[30:31]
1710;
1711; GFX89-LABEL: void_func_v5i16:
1712; GFX89:       ; %bb.0:
1713; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1714; GFX89-NEXT:    s_mov_b32 s7, 0xf000
1715; GFX89-NEXT:    s_mov_b32 s6, -1
1716; GFX89-NEXT:    buffer_store_short v2, off, s[4:7], 0
1717; GFX89-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1718; GFX89-NEXT:    s_waitcnt vmcnt(0)
1719; GFX89-NEXT:    s_setpc_b64 s[30:31]
1720;
1721; GFX11-LABEL: void_func_v5i16:
1722; GFX11:       ; %bb.0:
1723; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1724; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
1725; GFX11-NEXT:    s_mov_b32 s2, -1
1726; GFX11-NEXT:    s_clause 0x1
1727; GFX11-NEXT:    buffer_store_b16 v2, off, s[0:3], 0
1728; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[0:3], 0
1729; GFX11-NEXT:    s_setpc_b64 s[30:31]
1730  store <5 x i16> %arg0, ptr addrspace(1) undef
1731  ret void
1732}
1733
1734define void @void_func_v8i16(<8 x i16> %arg0) #0 {
1735; CI-LABEL: void_func_v8i16:
1736; CI:       ; %bb.0:
1737; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1738; CI-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
1739; CI-NEXT:    v_and_b32_e32 v6, 0xffff, v6
1740; CI-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
1741; CI-NEXT:    v_and_b32_e32 v4, 0xffff, v4
1742; CI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1743; CI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1744; CI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1745; CI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1746; CI-NEXT:    v_or_b32_e32 v6, v6, v7
1747; CI-NEXT:    v_or_b32_e32 v5, v4, v5
1748; CI-NEXT:    v_or_b32_e32 v4, v2, v3
1749; CI-NEXT:    v_or_b32_e32 v3, v0, v1
1750; CI-NEXT:    s_mov_b32 s7, 0xf000
1751; CI-NEXT:    s_mov_b32 s6, -1
1752; CI-NEXT:    buffer_store_dwordx4 v[3:6], off, s[4:7], 0
1753; CI-NEXT:    s_waitcnt vmcnt(0)
1754; CI-NEXT:    s_setpc_b64 s[30:31]
1755;
1756; GFX89-LABEL: void_func_v8i16:
1757; GFX89:       ; %bb.0:
1758; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1759; GFX89-NEXT:    s_mov_b32 s7, 0xf000
1760; GFX89-NEXT:    s_mov_b32 s6, -1
1761; GFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1762; GFX89-NEXT:    s_waitcnt vmcnt(0)
1763; GFX89-NEXT:    s_setpc_b64 s[30:31]
1764;
1765; GFX11-LABEL: void_func_v8i16:
1766; GFX11:       ; %bb.0:
1767; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1768; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
1769; GFX11-NEXT:    s_mov_b32 s2, -1
1770; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
1771; GFX11-NEXT:    s_setpc_b64 s[30:31]
1772  store <8 x i16> %arg0, ptr addrspace(1) undef
1773  ret void
1774}
1775
1776define void @void_func_v16i16(<16 x i16> %arg0) #0 {
1777; CI-LABEL: void_func_v16i16:
1778; CI:       ; %bb.0:
1779; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1780; CI-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
1781; CI-NEXT:    v_and_b32_e32 v4, 0xffff, v4
1782; CI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
1783; CI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
1784; CI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1785; CI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
1786; CI-NEXT:    v_or_b32_e32 v5, v4, v5
1787; CI-NEXT:    v_or_b32_e32 v4, v2, v3
1788; CI-NEXT:    v_or_b32_e32 v3, v0, v1
1789; CI-NEXT:    v_lshlrev_b32_e32 v0, 16, v15
1790; CI-NEXT:    v_and_b32_e32 v1, 0xffff, v14
1791; CI-NEXT:    v_or_b32_e32 v14, v1, v0
1792; CI-NEXT:    v_lshlrev_b32_e32 v0, 16, v13
1793; CI-NEXT:    v_and_b32_e32 v1, 0xffff, v12
1794; CI-NEXT:    v_or_b32_e32 v13, v1, v0
1795; CI-NEXT:    v_lshlrev_b32_e32 v0, 16, v11
1796; CI-NEXT:    v_and_b32_e32 v1, 0xffff, v10
1797; CI-NEXT:    v_or_b32_e32 v12, v1, v0
1798; CI-NEXT:    v_lshlrev_b32_e32 v0, 16, v9
1799; CI-NEXT:    v_and_b32_e32 v1, 0xffff, v8
1800; CI-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
1801; CI-NEXT:    v_and_b32_e32 v6, 0xffff, v6
1802; CI-NEXT:    v_or_b32_e32 v11, v1, v0
1803; CI-NEXT:    s_mov_b32 s7, 0xf000
1804; CI-NEXT:    s_mov_b32 s6, -1
1805; CI-NEXT:    v_or_b32_e32 v6, v6, v7
1806; CI-NEXT:    buffer_store_dwordx4 v[11:14], off, s[4:7], 0
1807; CI-NEXT:    buffer_store_dwordx4 v[3:6], off, s[4:7], 0
1808; CI-NEXT:    s_waitcnt vmcnt(0)
1809; CI-NEXT:    s_setpc_b64 s[30:31]
1810;
1811; GFX89-LABEL: void_func_v16i16:
1812; GFX89:       ; %bb.0:
1813; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1814; GFX89-NEXT:    s_mov_b32 s7, 0xf000
1815; GFX89-NEXT:    s_mov_b32 s6, -1
1816; GFX89-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
1817; GFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1818; GFX89-NEXT:    s_waitcnt vmcnt(0)
1819; GFX89-NEXT:    s_setpc_b64 s[30:31]
1820;
1821; GFX11-LABEL: void_func_v16i16:
1822; GFX11:       ; %bb.0:
1823; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1824; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
1825; GFX11-NEXT:    s_mov_b32 s2, -1
1826; GFX11-NEXT:    s_clause 0x1
1827; GFX11-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0
1828; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
1829; GFX11-NEXT:    s_setpc_b64 s[30:31]
1830  store <16 x i16> %arg0, ptr addrspace(1) undef
1831  ret void
1832}
1833
1834define void @void_func_v2i24(<2 x i24> %arg0) #0 {
1835; CI-LABEL: void_func_v2i24:
1836; CI:       ; %bb.0:
1837; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1838; CI-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
1839; CI-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
1840; CI-NEXT:    s_mov_b32 s7, 0xf000
1841; CI-NEXT:    s_mov_b32 s6, -1
1842; CI-NEXT:    buffer_store_byte v1, off, s[4:7], 0
1843; CI-NEXT:    buffer_store_short v0, off, s[4:7], 0
1844; CI-NEXT:    s_waitcnt vmcnt(0)
1845; CI-NEXT:    s_setpc_b64 s[30:31]
1846;
1847; VI-LABEL: void_func_v2i24:
1848; VI:       ; %bb.0:
1849; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1850; VI-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
1851; VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
1852; VI-NEXT:    s_mov_b32 s7, 0xf000
1853; VI-NEXT:    s_mov_b32 s6, -1
1854; VI-NEXT:    buffer_store_byte v1, off, s[4:7], 0
1855; VI-NEXT:    buffer_store_short v0, off, s[4:7], 0
1856; VI-NEXT:    s_waitcnt vmcnt(0)
1857; VI-NEXT:    s_setpc_b64 s[30:31]
1858;
1859; GFX9-LABEL: void_func_v2i24:
1860; GFX9:       ; %bb.0:
1861; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1862; GFX9-NEXT:    v_add_u32_e32 v0, v0, v1
1863; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
1864; GFX9-NEXT:    s_mov_b32 s7, 0xf000
1865; GFX9-NEXT:    s_mov_b32 s6, -1
1866; GFX9-NEXT:    buffer_store_byte v1, off, s[4:7], 0
1867; GFX9-NEXT:    buffer_store_short v0, off, s[4:7], 0
1868; GFX9-NEXT:    s_waitcnt vmcnt(0)
1869; GFX9-NEXT:    s_setpc_b64 s[30:31]
1870;
1871; GFX11-LABEL: void_func_v2i24:
1872; GFX11:       ; %bb.0:
1873; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1874; GFX11-NEXT:    v_add_nc_u32_e32 v0, v0, v1
1875; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
1876; GFX11-NEXT:    s_mov_b32 s2, -1
1877; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1878; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
1879; GFX11-NEXT:    s_clause 0x1
1880; GFX11-NEXT:    buffer_store_b8 v1, off, s[0:3], 0
1881; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
1882; GFX11-NEXT:    s_setpc_b64 s[30:31]
1883  %elt0 = extractelement <2 x i24> %arg0, i32 0
1884  %elt1 = extractelement <2 x i24> %arg0, i32 1
1885  %add = add i24 %elt0, %elt1
1886  store i24 %add, ptr addrspace(1) undef
1887  ret void
1888}
1889
1890define void @void_func_v2f32(<2 x float> %arg0) #0 {
1891; CIGFX89-LABEL: void_func_v2f32:
1892; CIGFX89:       ; %bb.0:
1893; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1894; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
1895; CIGFX89-NEXT:    s_mov_b32 s6, -1
1896; CIGFX89-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
1897; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
1898; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
1899;
1900; GFX11-LABEL: void_func_v2f32:
1901; GFX11:       ; %bb.0:
1902; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1903; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
1904; GFX11-NEXT:    s_mov_b32 s2, -1
1905; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[0:3], 0
1906; GFX11-NEXT:    s_setpc_b64 s[30:31]
1907  store <2 x float> %arg0, ptr addrspace(1) undef
1908  ret void
1909}
1910
1911define void @void_func_v3f32(<3 x float> %arg0) #0 {
1912; CIGFX89-LABEL: void_func_v3f32:
1913; CIGFX89:       ; %bb.0:
1914; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1915; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
1916; CIGFX89-NEXT:    s_mov_b32 s6, -1
1917; CIGFX89-NEXT:    buffer_store_dwordx3 v[0:2], off, s[4:7], 0
1918; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
1919; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
1920;
1921; GFX11-LABEL: void_func_v3f32:
1922; GFX11:       ; %bb.0:
1923; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1924; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
1925; GFX11-NEXT:    s_mov_b32 s2, -1
1926; GFX11-NEXT:    buffer_store_b96 v[0:2], off, s[0:3], 0
1927; GFX11-NEXT:    s_setpc_b64 s[30:31]
1928  store <3 x float> %arg0, ptr addrspace(1) undef
1929  ret void
1930}
1931
1932define void @void_func_v4f32(<4 x float> %arg0) #0 {
1933; CIGFX89-LABEL: void_func_v4f32:
1934; CIGFX89:       ; %bb.0:
1935; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1936; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
1937; CIGFX89-NEXT:    s_mov_b32 s6, -1
1938; CIGFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1939; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
1940; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
1941;
1942; GFX11-LABEL: void_func_v4f32:
1943; GFX11:       ; %bb.0:
1944; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1945; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
1946; GFX11-NEXT:    s_mov_b32 s2, -1
1947; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
1948; GFX11-NEXT:    s_setpc_b64 s[30:31]
1949  store <4 x float> %arg0, ptr addrspace(1) undef
1950  ret void
1951}
1952
1953define void @void_func_v8f32(<8 x float> %arg0) #0 {
1954; CIGFX89-LABEL: void_func_v8f32:
1955; CIGFX89:       ; %bb.0:
1956; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1957; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
1958; CIGFX89-NEXT:    s_mov_b32 s6, -1
1959; CIGFX89-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
1960; CIGFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1961; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
1962; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
1963;
1964; GFX11-LABEL: void_func_v8f32:
1965; GFX11:       ; %bb.0:
1966; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1967; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
1968; GFX11-NEXT:    s_mov_b32 s2, -1
1969; GFX11-NEXT:    s_clause 0x1
1970; GFX11-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0
1971; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
1972; GFX11-NEXT:    s_setpc_b64 s[30:31]
1973  store <8 x float> %arg0, ptr addrspace(1) undef
1974  ret void
1975}
1976
1977define void @void_func_v16f32(<16 x float> %arg0) #0 {
1978; CIGFX89-LABEL: void_func_v16f32:
1979; CIGFX89:       ; %bb.0:
1980; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1981; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
1982; CIGFX89-NEXT:    s_mov_b32 s6, -1
1983; CIGFX89-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
1984; CIGFX89-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
1985; CIGFX89-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
1986; CIGFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
1987; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
1988; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
1989;
1990; GFX11-LABEL: void_func_v16f32:
1991; GFX11:       ; %bb.0:
1992; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1993; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
1994; GFX11-NEXT:    s_mov_b32 s2, -1
1995; GFX11-NEXT:    s_clause 0x3
1996; GFX11-NEXT:    buffer_store_b128 v[12:15], off, s[0:3], 0
1997; GFX11-NEXT:    buffer_store_b128 v[8:11], off, s[0:3], 0
1998; GFX11-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0
1999; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
2000; GFX11-NEXT:    s_setpc_b64 s[30:31]
2001  store <16 x float> %arg0, ptr addrspace(1) undef
2002  ret void
2003}
2004
2005define void @void_func_v2f64(<2 x double> %arg0) #0 {
2006; CIGFX89-LABEL: void_func_v2f64:
2007; CIGFX89:       ; %bb.0:
2008; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2009; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
2010; CIGFX89-NEXT:    s_mov_b32 s6, -1
2011; CIGFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
2012; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2013; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
2014;
2015; GFX11-LABEL: void_func_v2f64:
2016; GFX11:       ; %bb.0:
2017; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2018; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2019; GFX11-NEXT:    s_mov_b32 s2, -1
2020; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
2021; GFX11-NEXT:    s_setpc_b64 s[30:31]
2022  store <2 x double> %arg0, ptr addrspace(1) undef
2023  ret void
2024}
2025
2026define void @void_func_v3f64(<3 x double> %arg0) #0 {
2027; CIGFX89-LABEL: void_func_v3f64:
2028; CIGFX89:       ; %bb.0:
2029; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2030; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
2031; CIGFX89-NEXT:    s_mov_b32 s6, -1
2032; CIGFX89-NEXT:    buffer_store_dwordx2 v[4:5], off, s[4:7], 0
2033; CIGFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
2034; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2035; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
2036;
2037; GFX11-LABEL: void_func_v3f64:
2038; GFX11:       ; %bb.0:
2039; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2040; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2041; GFX11-NEXT:    s_mov_b32 s2, -1
2042; GFX11-NEXT:    s_clause 0x1
2043; GFX11-NEXT:    buffer_store_b64 v[4:5], off, s[0:3], 0
2044; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
2045; GFX11-NEXT:    s_setpc_b64 s[30:31]
2046  store <3 x double> %arg0, ptr addrspace(1) undef
2047  ret void
2048}
2049
2050define void @void_func_v4f64(<4 x double> %arg0) #0 {
2051; CIGFX89-LABEL: void_func_v4f64:
2052; CIGFX89:       ; %bb.0:
2053; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2054; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
2055; CIGFX89-NEXT:    s_mov_b32 s6, -1
2056; CIGFX89-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
2057; CIGFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
2058; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2059; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
2060;
2061; GFX11-LABEL: void_func_v4f64:
2062; GFX11:       ; %bb.0:
2063; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2064; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2065; GFX11-NEXT:    s_mov_b32 s2, -1
2066; GFX11-NEXT:    s_clause 0x1
2067; GFX11-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0
2068; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
2069; GFX11-NEXT:    s_setpc_b64 s[30:31]
2070  store <4 x double> %arg0, ptr addrspace(1) undef
2071  ret void
2072}
2073
2074define void @void_func_v8f64(<8 x double> %arg0) #0 {
2075; CIGFX89-LABEL: void_func_v8f64:
2076; CIGFX89:       ; %bb.0:
2077; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2078; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
2079; CIGFX89-NEXT:    s_mov_b32 s6, -1
2080; CIGFX89-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
2081; CIGFX89-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
2082; CIGFX89-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
2083; CIGFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
2084; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2085; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
2086;
2087; GFX11-LABEL: void_func_v8f64:
2088; GFX11:       ; %bb.0:
2089; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2090; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2091; GFX11-NEXT:    s_mov_b32 s2, -1
2092; GFX11-NEXT:    s_clause 0x3
2093; GFX11-NEXT:    buffer_store_b128 v[12:15], off, s[0:3], 0
2094; GFX11-NEXT:    buffer_store_b128 v[8:11], off, s[0:3], 0
2095; GFX11-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0
2096; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
2097; GFX11-NEXT:    s_setpc_b64 s[30:31]
2098  store <8 x double> %arg0, ptr addrspace(1) undef
2099  ret void
2100}
2101
2102define void @void_func_v16f64(<16 x double> %arg0) #0 {
2103; CIGFX89-LABEL: void_func_v16f64:
2104; CIGFX89:       ; %bb.0:
2105; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2106; CIGFX89-NEXT:    buffer_load_dword v31, off, s[0:3], s32
2107; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
2108; CIGFX89-NEXT:    s_mov_b32 s6, -1
2109; CIGFX89-NEXT:    buffer_store_dwordx4 v[24:27], off, s[4:7], 0
2110; CIGFX89-NEXT:    buffer_store_dwordx4 v[20:23], off, s[4:7], 0
2111; CIGFX89-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
2112; CIGFX89-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
2113; CIGFX89-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
2114; CIGFX89-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
2115; CIGFX89-NEXT:    s_waitcnt vmcnt(6)
2116; CIGFX89-NEXT:    buffer_store_dwordx4 v[28:31], off, s[4:7], 0
2117; CIGFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
2118; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2119; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
2120;
2121; GFX11-LABEL: void_func_v16f64:
2122; GFX11:       ; %bb.0:
2123; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2124; GFX11-NEXT:    scratch_load_b32 v31, off, s32
2125; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2126; GFX11-NEXT:    s_mov_b32 s2, -1
2127; GFX11-NEXT:    s_clause 0x3
2128; GFX11-NEXT:    buffer_store_b128 v[20:23], off, s[0:3], 0
2129; GFX11-NEXT:    buffer_store_b128 v[16:19], off, s[0:3], 0
2130; GFX11-NEXT:    buffer_store_b128 v[12:15], off, s[0:3], 0
2131; GFX11-NEXT:    buffer_store_b128 v[8:11], off, s[0:3], 0
2132; GFX11-NEXT:    s_waitcnt vmcnt(0)
2133; GFX11-NEXT:    s_clause 0x3
2134; GFX11-NEXT:    buffer_store_b128 v[28:31], off, s[0:3], 0
2135; GFX11-NEXT:    buffer_store_b128 v[24:27], off, s[0:3], 0
2136; GFX11-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0
2137; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
2138; GFX11-NEXT:    s_setpc_b64 s[30:31]
2139  store <16 x double> %arg0, ptr addrspace(1) undef
2140  ret void
2141}
2142
2143define void @void_func_v2f16(<2 x half> %arg0) #0 {
2144; CI-LABEL: void_func_v2f16:
2145; CI:       ; %bb.0:
2146; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2147; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
2148; CI-NEXT:    v_cvt_f16_f32_e32 v0, v0
2149; CI-NEXT:    s_mov_b32 s7, 0xf000
2150; CI-NEXT:    s_mov_b32 s6, -1
2151; CI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
2152; CI-NEXT:    v_or_b32_e32 v0, v0, v1
2153; CI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
2154; CI-NEXT:    s_waitcnt vmcnt(0)
2155; CI-NEXT:    s_setpc_b64 s[30:31]
2156;
2157; GFX89-LABEL: void_func_v2f16:
2158; GFX89:       ; %bb.0:
2159; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2160; GFX89-NEXT:    s_mov_b32 s7, 0xf000
2161; GFX89-NEXT:    s_mov_b32 s6, -1
2162; GFX89-NEXT:    buffer_store_dword v0, off, s[4:7], 0
2163; GFX89-NEXT:    s_waitcnt vmcnt(0)
2164; GFX89-NEXT:    s_setpc_b64 s[30:31]
2165;
2166; GFX11-LABEL: void_func_v2f16:
2167; GFX11:       ; %bb.0:
2168; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2169; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2170; GFX11-NEXT:    s_mov_b32 s2, -1
2171; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
2172; GFX11-NEXT:    s_setpc_b64 s[30:31]
2173  store <2 x half> %arg0, ptr addrspace(1) undef
2174  ret void
2175}
2176
2177; FIXME: Different abi if f16 legal
2178define void @void_func_v3f16(<3 x half> %arg0) #0 {
2179; CI-LABEL: void_func_v3f16:
2180; CI:       ; %bb.0:
2181; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2182; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
2183; CI-NEXT:    v_cvt_f16_f32_e32 v0, v0
2184; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
2185; CI-NEXT:    s_mov_b32 s7, 0xf000
2186; CI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
2187; CI-NEXT:    s_mov_b32 s6, -1
2188; CI-NEXT:    v_or_b32_e32 v0, v0, v1
2189; CI-NEXT:    buffer_store_short v2, off, s[4:7], 0
2190; CI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
2191; CI-NEXT:    s_waitcnt vmcnt(0)
2192; CI-NEXT:    s_setpc_b64 s[30:31]
2193;
2194; GFX89-LABEL: void_func_v3f16:
2195; GFX89:       ; %bb.0:
2196; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2197; GFX89-NEXT:    s_mov_b32 s7, 0xf000
2198; GFX89-NEXT:    s_mov_b32 s6, -1
2199; GFX89-NEXT:    buffer_store_short v1, off, s[4:7], 0
2200; GFX89-NEXT:    buffer_store_dword v0, off, s[4:7], 0
2201; GFX89-NEXT:    s_waitcnt vmcnt(0)
2202; GFX89-NEXT:    s_setpc_b64 s[30:31]
2203;
2204; GFX11-LABEL: void_func_v3f16:
2205; GFX11:       ; %bb.0:
2206; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2207; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2208; GFX11-NEXT:    s_mov_b32 s2, -1
2209; GFX11-NEXT:    s_clause 0x1
2210; GFX11-NEXT:    buffer_store_b16 v1, off, s[0:3], 0
2211; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
2212; GFX11-NEXT:    s_setpc_b64 s[30:31]
2213  store <3 x half> %arg0, ptr addrspace(1) undef
2214  ret void
2215}
2216
2217define void @void_func_v4f16(<4 x half> %arg0) #0 {
2218; CI-LABEL: void_func_v4f16:
2219; CI:       ; %bb.0:
2220; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2221; CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
2222; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
2223; CI-NEXT:    v_cvt_f16_f32_e32 v4, v1
2224; CI-NEXT:    v_cvt_f16_f32_e32 v0, v0
2225; CI-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
2226; CI-NEXT:    v_or_b32_e32 v1, v2, v1
2227; CI-NEXT:    v_lshlrev_b32_e32 v2, 16, v4
2228; CI-NEXT:    v_or_b32_e32 v0, v0, v2
2229; CI-NEXT:    s_mov_b32 s7, 0xf000
2230; CI-NEXT:    s_mov_b32 s6, -1
2231; CI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
2232; CI-NEXT:    s_waitcnt vmcnt(0)
2233; CI-NEXT:    s_setpc_b64 s[30:31]
2234;
2235; GFX89-LABEL: void_func_v4f16:
2236; GFX89:       ; %bb.0:
2237; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2238; GFX89-NEXT:    s_mov_b32 s7, 0xf000
2239; GFX89-NEXT:    s_mov_b32 s6, -1
2240; GFX89-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
2241; GFX89-NEXT:    s_waitcnt vmcnt(0)
2242; GFX89-NEXT:    s_setpc_b64 s[30:31]
2243;
2244; GFX11-LABEL: void_func_v4f16:
2245; GFX11:       ; %bb.0:
2246; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2247; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2248; GFX11-NEXT:    s_mov_b32 s2, -1
2249; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[0:3], 0
2250; GFX11-NEXT:    s_setpc_b64 s[30:31]
2251  store <4 x half> %arg0, ptr addrspace(1) undef
2252  ret void
2253}
2254
2255define void @void_func_v8f16(<8 x half> %arg0) #0 {
2256; CI-LABEL: void_func_v8f16:
2257; CI:       ; %bb.0:
2258; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2259; CI-NEXT:    v_cvt_f16_f32_e32 v7, v7
2260; CI-NEXT:    v_cvt_f16_f32_e32 v6, v6
2261; CI-NEXT:    v_cvt_f16_f32_e32 v8, v5
2262; CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
2263; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
2264; CI-NEXT:    v_cvt_f16_f32_e32 v4, v4
2265; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
2266; CI-NEXT:    v_cvt_f16_f32_e32 v0, v0
2267; CI-NEXT:    v_lshlrev_b32_e32 v5, 16, v7
2268; CI-NEXT:    v_or_b32_e32 v5, v6, v5
2269; CI-NEXT:    v_lshlrev_b32_e32 v6, 16, v8
2270; CI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
2271; CI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
2272; CI-NEXT:    v_or_b32_e32 v4, v4, v6
2273; CI-NEXT:    v_or_b32_e32 v3, v2, v3
2274; CI-NEXT:    v_or_b32_e32 v2, v0, v1
2275; CI-NEXT:    s_mov_b32 s7, 0xf000
2276; CI-NEXT:    s_mov_b32 s6, -1
2277; CI-NEXT:    buffer_store_dwordx4 v[2:5], off, s[4:7], 0
2278; CI-NEXT:    s_waitcnt vmcnt(0)
2279; CI-NEXT:    s_setpc_b64 s[30:31]
2280;
2281; GFX89-LABEL: void_func_v8f16:
2282; GFX89:       ; %bb.0:
2283; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2284; GFX89-NEXT:    s_mov_b32 s7, 0xf000
2285; GFX89-NEXT:    s_mov_b32 s6, -1
2286; GFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
2287; GFX89-NEXT:    s_waitcnt vmcnt(0)
2288; GFX89-NEXT:    s_setpc_b64 s[30:31]
2289;
2290; GFX11-LABEL: void_func_v8f16:
2291; GFX11:       ; %bb.0:
2292; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2293; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2294; GFX11-NEXT:    s_mov_b32 s2, -1
2295; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
2296; GFX11-NEXT:    s_setpc_b64 s[30:31]
2297  store <8 x half> %arg0, ptr addrspace(1) undef
2298  ret void
2299}
2300
2301define void @void_func_v16f16(<16 x half> %arg0) #0 {
2302; CI-LABEL: void_func_v16f16:
2303; CI:       ; %bb.0:
2304; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2305; CI-NEXT:    v_cvt_f16_f32_e32 v7, v7
2306; CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
2307; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
2308; CI-NEXT:    v_cvt_f16_f32_e32 v6, v6
2309; CI-NEXT:    v_cvt_f16_f32_e32 v16, v5
2310; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
2311; CI-NEXT:    v_cvt_f16_f32_e32 v0, v0
2312; CI-NEXT:    v_cvt_f16_f32_e32 v4, v4
2313; CI-NEXT:    v_lshlrev_b32_e32 v5, 16, v7
2314; CI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
2315; CI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
2316; CI-NEXT:    v_or_b32_e32 v5, v6, v5
2317; CI-NEXT:    v_lshlrev_b32_e32 v6, 16, v16
2318; CI-NEXT:    v_or_b32_e32 v3, v2, v3
2319; CI-NEXT:    v_or_b32_e32 v2, v0, v1
2320; CI-NEXT:    v_cvt_f16_f32_e32 v0, v15
2321; CI-NEXT:    v_or_b32_e32 v4, v4, v6
2322; CI-NEXT:    v_cvt_f16_f32_e32 v1, v14
2323; CI-NEXT:    v_cvt_f16_f32_e32 v6, v13
2324; CI-NEXT:    v_cvt_f16_f32_e32 v7, v12
2325; CI-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
2326; CI-NEXT:    v_or_b32_e32 v13, v1, v0
2327; CI-NEXT:    v_lshlrev_b32_e32 v0, 16, v6
2328; CI-NEXT:    v_or_b32_e32 v12, v7, v0
2329; CI-NEXT:    v_cvt_f16_f32_e32 v0, v11
2330; CI-NEXT:    v_cvt_f16_f32_e32 v1, v10
2331; CI-NEXT:    v_cvt_f16_f32_e32 v6, v9
2332; CI-NEXT:    v_cvt_f16_f32_e32 v7, v8
2333; CI-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
2334; CI-NEXT:    v_or_b32_e32 v11, v1, v0
2335; CI-NEXT:    v_lshlrev_b32_e32 v0, 16, v6
2336; CI-NEXT:    v_or_b32_e32 v10, v7, v0
2337; CI-NEXT:    s_mov_b32 s7, 0xf000
2338; CI-NEXT:    s_mov_b32 s6, -1
2339; CI-NEXT:    buffer_store_dwordx4 v[10:13], off, s[4:7], 0
2340; CI-NEXT:    buffer_store_dwordx4 v[2:5], off, s[4:7], 0
2341; CI-NEXT:    s_waitcnt vmcnt(0)
2342; CI-NEXT:    s_setpc_b64 s[30:31]
2343;
2344; GFX89-LABEL: void_func_v16f16:
2345; GFX89:       ; %bb.0:
2346; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2347; GFX89-NEXT:    s_mov_b32 s7, 0xf000
2348; GFX89-NEXT:    s_mov_b32 s6, -1
2349; GFX89-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
2350; GFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
2351; GFX89-NEXT:    s_waitcnt vmcnt(0)
2352; GFX89-NEXT:    s_setpc_b64 s[30:31]
2353;
2354; GFX11-LABEL: void_func_v16f16:
2355; GFX11:       ; %bb.0:
2356; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2357; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2358; GFX11-NEXT:    s_mov_b32 s2, -1
2359; GFX11-NEXT:    s_clause 0x1
2360; GFX11-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0
2361; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
2362; GFX11-NEXT:    s_setpc_b64 s[30:31]
2363  store <16 x half> %arg0, ptr addrspace(1) undef
2364  ret void
2365}
2366
2367; Make sure there is no alignment requirement for passed vgprs.
2368define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 {
2369; CIGFX89-LABEL: void_func_i32_i64_i32:
2370; CIGFX89:       ; %bb.0:
2371; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2372; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
2373; CIGFX89-NEXT:    s_mov_b32 s6, -1
2374; CIGFX89-NEXT:    buffer_store_dword v0, off, s[4:7], 0
2375; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2376; CIGFX89-NEXT:    buffer_store_dwordx2 v[1:2], off, s[4:7], 0
2377; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2378; CIGFX89-NEXT:    buffer_store_dword v3, off, s[4:7], 0
2379; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2380; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
2381;
2382; GFX11-LABEL: void_func_i32_i64_i32:
2383; GFX11:       ; %bb.0:
2384; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2385; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2386; GFX11-NEXT:    s_mov_b32 s2, -1
2387; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0 dlc
2388; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2389; GFX11-NEXT:    buffer_store_b64 v[1:2], off, s[0:3], 0 dlc
2390; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2391; GFX11-NEXT:    buffer_store_b32 v3, off, s[0:3], 0 dlc
2392; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2393; GFX11-NEXT:    s_setpc_b64 s[30:31]
2394  store volatile i32 %arg0, ptr addrspace(1) undef
2395  store volatile i64 %arg1, ptr addrspace(1) undef
2396  store volatile i32 %arg2, ptr addrspace(1) undef
2397  ret void
2398}
2399
2400define void @void_func_struct_i32({ i32 } %arg0) #0 {
2401; CIGFX89-LABEL: void_func_struct_i32:
2402; CIGFX89:       ; %bb.0:
2403; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2404; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
2405; CIGFX89-NEXT:    s_mov_b32 s6, -1
2406; CIGFX89-NEXT:    buffer_store_dword v0, off, s[4:7], 0
2407; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2408; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
2409;
2410; GFX11-LABEL: void_func_struct_i32:
2411; GFX11:       ; %bb.0:
2412; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2413; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2414; GFX11-NEXT:    s_mov_b32 s2, -1
2415; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
2416; GFX11-NEXT:    s_setpc_b64 s[30:31]
2417  store { i32 } %arg0, ptr addrspace(1) undef
2418  ret void
2419}
2420
2421define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 {
2422; CIGFX89-LABEL: void_func_struct_i8_i32:
2423; CIGFX89:       ; %bb.0:
2424; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2425; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
2426; CIGFX89-NEXT:    s_mov_b32 s6, -1
2427; CIGFX89-NEXT:    buffer_store_dword v1, off, s[4:7], 0
2428; CIGFX89-NEXT:    buffer_store_byte v0, off, s[4:7], 0
2429; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2430; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
2431;
2432; GFX11-LABEL: void_func_struct_i8_i32:
2433; GFX11:       ; %bb.0:
2434; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2435; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2436; GFX11-NEXT:    s_mov_b32 s2, -1
2437; GFX11-NEXT:    s_clause 0x1
2438; GFX11-NEXT:    buffer_store_b32 v1, off, s[0:3], 0
2439; GFX11-NEXT:    buffer_store_b8 v0, off, s[0:3], 0
2440; GFX11-NEXT:    s_setpc_b64 s[30:31]
2441  store { i8, i32 } %arg0, ptr addrspace(1) undef
2442  ret void
2443}
2444
2445define void @void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %arg0) #0 {
2446; CIGFX89-LABEL: void_func_byval_struct_i8_i32:
2447; CIGFX89:       ; %bb.0:
2448; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2449; CIGFX89-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:4
2450; CIGFX89-NEXT:    buffer_load_ubyte v1, off, s[0:3], s32
2451; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
2452; CIGFX89-NEXT:    s_mov_b32 s6, -1
2453; CIGFX89-NEXT:    s_waitcnt vmcnt(1)
2454; CIGFX89-NEXT:    buffer_store_dword v0, off, s[4:7], 0
2455; CIGFX89-NEXT:    s_waitcnt vmcnt(1)
2456; CIGFX89-NEXT:    buffer_store_byte v1, off, s[4:7], 0
2457; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2458; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
2459;
2460; GFX11-LABEL: void_func_byval_struct_i8_i32:
2461; GFX11:       ; %bb.0:
2462; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2463; GFX11-NEXT:    s_clause 0x1
2464; GFX11-NEXT:    scratch_load_b32 v0, off, s32 offset:4
2465; GFX11-NEXT:    scratch_load_u8 v1, off, s32
2466; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2467; GFX11-NEXT:    s_mov_b32 s2, -1
2468; GFX11-NEXT:    s_waitcnt vmcnt(1)
2469; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
2470; GFX11-NEXT:    s_waitcnt vmcnt(0)
2471; GFX11-NEXT:    buffer_store_b8 v1, off, s[0:3], 0
2472; GFX11-NEXT:    s_setpc_b64 s[30:31]
2473  %arg0.load = load { i8, i32 }, ptr addrspace(5) %arg0
2474  store { i8, i32 } %arg0.load, ptr addrspace(1) undef
2475  ret void
2476}
2477
2478define void @void_func_byval_struct_i8_i32_x2(ptr addrspace(5) byval({ i8, i32 }) %arg0, ptr addrspace(5) byval({ i8, i32 }) %arg1, i32 %arg2) #0 {
2479; CI-LABEL: void_func_byval_struct_i8_i32_x2:
2480; CI:       ; %bb.0:
2481; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2482; CI-NEXT:    buffer_load_ubyte v1, off, s[0:3], s32 glc
2483; CI-NEXT:    s_waitcnt vmcnt(0)
2484; CI-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:4 glc
2485; CI-NEXT:    s_waitcnt vmcnt(0)
2486; CI-NEXT:    buffer_load_ubyte v3, off, s[0:3], s32 offset:8 glc
2487; CI-NEXT:    s_waitcnt vmcnt(0)
2488; CI-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:12 glc
2489; CI-NEXT:    s_waitcnt vmcnt(0)
2490; CI-NEXT:    s_mov_b32 s7, 0xf000
2491; CI-NEXT:    s_mov_b32 s6, -1
2492; CI-NEXT:    s_mov_b32 m0, -1
2493; CI-NEXT:    buffer_store_dword v2, off, s[4:7], 0
2494; CI-NEXT:    s_waitcnt vmcnt(0)
2495; CI-NEXT:    buffer_store_byte v1, off, s[4:7], 0
2496; CI-NEXT:    s_waitcnt vmcnt(0)
2497; CI-NEXT:    buffer_store_dword v4, off, s[4:7], 0
2498; CI-NEXT:    s_waitcnt vmcnt(0)
2499; CI-NEXT:    buffer_store_byte v3, off, s[4:7], 0
2500; CI-NEXT:    s_waitcnt vmcnt(0)
2501; CI-NEXT:    ds_write_b32 v0, v0
2502; CI-NEXT:    s_waitcnt lgkmcnt(0)
2503; CI-NEXT:    s_setpc_b64 s[30:31]
2504;
2505; VI-LABEL: void_func_byval_struct_i8_i32_x2:
2506; VI:       ; %bb.0:
2507; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2508; VI-NEXT:    buffer_load_ubyte v1, off, s[0:3], s32 glc
2509; VI-NEXT:    s_waitcnt vmcnt(0)
2510; VI-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:4 glc
2511; VI-NEXT:    s_waitcnt vmcnt(0)
2512; VI-NEXT:    buffer_load_ubyte v3, off, s[0:3], s32 offset:8 glc
2513; VI-NEXT:    s_waitcnt vmcnt(0)
2514; VI-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:12 glc
2515; VI-NEXT:    s_waitcnt vmcnt(0)
2516; VI-NEXT:    s_mov_b32 s7, 0xf000
2517; VI-NEXT:    s_mov_b32 s6, -1
2518; VI-NEXT:    s_mov_b32 m0, -1
2519; VI-NEXT:    buffer_store_dword v2, off, s[4:7], 0
2520; VI-NEXT:    s_waitcnt vmcnt(0)
2521; VI-NEXT:    buffer_store_byte v1, off, s[4:7], 0
2522; VI-NEXT:    s_waitcnt vmcnt(0)
2523; VI-NEXT:    buffer_store_dword v4, off, s[4:7], 0
2524; VI-NEXT:    s_waitcnt vmcnt(0)
2525; VI-NEXT:    buffer_store_byte v3, off, s[4:7], 0
2526; VI-NEXT:    s_waitcnt vmcnt(0)
2527; VI-NEXT:    ds_write_b32 v0, v0
2528; VI-NEXT:    s_waitcnt lgkmcnt(0)
2529; VI-NEXT:    s_setpc_b64 s[30:31]
2530;
2531; GFX9-LABEL: void_func_byval_struct_i8_i32_x2:
2532; GFX9:       ; %bb.0:
2533; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2534; GFX9-NEXT:    buffer_load_ubyte v1, off, s[0:3], s32 glc
2535; GFX9-NEXT:    s_waitcnt vmcnt(0)
2536; GFX9-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:4 glc
2537; GFX9-NEXT:    s_waitcnt vmcnt(0)
2538; GFX9-NEXT:    buffer_load_ubyte v3, off, s[0:3], s32 offset:8 glc
2539; GFX9-NEXT:    s_waitcnt vmcnt(0)
2540; GFX9-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:12 glc
2541; GFX9-NEXT:    s_waitcnt vmcnt(0)
2542; GFX9-NEXT:    s_mov_b32 s7, 0xf000
2543; GFX9-NEXT:    s_mov_b32 s6, -1
2544; GFX9-NEXT:    buffer_store_dword v2, off, s[4:7], 0
2545; GFX9-NEXT:    s_waitcnt vmcnt(0)
2546; GFX9-NEXT:    buffer_store_byte v1, off, s[4:7], 0
2547; GFX9-NEXT:    s_waitcnt vmcnt(0)
2548; GFX9-NEXT:    buffer_store_dword v4, off, s[4:7], 0
2549; GFX9-NEXT:    s_waitcnt vmcnt(0)
2550; GFX9-NEXT:    buffer_store_byte v3, off, s[4:7], 0
2551; GFX9-NEXT:    s_waitcnt vmcnt(0)
2552; GFX9-NEXT:    ds_write_b32 v0, v0
2553; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2554; GFX9-NEXT:    s_setpc_b64 s[30:31]
2555;
2556; GFX11-LABEL: void_func_byval_struct_i8_i32_x2:
2557; GFX11:       ; %bb.0:
2558; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2559; GFX11-NEXT:    scratch_load_u8 v1, off, s32 glc dlc
2560; GFX11-NEXT:    s_waitcnt vmcnt(0)
2561; GFX11-NEXT:    scratch_load_b32 v2, off, s32 offset:4 glc dlc
2562; GFX11-NEXT:    s_waitcnt vmcnt(0)
2563; GFX11-NEXT:    scratch_load_u8 v3, off, s32 offset:8 glc dlc
2564; GFX11-NEXT:    s_waitcnt vmcnt(0)
2565; GFX11-NEXT:    scratch_load_b32 v4, off, s32 offset:12 glc dlc
2566; GFX11-NEXT:    s_waitcnt vmcnt(0)
2567; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2568; GFX11-NEXT:    s_mov_b32 s2, -1
2569; GFX11-NEXT:    buffer_store_b32 v2, off, s[0:3], 0 dlc
2570; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2571; GFX11-NEXT:    buffer_store_b8 v1, off, s[0:3], 0 dlc
2572; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2573; GFX11-NEXT:    buffer_store_b32 v4, off, s[0:3], 0 dlc
2574; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2575; GFX11-NEXT:    buffer_store_b8 v3, off, s[0:3], 0 dlc
2576; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2577; GFX11-NEXT:    ds_store_b32 v0, v0
2578; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2579; GFX11-NEXT:    s_setpc_b64 s[30:31]
2580  %arg0.load = load volatile { i8, i32 }, ptr addrspace(5) %arg0
2581  %arg1.load = load volatile { i8, i32 }, ptr addrspace(5) %arg1
2582  store volatile { i8, i32 } %arg0.load, ptr addrspace(1) undef
2583  store volatile { i8, i32 } %arg1.load, ptr addrspace(1) undef
2584  store volatile i32 %arg2, ptr addrspace(3) undef
2585  ret void
2586}
2587
2588define void @void_func_byval_i32_byval_i64(ptr addrspace(5) byval(i32) %arg0, ptr addrspace(5) byval(i64) %arg1) #0 {
2589; CIGFX89-LABEL: void_func_byval_i32_byval_i64:
2590; CIGFX89:       ; %bb.0:
2591; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2592; CIGFX89-NEXT:    buffer_load_dword v2, off, s[0:3], s32
2593; CIGFX89-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:8
2594; CIGFX89-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:12
2595; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
2596; CIGFX89-NEXT:    s_mov_b32 s6, -1
2597; CIGFX89-NEXT:    s_waitcnt vmcnt(2)
2598; CIGFX89-NEXT:    buffer_store_dword v2, off, s[4:7], 0
2599; CIGFX89-NEXT:    s_waitcnt vmcnt(1)
2600; CIGFX89-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
2601; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2602; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
2603;
2604; GFX11-LABEL: void_func_byval_i32_byval_i64:
2605; GFX11:       ; %bb.0:
2606; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2607; GFX11-NEXT:    s_clause 0x1
2608; GFX11-NEXT:    scratch_load_b32 v2, off, s32
2609; GFX11-NEXT:    scratch_load_b64 v[0:1], off, s32 offset:8
2610; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2611; GFX11-NEXT:    s_mov_b32 s2, -1
2612; GFX11-NEXT:    s_waitcnt vmcnt(1)
2613; GFX11-NEXT:    buffer_store_b32 v2, off, s[0:3], 0
2614; GFX11-NEXT:    s_waitcnt vmcnt(0)
2615; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[0:3], 0
2616; GFX11-NEXT:    s_setpc_b64 s[30:31]
2617  %arg0.load = load i32, ptr addrspace(5) %arg0
2618  %arg1.load = load i64, ptr addrspace(5) %arg1
2619  store i32 %arg0.load, ptr addrspace(1) undef
2620  store i64 %arg1.load, ptr addrspace(1) undef
2621  ret void
2622}
2623
2624define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0 {
2625; CIGFX89-LABEL: void_func_v32i32_i32_i64:
2626; CIGFX89:       ; %bb.0:
2627; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2628; CIGFX89-NEXT:    buffer_load_dword v31, off, s[0:3], s32
2629; CIGFX89-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:12
2630; CIGFX89-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:8
2631; CIGFX89-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:4
2632; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
2633; CIGFX89-NEXT:    s_mov_b32 s6, -1
2634; CIGFX89-NEXT:    s_waitcnt vmcnt(3)
2635; CIGFX89-NEXT:    buffer_store_dwordx4 v[28:31], off, s[4:7], 0
2636; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2637; CIGFX89-NEXT:    buffer_store_dwordx4 v[24:27], off, s[4:7], 0
2638; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2639; CIGFX89-NEXT:    buffer_store_dwordx4 v[20:23], off, s[4:7], 0
2640; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2641; CIGFX89-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
2642; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2643; CIGFX89-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
2644; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2645; CIGFX89-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
2646; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2647; CIGFX89-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
2648; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2649; CIGFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
2650; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2651; CIGFX89-NEXT:    buffer_store_dword v34, off, s[4:7], 0
2652; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2653; CIGFX89-NEXT:    buffer_store_dwordx2 v[32:33], off, s[4:7], 0
2654; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2655; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
2656;
2657; GFX11-LABEL: void_func_v32i32_i32_i64:
2658; GFX11:       ; %bb.0:
2659; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2660; GFX11-NEXT:    s_clause 0x3
2661; GFX11-NEXT:    scratch_load_b32 v31, off, s32
2662; GFX11-NEXT:    scratch_load_b32 v33, off, s32 offset:12
2663; GFX11-NEXT:    scratch_load_b32 v34, off, s32 offset:4
2664; GFX11-NEXT:    scratch_load_b32 v32, off, s32 offset:8
2665; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2666; GFX11-NEXT:    s_mov_b32 s2, -1
2667; GFX11-NEXT:    s_waitcnt vmcnt(3)
2668; GFX11-NEXT:    buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
2669; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2670; GFX11-NEXT:    buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
2671; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2672; GFX11-NEXT:    buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
2673; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2674; GFX11-NEXT:    buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
2675; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2676; GFX11-NEXT:    buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
2677; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2678; GFX11-NEXT:    buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
2679; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2680; GFX11-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
2681; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2682; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
2683; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2684; GFX11-NEXT:    s_waitcnt vmcnt(1)
2685; GFX11-NEXT:    buffer_store_b32 v34, off, s[0:3], 0 dlc
2686; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2687; GFX11-NEXT:    s_waitcnt vmcnt(0)
2688; GFX11-NEXT:    buffer_store_b64 v[32:33], off, s[0:3], 0 dlc
2689; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2690; GFX11-NEXT:    s_setpc_b64 s[30:31]
2691  store volatile <32 x i32> %arg0, ptr addrspace(1) undef
2692  store volatile i32 %arg1, ptr addrspace(1) undef
2693  store volatile i64 %arg2, ptr addrspace(1) undef
2694  ret void
2695}
2696
2697; FIXME: Different ext load types on CI vs. VI
2698define void @void_func_v32i32_i1_i8_i16_bf16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i16 %arg3, half %arg4, bfloat %arg5) #0 {
2699; CI-LABEL: void_func_v32i32_i1_i8_i16_bf16:
2700; CI:       ; %bb.0:
2701; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2702; CI-NEXT:    buffer_load_dword v31, off, s[0:3], s32
2703; CI-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:20
2704; CI-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:16
2705; CI-NEXT:    buffer_load_ubyte v34, off, s[0:3], s32 offset:4
2706; CI-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:8
2707; CI-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:12
2708; CI-NEXT:    s_mov_b32 s7, 0xf000
2709; CI-NEXT:    s_mov_b32 s6, -1
2710; CI-NEXT:    s_waitcnt vmcnt(5)
2711; CI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[4:7], 0
2712; CI-NEXT:    s_waitcnt vmcnt(0)
2713; CI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[4:7], 0
2714; CI-NEXT:    s_waitcnt vmcnt(0)
2715; CI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[4:7], 0
2716; CI-NEXT:    s_waitcnt vmcnt(0)
2717; CI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
2718; CI-NEXT:    s_waitcnt vmcnt(0)
2719; CI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
2720; CI-NEXT:    s_waitcnt vmcnt(0)
2721; CI-NEXT:    v_mul_f32_e32 v12, 1.0, v32
2722; CI-NEXT:    v_cvt_f16_f32_e32 v13, v33
2723; CI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
2724; CI-NEXT:    s_waitcnt vmcnt(0)
2725; CI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
2726; CI-NEXT:    s_waitcnt vmcnt(0)
2727; CI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
2728; CI-NEXT:    s_waitcnt vmcnt(0)
2729; CI-NEXT:    v_and_b32_e32 v0, 1, v34
2730; CI-NEXT:    v_lshrrev_b32_e32 v1, 16, v12
2731; CI-NEXT:    buffer_store_byte v0, off, s[4:7], 0
2732; CI-NEXT:    s_waitcnt vmcnt(0)
2733; CI-NEXT:    buffer_store_byte v35, off, s[4:7], 0
2734; CI-NEXT:    s_waitcnt vmcnt(0)
2735; CI-NEXT:    buffer_store_short v36, off, s[4:7], 0
2736; CI-NEXT:    s_waitcnt vmcnt(0)
2737; CI-NEXT:    buffer_store_short v13, off, s[4:7], 0
2738; CI-NEXT:    s_waitcnt vmcnt(0)
2739; CI-NEXT:    buffer_store_short v1, off, s[4:7], 0
2740; CI-NEXT:    s_waitcnt vmcnt(0)
2741; CI-NEXT:    s_setpc_b64 s[30:31]
2742;
2743; GFX89-LABEL: void_func_v32i32_i1_i8_i16_bf16:
2744; GFX89:       ; %bb.0:
2745; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2746; GFX89-NEXT:    buffer_load_dword v31, off, s[0:3], s32
2747; GFX89-NEXT:    buffer_load_ubyte v32, off, s[0:3], s32 offset:4
2748; GFX89-NEXT:    buffer_load_ushort v33, off, s[0:3], s32 offset:8
2749; GFX89-NEXT:    buffer_load_ushort v34, off, s[0:3], s32 offset:12
2750; GFX89-NEXT:    buffer_load_ushort v35, off, s[0:3], s32 offset:16
2751; GFX89-NEXT:    buffer_load_ushort v36, off, s[0:3], s32 offset:20
2752; GFX89-NEXT:    s_mov_b32 s7, 0xf000
2753; GFX89-NEXT:    s_mov_b32 s6, -1
2754; GFX89-NEXT:    s_waitcnt vmcnt(5)
2755; GFX89-NEXT:    buffer_store_dwordx4 v[28:31], off, s[4:7], 0
2756; GFX89-NEXT:    s_waitcnt vmcnt(0)
2757; GFX89-NEXT:    buffer_store_dwordx4 v[24:27], off, s[4:7], 0
2758; GFX89-NEXT:    s_waitcnt vmcnt(0)
2759; GFX89-NEXT:    buffer_store_dwordx4 v[20:23], off, s[4:7], 0
2760; GFX89-NEXT:    s_waitcnt vmcnt(0)
2761; GFX89-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
2762; GFX89-NEXT:    s_waitcnt vmcnt(0)
2763; GFX89-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
2764; GFX89-NEXT:    s_waitcnt vmcnt(0)
2765; GFX89-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
2766; GFX89-NEXT:    s_waitcnt vmcnt(0)
2767; GFX89-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
2768; GFX89-NEXT:    s_waitcnt vmcnt(0)
2769; GFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
2770; GFX89-NEXT:    s_waitcnt vmcnt(0)
2771; GFX89-NEXT:    v_and_b32_e32 v0, 1, v32
2772; GFX89-NEXT:    buffer_store_byte v0, off, s[4:7], 0
2773; GFX89-NEXT:    s_waitcnt vmcnt(0)
2774; GFX89-NEXT:    buffer_store_byte v33, off, s[4:7], 0
2775; GFX89-NEXT:    s_waitcnt vmcnt(0)
2776; GFX89-NEXT:    buffer_store_short v34, off, s[4:7], 0
2777; GFX89-NEXT:    s_waitcnt vmcnt(0)
2778; GFX89-NEXT:    buffer_store_short v35, off, s[4:7], 0
2779; GFX89-NEXT:    s_waitcnt vmcnt(0)
2780; GFX89-NEXT:    buffer_store_short v36, off, s[4:7], 0
2781; GFX89-NEXT:    s_waitcnt vmcnt(0)
2782; GFX89-NEXT:    s_setpc_b64 s[30:31]
2783;
2784; GFX11-LABEL: void_func_v32i32_i1_i8_i16_bf16:
2785; GFX11:       ; %bb.0:
2786; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2787; GFX11-NEXT:    s_clause 0x5
2788; GFX11-NEXT:    scratch_load_b32 v31, off, s32
2789; GFX11-NEXT:    scratch_load_u8 v32, off, s32 offset:4
2790; GFX11-NEXT:    scratch_load_u16 v33, off, s32 offset:8
2791; GFX11-NEXT:    scratch_load_u16 v34, off, s32 offset:12
2792; GFX11-NEXT:    scratch_load_u16 v35, off, s32 offset:16
2793; GFX11-NEXT:    scratch_load_u16 v36, off, s32 offset:20
2794; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2795; GFX11-NEXT:    s_mov_b32 s2, -1
2796; GFX11-NEXT:    s_waitcnt vmcnt(5)
2797; GFX11-NEXT:    buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
2798; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2799; GFX11-NEXT:    buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
2800; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2801; GFX11-NEXT:    buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
2802; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2803; GFX11-NEXT:    buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
2804; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2805; GFX11-NEXT:    s_waitcnt vmcnt(4)
2806; GFX11-NEXT:    v_and_b32_e32 v16, 1, v32
2807; GFX11-NEXT:    buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
2808; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2809; GFX11-NEXT:    buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
2810; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2811; GFX11-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
2812; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2813; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
2814; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2815; GFX11-NEXT:    buffer_store_b8 v16, off, s[0:3], 0 dlc
2816; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2817; GFX11-NEXT:    s_waitcnt vmcnt(3)
2818; GFX11-NEXT:    buffer_store_b8 v33, off, s[0:3], 0 dlc
2819; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2820; GFX11-NEXT:    s_waitcnt vmcnt(2)
2821; GFX11-NEXT:    buffer_store_b16 v34, off, s[0:3], 0 dlc
2822; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2823; GFX11-NEXT:    s_waitcnt vmcnt(1)
2824; GFX11-NEXT:    buffer_store_b16 v35, off, s[0:3], 0 dlc
2825; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2826; GFX11-NEXT:    s_waitcnt vmcnt(0)
2827; GFX11-NEXT:    buffer_store_b16 v36, off, s[0:3], 0 dlc
2828; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2829; GFX11-NEXT:    s_setpc_b64 s[30:31]
2830  store volatile <32 x i32> %arg0, ptr addrspace(1) undef
2831  store volatile i1 %arg1, ptr addrspace(1) undef
2832  store volatile i8 %arg2, ptr addrspace(1) undef
2833  store volatile i16 %arg3, ptr addrspace(1) undef
2834  store volatile half %arg4, ptr addrspace(1) undef
2835  store volatile bfloat %arg5, ptr addrspace(1) undef
2836  ret void
2837}
2838
2839define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2 x float> %arg2) #0 {
2840; CIGFX89-LABEL: void_func_v32i32_v2i32_v2f32:
2841; CIGFX89:       ; %bb.0:
2842; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2843; CIGFX89-NEXT:    buffer_load_dword v31, off, s[0:3], s32
2844; CIGFX89-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:8
2845; CIGFX89-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:4
2846; CIGFX89-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:16
2847; CIGFX89-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:12
2848; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
2849; CIGFX89-NEXT:    s_mov_b32 s6, -1
2850; CIGFX89-NEXT:    s_waitcnt vmcnt(4)
2851; CIGFX89-NEXT:    buffer_store_dwordx4 v[28:31], off, s[4:7], 0
2852; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2853; CIGFX89-NEXT:    buffer_store_dwordx4 v[24:27], off, s[4:7], 0
2854; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2855; CIGFX89-NEXT:    buffer_store_dwordx4 v[20:23], off, s[4:7], 0
2856; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2857; CIGFX89-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
2858; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2859; CIGFX89-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
2860; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2861; CIGFX89-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
2862; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2863; CIGFX89-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
2864; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2865; CIGFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
2866; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2867; CIGFX89-NEXT:    buffer_store_dwordx2 v[32:33], off, s[4:7], 0
2868; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2869; CIGFX89-NEXT:    buffer_store_dwordx2 v[34:35], off, s[4:7], 0
2870; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
2871; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
2872;
2873; GFX11-LABEL: void_func_v32i32_v2i32_v2f32:
2874; GFX11:       ; %bb.0:
2875; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2876; GFX11-NEXT:    s_clause 0x4
2877; GFX11-NEXT:    scratch_load_b32 v31, off, s32
2878; GFX11-NEXT:    scratch_load_b32 v33, off, s32 offset:8
2879; GFX11-NEXT:    scratch_load_b32 v32, off, s32 offset:4
2880; GFX11-NEXT:    scratch_load_b32 v35, off, s32 offset:16
2881; GFX11-NEXT:    scratch_load_b32 v34, off, s32 offset:12
2882; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2883; GFX11-NEXT:    s_mov_b32 s2, -1
2884; GFX11-NEXT:    s_waitcnt vmcnt(4)
2885; GFX11-NEXT:    buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
2886; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2887; GFX11-NEXT:    buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
2888; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2889; GFX11-NEXT:    buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
2890; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2891; GFX11-NEXT:    buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
2892; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2893; GFX11-NEXT:    buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
2894; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2895; GFX11-NEXT:    buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
2896; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2897; GFX11-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
2898; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2899; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
2900; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2901; GFX11-NEXT:    s_waitcnt vmcnt(2)
2902; GFX11-NEXT:    buffer_store_b64 v[32:33], off, s[0:3], 0 dlc
2903; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2904; GFX11-NEXT:    s_waitcnt vmcnt(0)
2905; GFX11-NEXT:    buffer_store_b64 v[34:35], off, s[0:3], 0 dlc
2906; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2907; GFX11-NEXT:    s_setpc_b64 s[30:31]
2908  store volatile <32 x i32> %arg0, ptr addrspace(1) undef
2909  store volatile <2 x i32> %arg1, ptr addrspace(1) undef
2910  store volatile <2 x float> %arg2, ptr addrspace(1) undef
2911  ret void
2912}
2913
2914define void @void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16(<32 x i32> %arg0, <2 x i16> %arg1, <2 x half> %arg2, <2 x bfloat> %arg3, <4 x bfloat> %arg4) #0 {
2915; CI-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16:
2916; CI:       ; %bb.0:
2917; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2918; CI-NEXT:    buffer_load_dword v31, off, s[0:3], s32
2919; CI-NEXT:    s_mov_b32 s7, 0xf000
2920; CI-NEXT:    s_mov_b32 s6, -1
2921; CI-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:28
2922; CI-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:32
2923; CI-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:36
2924; CI-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:40
2925; CI-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:20
2926; CI-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:24
2927; CI-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:12
2928; CI-NEXT:    s_waitcnt vmcnt(7)
2929; CI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[4:7], 0
2930; CI-NEXT:    s_waitcnt vmcnt(0)
2931; CI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[4:7], 0
2932; CI-NEXT:    s_waitcnt vmcnt(0)
2933; CI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[4:7], 0
2934; CI-NEXT:    s_waitcnt vmcnt(0)
2935; CI-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:16
2936; CI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
2937; CI-NEXT:    s_waitcnt vmcnt(0)
2938; CI-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:8
2939; CI-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:4
2940; CI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
2941; CI-NEXT:    s_waitcnt vmcnt(0)
2942; CI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
2943; CI-NEXT:    s_waitcnt vmcnt(0)
2944; CI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
2945; CI-NEXT:    s_waitcnt vmcnt(0)
2946; CI-NEXT:    v_cvt_f16_f32_e32 v10, v38
2947; CI-NEXT:    v_mul_f32_e32 v4, 1.0, v32
2948; CI-NEXT:    v_mul_f32_e32 v5, 1.0, v33
2949; CI-NEXT:    v_mul_f32_e32 v6, 1.0, v34
2950; CI-NEXT:    v_mul_f32_e32 v7, 1.0, v35
2951; CI-NEXT:    v_mul_f32_e32 v8, 1.0, v36
2952; CI-NEXT:    v_mul_f32_e32 v9, 1.0, v37
2953; CI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
2954; CI-NEXT:    s_waitcnt vmcnt(0)
2955; CI-NEXT:    buffer_store_short v16, off, s[4:7], 0
2956; CI-NEXT:    s_waitcnt vmcnt(0)
2957; CI-NEXT:    buffer_store_short v17, off, s[4:7], 0
2958; CI-NEXT:    s_waitcnt vmcnt(0)
2959; CI-NEXT:    v_cvt_f16_f32_e32 v11, v20
2960; CI-NEXT:    v_lshrrev_b32_e32 v0, 16, v4
2961; CI-NEXT:    v_lshrrev_b32_e32 v1, 16, v5
2962; CI-NEXT:    v_lshrrev_b32_e32 v2, 16, v6
2963; CI-NEXT:    v_lshrrev_b32_e32 v3, 16, v7
2964; CI-NEXT:    v_lshrrev_b32_e32 v4, 16, v8
2965; CI-NEXT:    v_lshrrev_b32_e32 v5, 16, v9
2966; CI-NEXT:    buffer_store_short v11, off, s[4:7], 0
2967; CI-NEXT:    s_waitcnt vmcnt(0)
2968; CI-NEXT:    buffer_store_short v10, off, s[4:7], 0
2969; CI-NEXT:    s_waitcnt vmcnt(0)
2970; CI-NEXT:    buffer_store_short v5, off, s[4:7], 0
2971; CI-NEXT:    s_waitcnt vmcnt(0)
2972; CI-NEXT:    buffer_store_short v4, off, s[4:7], 0
2973; CI-NEXT:    s_waitcnt vmcnt(0)
2974; CI-NEXT:    buffer_store_short v3, off, s[4:7], 0
2975; CI-NEXT:    s_waitcnt vmcnt(0)
2976; CI-NEXT:    buffer_store_short v2, off, s[4:7], 0
2977; CI-NEXT:    s_waitcnt vmcnt(0)
2978; CI-NEXT:    buffer_store_short v1, off, s[4:7], 0
2979; CI-NEXT:    s_waitcnt vmcnt(0)
2980; CI-NEXT:    buffer_store_short v0, off, s[4:7], 0
2981; CI-NEXT:    s_waitcnt vmcnt(0)
2982; CI-NEXT:    s_setpc_b64 s[30:31]
2983;
2984; GFX89-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16:
2985; GFX89:       ; %bb.0:
2986; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2987; GFX89-NEXT:    buffer_load_dword v31, off, s[0:3], s32
2988; GFX89-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:20
2989; GFX89-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:16
2990; GFX89-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:4
2991; GFX89-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:8
2992; GFX89-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:12
2993; GFX89-NEXT:    s_mov_b32 s7, 0xf000
2994; GFX89-NEXT:    s_mov_b32 s6, -1
2995; GFX89-NEXT:    s_waitcnt vmcnt(5)
2996; GFX89-NEXT:    buffer_store_dwordx4 v[28:31], off, s[4:7], 0
2997; GFX89-NEXT:    s_waitcnt vmcnt(0)
2998; GFX89-NEXT:    buffer_store_dwordx4 v[24:27], off, s[4:7], 0
2999; GFX89-NEXT:    s_waitcnt vmcnt(0)
3000; GFX89-NEXT:    buffer_store_dwordx4 v[20:23], off, s[4:7], 0
3001; GFX89-NEXT:    s_waitcnt vmcnt(0)
3002; GFX89-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
3003; GFX89-NEXT:    s_waitcnt vmcnt(0)
3004; GFX89-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
3005; GFX89-NEXT:    s_waitcnt vmcnt(0)
3006; GFX89-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
3007; GFX89-NEXT:    s_waitcnt vmcnt(0)
3008; GFX89-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
3009; GFX89-NEXT:    s_waitcnt vmcnt(0)
3010; GFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
3011; GFX89-NEXT:    s_waitcnt vmcnt(0)
3012; GFX89-NEXT:    buffer_store_dword v34, off, s[4:7], 0
3013; GFX89-NEXT:    s_waitcnt vmcnt(0)
3014; GFX89-NEXT:    buffer_store_dword v35, off, s[4:7], 0
3015; GFX89-NEXT:    s_waitcnt vmcnt(0)
3016; GFX89-NEXT:    buffer_store_dword v36, off, s[4:7], 0
3017; GFX89-NEXT:    s_waitcnt vmcnt(0)
3018; GFX89-NEXT:    buffer_store_dwordx2 v[32:33], off, s[4:7], 0
3019; GFX89-NEXT:    s_waitcnt vmcnt(0)
3020; GFX89-NEXT:    s_setpc_b64 s[30:31]
3021;
3022; GFX11-LABEL: void_func_v32i32_v2i16_v2f16_v2bf16_v4bf16:
3023; GFX11:       ; %bb.0:
3024; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3025; GFX11-NEXT:    s_clause 0x5
3026; GFX11-NEXT:    scratch_load_b32 v31, off, s32
3027; GFX11-NEXT:    scratch_load_b32 v33, off, s32 offset:20
3028; GFX11-NEXT:    scratch_load_b32 v34, off, s32 offset:4
3029; GFX11-NEXT:    scratch_load_b32 v35, off, s32 offset:8
3030; GFX11-NEXT:    scratch_load_b32 v36, off, s32 offset:12
3031; GFX11-NEXT:    scratch_load_b32 v32, off, s32 offset:16
3032; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
3033; GFX11-NEXT:    s_mov_b32 s2, -1
3034; GFX11-NEXT:    s_waitcnt vmcnt(5)
3035; GFX11-NEXT:    buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
3036; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3037; GFX11-NEXT:    buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
3038; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3039; GFX11-NEXT:    buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
3040; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3041; GFX11-NEXT:    buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
3042; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3043; GFX11-NEXT:    buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
3044; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3045; GFX11-NEXT:    buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
3046; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3047; GFX11-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
3048; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3049; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
3050; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3051; GFX11-NEXT:    s_waitcnt vmcnt(3)
3052; GFX11-NEXT:    buffer_store_b32 v34, off, s[0:3], 0 dlc
3053; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3054; GFX11-NEXT:    s_waitcnt vmcnt(2)
3055; GFX11-NEXT:    buffer_store_b32 v35, off, s[0:3], 0 dlc
3056; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3057; GFX11-NEXT:    s_waitcnt vmcnt(1)
3058; GFX11-NEXT:    buffer_store_b32 v36, off, s[0:3], 0 dlc
3059; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3060; GFX11-NEXT:    s_waitcnt vmcnt(0)
3061; GFX11-NEXT:    buffer_store_b64 v[32:33], off, s[0:3], 0 dlc
3062; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3063; GFX11-NEXT:    s_setpc_b64 s[30:31]
3064  store volatile <32 x i32> %arg0, ptr addrspace(1) undef
3065  store volatile <2 x i16> %arg1, ptr addrspace(1) undef
3066  store volatile <2 x half> %arg2, ptr addrspace(1) undef
3067  store volatile <2 x bfloat> %arg3, ptr addrspace(1) undef
3068  store volatile <4 x bfloat> %arg4, ptr addrspace(1) undef
3069  ret void
3070}
3071
3072define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2 x double> %arg2) #0 {
3073; CIGFX89-LABEL: void_func_v32i32_v2i64_v2f64:
3074; CIGFX89:       ; %bb.0:
3075; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3076; CIGFX89-NEXT:    buffer_load_dword v31, off, s[0:3], s32
3077; CIGFX89-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:20
3078; CIGFX89-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:16
3079; CIGFX89-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:12
3080; CIGFX89-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:8
3081; CIGFX89-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:4
3082; CIGFX89-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:32
3083; CIGFX89-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:28
3084; CIGFX89-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:24
3085; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
3086; CIGFX89-NEXT:    s_mov_b32 s6, -1
3087; CIGFX89-NEXT:    s_waitcnt vmcnt(8)
3088; CIGFX89-NEXT:    buffer_store_dwordx4 v[28:31], off, s[4:7], 0
3089; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3090; CIGFX89-NEXT:    buffer_store_dwordx4 v[24:27], off, s[4:7], 0
3091; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3092; CIGFX89-NEXT:    buffer_store_dwordx4 v[20:23], off, s[4:7], 0
3093; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3094; CIGFX89-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
3095; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3096; CIGFX89-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
3097; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3098; CIGFX89-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
3099; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3100; CIGFX89-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
3101; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3102; CIGFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
3103; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3104; CIGFX89-NEXT:    buffer_store_dwordx4 v[32:35], off, s[4:7], 0
3105; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3106; CIGFX89-NEXT:    buffer_store_dwordx4 v[36:39], off, s[4:7], 0
3107; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3108; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
3109;
3110; GFX11-LABEL: void_func_v32i32_v2i64_v2f64:
3111; GFX11:       ; %bb.0:
3112; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3113; GFX11-NEXT:    s_clause 0x8
3114; GFX11-NEXT:    scratch_load_b32 v31, off, s32
3115; GFX11-NEXT:    scratch_load_b32 v35, off, s32 offset:32
3116; GFX11-NEXT:    scratch_load_b32 v34, off, s32 offset:28
3117; GFX11-NEXT:    scratch_load_b32 v33, off, s32 offset:24
3118; GFX11-NEXT:    scratch_load_b32 v39, off, s32 offset:16
3119; GFX11-NEXT:    scratch_load_b32 v38, off, s32 offset:12
3120; GFX11-NEXT:    scratch_load_b32 v37, off, s32 offset:8
3121; GFX11-NEXT:    scratch_load_b32 v36, off, s32 offset:4
3122; GFX11-NEXT:    scratch_load_b32 v32, off, s32 offset:20
3123; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
3124; GFX11-NEXT:    s_mov_b32 s2, -1
3125; GFX11-NEXT:    s_waitcnt vmcnt(8)
3126; GFX11-NEXT:    buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
3127; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3128; GFX11-NEXT:    buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
3129; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3130; GFX11-NEXT:    buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
3131; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3132; GFX11-NEXT:    buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
3133; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3134; GFX11-NEXT:    buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
3135; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3136; GFX11-NEXT:    buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
3137; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3138; GFX11-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
3139; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3140; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
3141; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3142; GFX11-NEXT:    s_waitcnt vmcnt(1)
3143; GFX11-NEXT:    buffer_store_b128 v[36:39], off, s[0:3], 0 dlc
3144; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3145; GFX11-NEXT:    s_waitcnt vmcnt(0)
3146; GFX11-NEXT:    buffer_store_b128 v[32:35], off, s[0:3], 0 dlc
3147; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3148; GFX11-NEXT:    s_setpc_b64 s[30:31]
3149  store volatile <32 x i32> %arg0, ptr addrspace(1) undef
3150  store volatile <2 x i64> %arg1, ptr addrspace(1) undef
3151  store volatile <2 x double> %arg2, ptr addrspace(1) undef
3152  ret void
3153}
3154
3155define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4 x float> %arg2) #0 {
3156; CIGFX89-LABEL: void_func_v32i32_v4i32_v4f32:
3157; CIGFX89:       ; %bb.0:
3158; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3159; CIGFX89-NEXT:    buffer_load_dword v31, off, s[0:3], s32
3160; CIGFX89-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:20
3161; CIGFX89-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:16
3162; CIGFX89-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:12
3163; CIGFX89-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:8
3164; CIGFX89-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:4
3165; CIGFX89-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:32
3166; CIGFX89-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:28
3167; CIGFX89-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:24
3168; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
3169; CIGFX89-NEXT:    s_mov_b32 s6, -1
3170; CIGFX89-NEXT:    s_waitcnt vmcnt(8)
3171; CIGFX89-NEXT:    buffer_store_dwordx4 v[28:31], off, s[4:7], 0
3172; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3173; CIGFX89-NEXT:    buffer_store_dwordx4 v[24:27], off, s[4:7], 0
3174; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3175; CIGFX89-NEXT:    buffer_store_dwordx4 v[20:23], off, s[4:7], 0
3176; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3177; CIGFX89-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
3178; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3179; CIGFX89-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
3180; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3181; CIGFX89-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
3182; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3183; CIGFX89-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
3184; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3185; CIGFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
3186; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3187; CIGFX89-NEXT:    buffer_store_dwordx4 v[32:35], off, s[4:7], 0
3188; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3189; CIGFX89-NEXT:    buffer_store_dwordx4 v[36:39], off, s[4:7], 0
3190; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3191; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
3192;
3193; GFX11-LABEL: void_func_v32i32_v4i32_v4f32:
3194; GFX11:       ; %bb.0:
3195; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3196; GFX11-NEXT:    s_clause 0x8
3197; GFX11-NEXT:    scratch_load_b32 v31, off, s32
3198; GFX11-NEXT:    scratch_load_b32 v35, off, s32 offset:16
3199; GFX11-NEXT:    scratch_load_b32 v34, off, s32 offset:12
3200; GFX11-NEXT:    scratch_load_b32 v33, off, s32 offset:8
3201; GFX11-NEXT:    scratch_load_b32 v32, off, s32 offset:4
3202; GFX11-NEXT:    scratch_load_b32 v39, off, s32 offset:32
3203; GFX11-NEXT:    scratch_load_b32 v38, off, s32 offset:28
3204; GFX11-NEXT:    scratch_load_b32 v37, off, s32 offset:24
3205; GFX11-NEXT:    scratch_load_b32 v36, off, s32 offset:20
3206; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
3207; GFX11-NEXT:    s_mov_b32 s2, -1
3208; GFX11-NEXT:    s_waitcnt vmcnt(8)
3209; GFX11-NEXT:    buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
3210; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3211; GFX11-NEXT:    buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
3212; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3213; GFX11-NEXT:    buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
3214; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3215; GFX11-NEXT:    buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
3216; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3217; GFX11-NEXT:    buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
3218; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3219; GFX11-NEXT:    buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
3220; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3221; GFX11-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
3222; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3223; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
3224; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3225; GFX11-NEXT:    s_waitcnt vmcnt(4)
3226; GFX11-NEXT:    buffer_store_b128 v[32:35], off, s[0:3], 0 dlc
3227; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3228; GFX11-NEXT:    s_waitcnt vmcnt(0)
3229; GFX11-NEXT:    buffer_store_b128 v[36:39], off, s[0:3], 0 dlc
3230; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3231; GFX11-NEXT:    s_setpc_b64 s[30:31]
3232  store volatile <32 x i32> %arg0, ptr addrspace(1) undef
3233  store volatile <4 x i32> %arg1, ptr addrspace(1) undef
3234  store volatile <4 x float> %arg2, ptr addrspace(1) undef
3235  ret void
3236}
3237
3238define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8 x float> %arg2) #0 {
3239; CI-LABEL: void_func_v32i32_v8i32_v8f32:
3240; CI:       ; %bb.0:
3241; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3242; CI-NEXT:    buffer_load_dword v31, off, s[0:3], s32
3243; CI-NEXT:    s_mov_b32 s7, 0xf000
3244; CI-NEXT:    s_mov_b32 s6, -1
3245; CI-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:64
3246; CI-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:60
3247; CI-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:56
3248; CI-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:52
3249; CI-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:16
3250; CI-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:12
3251; CI-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:8
3252; CI-NEXT:    s_waitcnt vmcnt(7)
3253; CI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[4:7], 0
3254; CI-NEXT:    s_waitcnt vmcnt(0)
3255; CI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[4:7], 0
3256; CI-NEXT:    s_waitcnt vmcnt(0)
3257; CI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[4:7], 0
3258; CI-NEXT:    s_waitcnt vmcnt(0)
3259; CI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
3260; CI-NEXT:    s_waitcnt vmcnt(0)
3261; CI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
3262; CI-NEXT:    s_waitcnt vmcnt(0)
3263; CI-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:4
3264; CI-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:32
3265; CI-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:28
3266; CI-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:24
3267; CI-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:20
3268; CI-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:48
3269; CI-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:44
3270; CI-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:40
3271; CI-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:36
3272; CI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
3273; CI-NEXT:    s_waitcnt vmcnt(0)
3274; CI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
3275; CI-NEXT:    s_waitcnt vmcnt(0)
3276; CI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
3277; CI-NEXT:    s_waitcnt vmcnt(0)
3278; CI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
3279; CI-NEXT:    s_waitcnt vmcnt(0)
3280; CI-NEXT:    buffer_store_dwordx4 v[36:39], off, s[4:7], 0
3281; CI-NEXT:    s_waitcnt vmcnt(0)
3282; CI-NEXT:    buffer_store_dwordx4 v[32:35], off, s[4:7], 0
3283; CI-NEXT:    s_waitcnt vmcnt(0)
3284; CI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
3285; CI-NEXT:    s_waitcnt vmcnt(0)
3286; CI-NEXT:    s_setpc_b64 s[30:31]
3287;
3288; VI-LABEL: void_func_v32i32_v8i32_v8f32:
3289; VI:       ; %bb.0:
3290; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3291; VI-NEXT:    buffer_load_dword v31, off, s[0:3], s32
3292; VI-NEXT:    s_mov_b32 s7, 0xf000
3293; VI-NEXT:    s_mov_b32 s6, -1
3294; VI-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:64
3295; VI-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:60
3296; VI-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:56
3297; VI-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:52
3298; VI-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:16
3299; VI-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:12
3300; VI-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:8
3301; VI-NEXT:    s_waitcnt vmcnt(7)
3302; VI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[4:7], 0
3303; VI-NEXT:    s_waitcnt vmcnt(0)
3304; VI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[4:7], 0
3305; VI-NEXT:    s_waitcnt vmcnt(0)
3306; VI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[4:7], 0
3307; VI-NEXT:    s_waitcnt vmcnt(0)
3308; VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
3309; VI-NEXT:    s_waitcnt vmcnt(0)
3310; VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
3311; VI-NEXT:    s_waitcnt vmcnt(0)
3312; VI-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:4
3313; VI-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:32
3314; VI-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:28
3315; VI-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:24
3316; VI-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:20
3317; VI-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:48
3318; VI-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:44
3319; VI-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:40
3320; VI-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:36
3321; VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
3322; VI-NEXT:    s_waitcnt vmcnt(0)
3323; VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
3324; VI-NEXT:    s_waitcnt vmcnt(0)
3325; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
3326; VI-NEXT:    s_waitcnt vmcnt(0)
3327; VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
3328; VI-NEXT:    s_waitcnt vmcnt(0)
3329; VI-NEXT:    buffer_store_dwordx4 v[36:39], off, s[4:7], 0
3330; VI-NEXT:    s_waitcnt vmcnt(0)
3331; VI-NEXT:    buffer_store_dwordx4 v[32:35], off, s[4:7], 0
3332; VI-NEXT:    s_waitcnt vmcnt(0)
3333; VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
3334; VI-NEXT:    s_waitcnt vmcnt(0)
3335; VI-NEXT:    s_setpc_b64 s[30:31]
3336;
3337; GFX9-LABEL: void_func_v32i32_v8i32_v8f32:
3338; GFX9:       ; %bb.0:
3339; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3340; GFX9-NEXT:    buffer_load_dword v31, off, s[0:3], s32
3341; GFX9-NEXT:    s_mov_b32 s7, 0xf000
3342; GFX9-NEXT:    s_mov_b32 s6, -1
3343; GFX9-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:64
3344; GFX9-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:60
3345; GFX9-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:56
3346; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:52
3347; GFX9-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:16
3348; GFX9-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:12
3349; GFX9-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:8
3350; GFX9-NEXT:    s_waitcnt vmcnt(7)
3351; GFX9-NEXT:    buffer_store_dwordx4 v[28:31], off, s[4:7], 0
3352; GFX9-NEXT:    s_waitcnt vmcnt(0)
3353; GFX9-NEXT:    buffer_store_dwordx4 v[24:27], off, s[4:7], 0
3354; GFX9-NEXT:    s_waitcnt vmcnt(0)
3355; GFX9-NEXT:    buffer_store_dwordx4 v[20:23], off, s[4:7], 0
3356; GFX9-NEXT:    s_waitcnt vmcnt(0)
3357; GFX9-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
3358; GFX9-NEXT:    s_waitcnt vmcnt(0)
3359; GFX9-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
3360; GFX9-NEXT:    s_waitcnt vmcnt(0)
3361; GFX9-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:4
3362; GFX9-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:32
3363; GFX9-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:28
3364; GFX9-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:24
3365; GFX9-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:20
3366; GFX9-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:48
3367; GFX9-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:44
3368; GFX9-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:40
3369; GFX9-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:36
3370; GFX9-NEXT:    s_nop 0
3371; GFX9-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
3372; GFX9-NEXT:    s_waitcnt vmcnt(0)
3373; GFX9-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
3374; GFX9-NEXT:    s_waitcnt vmcnt(0)
3375; GFX9-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
3376; GFX9-NEXT:    s_waitcnt vmcnt(0)
3377; GFX9-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
3378; GFX9-NEXT:    s_waitcnt vmcnt(0)
3379; GFX9-NEXT:    buffer_store_dwordx4 v[36:39], off, s[4:7], 0
3380; GFX9-NEXT:    s_waitcnt vmcnt(0)
3381; GFX9-NEXT:    buffer_store_dwordx4 v[32:35], off, s[4:7], 0
3382; GFX9-NEXT:    s_waitcnt vmcnt(0)
3383; GFX9-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
3384; GFX9-NEXT:    s_waitcnt vmcnt(0)
3385; GFX9-NEXT:    s_setpc_b64 s[30:31]
3386;
3387; GFX11-LABEL: void_func_v32i32_v8i32_v8f32:
3388; GFX11:       ; %bb.0:
3389; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3390; GFX11-NEXT:    s_clause 0x10
3391; GFX11-NEXT:    scratch_load_b32 v35, off, s32 offset:48
3392; GFX11-NEXT:    scratch_load_b32 v34, off, s32 offset:44
3393; GFX11-NEXT:    scratch_load_b32 v33, off, s32 offset:40
3394; GFX11-NEXT:    scratch_load_b32 v39, off, s32 offset:64
3395; GFX11-NEXT:    scratch_load_b32 v38, off, s32 offset:60
3396; GFX11-NEXT:    scratch_load_b32 v31, off, s32
3397; GFX11-NEXT:    scratch_load_b32 v37, off, s32 offset:56
3398; GFX11-NEXT:    scratch_load_b32 v51, off, s32 offset:16
3399; GFX11-NEXT:    scratch_load_b32 v50, off, s32 offset:12
3400; GFX11-NEXT:    scratch_load_b32 v49, off, s32 offset:8
3401; GFX11-NEXT:    scratch_load_b32 v55, off, s32 offset:32
3402; GFX11-NEXT:    scratch_load_b32 v54, off, s32 offset:28
3403; GFX11-NEXT:    scratch_load_b32 v53, off, s32 offset:24
3404; GFX11-NEXT:    scratch_load_b32 v52, off, s32 offset:20
3405; GFX11-NEXT:    scratch_load_b32 v48, off, s32 offset:4
3406; GFX11-NEXT:    scratch_load_b32 v36, off, s32 offset:52
3407; GFX11-NEXT:    scratch_load_b32 v32, off, s32 offset:36
3408; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
3409; GFX11-NEXT:    s_mov_b32 s2, -1
3410; GFX11-NEXT:    s_waitcnt vmcnt(11)
3411; GFX11-NEXT:    buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
3412; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3413; GFX11-NEXT:    buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
3414; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3415; GFX11-NEXT:    buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
3416; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3417; GFX11-NEXT:    buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
3418; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3419; GFX11-NEXT:    buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
3420; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3421; GFX11-NEXT:    buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
3422; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3423; GFX11-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
3424; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3425; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
3426; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3427; GFX11-NEXT:    s_waitcnt vmcnt(3)
3428; GFX11-NEXT:    buffer_store_b128 v[52:55], off, s[0:3], 0 dlc
3429; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3430; GFX11-NEXT:    s_waitcnt vmcnt(2)
3431; GFX11-NEXT:    buffer_store_b128 v[48:51], off, s[0:3], 0 dlc
3432; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3433; GFX11-NEXT:    s_waitcnt vmcnt(1)
3434; GFX11-NEXT:    buffer_store_b128 v[36:39], off, s[0:3], 0 dlc
3435; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3436; GFX11-NEXT:    s_waitcnt vmcnt(0)
3437; GFX11-NEXT:    buffer_store_b128 v[32:35], off, s[0:3], 0 dlc
3438; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3439; GFX11-NEXT:    s_setpc_b64 s[30:31]
3440  store volatile <32 x i32> %arg0, ptr addrspace(1) undef
3441  store volatile <8 x i32> %arg1, ptr addrspace(1) undef
3442  store volatile <8 x float> %arg2, ptr addrspace(1) undef
3443  ret void
3444}
3445
3446define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1, <16 x float> %arg2) #0 {
3447; CI-LABEL: void_func_v32i32_v16i32_v16f32:
3448; CI:       ; %bb.0:
3449; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3450; CI-NEXT:    buffer_load_dword v31, off, s[0:3], s32
3451; CI-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:64
3452; CI-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:60
3453; CI-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:56
3454; CI-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:52
3455; CI-NEXT:    s_mov_b32 s7, 0xf000
3456; CI-NEXT:    s_mov_b32 s6, -1
3457; CI-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:48
3458; CI-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:44
3459; CI-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:40
3460; CI-NEXT:    s_waitcnt vmcnt(7)
3461; CI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[4:7], 0
3462; CI-NEXT:    s_waitcnt vmcnt(0)
3463; CI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[4:7], 0
3464; CI-NEXT:    s_waitcnt vmcnt(0)
3465; CI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[4:7], 0
3466; CI-NEXT:    s_waitcnt vmcnt(0)
3467; CI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
3468; CI-NEXT:    s_waitcnt vmcnt(0)
3469; CI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
3470; CI-NEXT:    s_waitcnt vmcnt(0)
3471; CI-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:36
3472; CI-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:32
3473; CI-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:28
3474; CI-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:24
3475; CI-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:20
3476; CI-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:16
3477; CI-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:12
3478; CI-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:8
3479; CI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
3480; CI-NEXT:    s_waitcnt vmcnt(0)
3481; CI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
3482; CI-NEXT:    s_waitcnt vmcnt(0)
3483; CI-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:4
3484; CI-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:96
3485; CI-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:92
3486; CI-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:88
3487; CI-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:84
3488; CI-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:112
3489; CI-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:108
3490; CI-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:104
3491; CI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
3492; CI-NEXT:    s_waitcnt vmcnt(0)
3493; CI-NEXT:    buffer_store_dwordx4 v[32:35], off, s[4:7], 0
3494; CI-NEXT:    s_waitcnt vmcnt(0)
3495; CI-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:100
3496; CI-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:128
3497; CI-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:124
3498; CI-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:120
3499; CI-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:116
3500; CI-NEXT:    buffer_load_dword v23, off, s[0:3], s32 offset:80
3501; CI-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:76
3502; CI-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:72
3503; CI-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:68
3504; CI-NEXT:    buffer_store_dwordx4 v[36:39], off, s[4:7], 0
3505; CI-NEXT:    s_waitcnt vmcnt(0)
3506; CI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
3507; CI-NEXT:    s_waitcnt vmcnt(0)
3508; CI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
3509; CI-NEXT:    s_waitcnt vmcnt(0)
3510; CI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
3511; CI-NEXT:    s_waitcnt vmcnt(0)
3512; CI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
3513; CI-NEXT:    s_waitcnt vmcnt(0)
3514; CI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
3515; CI-NEXT:    s_waitcnt vmcnt(0)
3516; CI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[4:7], 0
3517; CI-NEXT:    s_waitcnt vmcnt(0)
3518; CI-NEXT:    s_setpc_b64 s[30:31]
3519;
3520; VI-LABEL: void_func_v32i32_v16i32_v16f32:
3521; VI:       ; %bb.0:
3522; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3523; VI-NEXT:    buffer_load_dword v31, off, s[0:3], s32
3524; VI-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:64
3525; VI-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:60
3526; VI-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:56
3527; VI-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:52
3528; VI-NEXT:    s_mov_b32 s7, 0xf000
3529; VI-NEXT:    s_mov_b32 s6, -1
3530; VI-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:48
3531; VI-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:44
3532; VI-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:40
3533; VI-NEXT:    s_waitcnt vmcnt(7)
3534; VI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[4:7], 0
3535; VI-NEXT:    s_waitcnt vmcnt(0)
3536; VI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[4:7], 0
3537; VI-NEXT:    s_waitcnt vmcnt(0)
3538; VI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[4:7], 0
3539; VI-NEXT:    s_waitcnt vmcnt(0)
3540; VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
3541; VI-NEXT:    s_waitcnt vmcnt(0)
3542; VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
3543; VI-NEXT:    s_waitcnt vmcnt(0)
3544; VI-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:36
3545; VI-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:32
3546; VI-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:28
3547; VI-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:24
3548; VI-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:20
3549; VI-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:16
3550; VI-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:12
3551; VI-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:8
3552; VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
3553; VI-NEXT:    s_waitcnt vmcnt(0)
3554; VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
3555; VI-NEXT:    s_waitcnt vmcnt(0)
3556; VI-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:4
3557; VI-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:96
3558; VI-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:92
3559; VI-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:88
3560; VI-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:84
3561; VI-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:112
3562; VI-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:108
3563; VI-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:104
3564; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
3565; VI-NEXT:    s_waitcnt vmcnt(0)
3566; VI-NEXT:    buffer_store_dwordx4 v[32:35], off, s[4:7], 0
3567; VI-NEXT:    s_waitcnt vmcnt(0)
3568; VI-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:100
3569; VI-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:128
3570; VI-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:124
3571; VI-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:120
3572; VI-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:116
3573; VI-NEXT:    buffer_load_dword v23, off, s[0:3], s32 offset:80
3574; VI-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:76
3575; VI-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:72
3576; VI-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:68
3577; VI-NEXT:    buffer_store_dwordx4 v[36:39], off, s[4:7], 0
3578; VI-NEXT:    s_waitcnt vmcnt(0)
3579; VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
3580; VI-NEXT:    s_waitcnt vmcnt(0)
3581; VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
3582; VI-NEXT:    s_waitcnt vmcnt(0)
3583; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
3584; VI-NEXT:    s_waitcnt vmcnt(0)
3585; VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
3586; VI-NEXT:    s_waitcnt vmcnt(0)
3587; VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
3588; VI-NEXT:    s_waitcnt vmcnt(0)
3589; VI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[4:7], 0
3590; VI-NEXT:    s_waitcnt vmcnt(0)
3591; VI-NEXT:    s_setpc_b64 s[30:31]
3592;
3593; GFX9-LABEL: void_func_v32i32_v16i32_v16f32:
3594; GFX9:       ; %bb.0:
3595; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3596; GFX9-NEXT:    buffer_load_dword v31, off, s[0:3], s32
3597; GFX9-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:64
3598; GFX9-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:60
3599; GFX9-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:56
3600; GFX9-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:52
3601; GFX9-NEXT:    s_mov_b32 s7, 0xf000
3602; GFX9-NEXT:    s_mov_b32 s6, -1
3603; GFX9-NEXT:    buffer_load_dword v39, off, s[0:3], s32 offset:48
3604; GFX9-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:44
3605; GFX9-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:40
3606; GFX9-NEXT:    s_waitcnt vmcnt(7)
3607; GFX9-NEXT:    buffer_store_dwordx4 v[28:31], off, s[4:7], 0
3608; GFX9-NEXT:    s_waitcnt vmcnt(0)
3609; GFX9-NEXT:    buffer_store_dwordx4 v[24:27], off, s[4:7], 0
3610; GFX9-NEXT:    s_waitcnt vmcnt(0)
3611; GFX9-NEXT:    buffer_store_dwordx4 v[20:23], off, s[4:7], 0
3612; GFX9-NEXT:    s_waitcnt vmcnt(0)
3613; GFX9-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
3614; GFX9-NEXT:    s_waitcnt vmcnt(0)
3615; GFX9-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
3616; GFX9-NEXT:    s_waitcnt vmcnt(0)
3617; GFX9-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:36
3618; GFX9-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:32
3619; GFX9-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:28
3620; GFX9-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:24
3621; GFX9-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:20
3622; GFX9-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:16
3623; GFX9-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:12
3624; GFX9-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:8
3625; GFX9-NEXT:    s_nop 0
3626; GFX9-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
3627; GFX9-NEXT:    s_waitcnt vmcnt(0)
3628; GFX9-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
3629; GFX9-NEXT:    s_waitcnt vmcnt(0)
3630; GFX9-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:4
3631; GFX9-NEXT:    buffer_load_dword v7, off, s[0:3], s32 offset:96
3632; GFX9-NEXT:    buffer_load_dword v6, off, s[0:3], s32 offset:92
3633; GFX9-NEXT:    buffer_load_dword v5, off, s[0:3], s32 offset:88
3634; GFX9-NEXT:    buffer_load_dword v4, off, s[0:3], s32 offset:84
3635; GFX9-NEXT:    buffer_load_dword v11, off, s[0:3], s32 offset:112
3636; GFX9-NEXT:    buffer_load_dword v10, off, s[0:3], s32 offset:108
3637; GFX9-NEXT:    buffer_load_dword v9, off, s[0:3], s32 offset:104
3638; GFX9-NEXT:    s_nop 0
3639; GFX9-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
3640; GFX9-NEXT:    s_waitcnt vmcnt(0)
3641; GFX9-NEXT:    buffer_store_dwordx4 v[32:35], off, s[4:7], 0
3642; GFX9-NEXT:    s_waitcnt vmcnt(0)
3643; GFX9-NEXT:    buffer_load_dword v8, off, s[0:3], s32 offset:100
3644; GFX9-NEXT:    buffer_load_dword v3, off, s[0:3], s32 offset:128
3645; GFX9-NEXT:    buffer_load_dword v2, off, s[0:3], s32 offset:124
3646; GFX9-NEXT:    buffer_load_dword v1, off, s[0:3], s32 offset:120
3647; GFX9-NEXT:    buffer_load_dword v0, off, s[0:3], s32 offset:116
3648; GFX9-NEXT:    buffer_load_dword v23, off, s[0:3], s32 offset:80
3649; GFX9-NEXT:    buffer_load_dword v22, off, s[0:3], s32 offset:76
3650; GFX9-NEXT:    buffer_load_dword v21, off, s[0:3], s32 offset:72
3651; GFX9-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:68
3652; GFX9-NEXT:    s_nop 0
3653; GFX9-NEXT:    buffer_store_dwordx4 v[36:39], off, s[4:7], 0
3654; GFX9-NEXT:    s_waitcnt vmcnt(0)
3655; GFX9-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
3656; GFX9-NEXT:    s_waitcnt vmcnt(0)
3657; GFX9-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
3658; GFX9-NEXT:    s_waitcnt vmcnt(0)
3659; GFX9-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
3660; GFX9-NEXT:    s_waitcnt vmcnt(0)
3661; GFX9-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
3662; GFX9-NEXT:    s_waitcnt vmcnt(0)
3663; GFX9-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
3664; GFX9-NEXT:    s_waitcnt vmcnt(0)
3665; GFX9-NEXT:    buffer_store_dwordx4 v[20:23], off, s[4:7], 0
3666; GFX9-NEXT:    s_waitcnt vmcnt(0)
3667; GFX9-NEXT:    s_setpc_b64 s[30:31]
3668;
3669; GFX11-LABEL: void_func_v32i32_v16i32_v16f32:
3670; GFX11:       ; %bb.0:
3671; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3672; GFX11-NEXT:    s_clause 0x1f
3673; GFX11-NEXT:    scratch_load_b32 v35, off, s32 offset:80
3674; GFX11-NEXT:    scratch_load_b32 v34, off, s32 offset:76
3675; GFX11-NEXT:    scratch_load_b32 v33, off, s32 offset:72
3676; GFX11-NEXT:    scratch_load_b32 v39, off, s32 offset:96
3677; GFX11-NEXT:    scratch_load_b32 v38, off, s32 offset:92
3678; GFX11-NEXT:    scratch_load_b32 v37, off, s32 offset:88
3679; GFX11-NEXT:    scratch_load_b32 v51, off, s32 offset:112
3680; GFX11-NEXT:    scratch_load_b32 v50, off, s32 offset:108
3681; GFX11-NEXT:    scratch_load_b32 v49, off, s32 offset:104
3682; GFX11-NEXT:    scratch_load_b32 v55, off, s32 offset:128
3683; GFX11-NEXT:    scratch_load_b32 v54, off, s32 offset:124
3684; GFX11-NEXT:    scratch_load_b32 v53, off, s32 offset:120
3685; GFX11-NEXT:    scratch_load_b32 v67, off, s32 offset:16
3686; GFX11-NEXT:    scratch_load_b32 v66, off, s32 offset:12
3687; GFX11-NEXT:    scratch_load_b32 v65, off, s32 offset:8
3688; GFX11-NEXT:    scratch_load_b32 v71, off, s32 offset:32
3689; GFX11-NEXT:    scratch_load_b32 v70, off, s32 offset:28
3690; GFX11-NEXT:    scratch_load_b32 v31, off, s32
3691; GFX11-NEXT:    scratch_load_b32 v69, off, s32 offset:24
3692; GFX11-NEXT:    scratch_load_b32 v83, off, s32 offset:48
3693; GFX11-NEXT:    scratch_load_b32 v82, off, s32 offset:44
3694; GFX11-NEXT:    scratch_load_b32 v81, off, s32 offset:40
3695; GFX11-NEXT:    scratch_load_b32 v87, off, s32 offset:64
3696; GFX11-NEXT:    scratch_load_b32 v86, off, s32 offset:60
3697; GFX11-NEXT:    scratch_load_b32 v85, off, s32 offset:56
3698; GFX11-NEXT:    scratch_load_b32 v84, off, s32 offset:52
3699; GFX11-NEXT:    scratch_load_b32 v80, off, s32 offset:36
3700; GFX11-NEXT:    scratch_load_b32 v68, off, s32 offset:20
3701; GFX11-NEXT:    scratch_load_b32 v64, off, s32 offset:4
3702; GFX11-NEXT:    scratch_load_b32 v52, off, s32 offset:116
3703; GFX11-NEXT:    scratch_load_b32 v48, off, s32 offset:100
3704; GFX11-NEXT:    scratch_load_b32 v36, off, s32 offset:84
3705; GFX11-NEXT:    scratch_load_b32 v32, off, s32 offset:68
3706; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
3707; GFX11-NEXT:    s_mov_b32 s2, -1
3708; GFX11-NEXT:    s_waitcnt vmcnt(15)
3709; GFX11-NEXT:    buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
3710; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3711; GFX11-NEXT:    buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
3712; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3713; GFX11-NEXT:    buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
3714; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3715; GFX11-NEXT:    buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
3716; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3717; GFX11-NEXT:    buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
3718; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3719; GFX11-NEXT:    buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
3720; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3721; GFX11-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
3722; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3723; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
3724; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3725; GFX11-NEXT:    s_waitcnt vmcnt(7)
3726; GFX11-NEXT:    buffer_store_b128 v[84:87], off, s[0:3], 0 dlc
3727; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3728; GFX11-NEXT:    s_waitcnt vmcnt(6)
3729; GFX11-NEXT:    buffer_store_b128 v[80:83], off, s[0:3], 0 dlc
3730; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3731; GFX11-NEXT:    s_waitcnt vmcnt(5)
3732; GFX11-NEXT:    buffer_store_b128 v[68:71], off, s[0:3], 0 dlc
3733; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3734; GFX11-NEXT:    s_waitcnt vmcnt(4)
3735; GFX11-NEXT:    buffer_store_b128 v[64:67], off, s[0:3], 0 dlc
3736; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3737; GFX11-NEXT:    s_waitcnt vmcnt(3)
3738; GFX11-NEXT:    buffer_store_b128 v[52:55], off, s[0:3], 0 dlc
3739; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3740; GFX11-NEXT:    s_waitcnt vmcnt(2)
3741; GFX11-NEXT:    buffer_store_b128 v[48:51], off, s[0:3], 0 dlc
3742; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3743; GFX11-NEXT:    s_waitcnt vmcnt(1)
3744; GFX11-NEXT:    buffer_store_b128 v[36:39], off, s[0:3], 0 dlc
3745; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3746; GFX11-NEXT:    s_waitcnt vmcnt(0)
3747; GFX11-NEXT:    buffer_store_b128 v[32:35], off, s[0:3], 0 dlc
3748; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3749; GFX11-NEXT:    s_setpc_b64 s[30:31]
3750  store volatile <32 x i32> %arg0, ptr addrspace(1) undef
3751  store volatile <16 x i32> %arg1, ptr addrspace(1) undef
3752  store volatile <16 x float> %arg2, ptr addrspace(1) undef
3753  ret void
3754}
3755
3756; Make sure v3 isn't a wasted register because of v3 types being promoted to v4
3757define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 {
3758; CI-LABEL: void_func_v3f32_wasted_reg:
3759; CI:       ; %bb.0:
3760; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3761; CI-NEXT:    s_mov_b32 m0, -1
3762; CI-NEXT:    ds_write_b32 v0, v0
3763; CI-NEXT:    ds_write_b32 v0, v1
3764; CI-NEXT:    ds_write_b32 v0, v2
3765; CI-NEXT:    ds_write_b32 v0, v3
3766; CI-NEXT:    s_waitcnt lgkmcnt(0)
3767; CI-NEXT:    s_setpc_b64 s[30:31]
3768;
3769; VI-LABEL: void_func_v3f32_wasted_reg:
3770; VI:       ; %bb.0:
3771; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3772; VI-NEXT:    s_mov_b32 m0, -1
3773; VI-NEXT:    ds_write_b32 v0, v0
3774; VI-NEXT:    ds_write_b32 v0, v1
3775; VI-NEXT:    ds_write_b32 v0, v2
3776; VI-NEXT:    ds_write_b32 v0, v3
3777; VI-NEXT:    s_waitcnt lgkmcnt(0)
3778; VI-NEXT:    s_setpc_b64 s[30:31]
3779;
3780; GFX9-LABEL: void_func_v3f32_wasted_reg:
3781; GFX9:       ; %bb.0:
3782; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3783; GFX9-NEXT:    ds_write_b32 v0, v0
3784; GFX9-NEXT:    ds_write_b32 v0, v1
3785; GFX9-NEXT:    ds_write_b32 v0, v2
3786; GFX9-NEXT:    ds_write_b32 v0, v3
3787; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
3788; GFX9-NEXT:    s_setpc_b64 s[30:31]
3789;
3790; GFX11-LABEL: void_func_v3f32_wasted_reg:
3791; GFX11:       ; %bb.0:
3792; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3793; GFX11-NEXT:    ds_store_b32 v0, v0
3794; GFX11-NEXT:    ds_store_b32 v0, v1
3795; GFX11-NEXT:    ds_store_b32 v0, v2
3796; GFX11-NEXT:    ds_store_b32 v0, v3
3797; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
3798; GFX11-NEXT:    s_setpc_b64 s[30:31]
3799  %arg0.0 = extractelement <3 x float> %arg0, i32 0
3800  %arg0.1 = extractelement <3 x float> %arg0, i32 1
3801  %arg0.2 = extractelement <3 x float> %arg0, i32 2
3802  store volatile float %arg0.0, ptr addrspace(3) undef
3803  store volatile float %arg0.1, ptr addrspace(3) undef
3804  store volatile float %arg0.2, ptr addrspace(3) undef
3805  store volatile i32 %arg1, ptr addrspace(3) undef
3806  ret void
3807}
3808
3809define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 {
3810; CI-LABEL: void_func_v3i32_wasted_reg:
3811; CI:       ; %bb.0:
3812; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3813; CI-NEXT:    s_mov_b32 m0, -1
3814; CI-NEXT:    ds_write_b32 v0, v0
3815; CI-NEXT:    ds_write_b32 v0, v1
3816; CI-NEXT:    ds_write_b32 v0, v2
3817; CI-NEXT:    ds_write_b32 v0, v3
3818; CI-NEXT:    s_waitcnt lgkmcnt(0)
3819; CI-NEXT:    s_setpc_b64 s[30:31]
3820;
3821; VI-LABEL: void_func_v3i32_wasted_reg:
3822; VI:       ; %bb.0:
3823; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3824; VI-NEXT:    s_mov_b32 m0, -1
3825; VI-NEXT:    ds_write_b32 v0, v0
3826; VI-NEXT:    ds_write_b32 v0, v1
3827; VI-NEXT:    ds_write_b32 v0, v2
3828; VI-NEXT:    ds_write_b32 v0, v3
3829; VI-NEXT:    s_waitcnt lgkmcnt(0)
3830; VI-NEXT:    s_setpc_b64 s[30:31]
3831;
3832; GFX9-LABEL: void_func_v3i32_wasted_reg:
3833; GFX9:       ; %bb.0:
3834; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3835; GFX9-NEXT:    ds_write_b32 v0, v0
3836; GFX9-NEXT:    ds_write_b32 v0, v1
3837; GFX9-NEXT:    ds_write_b32 v0, v2
3838; GFX9-NEXT:    ds_write_b32 v0, v3
3839; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
3840; GFX9-NEXT:    s_setpc_b64 s[30:31]
3841;
3842; GFX11-LABEL: void_func_v3i32_wasted_reg:
3843; GFX11:       ; %bb.0:
3844; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3845; GFX11-NEXT:    ds_store_b32 v0, v0
3846; GFX11-NEXT:    ds_store_b32 v0, v1
3847; GFX11-NEXT:    ds_store_b32 v0, v2
3848; GFX11-NEXT:    ds_store_b32 v0, v3
3849; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
3850; GFX11-NEXT:    s_setpc_b64 s[30:31]
3851  %arg0.0 = extractelement <3 x i32> %arg0, i32 0
3852  %arg0.1 = extractelement <3 x i32> %arg0, i32 1
3853  %arg0.2 = extractelement <3 x i32> %arg0, i32 2
3854  store volatile i32 %arg0.0, ptr addrspace(3) undef
3855  store volatile i32 %arg0.1, ptr addrspace(3) undef
3856  store volatile i32 %arg0.2, ptr addrspace(3) undef
3857  store volatile i32 %arg1, ptr addrspace(3) undef
3858  ret void
3859}
3860
3861; Check there is no crash.
3862define void @void_func_volatile_v16i8(<16 x i8> %arg0) #0 {
3863; CIGFX89-LABEL: void_func_volatile_v16i8:
3864; CIGFX89:       ; %bb.0:
3865; CIGFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3866; CIGFX89-NEXT:    s_mov_b32 s7, 0xf000
3867; CIGFX89-NEXT:    s_mov_b32 s6, -1
3868; CIGFX89-NEXT:    buffer_store_byte v15, off, s[4:7], 0
3869; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3870; CIGFX89-NEXT:    buffer_store_byte v14, off, s[4:7], 0
3871; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3872; CIGFX89-NEXT:    buffer_store_byte v13, off, s[4:7], 0
3873; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3874; CIGFX89-NEXT:    buffer_store_byte v12, off, s[4:7], 0
3875; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3876; CIGFX89-NEXT:    buffer_store_byte v11, off, s[4:7], 0
3877; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3878; CIGFX89-NEXT:    buffer_store_byte v10, off, s[4:7], 0
3879; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3880; CIGFX89-NEXT:    buffer_store_byte v9, off, s[4:7], 0
3881; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3882; CIGFX89-NEXT:    buffer_store_byte v8, off, s[4:7], 0
3883; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3884; CIGFX89-NEXT:    buffer_store_byte v7, off, s[4:7], 0
3885; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3886; CIGFX89-NEXT:    buffer_store_byte v6, off, s[4:7], 0
3887; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3888; CIGFX89-NEXT:    buffer_store_byte v5, off, s[4:7], 0
3889; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3890; CIGFX89-NEXT:    buffer_store_byte v4, off, s[4:7], 0
3891; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3892; CIGFX89-NEXT:    buffer_store_byte v3, off, s[4:7], 0
3893; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3894; CIGFX89-NEXT:    buffer_store_byte v2, off, s[4:7], 0
3895; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3896; CIGFX89-NEXT:    buffer_store_byte v1, off, s[4:7], 0
3897; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3898; CIGFX89-NEXT:    buffer_store_byte v0, off, s[4:7], 0
3899; CIGFX89-NEXT:    s_waitcnt vmcnt(0)
3900; CIGFX89-NEXT:    s_setpc_b64 s[30:31]
3901;
3902; GFX11-LABEL: void_func_volatile_v16i8:
3903; GFX11:       ; %bb.0:
3904; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3905; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
3906; GFX11-NEXT:    s_mov_b32 s2, -1
3907; GFX11-NEXT:    buffer_store_b8 v15, off, s[0:3], 0 dlc
3908; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3909; GFX11-NEXT:    buffer_store_b8 v14, off, s[0:3], 0 dlc
3910; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3911; GFX11-NEXT:    buffer_store_b8 v13, off, s[0:3], 0 dlc
3912; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3913; GFX11-NEXT:    buffer_store_b8 v12, off, s[0:3], 0 dlc
3914; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3915; GFX11-NEXT:    buffer_store_b8 v11, off, s[0:3], 0 dlc
3916; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3917; GFX11-NEXT:    buffer_store_b8 v10, off, s[0:3], 0 dlc
3918; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3919; GFX11-NEXT:    buffer_store_b8 v9, off, s[0:3], 0 dlc
3920; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3921; GFX11-NEXT:    buffer_store_b8 v8, off, s[0:3], 0 dlc
3922; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3923; GFX11-NEXT:    buffer_store_b8 v7, off, s[0:3], 0 dlc
3924; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3925; GFX11-NEXT:    buffer_store_b8 v6, off, s[0:3], 0 dlc
3926; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3927; GFX11-NEXT:    buffer_store_b8 v5, off, s[0:3], 0 dlc
3928; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3929; GFX11-NEXT:    buffer_store_b8 v4, off, s[0:3], 0 dlc
3930; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3931; GFX11-NEXT:    buffer_store_b8 v3, off, s[0:3], 0 dlc
3932; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3933; GFX11-NEXT:    buffer_store_b8 v2, off, s[0:3], 0 dlc
3934; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3935; GFX11-NEXT:    buffer_store_b8 v1, off, s[0:3], 0 dlc
3936; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3937; GFX11-NEXT:    buffer_store_b8 v0, off, s[0:3], 0 dlc
3938; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
3939; GFX11-NEXT:    s_setpc_b64 s[30:31]
3940  store volatile <16 x i8> %arg0, ptr addrspace(1) undef
3941  ret void
3942}
3943
3944; Check there is no crash.
3945define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
3946; CI-LABEL: void_func_v32i32_v16i8:
3947; CI:       ; %bb.0:
3948; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3949; CI-NEXT:    buffer_load_dword v31, off, s[0:3], s32
3950; CI-NEXT:    s_mov_b32 s7, 0xf000
3951; CI-NEXT:    s_mov_b32 s6, -1
3952; CI-NEXT:    buffer_load_dword v32, off, s[0:3], s32 offset:60
3953; CI-NEXT:    buffer_load_dword v33, off, s[0:3], s32 offset:64
3954; CI-NEXT:    buffer_load_dword v34, off, s[0:3], s32 offset:48
3955; CI-NEXT:    buffer_load_dword v35, off, s[0:3], s32 offset:52
3956; CI-NEXT:    buffer_load_dword v36, off, s[0:3], s32 offset:56
3957; CI-NEXT:    buffer_load_dword v37, off, s[0:3], s32 offset:36
3958; CI-NEXT:    buffer_load_dword v38, off, s[0:3], s32 offset:40
3959; CI-NEXT:    s_waitcnt vmcnt(7)
3960; CI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[4:7], 0
3961; CI-NEXT:    s_waitcnt vmcnt(0)
3962; CI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[4:7], 0
3963; CI-NEXT:    s_waitcnt vmcnt(0)
3964; CI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[4:7], 0
3965; CI-NEXT:    s_waitcnt vmcnt(0)
3966; CI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
3967; CI-NEXT:    s_waitcnt vmcnt(0)
3968; CI-NEXT:    buffer_load_dword v16, off, s[0:3], s32 offset:28
3969; CI-NEXT:    buffer_load_dword v17, off, s[0:3], s32 offset:32
3970; CI-NEXT:    buffer_load_dword v18, off, s[0:3], s32 offset:20
3971; CI-NEXT:    buffer_load_dword v19, off, s[0:3], s32 offset:24
3972; CI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
3973; CI-NEXT:    s_waitcnt vmcnt(0)
3974; CI-NEXT:    buffer_load_dword v12, off, s[0:3], s32 offset:16
3975; CI-NEXT:    buffer_load_dword v13, off, s[0:3], s32 offset:12
3976; CI-NEXT:    buffer_load_dword v14, off, s[0:3], s32 offset:8
3977; CI-NEXT:    buffer_load_dword v15, off, s[0:3], s32 offset:4
3978; CI-NEXT:    buffer_load_dword v20, off, s[0:3], s32 offset:44
3979; CI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
3980; CI-NEXT:    s_waitcnt vmcnt(0)
3981; CI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
3982; CI-NEXT:    s_waitcnt vmcnt(0)
3983; CI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
3984; CI-NEXT:    s_waitcnt vmcnt(0)
3985; CI-NEXT:    buffer_store_byte v33, off, s[4:7], 0
3986; CI-NEXT:    s_waitcnt vmcnt(0)
3987; CI-NEXT:    buffer_store_byte v32, off, s[4:7], 0
3988; CI-NEXT:    s_waitcnt vmcnt(0)
3989; CI-NEXT:    buffer_store_byte v36, off, s[4:7], 0
3990; CI-NEXT:    s_waitcnt vmcnt(0)
3991; CI-NEXT:    buffer_store_byte v35, off, s[4:7], 0
3992; CI-NEXT:    s_waitcnt vmcnt(0)
3993; CI-NEXT:    buffer_store_byte v34, off, s[4:7], 0
3994; CI-NEXT:    s_waitcnt vmcnt(0)
3995; CI-NEXT:    buffer_store_byte v20, off, s[4:7], 0
3996; CI-NEXT:    s_waitcnt vmcnt(0)
3997; CI-NEXT:    buffer_store_byte v38, off, s[4:7], 0
3998; CI-NEXT:    s_waitcnt vmcnt(0)
3999; CI-NEXT:    buffer_store_byte v37, off, s[4:7], 0
4000; CI-NEXT:    s_waitcnt vmcnt(0)
4001; CI-NEXT:    buffer_store_byte v17, off, s[4:7], 0
4002; CI-NEXT:    s_waitcnt vmcnt(0)
4003; CI-NEXT:    buffer_store_byte v16, off, s[4:7], 0
4004; CI-NEXT:    s_waitcnt vmcnt(0)
4005; CI-NEXT:    buffer_store_byte v19, off, s[4:7], 0
4006; CI-NEXT:    s_waitcnt vmcnt(0)
4007; CI-NEXT:    buffer_store_byte v18, off, s[4:7], 0
4008; CI-NEXT:    s_waitcnt vmcnt(0)
4009; CI-NEXT:    buffer_store_byte v12, off, s[4:7], 0
4010; CI-NEXT:    s_waitcnt vmcnt(0)
4011; CI-NEXT:    buffer_store_byte v13, off, s[4:7], 0
4012; CI-NEXT:    s_waitcnt vmcnt(0)
4013; CI-NEXT:    buffer_store_byte v14, off, s[4:7], 0
4014; CI-NEXT:    s_waitcnt vmcnt(0)
4015; CI-NEXT:    buffer_store_byte v15, off, s[4:7], 0
4016; CI-NEXT:    s_waitcnt vmcnt(0)
4017; CI-NEXT:    s_setpc_b64 s[30:31]
4018;
4019; VI-LABEL: void_func_v32i32_v16i8:
4020; VI:       ; %bb.0:
4021; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4022; VI-NEXT:    buffer_load_dword v31, off, s[0:3], s32
4023; VI-NEXT:    s_mov_b32 s7, 0xf000
4024; VI-NEXT:    s_mov_b32 s6, -1
4025; VI-NEXT:    buffer_load_ubyte v32, off, s[0:3], s32 offset:60
4026; VI-NEXT:    buffer_load_ubyte v33, off, s[0:3], s32 offset:64
4027; VI-NEXT:    buffer_load_ubyte v34, off, s[0:3], s32 offset:48
4028; VI-NEXT:    buffer_load_ubyte v35, off, s[0:3], s32 offset:52
4029; VI-NEXT:    buffer_load_ubyte v36, off, s[0:3], s32 offset:56
4030; VI-NEXT:    buffer_load_ubyte v37, off, s[0:3], s32 offset:36
4031; VI-NEXT:    buffer_load_ubyte v38, off, s[0:3], s32 offset:40
4032; VI-NEXT:    s_waitcnt vmcnt(7)
4033; VI-NEXT:    buffer_store_dwordx4 v[28:31], off, s[4:7], 0
4034; VI-NEXT:    s_waitcnt vmcnt(0)
4035; VI-NEXT:    buffer_store_dwordx4 v[24:27], off, s[4:7], 0
4036; VI-NEXT:    s_waitcnt vmcnt(0)
4037; VI-NEXT:    buffer_store_dwordx4 v[20:23], off, s[4:7], 0
4038; VI-NEXT:    s_waitcnt vmcnt(0)
4039; VI-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
4040; VI-NEXT:    s_waitcnt vmcnt(0)
4041; VI-NEXT:    buffer_load_ubyte v16, off, s[0:3], s32 offset:28
4042; VI-NEXT:    buffer_load_ubyte v17, off, s[0:3], s32 offset:32
4043; VI-NEXT:    buffer_load_ubyte v18, off, s[0:3], s32 offset:20
4044; VI-NEXT:    buffer_load_ubyte v19, off, s[0:3], s32 offset:24
4045; VI-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
4046; VI-NEXT:    s_waitcnt vmcnt(0)
4047; VI-NEXT:    buffer_load_ubyte v12, off, s[0:3], s32 offset:16
4048; VI-NEXT:    buffer_load_ubyte v13, off, s[0:3], s32 offset:12
4049; VI-NEXT:    buffer_load_ubyte v14, off, s[0:3], s32 offset:8
4050; VI-NEXT:    buffer_load_ubyte v15, off, s[0:3], s32 offset:4
4051; VI-NEXT:    buffer_load_ubyte v20, off, s[0:3], s32 offset:44
4052; VI-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
4053; VI-NEXT:    s_waitcnt vmcnt(0)
4054; VI-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
4055; VI-NEXT:    s_waitcnt vmcnt(0)
4056; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
4057; VI-NEXT:    s_waitcnt vmcnt(0)
4058; VI-NEXT:    buffer_store_byte v33, off, s[4:7], 0
4059; VI-NEXT:    s_waitcnt vmcnt(0)
4060; VI-NEXT:    buffer_store_byte v32, off, s[4:7], 0
4061; VI-NEXT:    s_waitcnt vmcnt(0)
4062; VI-NEXT:    buffer_store_byte v36, off, s[4:7], 0
4063; VI-NEXT:    s_waitcnt vmcnt(0)
4064; VI-NEXT:    buffer_store_byte v35, off, s[4:7], 0
4065; VI-NEXT:    s_waitcnt vmcnt(0)
4066; VI-NEXT:    buffer_store_byte v34, off, s[4:7], 0
4067; VI-NEXT:    s_waitcnt vmcnt(0)
4068; VI-NEXT:    buffer_store_byte v20, off, s[4:7], 0
4069; VI-NEXT:    s_waitcnt vmcnt(0)
4070; VI-NEXT:    buffer_store_byte v38, off, s[4:7], 0
4071; VI-NEXT:    s_waitcnt vmcnt(0)
4072; VI-NEXT:    buffer_store_byte v37, off, s[4:7], 0
4073; VI-NEXT:    s_waitcnt vmcnt(0)
4074; VI-NEXT:    buffer_store_byte v17, off, s[4:7], 0
4075; VI-NEXT:    s_waitcnt vmcnt(0)
4076; VI-NEXT:    buffer_store_byte v16, off, s[4:7], 0
4077; VI-NEXT:    s_waitcnt vmcnt(0)
4078; VI-NEXT:    buffer_store_byte v19, off, s[4:7], 0
4079; VI-NEXT:    s_waitcnt vmcnt(0)
4080; VI-NEXT:    buffer_store_byte v18, off, s[4:7], 0
4081; VI-NEXT:    s_waitcnt vmcnt(0)
4082; VI-NEXT:    buffer_store_byte v12, off, s[4:7], 0
4083; VI-NEXT:    s_waitcnt vmcnt(0)
4084; VI-NEXT:    buffer_store_byte v13, off, s[4:7], 0
4085; VI-NEXT:    s_waitcnt vmcnt(0)
4086; VI-NEXT:    buffer_store_byte v14, off, s[4:7], 0
4087; VI-NEXT:    s_waitcnt vmcnt(0)
4088; VI-NEXT:    buffer_store_byte v15, off, s[4:7], 0
4089; VI-NEXT:    s_waitcnt vmcnt(0)
4090; VI-NEXT:    s_setpc_b64 s[30:31]
4091;
4092; GFX9-LABEL: void_func_v32i32_v16i8:
4093; GFX9:       ; %bb.0:
4094; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4095; GFX9-NEXT:    buffer_load_dword v31, off, s[0:3], s32
4096; GFX9-NEXT:    s_mov_b32 s7, 0xf000
4097; GFX9-NEXT:    s_mov_b32 s6, -1
4098; GFX9-NEXT:    buffer_load_ubyte v32, off, s[0:3], s32 offset:60
4099; GFX9-NEXT:    buffer_load_ubyte v33, off, s[0:3], s32 offset:64
4100; GFX9-NEXT:    buffer_load_ubyte v34, off, s[0:3], s32 offset:48
4101; GFX9-NEXT:    buffer_load_ubyte v35, off, s[0:3], s32 offset:52
4102; GFX9-NEXT:    buffer_load_ubyte v36, off, s[0:3], s32 offset:56
4103; GFX9-NEXT:    buffer_load_ubyte v37, off, s[0:3], s32 offset:36
4104; GFX9-NEXT:    buffer_load_ubyte v38, off, s[0:3], s32 offset:40
4105; GFX9-NEXT:    s_waitcnt vmcnt(7)
4106; GFX9-NEXT:    buffer_store_dwordx4 v[28:31], off, s[4:7], 0
4107; GFX9-NEXT:    s_waitcnt vmcnt(0)
4108; GFX9-NEXT:    buffer_store_dwordx4 v[24:27], off, s[4:7], 0
4109; GFX9-NEXT:    s_waitcnt vmcnt(0)
4110; GFX9-NEXT:    buffer_store_dwordx4 v[20:23], off, s[4:7], 0
4111; GFX9-NEXT:    s_waitcnt vmcnt(0)
4112; GFX9-NEXT:    buffer_store_dwordx4 v[16:19], off, s[4:7], 0
4113; GFX9-NEXT:    s_waitcnt vmcnt(0)
4114; GFX9-NEXT:    buffer_load_ubyte v16, off, s[0:3], s32 offset:28
4115; GFX9-NEXT:    buffer_load_ubyte v17, off, s[0:3], s32 offset:32
4116; GFX9-NEXT:    buffer_load_ubyte v18, off, s[0:3], s32 offset:20
4117; GFX9-NEXT:    buffer_load_ubyte v19, off, s[0:3], s32 offset:24
4118; GFX9-NEXT:    buffer_load_ubyte v20, off, s[0:3], s32 offset:44
4119; GFX9-NEXT:    s_nop 0
4120; GFX9-NEXT:    buffer_store_dwordx4 v[12:15], off, s[4:7], 0
4121; GFX9-NEXT:    s_waitcnt vmcnt(0)
4122; GFX9-NEXT:    buffer_load_ubyte v12, off, s[0:3], s32 offset:16
4123; GFX9-NEXT:    buffer_load_ubyte v13, off, s[0:3], s32 offset:12
4124; GFX9-NEXT:    buffer_load_ubyte v14, off, s[0:3], s32 offset:8
4125; GFX9-NEXT:    buffer_load_ubyte v15, off, s[0:3], s32 offset:4
4126; GFX9-NEXT:    s_nop 0
4127; GFX9-NEXT:    buffer_store_dwordx4 v[8:11], off, s[4:7], 0
4128; GFX9-NEXT:    s_waitcnt vmcnt(0)
4129; GFX9-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
4130; GFX9-NEXT:    s_waitcnt vmcnt(0)
4131; GFX9-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
4132; GFX9-NEXT:    s_waitcnt vmcnt(0)
4133; GFX9-NEXT:    buffer_store_byte v33, off, s[4:7], 0
4134; GFX9-NEXT:    s_waitcnt vmcnt(0)
4135; GFX9-NEXT:    buffer_store_byte v32, off, s[4:7], 0
4136; GFX9-NEXT:    s_waitcnt vmcnt(0)
4137; GFX9-NEXT:    buffer_store_byte v36, off, s[4:7], 0
4138; GFX9-NEXT:    s_waitcnt vmcnt(0)
4139; GFX9-NEXT:    buffer_store_byte v35, off, s[4:7], 0
4140; GFX9-NEXT:    s_waitcnt vmcnt(0)
4141; GFX9-NEXT:    buffer_store_byte v34, off, s[4:7], 0
4142; GFX9-NEXT:    s_waitcnt vmcnt(0)
4143; GFX9-NEXT:    buffer_store_byte v20, off, s[4:7], 0
4144; GFX9-NEXT:    s_waitcnt vmcnt(0)
4145; GFX9-NEXT:    buffer_store_byte v38, off, s[4:7], 0
4146; GFX9-NEXT:    s_waitcnt vmcnt(0)
4147; GFX9-NEXT:    buffer_store_byte v37, off, s[4:7], 0
4148; GFX9-NEXT:    s_waitcnt vmcnt(0)
4149; GFX9-NEXT:    buffer_store_byte v17, off, s[4:7], 0
4150; GFX9-NEXT:    s_waitcnt vmcnt(0)
4151; GFX9-NEXT:    buffer_store_byte v16, off, s[4:7], 0
4152; GFX9-NEXT:    s_waitcnt vmcnt(0)
4153; GFX9-NEXT:    buffer_store_byte v19, off, s[4:7], 0
4154; GFX9-NEXT:    s_waitcnt vmcnt(0)
4155; GFX9-NEXT:    buffer_store_byte v18, off, s[4:7], 0
4156; GFX9-NEXT:    s_waitcnt vmcnt(0)
4157; GFX9-NEXT:    buffer_store_byte v12, off, s[4:7], 0
4158; GFX9-NEXT:    s_waitcnt vmcnt(0)
4159; GFX9-NEXT:    buffer_store_byte v13, off, s[4:7], 0
4160; GFX9-NEXT:    s_waitcnt vmcnt(0)
4161; GFX9-NEXT:    buffer_store_byte v14, off, s[4:7], 0
4162; GFX9-NEXT:    s_waitcnt vmcnt(0)
4163; GFX9-NEXT:    buffer_store_byte v15, off, s[4:7], 0
4164; GFX9-NEXT:    s_waitcnt vmcnt(0)
4165; GFX9-NEXT:    s_setpc_b64 s[30:31]
4166;
4167; GFX11-LABEL: void_func_v32i32_v16i8:
4168; GFX11:       ; %bb.0:
4169; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4170; GFX11-NEXT:    s_clause 0x10
4171; GFX11-NEXT:    scratch_load_b32 v31, off, s32
4172; GFX11-NEXT:    scratch_load_u8 v32, off, s32 offset:64
4173; GFX11-NEXT:    scratch_load_u8 v33, off, s32 offset:60
4174; GFX11-NEXT:    scratch_load_u8 v34, off, s32 offset:56
4175; GFX11-NEXT:    scratch_load_u8 v35, off, s32 offset:52
4176; GFX11-NEXT:    scratch_load_u8 v36, off, s32 offset:48
4177; GFX11-NEXT:    scratch_load_u8 v37, off, s32 offset:44
4178; GFX11-NEXT:    scratch_load_u8 v38, off, s32 offset:40
4179; GFX11-NEXT:    scratch_load_u8 v39, off, s32 offset:36
4180; GFX11-NEXT:    scratch_load_u8 v48, off, s32 offset:32
4181; GFX11-NEXT:    scratch_load_u8 v49, off, s32 offset:28
4182; GFX11-NEXT:    scratch_load_u8 v50, off, s32 offset:24
4183; GFX11-NEXT:    scratch_load_u8 v51, off, s32 offset:20
4184; GFX11-NEXT:    scratch_load_u8 v52, off, s32 offset:16
4185; GFX11-NEXT:    scratch_load_u8 v53, off, s32 offset:12
4186; GFX11-NEXT:    scratch_load_u8 v54, off, s32 offset:8
4187; GFX11-NEXT:    scratch_load_u8 v55, off, s32 offset:4
4188; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
4189; GFX11-NEXT:    s_mov_b32 s2, -1
4190; GFX11-NEXT:    s_waitcnt vmcnt(16)
4191; GFX11-NEXT:    buffer_store_b128 v[28:31], off, s[0:3], 0 dlc
4192; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4193; GFX11-NEXT:    buffer_store_b128 v[24:27], off, s[0:3], 0 dlc
4194; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4195; GFX11-NEXT:    buffer_store_b128 v[20:23], off, s[0:3], 0 dlc
4196; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4197; GFX11-NEXT:    buffer_store_b128 v[16:19], off, s[0:3], 0 dlc
4198; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4199; GFX11-NEXT:    buffer_store_b128 v[12:15], off, s[0:3], 0 dlc
4200; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4201; GFX11-NEXT:    buffer_store_b128 v[8:11], off, s[0:3], 0 dlc
4202; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4203; GFX11-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0 dlc
4204; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4205; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0 dlc
4206; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4207; GFX11-NEXT:    s_waitcnt vmcnt(15)
4208; GFX11-NEXT:    buffer_store_b8 v32, off, s[0:3], 0 dlc
4209; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4210; GFX11-NEXT:    s_waitcnt vmcnt(14)
4211; GFX11-NEXT:    buffer_store_b8 v33, off, s[0:3], 0 dlc
4212; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4213; GFX11-NEXT:    s_waitcnt vmcnt(13)
4214; GFX11-NEXT:    buffer_store_b8 v34, off, s[0:3], 0 dlc
4215; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4216; GFX11-NEXT:    s_waitcnt vmcnt(12)
4217; GFX11-NEXT:    buffer_store_b8 v35, off, s[0:3], 0 dlc
4218; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4219; GFX11-NEXT:    s_waitcnt vmcnt(11)
4220; GFX11-NEXT:    buffer_store_b8 v36, off, s[0:3], 0 dlc
4221; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4222; GFX11-NEXT:    s_waitcnt vmcnt(10)
4223; GFX11-NEXT:    buffer_store_b8 v37, off, s[0:3], 0 dlc
4224; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4225; GFX11-NEXT:    s_waitcnt vmcnt(9)
4226; GFX11-NEXT:    buffer_store_b8 v38, off, s[0:3], 0 dlc
4227; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4228; GFX11-NEXT:    s_waitcnt vmcnt(8)
4229; GFX11-NEXT:    buffer_store_b8 v39, off, s[0:3], 0 dlc
4230; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4231; GFX11-NEXT:    s_waitcnt vmcnt(7)
4232; GFX11-NEXT:    buffer_store_b8 v48, off, s[0:3], 0 dlc
4233; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4234; GFX11-NEXT:    s_waitcnt vmcnt(6)
4235; GFX11-NEXT:    buffer_store_b8 v49, off, s[0:3], 0 dlc
4236; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4237; GFX11-NEXT:    s_waitcnt vmcnt(5)
4238; GFX11-NEXT:    buffer_store_b8 v50, off, s[0:3], 0 dlc
4239; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4240; GFX11-NEXT:    s_waitcnt vmcnt(4)
4241; GFX11-NEXT:    buffer_store_b8 v51, off, s[0:3], 0 dlc
4242; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4243; GFX11-NEXT:    s_waitcnt vmcnt(3)
4244; GFX11-NEXT:    buffer_store_b8 v52, off, s[0:3], 0 dlc
4245; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4246; GFX11-NEXT:    s_waitcnt vmcnt(2)
4247; GFX11-NEXT:    buffer_store_b8 v53, off, s[0:3], 0 dlc
4248; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4249; GFX11-NEXT:    s_waitcnt vmcnt(1)
4250; GFX11-NEXT:    buffer_store_b8 v54, off, s[0:3], 0 dlc
4251; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4252; GFX11-NEXT:    s_waitcnt vmcnt(0)
4253; GFX11-NEXT:    buffer_store_b8 v55, off, s[0:3], 0 dlc
4254; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4255; GFX11-NEXT:    s_setpc_b64 s[30:31]
4256  store volatile <32 x i32> %arg0, ptr addrspace(1) undef
4257  store volatile <16 x i8> %arg1, ptr addrspace(1) undef
4258  ret void
4259}
4260
4261
4262define void @void_func_bf16(bfloat %arg0) #0 {
4263; CI-LABEL: void_func_bf16:
4264; CI:       ; %bb.0:
4265; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4266; CI-NEXT:    v_mul_f32_e32 v0, 1.0, v0
4267; CI-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
4268; CI-NEXT:    s_mov_b32 s7, 0xf000
4269; CI-NEXT:    s_mov_b32 s6, -1
4270; CI-NEXT:    buffer_store_short v0, off, s[4:7], 0
4271; CI-NEXT:    s_waitcnt vmcnt(0)
4272; CI-NEXT:    s_setpc_b64 s[30:31]
4273;
4274; GFX89-LABEL: void_func_bf16:
4275; GFX89:       ; %bb.0:
4276; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4277; GFX89-NEXT:    s_mov_b32 s7, 0xf000
4278; GFX89-NEXT:    s_mov_b32 s6, -1
4279; GFX89-NEXT:    buffer_store_short v0, off, s[4:7], 0
4280; GFX89-NEXT:    s_waitcnt vmcnt(0)
4281; GFX89-NEXT:    s_setpc_b64 s[30:31]
4282;
4283; GFX11-LABEL: void_func_bf16:
4284; GFX11:       ; %bb.0:
4285; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4286; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
4287; GFX11-NEXT:    s_mov_b32 s2, -1
4288; GFX11-NEXT:    buffer_store_b16 v0, off, s[0:3], 0
4289; GFX11-NEXT:    s_setpc_b64 s[30:31]
4290  store bfloat %arg0, ptr addrspace(1) undef
4291  ret void
4292}
4293
4294define void @void_func_v2bf16(<2 x bfloat> %arg0) #0 {
4295; CI-LABEL: void_func_v2bf16:
4296; CI:       ; %bb.0:
4297; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4298; CI-NEXT:    v_mul_f32_e32 v1, 1.0, v1
4299; CI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
4300; CI-NEXT:    v_mul_f32_e32 v0, 1.0, v0
4301; CI-NEXT:    v_alignbit_b32 v0, v1, v0, 16
4302; CI-NEXT:    s_mov_b32 s7, 0xf000
4303; CI-NEXT:    s_mov_b32 s6, -1
4304; CI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
4305; CI-NEXT:    s_waitcnt vmcnt(0)
4306; CI-NEXT:    s_setpc_b64 s[30:31]
4307;
4308; GFX89-LABEL: void_func_v2bf16:
4309; GFX89:       ; %bb.0:
4310; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4311; GFX89-NEXT:    s_mov_b32 s7, 0xf000
4312; GFX89-NEXT:    s_mov_b32 s6, -1
4313; GFX89-NEXT:    buffer_store_dword v0, off, s[4:7], 0
4314; GFX89-NEXT:    s_waitcnt vmcnt(0)
4315; GFX89-NEXT:    s_setpc_b64 s[30:31]
4316;
4317; GFX11-LABEL: void_func_v2bf16:
4318; GFX11:       ; %bb.0:
4319; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4320; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
4321; GFX11-NEXT:    s_mov_b32 s2, -1
4322; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
4323; GFX11-NEXT:    s_setpc_b64 s[30:31]
4324  store <2 x bfloat> %arg0, ptr addrspace(1) undef
4325  ret void
4326}
4327
4328define void @void_func_v3bf16(<3 x bfloat> %arg0) #0 {
4329; CI-LABEL: void_func_v3bf16:
4330; CI:       ; %bb.0:
4331; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4332; CI-NEXT:    v_mul_f32_e32 v1, 1.0, v1
4333; CI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
4334; CI-NEXT:    v_mul_f32_e32 v0, 1.0, v0
4335; CI-NEXT:    v_alignbit_b32 v0, v1, v0, 16
4336; CI-NEXT:    v_mul_f32_e32 v1, 1.0, v2
4337; CI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
4338; CI-NEXT:    s_mov_b32 s7, 0xf000
4339; CI-NEXT:    s_mov_b32 s6, -1
4340; CI-NEXT:    buffer_store_short v1, off, s[4:7], 0
4341; CI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
4342; CI-NEXT:    s_waitcnt vmcnt(0)
4343; CI-NEXT:    s_setpc_b64 s[30:31]
4344;
4345; GFX89-LABEL: void_func_v3bf16:
4346; GFX89:       ; %bb.0:
4347; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4348; GFX89-NEXT:    s_mov_b32 s7, 0xf000
4349; GFX89-NEXT:    s_mov_b32 s6, -1
4350; GFX89-NEXT:    buffer_store_short v1, off, s[4:7], 0
4351; GFX89-NEXT:    buffer_store_dword v0, off, s[4:7], 0
4352; GFX89-NEXT:    s_waitcnt vmcnt(0)
4353; GFX89-NEXT:    s_setpc_b64 s[30:31]
4354;
4355; GFX11-LABEL: void_func_v3bf16:
4356; GFX11:       ; %bb.0:
4357; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4358; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
4359; GFX11-NEXT:    s_mov_b32 s2, -1
4360; GFX11-NEXT:    s_clause 0x1
4361; GFX11-NEXT:    buffer_store_b16 v1, off, s[0:3], 0
4362; GFX11-NEXT:    buffer_store_b32 v0, off, s[0:3], 0
4363; GFX11-NEXT:    s_setpc_b64 s[30:31]
4364  store <3 x bfloat> %arg0, ptr addrspace(1) undef
4365  ret void
4366}
4367
4368define void @void_func_v4bf16(<4 x bfloat> %arg0) #0 {
4369; CI-LABEL: void_func_v4bf16:
4370; CI:       ; %bb.0:
4371; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4372; CI-NEXT:    v_mul_f32_e32 v3, 1.0, v3
4373; CI-NEXT:    v_mul_f32_e32 v1, 1.0, v1
4374; CI-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
4375; CI-NEXT:    v_mul_f32_e32 v2, 1.0, v2
4376; CI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
4377; CI-NEXT:    v_mul_f32_e32 v0, 1.0, v0
4378; CI-NEXT:    v_alignbit_b32 v2, v3, v2, 16
4379; CI-NEXT:    v_alignbit_b32 v1, v1, v0, 16
4380; CI-NEXT:    s_mov_b32 s7, 0xf000
4381; CI-NEXT:    s_mov_b32 s6, -1
4382; CI-NEXT:    buffer_store_dwordx2 v[1:2], off, s[4:7], 0
4383; CI-NEXT:    s_waitcnt vmcnt(0)
4384; CI-NEXT:    s_setpc_b64 s[30:31]
4385;
4386; GFX89-LABEL: void_func_v4bf16:
4387; GFX89:       ; %bb.0:
4388; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4389; GFX89-NEXT:    s_mov_b32 s7, 0xf000
4390; GFX89-NEXT:    s_mov_b32 s6, -1
4391; GFX89-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
4392; GFX89-NEXT:    s_waitcnt vmcnt(0)
4393; GFX89-NEXT:    s_setpc_b64 s[30:31]
4394;
4395; GFX11-LABEL: void_func_v4bf16:
4396; GFX11:       ; %bb.0:
4397; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4398; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
4399; GFX11-NEXT:    s_mov_b32 s2, -1
4400; GFX11-NEXT:    buffer_store_b64 v[0:1], off, s[0:3], 0
4401; GFX11-NEXT:    s_setpc_b64 s[30:31]
4402  store <4 x bfloat> %arg0, ptr addrspace(1) undef
4403  ret void
4404}
4405
4406define void @void_func_v8bf16(<8 x bfloat> %arg0) #0 {
4407; CI-LABEL: void_func_v8bf16:
4408; CI:       ; %bb.0:
4409; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4410; CI-NEXT:    v_mul_f32_e32 v7, 1.0, v7
4411; CI-NEXT:    v_mul_f32_e32 v5, 1.0, v5
4412; CI-NEXT:    v_mul_f32_e32 v3, 1.0, v3
4413; CI-NEXT:    v_mul_f32_e32 v1, 1.0, v1
4414; CI-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
4415; CI-NEXT:    v_mul_f32_e32 v6, 1.0, v6
4416; CI-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
4417; CI-NEXT:    v_mul_f32_e32 v4, 1.0, v4
4418; CI-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
4419; CI-NEXT:    v_mul_f32_e32 v2, 1.0, v2
4420; CI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
4421; CI-NEXT:    v_mul_f32_e32 v0, 1.0, v0
4422; CI-NEXT:    v_alignbit_b32 v6, v7, v6, 16
4423; CI-NEXT:    v_alignbit_b32 v5, v5, v4, 16
4424; CI-NEXT:    v_alignbit_b32 v4, v3, v2, 16
4425; CI-NEXT:    v_alignbit_b32 v3, v1, v0, 16
4426; CI-NEXT:    s_mov_b32 s7, 0xf000
4427; CI-NEXT:    s_mov_b32 s6, -1
4428; CI-NEXT:    buffer_store_dwordx4 v[3:6], off, s[4:7], 0
4429; CI-NEXT:    s_waitcnt vmcnt(0)
4430; CI-NEXT:    s_setpc_b64 s[30:31]
4431;
4432; GFX89-LABEL: void_func_v8bf16:
4433; GFX89:       ; %bb.0:
4434; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4435; GFX89-NEXT:    s_mov_b32 s7, 0xf000
4436; GFX89-NEXT:    s_mov_b32 s6, -1
4437; GFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
4438; GFX89-NEXT:    s_waitcnt vmcnt(0)
4439; GFX89-NEXT:    s_setpc_b64 s[30:31]
4440;
4441; GFX11-LABEL: void_func_v8bf16:
4442; GFX11:       ; %bb.0:
4443; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4444; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
4445; GFX11-NEXT:    s_mov_b32 s2, -1
4446; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
4447; GFX11-NEXT:    s_setpc_b64 s[30:31]
4448  store <8 x bfloat> %arg0, ptr addrspace(1) undef
4449  ret void
4450}
4451
4452define void @void_func_v16bf16(<16 x bfloat> %arg0) #0 {
4453; CI-LABEL: void_func_v16bf16:
4454; CI:       ; %bb.0:
4455; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4456; CI-NEXT:    v_mul_f32_e32 v5, 1.0, v5
4457; CI-NEXT:    v_mul_f32_e32 v3, 1.0, v3
4458; CI-NEXT:    v_mul_f32_e32 v1, 1.0, v1
4459; CI-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
4460; CI-NEXT:    v_mul_f32_e32 v4, 1.0, v4
4461; CI-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
4462; CI-NEXT:    v_mul_f32_e32 v2, 1.0, v2
4463; CI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
4464; CI-NEXT:    v_mul_f32_e32 v0, 1.0, v0
4465; CI-NEXT:    v_alignbit_b32 v5, v5, v4, 16
4466; CI-NEXT:    v_alignbit_b32 v4, v3, v2, 16
4467; CI-NEXT:    v_alignbit_b32 v3, v1, v0, 16
4468; CI-NEXT:    v_mul_f32_e32 v0, 1.0, v15
4469; CI-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
4470; CI-NEXT:    v_mul_f32_e32 v1, 1.0, v14
4471; CI-NEXT:    v_alignbit_b32 v14, v0, v1, 16
4472; CI-NEXT:    v_mul_f32_e32 v0, 1.0, v13
4473; CI-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
4474; CI-NEXT:    v_mul_f32_e32 v1, 1.0, v12
4475; CI-NEXT:    v_alignbit_b32 v13, v0, v1, 16
4476; CI-NEXT:    v_mul_f32_e32 v0, 1.0, v11
4477; CI-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
4478; CI-NEXT:    v_mul_f32_e32 v1, 1.0, v10
4479; CI-NEXT:    v_alignbit_b32 v12, v0, v1, 16
4480; CI-NEXT:    v_mul_f32_e32 v0, 1.0, v9
4481; CI-NEXT:    v_mul_f32_e32 v7, 1.0, v7
4482; CI-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
4483; CI-NEXT:    v_mul_f32_e32 v1, 1.0, v8
4484; CI-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
4485; CI-NEXT:    v_mul_f32_e32 v6, 1.0, v6
4486; CI-NEXT:    v_alignbit_b32 v11, v0, v1, 16
4487; CI-NEXT:    s_mov_b32 s7, 0xf000
4488; CI-NEXT:    s_mov_b32 s6, -1
4489; CI-NEXT:    v_alignbit_b32 v6, v7, v6, 16
4490; CI-NEXT:    buffer_store_dwordx4 v[11:14], off, s[4:7], 0
4491; CI-NEXT:    buffer_store_dwordx4 v[3:6], off, s[4:7], 0
4492; CI-NEXT:    s_waitcnt vmcnt(0)
4493; CI-NEXT:    s_setpc_b64 s[30:31]
4494;
4495; GFX89-LABEL: void_func_v16bf16:
4496; GFX89:       ; %bb.0:
4497; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4498; GFX89-NEXT:    s_mov_b32 s7, 0xf000
4499; GFX89-NEXT:    s_mov_b32 s6, -1
4500; GFX89-NEXT:    buffer_store_dwordx4 v[4:7], off, s[4:7], 0
4501; GFX89-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
4502; GFX89-NEXT:    s_waitcnt vmcnt(0)
4503; GFX89-NEXT:    s_setpc_b64 s[30:31]
4504;
4505; GFX11-LABEL: void_func_v16bf16:
4506; GFX11:       ; %bb.0:
4507; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4508; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
4509; GFX11-NEXT:    s_mov_b32 s2, -1
4510; GFX11-NEXT:    s_clause 0x1
4511; GFX11-NEXT:    buffer_store_b128 v[4:7], off, s[0:3], 0
4512; GFX11-NEXT:    buffer_store_b128 v[0:3], off, s[0:3], 0
4513; GFX11-NEXT:    s_setpc_b64 s[30:31]
4514  store <16 x bfloat> %arg0, ptr addrspace(1) undef
4515  ret void
4516}
4517
4518attributes #0 = { nounwind }
4519