xref: /llvm-project/llvm/test/CodeGen/AMDGPU/function-returns.ll (revision 6206f5444fc0732e6495703c75a67f1f90f5b418)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX789,CI %s
3; RUN: llc -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX789,GFX89,GFX8 %s
4; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX789,GFX89,GFX9 %s
5; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
6
7define i1 @i1_func_void() #0 {
8; GFX789-LABEL: i1_func_void:
9; GFX789:       ; %bb.0:
10; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GFX789-NEXT:    s_mov_b32 s7, 0xf000
12; GFX789-NEXT:    s_mov_b32 s6, -1
13; GFX789-NEXT:    buffer_load_ubyte v0, off, s[4:7], 0
14; GFX789-NEXT:    s_waitcnt vmcnt(0)
15; GFX789-NEXT:    s_setpc_b64 s[30:31]
16;
17; GFX11-LABEL: i1_func_void:
18; GFX11:       ; %bb.0:
19; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
21; GFX11-NEXT:    s_mov_b32 s2, -1
22; GFX11-NEXT:    buffer_load_u8 v0, off, s[0:3], 0
23; GFX11-NEXT:    s_waitcnt vmcnt(0)
24; GFX11-NEXT:    s_setpc_b64 s[30:31]
25  %val = load i1, ptr addrspace(1) undef
26  ret i1 %val
27}
28
29; FIXME: Missing and?
30define zeroext i1 @i1_zeroext_func_void() #0 {
31; GFX789-LABEL: i1_zeroext_func_void:
32; GFX789:       ; %bb.0:
33; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34; GFX789-NEXT:    s_mov_b32 s7, 0xf000
35; GFX789-NEXT:    s_mov_b32 s6, -1
36; GFX789-NEXT:    buffer_load_ubyte v0, off, s[4:7], 0
37; GFX789-NEXT:    s_waitcnt vmcnt(0)
38; GFX789-NEXT:    s_setpc_b64 s[30:31]
39;
40; GFX11-LABEL: i1_zeroext_func_void:
41; GFX11:       ; %bb.0:
42; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
44; GFX11-NEXT:    s_mov_b32 s2, -1
45; GFX11-NEXT:    buffer_load_u8 v0, off, s[0:3], 0
46; GFX11-NEXT:    s_waitcnt vmcnt(0)
47; GFX11-NEXT:    s_setpc_b64 s[30:31]
48  %val = load i1, ptr addrspace(1) undef
49  ret i1 %val
50}
51
52define signext i1 @i1_signext_func_void() #0 {
53; GFX789-LABEL: i1_signext_func_void:
54; GFX789:       ; %bb.0:
55; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
56; GFX789-NEXT:    s_mov_b32 s7, 0xf000
57; GFX789-NEXT:    s_mov_b32 s6, -1
58; GFX789-NEXT:    buffer_load_ubyte v0, off, s[4:7], 0
59; GFX789-NEXT:    s_waitcnt vmcnt(0)
60; GFX789-NEXT:    v_bfe_i32 v0, v0, 0, 1
61; GFX789-NEXT:    s_setpc_b64 s[30:31]
62;
63; GFX11-LABEL: i1_signext_func_void:
64; GFX11:       ; %bb.0:
65; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
67; GFX11-NEXT:    s_mov_b32 s2, -1
68; GFX11-NEXT:    buffer_load_u8 v0, off, s[0:3], 0
69; GFX11-NEXT:    s_waitcnt vmcnt(0)
70; GFX11-NEXT:    v_bfe_i32 v0, v0, 0, 1
71; GFX11-NEXT:    s_setpc_b64 s[30:31]
72  %val = load i1, ptr addrspace(1) undef
73  ret i1 %val
74}
75
76define i8 @i8_func_void() #0 {
77; GFX789-LABEL: i8_func_void:
78; GFX789:       ; %bb.0:
79; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
80; GFX789-NEXT:    s_mov_b32 s7, 0xf000
81; GFX789-NEXT:    s_mov_b32 s6, -1
82; GFX789-NEXT:    buffer_load_ubyte v0, off, s[4:7], 0
83; GFX789-NEXT:    s_waitcnt vmcnt(0)
84; GFX789-NEXT:    s_setpc_b64 s[30:31]
85;
86; GFX11-LABEL: i8_func_void:
87; GFX11:       ; %bb.0:
88; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
89; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
90; GFX11-NEXT:    s_mov_b32 s2, -1
91; GFX11-NEXT:    buffer_load_u8 v0, off, s[0:3], 0
92; GFX11-NEXT:    s_waitcnt vmcnt(0)
93; GFX11-NEXT:    s_setpc_b64 s[30:31]
94  %val = load i8, ptr addrspace(1) undef
95  ret i8 %val
96}
97
98define zeroext i8 @i8_zeroext_func_void() #0 {
99; GFX789-LABEL: i8_zeroext_func_void:
100; GFX789:       ; %bb.0:
101; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102; GFX789-NEXT:    s_mov_b32 s7, 0xf000
103; GFX789-NEXT:    s_mov_b32 s6, -1
104; GFX789-NEXT:    buffer_load_ubyte v0, off, s[4:7], 0
105; GFX789-NEXT:    s_waitcnt vmcnt(0)
106; GFX789-NEXT:    s_setpc_b64 s[30:31]
107;
108; GFX11-LABEL: i8_zeroext_func_void:
109; GFX11:       ; %bb.0:
110; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
111; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
112; GFX11-NEXT:    s_mov_b32 s2, -1
113; GFX11-NEXT:    buffer_load_u8 v0, off, s[0:3], 0
114; GFX11-NEXT:    s_waitcnt vmcnt(0)
115; GFX11-NEXT:    s_setpc_b64 s[30:31]
116  %val = load i8, ptr addrspace(1) undef
117  ret i8 %val
118}
119
120define signext i8 @i8_signext_func_void() #0 {
121; GFX789-LABEL: i8_signext_func_void:
122; GFX789:       ; %bb.0:
123; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
124; GFX789-NEXT:    s_mov_b32 s7, 0xf000
125; GFX789-NEXT:    s_mov_b32 s6, -1
126; GFX789-NEXT:    buffer_load_sbyte v0, off, s[4:7], 0
127; GFX789-NEXT:    s_waitcnt vmcnt(0)
128; GFX789-NEXT:    s_setpc_b64 s[30:31]
129;
130; GFX11-LABEL: i8_signext_func_void:
131; GFX11:       ; %bb.0:
132; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
134; GFX11-NEXT:    s_mov_b32 s2, -1
135; GFX11-NEXT:    buffer_load_i8 v0, off, s[0:3], 0
136; GFX11-NEXT:    s_waitcnt vmcnt(0)
137; GFX11-NEXT:    s_setpc_b64 s[30:31]
138  %val = load i8, ptr addrspace(1) undef
139  ret i8 %val
140}
141
142define i16 @i16_func_void() #0 {
143; GFX789-LABEL: i16_func_void:
144; GFX789:       ; %bb.0:
145; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
146; GFX789-NEXT:    s_mov_b32 s7, 0xf000
147; GFX789-NEXT:    s_mov_b32 s6, -1
148; GFX789-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
149; GFX789-NEXT:    s_waitcnt vmcnt(0)
150; GFX789-NEXT:    s_setpc_b64 s[30:31]
151;
152; GFX11-LABEL: i16_func_void:
153; GFX11:       ; %bb.0:
154; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
155; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
156; GFX11-NEXT:    s_mov_b32 s2, -1
157; GFX11-NEXT:    buffer_load_u16 v0, off, s[0:3], 0
158; GFX11-NEXT:    s_waitcnt vmcnt(0)
159; GFX11-NEXT:    s_setpc_b64 s[30:31]
160  %val = load i16, ptr addrspace(1) undef
161  ret i16 %val
162}
163
164define zeroext i16 @i16_zeroext_func_void() #0 {
165; GFX789-LABEL: i16_zeroext_func_void:
166; GFX789:       ; %bb.0:
167; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
168; GFX789-NEXT:    s_mov_b32 s7, 0xf000
169; GFX789-NEXT:    s_mov_b32 s6, -1
170; GFX789-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
171; GFX789-NEXT:    s_waitcnt vmcnt(0)
172; GFX789-NEXT:    s_setpc_b64 s[30:31]
173;
174; GFX11-LABEL: i16_zeroext_func_void:
175; GFX11:       ; %bb.0:
176; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
177; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
178; GFX11-NEXT:    s_mov_b32 s2, -1
179; GFX11-NEXT:    buffer_load_u16 v0, off, s[0:3], 0
180; GFX11-NEXT:    s_waitcnt vmcnt(0)
181; GFX11-NEXT:    s_setpc_b64 s[30:31]
182  %val = load i16, ptr addrspace(1) undef
183  ret i16 %val
184}
185
186define signext i16 @i16_signext_func_void() #0 {
187; GFX789-LABEL: i16_signext_func_void:
188; GFX789:       ; %bb.0:
189; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190; GFX789-NEXT:    s_mov_b32 s7, 0xf000
191; GFX789-NEXT:    s_mov_b32 s6, -1
192; GFX789-NEXT:    buffer_load_sshort v0, off, s[4:7], 0
193; GFX789-NEXT:    s_waitcnt vmcnt(0)
194; GFX789-NEXT:    s_setpc_b64 s[30:31]
195;
196; GFX11-LABEL: i16_signext_func_void:
197; GFX11:       ; %bb.0:
198; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
199; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
200; GFX11-NEXT:    s_mov_b32 s2, -1
201; GFX11-NEXT:    buffer_load_i16 v0, off, s[0:3], 0
202; GFX11-NEXT:    s_waitcnt vmcnt(0)
203; GFX11-NEXT:    s_setpc_b64 s[30:31]
204  %val = load i16, ptr addrspace(1) undef
205  ret i16 %val
206}
207
208define i32 @i32_func_void() #0 {
209; GFX789-LABEL: i32_func_void:
210; GFX789:       ; %bb.0:
211; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212; GFX789-NEXT:    s_mov_b32 s7, 0xf000
213; GFX789-NEXT:    s_mov_b32 s6, -1
214; GFX789-NEXT:    buffer_load_dword v0, off, s[4:7], 0
215; GFX789-NEXT:    s_waitcnt vmcnt(0)
216; GFX789-NEXT:    s_setpc_b64 s[30:31]
217;
218; GFX11-LABEL: i32_func_void:
219; GFX11:       ; %bb.0:
220; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
221; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
222; GFX11-NEXT:    s_mov_b32 s2, -1
223; GFX11-NEXT:    buffer_load_b32 v0, off, s[0:3], 0
224; GFX11-NEXT:    s_waitcnt vmcnt(0)
225; GFX11-NEXT:    s_setpc_b64 s[30:31]
226  %val = load i32, ptr addrspace(1) undef
227  ret i32 %val
228}
229
230define i48 @i48_func_void() #0 {
231; GFX789-LABEL: i48_func_void:
232; GFX789:       ; %bb.0:
233; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
234; GFX789-NEXT:    s_mov_b32 s7, 0xf000
235; GFX789-NEXT:    s_mov_b32 s6, -1
236; GFX789-NEXT:    buffer_load_dword v0, off, s[4:7], 0
237; GFX789-NEXT:    buffer_load_ushort v1, off, s[4:7], 0
238; GFX789-NEXT:    s_waitcnt vmcnt(0)
239; GFX789-NEXT:    s_setpc_b64 s[30:31]
240;
241; GFX11-LABEL: i48_func_void:
242; GFX11:       ; %bb.0:
243; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
244; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
245; GFX11-NEXT:    s_mov_b32 s2, -1
246; GFX11-NEXT:    s_clause 0x1
247; GFX11-NEXT:    buffer_load_b32 v0, off, s[0:3], 0
248; GFX11-NEXT:    buffer_load_u16 v1, off, s[0:3], 0
249; GFX11-NEXT:    s_waitcnt vmcnt(0)
250; GFX11-NEXT:    s_setpc_b64 s[30:31]
251  %val = load i48, ptr addrspace(1) undef, align 8
252  ret i48 %val
253}
254
255define zeroext i48 @i48_zeroext_func_void() #0 {
256; GFX789-LABEL: i48_zeroext_func_void:
257; GFX789:       ; %bb.0:
258; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
259; GFX789-NEXT:    s_mov_b32 s7, 0xf000
260; GFX789-NEXT:    s_mov_b32 s6, -1
261; GFX789-NEXT:    buffer_load_dword v0, off, s[4:7], 0
262; GFX789-NEXT:    buffer_load_ushort v1, off, s[4:7], 0
263; GFX789-NEXT:    s_waitcnt vmcnt(0)
264; GFX789-NEXT:    s_setpc_b64 s[30:31]
265;
266; GFX11-LABEL: i48_zeroext_func_void:
267; GFX11:       ; %bb.0:
268; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
269; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
270; GFX11-NEXT:    s_mov_b32 s2, -1
271; GFX11-NEXT:    s_clause 0x1
272; GFX11-NEXT:    buffer_load_b32 v0, off, s[0:3], 0
273; GFX11-NEXT:    buffer_load_u16 v1, off, s[0:3], 0
274; GFX11-NEXT:    s_waitcnt vmcnt(0)
275; GFX11-NEXT:    s_setpc_b64 s[30:31]
276  %val = load i48, ptr addrspace(1) undef, align 8
277  ret i48 %val
278}
279
280define signext i48 @i48_signext_func_void() #0 {
281; GFX789-LABEL: i48_signext_func_void:
282; GFX789:       ; %bb.0:
283; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
284; GFX789-NEXT:    s_mov_b32 s7, 0xf000
285; GFX789-NEXT:    s_mov_b32 s6, -1
286; GFX789-NEXT:    buffer_load_dword v0, off, s[4:7], 0
287; GFX789-NEXT:    buffer_load_sshort v1, off, s[4:7], 0
288; GFX789-NEXT:    s_waitcnt vmcnt(0)
289; GFX789-NEXT:    s_setpc_b64 s[30:31]
290;
291; GFX11-LABEL: i48_signext_func_void:
292; GFX11:       ; %bb.0:
293; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
294; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
295; GFX11-NEXT:    s_mov_b32 s2, -1
296; GFX11-NEXT:    s_clause 0x1
297; GFX11-NEXT:    buffer_load_b32 v0, off, s[0:3], 0
298; GFX11-NEXT:    buffer_load_i16 v1, off, s[0:3], 0
299; GFX11-NEXT:    s_waitcnt vmcnt(0)
300; GFX11-NEXT:    s_setpc_b64 s[30:31]
301  %val = load i48, ptr addrspace(1) undef, align 8
302  ret i48 %val
303}
304
305define i63 @i63_func_void(i63 %val) #0 {
306; GFX789-LABEL: i63_func_void:
307; GFX789:       ; %bb.0:
308; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
309; GFX789-NEXT:    s_setpc_b64 s[30:31]
310;
311; GFX11-LABEL: i63_func_void:
312; GFX11:       ; %bb.0:
313; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
314; GFX11-NEXT:    s_setpc_b64 s[30:31]
315  ret i63 %val
316}
317
318define zeroext i63 @i63_zeroext_func_void(i63 %val) #0 {
319; GFX789-LABEL: i63_zeroext_func_void:
320; GFX789:       ; %bb.0:
321; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322; GFX789-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
323; GFX789-NEXT:    s_setpc_b64 s[30:31]
324;
325; GFX11-LABEL: i63_zeroext_func_void:
326; GFX11:       ; %bb.0:
327; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
328; GFX11-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v1
329; GFX11-NEXT:    s_setpc_b64 s[30:31]
330  ret i63 %val
331}
332
333define signext i63 @i63_signext_func_void(i63 %val) #0 {
334; CI-LABEL: i63_signext_func_void:
335; CI:       ; %bb.0:
336; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
337; CI-NEXT:    v_lshl_b64 v[0:1], v[0:1], 1
338; CI-NEXT:    v_ashr_i64 v[0:1], v[0:1], 1
339; CI-NEXT:    s_setpc_b64 s[30:31]
340;
341; GFX89-LABEL: i63_signext_func_void:
342; GFX89:       ; %bb.0:
343; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
344; GFX89-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
345; GFX89-NEXT:    v_ashrrev_i64 v[0:1], 1, v[0:1]
346; GFX89-NEXT:    s_setpc_b64 s[30:31]
347;
348; GFX11-LABEL: i63_signext_func_void:
349; GFX11:       ; %bb.0:
350; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
351; GFX11-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
352; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
353; GFX11-NEXT:    v_ashrrev_i64 v[0:1], 1, v[0:1]
354; GFX11-NEXT:    s_setpc_b64 s[30:31]
355  ret i63 %val
356}
357
358define i64 @i64_func_void() #0 {
359; GFX789-LABEL: i64_func_void:
360; GFX789:       ; %bb.0:
361; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
362; GFX789-NEXT:    s_mov_b32 s7, 0xf000
363; GFX789-NEXT:    s_mov_b32 s6, -1
364; GFX789-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
365; GFX789-NEXT:    s_waitcnt vmcnt(0)
366; GFX789-NEXT:    s_setpc_b64 s[30:31]
367;
368; GFX11-LABEL: i64_func_void:
369; GFX11:       ; %bb.0:
370; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
371; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
372; GFX11-NEXT:    s_mov_b32 s2, -1
373; GFX11-NEXT:    buffer_load_b64 v[0:1], off, s[0:3], 0
374; GFX11-NEXT:    s_waitcnt vmcnt(0)
375; GFX11-NEXT:    s_setpc_b64 s[30:31]
376  %val = load i64, ptr addrspace(1) undef
377  ret i64 %val
378}
379
380define i65 @i65_func_void() #0 {
381; GFX789-LABEL: i65_func_void:
382; GFX789:       ; %bb.0:
383; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
384; GFX789-NEXT:    s_mov_b32 s7, 0xf000
385; GFX789-NEXT:    s_mov_b32 s6, -1
386; GFX789-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
387; GFX789-NEXT:    buffer_load_ubyte v2, off, s[4:7], 0
388; GFX789-NEXT:    s_waitcnt vmcnt(0)
389; GFX789-NEXT:    s_setpc_b64 s[30:31]
390;
391; GFX11-LABEL: i65_func_void:
392; GFX11:       ; %bb.0:
393; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
394; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
395; GFX11-NEXT:    s_mov_b32 s2, -1
396; GFX11-NEXT:    s_clause 0x1
397; GFX11-NEXT:    buffer_load_b64 v[0:1], off, s[0:3], 0
398; GFX11-NEXT:    buffer_load_u8 v2, off, s[0:3], 0
399; GFX11-NEXT:    s_waitcnt vmcnt(0)
400; GFX11-NEXT:    s_setpc_b64 s[30:31]
401  %val = load i65, ptr addrspace(1) undef
402  ret i65 %val
403}
404
405define float @f32_func_void() #0 {
406; GFX789-LABEL: f32_func_void:
407; GFX789:       ; %bb.0:
408; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
409; GFX789-NEXT:    s_mov_b32 s7, 0xf000
410; GFX789-NEXT:    s_mov_b32 s6, -1
411; GFX789-NEXT:    buffer_load_dword v0, off, s[4:7], 0
412; GFX789-NEXT:    s_waitcnt vmcnt(0)
413; GFX789-NEXT:    s_setpc_b64 s[30:31]
414;
415; GFX11-LABEL: f32_func_void:
416; GFX11:       ; %bb.0:
417; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
418; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
419; GFX11-NEXT:    s_mov_b32 s2, -1
420; GFX11-NEXT:    buffer_load_b32 v0, off, s[0:3], 0
421; GFX11-NEXT:    s_waitcnt vmcnt(0)
422; GFX11-NEXT:    s_setpc_b64 s[30:31]
423  %val = load float, ptr addrspace(1) undef
424  ret float %val
425}
426
427define double @f64_func_void() #0 {
428; GFX789-LABEL: f64_func_void:
429; GFX789:       ; %bb.0:
430; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
431; GFX789-NEXT:    s_mov_b32 s7, 0xf000
432; GFX789-NEXT:    s_mov_b32 s6, -1
433; GFX789-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
434; GFX789-NEXT:    s_waitcnt vmcnt(0)
435; GFX789-NEXT:    s_setpc_b64 s[30:31]
436;
437; GFX11-LABEL: f64_func_void:
438; GFX11:       ; %bb.0:
439; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
440; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
441; GFX11-NEXT:    s_mov_b32 s2, -1
442; GFX11-NEXT:    buffer_load_b64 v[0:1], off, s[0:3], 0
443; GFX11-NEXT:    s_waitcnt vmcnt(0)
444; GFX11-NEXT:    s_setpc_b64 s[30:31]
445  %val = load double, ptr addrspace(1) undef
446  ret double %val
447}
448
449define <2 x double> @v2f64_func_void() #0 {
450; GFX789-LABEL: v2f64_func_void:
451; GFX789:       ; %bb.0:
452; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
453; GFX789-NEXT:    s_mov_b32 s7, 0xf000
454; GFX789-NEXT:    s_mov_b32 s6, -1
455; GFX789-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
456; GFX789-NEXT:    s_waitcnt vmcnt(0)
457; GFX789-NEXT:    s_setpc_b64 s[30:31]
458;
459; GFX11-LABEL: v2f64_func_void:
460; GFX11:       ; %bb.0:
461; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
462; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
463; GFX11-NEXT:    s_mov_b32 s2, -1
464; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
465; GFX11-NEXT:    s_waitcnt vmcnt(0)
466; GFX11-NEXT:    s_setpc_b64 s[30:31]
467  %val = load <2 x double>, ptr addrspace(1) undef
468  ret <2 x double> %val
469}
470
471define <2 x i32> @v2i32_func_void() #0 {
472; GFX789-LABEL: v2i32_func_void:
473; GFX789:       ; %bb.0:
474; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
475; GFX789-NEXT:    s_mov_b32 s7, 0xf000
476; GFX789-NEXT:    s_mov_b32 s6, -1
477; GFX789-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
478; GFX789-NEXT:    s_waitcnt vmcnt(0)
479; GFX789-NEXT:    s_setpc_b64 s[30:31]
480;
481; GFX11-LABEL: v2i32_func_void:
482; GFX11:       ; %bb.0:
483; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
484; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
485; GFX11-NEXT:    s_mov_b32 s2, -1
486; GFX11-NEXT:    buffer_load_b64 v[0:1], off, s[0:3], 0
487; GFX11-NEXT:    s_waitcnt vmcnt(0)
488; GFX11-NEXT:    s_setpc_b64 s[30:31]
489  %val = load <2 x i32>, ptr addrspace(1) undef
490  ret <2 x i32> %val
491}
492
493define <3 x i32> @v3i32_func_void() #0 {
494; GFX789-LABEL: v3i32_func_void:
495; GFX789:       ; %bb.0:
496; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
497; GFX789-NEXT:    s_mov_b32 s7, 0xf000
498; GFX789-NEXT:    s_mov_b32 s6, -1
499; GFX789-NEXT:    buffer_load_dwordx3 v[0:2], off, s[4:7], 0
500; GFX789-NEXT:    s_waitcnt vmcnt(0)
501; GFX789-NEXT:    s_setpc_b64 s[30:31]
502;
503; GFX11-LABEL: v3i32_func_void:
504; GFX11:       ; %bb.0:
505; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
506; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
507; GFX11-NEXT:    s_mov_b32 s2, -1
508; GFX11-NEXT:    buffer_load_b96 v[0:2], off, s[0:3], 0
509; GFX11-NEXT:    s_waitcnt vmcnt(0)
510; GFX11-NEXT:    s_setpc_b64 s[30:31]
511  %val = load <3 x i32>, ptr addrspace(1) undef
512  ret <3 x i32> %val
513}
514
515define <4 x i32> @v4i32_func_void() #0 {
516; GFX789-LABEL: v4i32_func_void:
517; GFX789:       ; %bb.0:
518; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
519; GFX789-NEXT:    s_mov_b32 s7, 0xf000
520; GFX789-NEXT:    s_mov_b32 s6, -1
521; GFX789-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
522; GFX789-NEXT:    s_waitcnt vmcnt(0)
523; GFX789-NEXT:    s_setpc_b64 s[30:31]
524;
525; GFX11-LABEL: v4i32_func_void:
526; GFX11:       ; %bb.0:
527; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
528; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
529; GFX11-NEXT:    s_mov_b32 s2, -1
530; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
531; GFX11-NEXT:    s_waitcnt vmcnt(0)
532; GFX11-NEXT:    s_setpc_b64 s[30:31]
533  %val = load <4 x i32>, ptr addrspace(1) undef
534  ret <4 x i32> %val
535}
536
537define <5 x i32> @v5i32_func_void() #0 {
538; GFX789-LABEL: v5i32_func_void:
539; GFX789:       ; %bb.0:
540; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
541; GFX789-NEXT:    s_mov_b32 s7, 0xf000
542; GFX789-NEXT:    s_mov_b32 s6, -1
543; GFX789-NEXT:    buffer_load_dword v4, off, s[4:7], 0 glc
544; GFX789-NEXT:    s_waitcnt vmcnt(0)
545; GFX789-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0 glc
546; GFX789-NEXT:    s_waitcnt vmcnt(0)
547; GFX789-NEXT:    s_setpc_b64 s[30:31]
548;
549; GFX11-LABEL: v5i32_func_void:
550; GFX11:       ; %bb.0:
551; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
552; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
553; GFX11-NEXT:    s_mov_b32 s2, -1
554; GFX11-NEXT:    buffer_load_b32 v4, off, s[0:3], 0 glc dlc
555; GFX11-NEXT:    s_waitcnt vmcnt(0)
556; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0 glc dlc
557; GFX11-NEXT:    s_waitcnt vmcnt(0)
558; GFX11-NEXT:    s_setpc_b64 s[30:31]
559  %val = load volatile <5 x i32>, ptr addrspace(1) undef
560  ret <5 x i32> %val
561}
562
563define <8 x i32> @v8i32_func_void() #0 {
564; GFX789-LABEL: v8i32_func_void:
565; GFX789:       ; %bb.0:
566; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
567; GFX789-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
568; GFX789-NEXT:    s_mov_b32 s7, 0xf000
569; GFX789-NEXT:    s_mov_b32 s6, -1
570; GFX789-NEXT:    s_waitcnt lgkmcnt(0)
571; GFX789-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
572; GFX789-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
573; GFX789-NEXT:    s_waitcnt vmcnt(0)
574; GFX789-NEXT:    s_setpc_b64 s[30:31]
575;
576; GFX11-LABEL: v8i32_func_void:
577; GFX11:       ; %bb.0:
578; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
579; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
580; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
581; GFX11-NEXT:    s_mov_b32 s2, -1
582; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
583; GFX11-NEXT:    s_clause 0x1
584; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
585; GFX11-NEXT:    buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16
586; GFX11-NEXT:    s_waitcnt vmcnt(0)
587; GFX11-NEXT:    s_setpc_b64 s[30:31]
588  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
589  %val = load <8 x i32>, ptr addrspace(1) %ptr
590  ret <8 x i32> %val
591}
592
593define <16 x i32> @v16i32_func_void() #0 {
594; GFX789-LABEL: v16i32_func_void:
595; GFX789:       ; %bb.0:
596; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
597; GFX789-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
598; GFX789-NEXT:    s_mov_b32 s7, 0xf000
599; GFX789-NEXT:    s_mov_b32 s6, -1
600; GFX789-NEXT:    s_waitcnt lgkmcnt(0)
601; GFX789-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
602; GFX789-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
603; GFX789-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
604; GFX789-NEXT:    buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
605; GFX789-NEXT:    s_waitcnt vmcnt(0)
606; GFX789-NEXT:    s_setpc_b64 s[30:31]
607;
608; GFX11-LABEL: v16i32_func_void:
609; GFX11:       ; %bb.0:
610; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
611; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
612; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
613; GFX11-NEXT:    s_mov_b32 s2, -1
614; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
615; GFX11-NEXT:    s_clause 0x3
616; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
617; GFX11-NEXT:    buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16
618; GFX11-NEXT:    buffer_load_b128 v[8:11], off, s[0:3], 0 offset:32
619; GFX11-NEXT:    buffer_load_b128 v[12:15], off, s[0:3], 0 offset:48
620; GFX11-NEXT:    s_waitcnt vmcnt(0)
621; GFX11-NEXT:    s_setpc_b64 s[30:31]
622  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
623  %val = load <16 x i32>, ptr addrspace(1) %ptr
624  ret <16 x i32> %val
625}
626
627define <32 x i32> @v32i32_func_void() #0 {
628; GFX789-LABEL: v32i32_func_void:
629; GFX789:       ; %bb.0:
630; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
631; GFX789-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
632; GFX789-NEXT:    s_mov_b32 s7, 0xf000
633; GFX789-NEXT:    s_mov_b32 s6, -1
634; GFX789-NEXT:    s_waitcnt lgkmcnt(0)
635; GFX789-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
636; GFX789-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
637; GFX789-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
638; GFX789-NEXT:    buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
639; GFX789-NEXT:    buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
640; GFX789-NEXT:    buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
641; GFX789-NEXT:    buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
642; GFX789-NEXT:    buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
643; GFX789-NEXT:    s_waitcnt vmcnt(0)
644; GFX789-NEXT:    s_setpc_b64 s[30:31]
645;
646; GFX11-LABEL: v32i32_func_void:
647; GFX11:       ; %bb.0:
648; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
649; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
650; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
651; GFX11-NEXT:    s_mov_b32 s2, -1
652; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
653; GFX11-NEXT:    s_clause 0x7
654; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
655; GFX11-NEXT:    buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16
656; GFX11-NEXT:    buffer_load_b128 v[8:11], off, s[0:3], 0 offset:32
657; GFX11-NEXT:    buffer_load_b128 v[12:15], off, s[0:3], 0 offset:48
658; GFX11-NEXT:    buffer_load_b128 v[16:19], off, s[0:3], 0 offset:64
659; GFX11-NEXT:    buffer_load_b128 v[20:23], off, s[0:3], 0 offset:80
660; GFX11-NEXT:    buffer_load_b128 v[24:27], off, s[0:3], 0 offset:96
661; GFX11-NEXT:    buffer_load_b128 v[28:31], off, s[0:3], 0 offset:112
662; GFX11-NEXT:    s_waitcnt vmcnt(0)
663; GFX11-NEXT:    s_setpc_b64 s[30:31]
664  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
665  %val = load <32 x i32>, ptr addrspace(1) %ptr
666  ret <32 x i32> %val
667}
668
669define <2 x i64> @v2i64_func_void() #0 {
670; GFX789-LABEL: v2i64_func_void:
671; GFX789:       ; %bb.0:
672; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
673; GFX789-NEXT:    s_mov_b32 s7, 0xf000
674; GFX789-NEXT:    s_mov_b32 s6, -1
675; GFX789-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
676; GFX789-NEXT:    s_waitcnt vmcnt(0)
677; GFX789-NEXT:    s_setpc_b64 s[30:31]
678;
679; GFX11-LABEL: v2i64_func_void:
680; GFX11:       ; %bb.0:
681; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
682; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
683; GFX11-NEXT:    s_mov_b32 s2, -1
684; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
685; GFX11-NEXT:    s_waitcnt vmcnt(0)
686; GFX11-NEXT:    s_setpc_b64 s[30:31]
687  %val = load <2 x i64>, ptr addrspace(1) undef
688  ret <2 x i64> %val
689}
690
691define <3 x i64> @v3i64_func_void() #0 {
692; GFX789-LABEL: v3i64_func_void:
693; GFX789:       ; %bb.0:
694; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
695; GFX789-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
696; GFX789-NEXT:    s_mov_b32 s7, 0xf000
697; GFX789-NEXT:    s_mov_b32 s6, -1
698; GFX789-NEXT:    s_waitcnt lgkmcnt(0)
699; GFX789-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
700; GFX789-NEXT:    buffer_load_dwordx2 v[4:5], off, s[4:7], 0 offset:16
701; GFX789-NEXT:    s_waitcnt vmcnt(0)
702; GFX789-NEXT:    s_setpc_b64 s[30:31]
703;
704; GFX11-LABEL: v3i64_func_void:
705; GFX11:       ; %bb.0:
706; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
707; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
708; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
709; GFX11-NEXT:    s_mov_b32 s2, -1
710; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
711; GFX11-NEXT:    s_clause 0x1
712; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
713; GFX11-NEXT:    buffer_load_b64 v[4:5], off, s[0:3], 0 offset:16
714; GFX11-NEXT:    s_waitcnt vmcnt(0)
715; GFX11-NEXT:    s_setpc_b64 s[30:31]
716  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
717  %val = load <3 x i64>, ptr addrspace(1) %ptr
718  ret <3 x i64> %val
719}
720
721define <4 x i64> @v4i64_func_void() #0 {
722; GFX789-LABEL: v4i64_func_void:
723; GFX789:       ; %bb.0:
724; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
725; GFX789-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
726; GFX789-NEXT:    s_mov_b32 s7, 0xf000
727; GFX789-NEXT:    s_mov_b32 s6, -1
728; GFX789-NEXT:    s_waitcnt lgkmcnt(0)
729; GFX789-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
730; GFX789-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
731; GFX789-NEXT:    s_waitcnt vmcnt(0)
732; GFX789-NEXT:    s_setpc_b64 s[30:31]
733;
734; GFX11-LABEL: v4i64_func_void:
735; GFX11:       ; %bb.0:
736; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
737; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
738; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
739; GFX11-NEXT:    s_mov_b32 s2, -1
740; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
741; GFX11-NEXT:    s_clause 0x1
742; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
743; GFX11-NEXT:    buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16
744; GFX11-NEXT:    s_waitcnt vmcnt(0)
745; GFX11-NEXT:    s_setpc_b64 s[30:31]
746  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
747  %val = load <4 x i64>, ptr addrspace(1) %ptr
748  ret <4 x i64> %val
749}
750
751define <5 x i64> @v5i64_func_void() #0 {
752; GFX789-LABEL: v5i64_func_void:
753; GFX789:       ; %bb.0:
754; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
755; GFX789-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
756; GFX789-NEXT:    s_mov_b32 s7, 0xf000
757; GFX789-NEXT:    s_mov_b32 s6, -1
758; GFX789-NEXT:    s_waitcnt lgkmcnt(0)
759; GFX789-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
760; GFX789-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
761; GFX789-NEXT:    buffer_load_dwordx2 v[8:9], off, s[4:7], 0 offset:32
762; GFX789-NEXT:    s_waitcnt vmcnt(0)
763; GFX789-NEXT:    s_setpc_b64 s[30:31]
764;
765; GFX11-LABEL: v5i64_func_void:
766; GFX11:       ; %bb.0:
767; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
768; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
769; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
770; GFX11-NEXT:    s_mov_b32 s2, -1
771; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
772; GFX11-NEXT:    s_clause 0x2
773; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
774; GFX11-NEXT:    buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16
775; GFX11-NEXT:    buffer_load_b64 v[8:9], off, s[0:3], 0 offset:32
776; GFX11-NEXT:    s_waitcnt vmcnt(0)
777; GFX11-NEXT:    s_setpc_b64 s[30:31]
778  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
779  %val = load <5 x i64>, ptr addrspace(1) %ptr
780  ret <5 x i64> %val
781}
782
783define <8 x i64> @v8i64_func_void() #0 {
784; GFX789-LABEL: v8i64_func_void:
785; GFX789:       ; %bb.0:
786; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
787; GFX789-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
788; GFX789-NEXT:    s_mov_b32 s7, 0xf000
789; GFX789-NEXT:    s_mov_b32 s6, -1
790; GFX789-NEXT:    s_waitcnt lgkmcnt(0)
791; GFX789-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
792; GFX789-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
793; GFX789-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
794; GFX789-NEXT:    buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
795; GFX789-NEXT:    s_waitcnt vmcnt(0)
796; GFX789-NEXT:    s_setpc_b64 s[30:31]
797;
798; GFX11-LABEL: v8i64_func_void:
799; GFX11:       ; %bb.0:
800; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
801; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
802; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
803; GFX11-NEXT:    s_mov_b32 s2, -1
804; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
805; GFX11-NEXT:    s_clause 0x3
806; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
807; GFX11-NEXT:    buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16
808; GFX11-NEXT:    buffer_load_b128 v[8:11], off, s[0:3], 0 offset:32
809; GFX11-NEXT:    buffer_load_b128 v[12:15], off, s[0:3], 0 offset:48
810; GFX11-NEXT:    s_waitcnt vmcnt(0)
811; GFX11-NEXT:    s_setpc_b64 s[30:31]
812  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
813  %val = load <8 x i64>, ptr addrspace(1) %ptr
814  ret <8 x i64> %val
815}
816
817define <16 x i64> @v16i64_func_void() #0 {
818; GFX789-LABEL: v16i64_func_void:
819; GFX789:       ; %bb.0:
820; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
821; GFX789-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
822; GFX789-NEXT:    s_mov_b32 s7, 0xf000
823; GFX789-NEXT:    s_mov_b32 s6, -1
824; GFX789-NEXT:    s_waitcnt lgkmcnt(0)
825; GFX789-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
826; GFX789-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
827; GFX789-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
828; GFX789-NEXT:    buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:48
829; GFX789-NEXT:    buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:64
830; GFX789-NEXT:    buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80
831; GFX789-NEXT:    buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:96
832; GFX789-NEXT:    buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:112
833; GFX789-NEXT:    s_waitcnt vmcnt(0)
834; GFX789-NEXT:    s_setpc_b64 s[30:31]
835;
836; GFX11-LABEL: v16i64_func_void:
837; GFX11:       ; %bb.0:
838; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
839; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
840; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
841; GFX11-NEXT:    s_mov_b32 s2, -1
842; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
843; GFX11-NEXT:    s_clause 0x7
844; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
845; GFX11-NEXT:    buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16
846; GFX11-NEXT:    buffer_load_b128 v[8:11], off, s[0:3], 0 offset:32
847; GFX11-NEXT:    buffer_load_b128 v[12:15], off, s[0:3], 0 offset:48
848; GFX11-NEXT:    buffer_load_b128 v[16:19], off, s[0:3], 0 offset:64
849; GFX11-NEXT:    buffer_load_b128 v[20:23], off, s[0:3], 0 offset:80
850; GFX11-NEXT:    buffer_load_b128 v[24:27], off, s[0:3], 0 offset:96
851; GFX11-NEXT:    buffer_load_b128 v[28:31], off, s[0:3], 0 offset:112
852; GFX11-NEXT:    s_waitcnt vmcnt(0)
853; GFX11-NEXT:    s_setpc_b64 s[30:31]
854  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
855  %val = load <16 x i64>, ptr addrspace(1) %ptr
856  ret <16 x i64> %val
857}
858
859define <2 x i16> @v2i16_func_void() #0 {
860; CI-LABEL: v2i16_func_void:
861; CI:       ; %bb.0:
862; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
863; CI-NEXT:    s_mov_b32 s7, 0xf000
864; CI-NEXT:    s_mov_b32 s6, -1
865; CI-NEXT:    buffer_load_dword v0, off, s[4:7], 0
866; CI-NEXT:    s_waitcnt vmcnt(0)
867; CI-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
868; CI-NEXT:    s_setpc_b64 s[30:31]
869;
870; GFX89-LABEL: v2i16_func_void:
871; GFX89:       ; %bb.0:
872; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
873; GFX89-NEXT:    s_mov_b32 s7, 0xf000
874; GFX89-NEXT:    s_mov_b32 s6, -1
875; GFX89-NEXT:    buffer_load_dword v0, off, s[4:7], 0
876; GFX89-NEXT:    s_waitcnt vmcnt(0)
877; GFX89-NEXT:    s_setpc_b64 s[30:31]
878;
879; GFX11-LABEL: v2i16_func_void:
880; GFX11:       ; %bb.0:
881; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
882; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
883; GFX11-NEXT:    s_mov_b32 s2, -1
884; GFX11-NEXT:    buffer_load_b32 v0, off, s[0:3], 0
885; GFX11-NEXT:    s_waitcnt vmcnt(0)
886; GFX11-NEXT:    s_setpc_b64 s[30:31]
887  %val = load <2 x i16>, ptr addrspace(1) undef
888  ret <2 x i16> %val
889}
890
891define <3 x i16> @v3i16_func_void() #0 {
892; CI-LABEL: v3i16_func_void:
893; CI:       ; %bb.0:
894; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
895; CI-NEXT:    s_mov_b32 s7, 0xf000
896; CI-NEXT:    s_mov_b32 s6, -1
897; CI-NEXT:    buffer_load_dwordx2 v[2:3], off, s[4:7], 0
898; CI-NEXT:    s_waitcnt vmcnt(0)
899; CI-NEXT:    v_alignbit_b32 v1, v3, v2, 16
900; CI-NEXT:    v_mov_b32_e32 v0, v2
901; CI-NEXT:    v_mov_b32_e32 v2, v3
902; CI-NEXT:    s_setpc_b64 s[30:31]
903;
904; GFX89-LABEL: v3i16_func_void:
905; GFX89:       ; %bb.0:
906; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
907; GFX89-NEXT:    s_mov_b32 s7, 0xf000
908; GFX89-NEXT:    s_mov_b32 s6, -1
909; GFX89-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
910; GFX89-NEXT:    s_waitcnt vmcnt(0)
911; GFX89-NEXT:    s_setpc_b64 s[30:31]
912;
913; GFX11-LABEL: v3i16_func_void:
914; GFX11:       ; %bb.0:
915; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
916; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
917; GFX11-NEXT:    s_mov_b32 s2, -1
918; GFX11-NEXT:    buffer_load_b64 v[0:1], off, s[0:3], 0
919; GFX11-NEXT:    s_waitcnt vmcnt(0)
920; GFX11-NEXT:    s_setpc_b64 s[30:31]
921  %val = load <3 x i16>, ptr addrspace(1) undef
922  ret <3 x i16> %val
923}
924
925define <4 x i16> @v4i16_func_void() #0 {
926; CI-LABEL: v4i16_func_void:
927; CI:       ; %bb.0:
928; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
929; CI-NEXT:    s_mov_b32 s7, 0xf000
930; CI-NEXT:    s_mov_b32 s6, -1
931; CI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
932; CI-NEXT:    s_waitcnt vmcnt(0)
933; CI-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
934; CI-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
935; CI-NEXT:    v_mov_b32_e32 v2, v1
936; CI-NEXT:    v_mov_b32_e32 v1, v4
937; CI-NEXT:    s_setpc_b64 s[30:31]
938;
939; GFX89-LABEL: v4i16_func_void:
940; GFX89:       ; %bb.0:
941; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
942; GFX89-NEXT:    s_mov_b32 s7, 0xf000
943; GFX89-NEXT:    s_mov_b32 s6, -1
944; GFX89-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
945; GFX89-NEXT:    s_waitcnt vmcnt(0)
946; GFX89-NEXT:    s_setpc_b64 s[30:31]
947;
948; GFX11-LABEL: v4i16_func_void:
949; GFX11:       ; %bb.0:
950; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
951; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
952; GFX11-NEXT:    s_mov_b32 s2, -1
953; GFX11-NEXT:    buffer_load_b64 v[0:1], off, s[0:3], 0
954; GFX11-NEXT:    s_waitcnt vmcnt(0)
955; GFX11-NEXT:    s_setpc_b64 s[30:31]
956  %val = load <4 x i16>, ptr addrspace(1) undef
957  ret <4 x i16> %val
958}
959
960define <4 x half> @v4f16_func_void() #0 {
961; CI-LABEL: v4f16_func_void:
962; CI:       ; %bb.0:
963; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
964; CI-NEXT:    s_mov_b32 s7, 0xf000
965; CI-NEXT:    s_mov_b32 s6, -1
966; CI-NEXT:    buffer_load_dwordx2 v[3:4], off, s[4:7], 0
967; CI-NEXT:    s_waitcnt vmcnt(0)
968; CI-NEXT:    v_cvt_f32_f16_e32 v0, v3
969; CI-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
970; CI-NEXT:    v_lshrrev_b32_e32 v3, 16, v4
971; CI-NEXT:    v_cvt_f32_f16_e32 v2, v4
972; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
973; CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
974; CI-NEXT:    s_setpc_b64 s[30:31]
975;
976; GFX89-LABEL: v4f16_func_void:
977; GFX89:       ; %bb.0:
978; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
979; GFX89-NEXT:    s_mov_b32 s7, 0xf000
980; GFX89-NEXT:    s_mov_b32 s6, -1
981; GFX89-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
982; GFX89-NEXT:    s_waitcnt vmcnt(0)
983; GFX89-NEXT:    s_setpc_b64 s[30:31]
984;
985; GFX11-LABEL: v4f16_func_void:
986; GFX11:       ; %bb.0:
987; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
988; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
989; GFX11-NEXT:    s_mov_b32 s2, -1
990; GFX11-NEXT:    buffer_load_b64 v[0:1], off, s[0:3], 0
991; GFX11-NEXT:    s_waitcnt vmcnt(0)
992; GFX11-NEXT:    s_setpc_b64 s[30:31]
993  %val = load <4 x half>, ptr addrspace(1) undef
994  ret <4 x half> %val
995}
996
997; FIXME: Mixing buffer and global
998; FIXME: Should not scalarize
999define <5 x i16> @v5i16_func_void() #0 {
1000; CI-LABEL: v5i16_func_void:
1001; CI:       ; %bb.0:
1002; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1003; CI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
1004; CI-NEXT:    s_mov_b32 s7, 0xf000
1005; CI-NEXT:    s_mov_b32 s6, -1
1006; CI-NEXT:    s_waitcnt lgkmcnt(0)
1007; CI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
1008; CI-NEXT:    buffer_load_sshort v4, off, s[4:7], 0 offset:8
1009; CI-NEXT:    s_waitcnt vmcnt(1)
1010; CI-NEXT:    v_alignbit_b32 v5, v1, v0, 16
1011; CI-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
1012; CI-NEXT:    v_mov_b32_e32 v2, v1
1013; CI-NEXT:    v_mov_b32_e32 v1, v5
1014; CI-NEXT:    s_waitcnt vmcnt(0)
1015; CI-NEXT:    s_setpc_b64 s[30:31]
1016;
1017; GFX89-LABEL: v5i16_func_void:
1018; GFX89:       ; %bb.0:
1019; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1020; GFX89-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
1021; GFX89-NEXT:    s_mov_b32 s7, 0xf000
1022; GFX89-NEXT:    s_mov_b32 s6, -1
1023; GFX89-NEXT:    s_waitcnt lgkmcnt(0)
1024; GFX89-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
1025; GFX89-NEXT:    s_waitcnt vmcnt(0)
1026; GFX89-NEXT:    s_setpc_b64 s[30:31]
1027;
1028; GFX11-LABEL: v5i16_func_void:
1029; GFX11:       ; %bb.0:
1030; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1031; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
1032; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
1033; GFX11-NEXT:    s_mov_b32 s2, -1
1034; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1035; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
1036; GFX11-NEXT:    s_waitcnt vmcnt(0)
1037; GFX11-NEXT:    s_setpc_b64 s[30:31]
1038  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
1039  %val = load <5 x i16>, ptr addrspace(1) %ptr
1040  ret <5 x i16> %val
1041}
1042
1043define <8 x i16> @v8i16_func_void() #0 {
1044; CI-LABEL: v8i16_func_void:
1045; CI:       ; %bb.0:
1046; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1047; CI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
1048; CI-NEXT:    s_mov_b32 s7, 0xf000
1049; CI-NEXT:    s_mov_b32 s6, -1
1050; CI-NEXT:    s_waitcnt lgkmcnt(0)
1051; CI-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0
1052; CI-NEXT:    s_waitcnt vmcnt(0)
1053; CI-NEXT:    v_lshrrev_b32_e32 v1, 16, v8
1054; CI-NEXT:    v_lshrrev_b32_e32 v3, 16, v9
1055; CI-NEXT:    v_lshrrev_b32_e32 v5, 16, v10
1056; CI-NEXT:    v_lshrrev_b32_e32 v7, 16, v11
1057; CI-NEXT:    v_mov_b32_e32 v0, v8
1058; CI-NEXT:    v_mov_b32_e32 v2, v9
1059; CI-NEXT:    v_mov_b32_e32 v4, v10
1060; CI-NEXT:    v_mov_b32_e32 v6, v11
1061; CI-NEXT:    s_setpc_b64 s[30:31]
1062;
1063; GFX89-LABEL: v8i16_func_void:
1064; GFX89:       ; %bb.0:
1065; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1066; GFX89-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
1067; GFX89-NEXT:    s_mov_b32 s7, 0xf000
1068; GFX89-NEXT:    s_mov_b32 s6, -1
1069; GFX89-NEXT:    s_waitcnt lgkmcnt(0)
1070; GFX89-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
1071; GFX89-NEXT:    s_waitcnt vmcnt(0)
1072; GFX89-NEXT:    s_setpc_b64 s[30:31]
1073;
1074; GFX11-LABEL: v8i16_func_void:
1075; GFX11:       ; %bb.0:
1076; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1077; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
1078; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
1079; GFX11-NEXT:    s_mov_b32 s2, -1
1080; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1081; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
1082; GFX11-NEXT:    s_waitcnt vmcnt(0)
1083; GFX11-NEXT:    s_setpc_b64 s[30:31]
1084  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
1085  %val = load <8 x i16>, ptr addrspace(1) %ptr
1086  ret <8 x i16> %val
1087}
1088
1089define <16 x i16> @v16i16_func_void() #0 {
1090; CI-LABEL: v16i16_func_void:
1091; CI:       ; %bb.0:
1092; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1093; CI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
1094; CI-NEXT:    s_mov_b32 s7, 0xf000
1095; CI-NEXT:    s_mov_b32 s6, -1
1096; CI-NEXT:    s_waitcnt lgkmcnt(0)
1097; CI-NEXT:    buffer_load_dwordx4 v[22:25], off, s[4:7], 0
1098; CI-NEXT:    buffer_load_dwordx4 v[18:21], off, s[4:7], 0 offset:16
1099; CI-NEXT:    s_waitcnt vmcnt(1)
1100; CI-NEXT:    v_lshrrev_b32_e32 v1, 16, v22
1101; CI-NEXT:    v_lshrrev_b32_e32 v3, 16, v23
1102; CI-NEXT:    v_lshrrev_b32_e32 v5, 16, v24
1103; CI-NEXT:    v_lshrrev_b32_e32 v7, 16, v25
1104; CI-NEXT:    s_waitcnt vmcnt(0)
1105; CI-NEXT:    v_lshrrev_b32_e32 v9, 16, v18
1106; CI-NEXT:    v_lshrrev_b32_e32 v11, 16, v19
1107; CI-NEXT:    v_lshrrev_b32_e32 v13, 16, v20
1108; CI-NEXT:    v_lshrrev_b32_e32 v15, 16, v21
1109; CI-NEXT:    v_mov_b32_e32 v0, v22
1110; CI-NEXT:    v_mov_b32_e32 v2, v23
1111; CI-NEXT:    v_mov_b32_e32 v4, v24
1112; CI-NEXT:    v_mov_b32_e32 v6, v25
1113; CI-NEXT:    v_mov_b32_e32 v8, v18
1114; CI-NEXT:    v_mov_b32_e32 v10, v19
1115; CI-NEXT:    v_mov_b32_e32 v12, v20
1116; CI-NEXT:    v_mov_b32_e32 v14, v21
1117; CI-NEXT:    s_setpc_b64 s[30:31]
1118;
1119; GFX89-LABEL: v16i16_func_void:
1120; GFX89:       ; %bb.0:
1121; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1122; GFX89-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
1123; GFX89-NEXT:    s_mov_b32 s7, 0xf000
1124; GFX89-NEXT:    s_mov_b32 s6, -1
1125; GFX89-NEXT:    s_waitcnt lgkmcnt(0)
1126; GFX89-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
1127; GFX89-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
1128; GFX89-NEXT:    s_waitcnt vmcnt(0)
1129; GFX89-NEXT:    s_setpc_b64 s[30:31]
1130;
1131; GFX11-LABEL: v16i16_func_void:
1132; GFX11:       ; %bb.0:
1133; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1134; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
1135; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
1136; GFX11-NEXT:    s_mov_b32 s2, -1
1137; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1138; GFX11-NEXT:    s_clause 0x1
1139; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
1140; GFX11-NEXT:    buffer_load_b128 v[4:7], off, s[0:3], 0 offset:16
1141; GFX11-NEXT:    s_waitcnt vmcnt(0)
1142; GFX11-NEXT:    s_setpc_b64 s[30:31]
1143  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
1144  %val = load <16 x i16>, ptr addrspace(1) %ptr
1145  ret <16 x i16> %val
1146}
1147
1148; FIXME: Should pack
1149define <16 x i8> @v16i8_func_void() #0 {
1150; GFX789-LABEL: v16i8_func_void:
1151; GFX789:       ; %bb.0:
1152; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1153; GFX789-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
1154; GFX789-NEXT:    s_mov_b32 s7, 0xf000
1155; GFX789-NEXT:    s_mov_b32 s6, -1
1156; GFX789-NEXT:    s_waitcnt lgkmcnt(0)
1157; GFX789-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
1158; GFX789-NEXT:    s_waitcnt vmcnt(0)
1159; GFX789-NEXT:    v_lshrrev_b32_e32 v16, 8, v0
1160; GFX789-NEXT:    v_lshrrev_b32_e32 v17, 16, v0
1161; GFX789-NEXT:    v_lshrrev_b32_e32 v18, 24, v0
1162; GFX789-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
1163; GFX789-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
1164; GFX789-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
1165; GFX789-NEXT:    v_lshrrev_b32_e32 v9, 8, v2
1166; GFX789-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
1167; GFX789-NEXT:    v_lshrrev_b32_e32 v11, 24, v2
1168; GFX789-NEXT:    v_lshrrev_b32_e32 v13, 8, v3
1169; GFX789-NEXT:    v_lshrrev_b32_e32 v14, 16, v3
1170; GFX789-NEXT:    v_lshrrev_b32_e32 v15, 24, v3
1171; GFX789-NEXT:    v_mov_b32_e32 v4, v1
1172; GFX789-NEXT:    v_mov_b32_e32 v8, v2
1173; GFX789-NEXT:    v_mov_b32_e32 v12, v3
1174; GFX789-NEXT:    v_mov_b32_e32 v1, v16
1175; GFX789-NEXT:    v_mov_b32_e32 v2, v17
1176; GFX789-NEXT:    v_mov_b32_e32 v3, v18
1177; GFX789-NEXT:    s_setpc_b64 s[30:31]
1178;
1179; GFX11-LABEL: v16i8_func_void:
1180; GFX11:       ; %bb.0:
1181; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1182; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
1183; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
1184; GFX11-NEXT:    s_mov_b32 s2, -1
1185; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1186; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
1187; GFX11-NEXT:    s_waitcnt vmcnt(0)
1188; GFX11-NEXT:    v_lshrrev_b32_e32 v16, 8, v0
1189; GFX11-NEXT:    v_lshrrev_b32_e32 v17, 16, v0
1190; GFX11-NEXT:    v_lshrrev_b32_e32 v18, 24, v0
1191; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
1192; GFX11-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
1193; GFX11-NEXT:    v_lshrrev_b32_e32 v7, 24, v1
1194; GFX11-NEXT:    v_lshrrev_b32_e32 v9, 8, v2
1195; GFX11-NEXT:    v_lshrrev_b32_e32 v10, 16, v2
1196; GFX11-NEXT:    v_lshrrev_b32_e32 v11, 24, v2
1197; GFX11-NEXT:    v_lshrrev_b32_e32 v13, 8, v3
1198; GFX11-NEXT:    v_lshrrev_b32_e32 v14, 16, v3
1199; GFX11-NEXT:    v_lshrrev_b32_e32 v15, 24, v3
1200; GFX11-NEXT:    v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v1, v16
1201; GFX11-NEXT:    v_mov_b32_e32 v8, v2
1202; GFX11-NEXT:    v_dual_mov_b32 v12, v3 :: v_dual_mov_b32 v3, v18
1203; GFX11-NEXT:    v_mov_b32_e32 v2, v17
1204; GFX11-NEXT:    s_setpc_b64 s[30:31]
1205  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
1206  %val = load <16 x i8>, ptr addrspace(1) %ptr
1207  ret <16 x i8> %val
1208}
1209
1210; FIXME: Should pack
1211define <4  x i8> @v4i8_func_void() #0 {
1212; GFX789-LABEL: v4i8_func_void:
1213; GFX789:       ; %bb.0:
1214; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1215; GFX789-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
1216; GFX789-NEXT:    s_mov_b32 s7, 0xf000
1217; GFX789-NEXT:    s_mov_b32 s6, -1
1218; GFX789-NEXT:    s_waitcnt lgkmcnt(0)
1219; GFX789-NEXT:    buffer_load_dword v0, off, s[4:7], 0
1220; GFX789-NEXT:    s_waitcnt vmcnt(0)
1221; GFX789-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
1222; GFX789-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
1223; GFX789-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
1224; GFX789-NEXT:    s_setpc_b64 s[30:31]
1225;
1226; GFX11-LABEL: v4i8_func_void:
1227; GFX11:       ; %bb.0:
1228; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1229; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
1230; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
1231; GFX11-NEXT:    s_mov_b32 s2, -1
1232; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1233; GFX11-NEXT:    buffer_load_b32 v0, off, s[0:3], 0
1234; GFX11-NEXT:    s_waitcnt vmcnt(0)
1235; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 8, v0
1236; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
1237; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 24, v0
1238; GFX11-NEXT:    s_setpc_b64 s[30:31]
1239  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
1240  %val = load <4  x i8>, ptr addrspace(1) %ptr
1241  ret <4  x i8> %val
1242}
1243
1244define {i8, i32} @struct_i8_i32_func_void() #0 {
1245; GFX789-LABEL: struct_i8_i32_func_void:
1246; GFX789:       ; %bb.0:
1247; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1248; GFX789-NEXT:    s_mov_b32 s7, 0xf000
1249; GFX789-NEXT:    s_mov_b32 s6, -1
1250; GFX789-NEXT:    buffer_load_ubyte v0, off, s[4:7], 0
1251; GFX789-NEXT:    buffer_load_dword v1, off, s[4:7], 0
1252; GFX789-NEXT:    s_waitcnt vmcnt(0)
1253; GFX789-NEXT:    s_setpc_b64 s[30:31]
1254;
1255; GFX11-LABEL: struct_i8_i32_func_void:
1256; GFX11:       ; %bb.0:
1257; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1258; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
1259; GFX11-NEXT:    s_mov_b32 s2, -1
1260; GFX11-NEXT:    s_clause 0x1
1261; GFX11-NEXT:    buffer_load_u8 v0, off, s[0:3], 0
1262; GFX11-NEXT:    buffer_load_b32 v1, off, s[0:3], 0
1263; GFX11-NEXT:    s_waitcnt vmcnt(0)
1264; GFX11-NEXT:    s_setpc_b64 s[30:31]
1265  %val = load { i8, i32 }, ptr addrspace(1) undef
1266  ret { i8, i32 } %val
1267}
1268
1269define void @void_func_sret_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }) %arg0) #0 {
1270; GFX789-LABEL: void_func_sret_struct_i8_i32:
1271; GFX789:       ; %bb.0:
1272; GFX789-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1273; GFX789-NEXT:    s_mov_b32 s7, 0xf000
1274; GFX789-NEXT:    s_mov_b32 s6, -1
1275; GFX789-NEXT:    buffer_load_ubyte v1, off, s[4:7], 0 glc
1276; GFX789-NEXT:    s_waitcnt vmcnt(0)
1277; GFX789-NEXT:    buffer_load_dword v2, off, s[4:7], 0 glc
1278; GFX789-NEXT:    s_waitcnt vmcnt(0)
1279; GFX789-NEXT:    buffer_store_byte v1, v0, s[0:3], 0 offen
1280; GFX789-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
1281; GFX789-NEXT:    s_waitcnt vmcnt(0)
1282; GFX789-NEXT:    s_setpc_b64 s[30:31]
1283;
1284; GFX11-LABEL: void_func_sret_struct_i8_i32:
1285; GFX11:       ; %bb.0:
1286; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1287; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
1288; GFX11-NEXT:    s_mov_b32 s2, -1
1289; GFX11-NEXT:    buffer_load_u8 v1, off, s[0:3], 0 glc dlc
1290; GFX11-NEXT:    s_waitcnt vmcnt(0)
1291; GFX11-NEXT:    buffer_load_b32 v2, off, s[0:3], 0 glc dlc
1292; GFX11-NEXT:    s_waitcnt vmcnt(0)
1293; GFX11-NEXT:    s_clause 0x1
1294; GFX11-NEXT:    scratch_store_b8 v0, v1, off
1295; GFX11-NEXT:    scratch_store_b32 v0, v2, off offset:4
1296; GFX11-NEXT:    s_setpc_b64 s[30:31]
1297  %val0 = load volatile i8, ptr addrspace(1) undef
1298  %val1 = load volatile i32, ptr addrspace(1) undef
1299  %gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 0
1300  %gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 1
1301  store i8 %val0, ptr addrspace(5) %gep0
1302  store i32 %val1, ptr addrspace(5) %gep1
1303  ret void
1304}
1305
1306; FIXME: Should be able to fold offsets in all of these pre-gfx9. Call
1307; lowering introduces an extra CopyToReg/CopyFromReg obscuring the
1308; AssertZext inserted. Not using it introduces the spills.
1309define <33 x i32> @v33i32_func_void() #0 {
1310; CI-LABEL: v33i32_func_void:
1311; CI:       ; %bb.0:
1312; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1313; CI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
1314; CI-NEXT:    s_mov_b32 s7, 0xf000
1315; CI-NEXT:    s_mov_b32 s6, -1
1316; CI-NEXT:    v_add_i32_e32 v34, vcc, 0x80, v0
1317; CI-NEXT:    s_waitcnt lgkmcnt(0)
1318; CI-NEXT:    buffer_load_dword v33, off, s[4:7], 0 offset:128
1319; CI-NEXT:    buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112
1320; CI-NEXT:    buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96
1321; CI-NEXT:    buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80
1322; CI-NEXT:    buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64
1323; CI-NEXT:    buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48
1324; CI-NEXT:    buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32
1325; CI-NEXT:    buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16
1326; CI-NEXT:    buffer_load_dwordx4 v[29:32], off, s[4:7], 0
1327; CI-NEXT:    s_waitcnt vmcnt(8)
1328; CI-NEXT:    buffer_store_dword v33, v34, s[0:3], 0 offen
1329; CI-NEXT:    v_add_i32_e32 v33, vcc, 0x7c, v0
1330; CI-NEXT:    s_waitcnt vmcnt(8)
1331; CI-NEXT:    buffer_store_dword v4, v33, s[0:3], 0 offen
1332; CI-NEXT:    v_add_i32_e32 v4, vcc, 0x78, v0
1333; CI-NEXT:    buffer_store_dword v3, v4, s[0:3], 0 offen
1334; CI-NEXT:    v_add_i32_e32 v3, vcc, 0x74, v0
1335; CI-NEXT:    buffer_store_dword v2, v3, s[0:3], 0 offen
1336; CI-NEXT:    v_add_i32_e32 v2, vcc, 0x70, v0
1337; CI-NEXT:    buffer_store_dword v1, v2, s[0:3], 0 offen
1338; CI-NEXT:    v_add_i32_e32 v1, vcc, 0x6c, v0
1339; CI-NEXT:    v_add_i32_e32 v2, vcc, 0x68, v0
1340; CI-NEXT:    v_add_i32_e32 v3, vcc, 0x64, v0
1341; CI-NEXT:    s_waitcnt vmcnt(11)
1342; CI-NEXT:    buffer_store_dword v8, v1, s[0:3], 0 offen
1343; CI-NEXT:    v_add_i32_e32 v1, vcc, 0x60, v0
1344; CI-NEXT:    buffer_store_dword v7, v2, s[0:3], 0 offen
1345; CI-NEXT:    v_add_i32_e32 v2, vcc, 0x5c, v0
1346; CI-NEXT:    buffer_store_dword v6, v3, s[0:3], 0 offen
1347; CI-NEXT:    v_add_i32_e32 v3, vcc, 0x58, v0
1348; CI-NEXT:    buffer_store_dword v5, v1, s[0:3], 0 offen
1349; CI-NEXT:    v_add_i32_e32 v1, vcc, 0x54, v0
1350; CI-NEXT:    v_add_i32_e32 v4, vcc, 0x50, v0
1351; CI-NEXT:    v_add_i32_e32 v5, vcc, 0x4c, v0
1352; CI-NEXT:    s_waitcnt vmcnt(14)
1353; CI-NEXT:    buffer_store_dword v12, v2, s[0:3], 0 offen
1354; CI-NEXT:    buffer_store_dword v11, v3, s[0:3], 0 offen
1355; CI-NEXT:    buffer_store_dword v10, v1, s[0:3], 0 offen
1356; CI-NEXT:    v_add_i32_e32 v1, vcc, 56, v0
1357; CI-NEXT:    v_add_i32_e32 v6, vcc, 0x48, v0
1358; CI-NEXT:    v_add_i32_e32 v7, vcc, 0x44, v0
1359; CI-NEXT:    v_add_i32_e32 v2, vcc, 64, v0
1360; CI-NEXT:    v_add_i32_e32 v3, vcc, 60, v0
1361; CI-NEXT:    buffer_store_dword v9, v4, s[0:3], 0 offen
1362; CI-NEXT:    v_add_i32_e32 v4, vcc, 52, v0
1363; CI-NEXT:    v_add_i32_e32 v8, vcc, 48, v0
1364; CI-NEXT:    v_add_i32_e32 v9, vcc, 44, v0
1365; CI-NEXT:    v_add_i32_e32 v10, vcc, 40, v0
1366; CI-NEXT:    v_add_i32_e32 v11, vcc, 36, v0
1367; CI-NEXT:    s_waitcnt vmcnt(14)
1368; CI-NEXT:    buffer_store_dword v16, v5, s[0:3], 0 offen
1369; CI-NEXT:    buffer_store_dword v15, v6, s[0:3], 0 offen
1370; CI-NEXT:    buffer_store_dword v14, v7, s[0:3], 0 offen
1371; CI-NEXT:    buffer_store_dword v13, v2, s[0:3], 0 offen
1372; CI-NEXT:    buffer_store_dword v20, v3, s[0:3], 0 offen
1373; CI-NEXT:    buffer_store_dword v19, v1, s[0:3], 0 offen
1374; CI-NEXT:    buffer_store_dword v18, v4, s[0:3], 0 offen
1375; CI-NEXT:    buffer_store_dword v17, v8, s[0:3], 0 offen
1376; CI-NEXT:    buffer_store_dword v24, v9, s[0:3], 0 offen
1377; CI-NEXT:    buffer_store_dword v23, v10, s[0:3], 0 offen
1378; CI-NEXT:    buffer_store_dword v22, v11, s[0:3], 0 offen
1379; CI-NEXT:    v_add_i32_e32 v1, vcc, 32, v0
1380; CI-NEXT:    buffer_store_dword v21, v1, s[0:3], 0 offen
1381; CI-NEXT:    v_add_i32_e32 v1, vcc, 28, v0
1382; CI-NEXT:    buffer_store_dword v28, v1, s[0:3], 0 offen
1383; CI-NEXT:    v_add_i32_e32 v1, vcc, 24, v0
1384; CI-NEXT:    buffer_store_dword v27, v1, s[0:3], 0 offen
1385; CI-NEXT:    v_add_i32_e32 v1, vcc, 20, v0
1386; CI-NEXT:    buffer_store_dword v26, v1, s[0:3], 0 offen
1387; CI-NEXT:    v_add_i32_e32 v1, vcc, 16, v0
1388; CI-NEXT:    buffer_store_dword v25, v1, s[0:3], 0 offen
1389; CI-NEXT:    v_add_i32_e32 v1, vcc, 12, v0
1390; CI-NEXT:    s_waitcnt vmcnt(14)
1391; CI-NEXT:    buffer_store_dword v32, v1, s[0:3], 0 offen
1392; CI-NEXT:    v_add_i32_e32 v1, vcc, 8, v0
1393; CI-NEXT:    buffer_store_dword v31, v1, s[0:3], 0 offen
1394; CI-NEXT:    v_add_i32_e32 v1, vcc, 4, v0
1395; CI-NEXT:    buffer_store_dword v30, v1, s[0:3], 0 offen
1396; CI-NEXT:    buffer_store_dword v29, v0, s[0:3], 0 offen
1397; CI-NEXT:    s_waitcnt vmcnt(0)
1398; CI-NEXT:    s_setpc_b64 s[30:31]
1399;
1400; GFX8-LABEL: v33i32_func_void:
1401; GFX8:       ; %bb.0:
1402; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1403; GFX8-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
1404; GFX8-NEXT:    s_mov_b32 s7, 0xf000
1405; GFX8-NEXT:    s_mov_b32 s6, -1
1406; GFX8-NEXT:    v_add_u32_e32 v34, vcc, 0x80, v0
1407; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1408; GFX8-NEXT:    buffer_load_dword v33, off, s[4:7], 0 offset:128
1409; GFX8-NEXT:    buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112
1410; GFX8-NEXT:    buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96
1411; GFX8-NEXT:    buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80
1412; GFX8-NEXT:    buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64
1413; GFX8-NEXT:    buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48
1414; GFX8-NEXT:    buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32
1415; GFX8-NEXT:    buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16
1416; GFX8-NEXT:    buffer_load_dwordx4 v[29:32], off, s[4:7], 0
1417; GFX8-NEXT:    s_waitcnt vmcnt(8)
1418; GFX8-NEXT:    buffer_store_dword v33, v34, s[0:3], 0 offen
1419; GFX8-NEXT:    v_add_u32_e32 v33, vcc, 0x7c, v0
1420; GFX8-NEXT:    s_waitcnt vmcnt(8)
1421; GFX8-NEXT:    buffer_store_dword v4, v33, s[0:3], 0 offen
1422; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0x78, v0
1423; GFX8-NEXT:    buffer_store_dword v3, v4, s[0:3], 0 offen
1424; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0x74, v0
1425; GFX8-NEXT:    buffer_store_dword v2, v3, s[0:3], 0 offen
1426; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0x70, v0
1427; GFX8-NEXT:    buffer_store_dword v1, v2, s[0:3], 0 offen
1428; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x6c, v0
1429; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0x68, v0
1430; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0x64, v0
1431; GFX8-NEXT:    s_waitcnt vmcnt(11)
1432; GFX8-NEXT:    buffer_store_dword v8, v1, s[0:3], 0 offen
1433; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x60, v0
1434; GFX8-NEXT:    buffer_store_dword v7, v2, s[0:3], 0 offen
1435; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0x5c, v0
1436; GFX8-NEXT:    buffer_store_dword v6, v3, s[0:3], 0 offen
1437; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0x58, v0
1438; GFX8-NEXT:    buffer_store_dword v5, v1, s[0:3], 0 offen
1439; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x54, v0
1440; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0x50, v0
1441; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 0x4c, v0
1442; GFX8-NEXT:    s_waitcnt vmcnt(14)
1443; GFX8-NEXT:    buffer_store_dword v12, v2, s[0:3], 0 offen
1444; GFX8-NEXT:    buffer_store_dword v11, v3, s[0:3], 0 offen
1445; GFX8-NEXT:    buffer_store_dword v10, v1, s[0:3], 0 offen
1446; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 56, v0
1447; GFX8-NEXT:    v_add_u32_e32 v6, vcc, 0x48, v0
1448; GFX8-NEXT:    v_add_u32_e32 v7, vcc, 0x44, v0
1449; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 64, v0
1450; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 60, v0
1451; GFX8-NEXT:    buffer_store_dword v9, v4, s[0:3], 0 offen
1452; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 52, v0
1453; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 48, v0
1454; GFX8-NEXT:    v_add_u32_e32 v9, vcc, 44, v0
1455; GFX8-NEXT:    v_add_u32_e32 v10, vcc, 40, v0
1456; GFX8-NEXT:    v_add_u32_e32 v11, vcc, 36, v0
1457; GFX8-NEXT:    s_waitcnt vmcnt(14)
1458; GFX8-NEXT:    buffer_store_dword v16, v5, s[0:3], 0 offen
1459; GFX8-NEXT:    buffer_store_dword v15, v6, s[0:3], 0 offen
1460; GFX8-NEXT:    buffer_store_dword v14, v7, s[0:3], 0 offen
1461; GFX8-NEXT:    buffer_store_dword v13, v2, s[0:3], 0 offen
1462; GFX8-NEXT:    buffer_store_dword v20, v3, s[0:3], 0 offen
1463; GFX8-NEXT:    buffer_store_dword v19, v1, s[0:3], 0 offen
1464; GFX8-NEXT:    buffer_store_dword v18, v4, s[0:3], 0 offen
1465; GFX8-NEXT:    buffer_store_dword v17, v8, s[0:3], 0 offen
1466; GFX8-NEXT:    buffer_store_dword v24, v9, s[0:3], 0 offen
1467; GFX8-NEXT:    buffer_store_dword v23, v10, s[0:3], 0 offen
1468; GFX8-NEXT:    buffer_store_dword v22, v11, s[0:3], 0 offen
1469; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 32, v0
1470; GFX8-NEXT:    buffer_store_dword v21, v1, s[0:3], 0 offen
1471; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 28, v0
1472; GFX8-NEXT:    buffer_store_dword v28, v1, s[0:3], 0 offen
1473; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 24, v0
1474; GFX8-NEXT:    buffer_store_dword v27, v1, s[0:3], 0 offen
1475; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 20, v0
1476; GFX8-NEXT:    buffer_store_dword v26, v1, s[0:3], 0 offen
1477; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 16, v0
1478; GFX8-NEXT:    buffer_store_dword v25, v1, s[0:3], 0 offen
1479; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 12, v0
1480; GFX8-NEXT:    s_waitcnt vmcnt(14)
1481; GFX8-NEXT:    buffer_store_dword v32, v1, s[0:3], 0 offen
1482; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 8, v0
1483; GFX8-NEXT:    buffer_store_dword v31, v1, s[0:3], 0 offen
1484; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 4, v0
1485; GFX8-NEXT:    buffer_store_dword v30, v1, s[0:3], 0 offen
1486; GFX8-NEXT:    buffer_store_dword v29, v0, s[0:3], 0 offen
1487; GFX8-NEXT:    s_waitcnt vmcnt(0)
1488; GFX8-NEXT:    s_setpc_b64 s[30:31]
1489;
1490; GFX9-LABEL: v33i32_func_void:
1491; GFX9:       ; %bb.0:
1492; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1493; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
1494; GFX9-NEXT:    s_mov_b32 s7, 0xf000
1495; GFX9-NEXT:    s_mov_b32 s6, -1
1496; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1497; GFX9-NEXT:    buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112
1498; GFX9-NEXT:    buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96
1499; GFX9-NEXT:    buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80
1500; GFX9-NEXT:    buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64
1501; GFX9-NEXT:    buffer_load_dword v33, off, s[4:7], 0 offset:128
1502; GFX9-NEXT:    buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48
1503; GFX9-NEXT:    buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32
1504; GFX9-NEXT:    buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16
1505; GFX9-NEXT:    buffer_load_dwordx4 v[29:32], off, s[4:7], 0
1506; GFX9-NEXT:    s_waitcnt vmcnt(8)
1507; GFX9-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:124
1508; GFX9-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:120
1509; GFX9-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:116
1510; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:112
1511; GFX9-NEXT:    s_waitcnt vmcnt(11)
1512; GFX9-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:108
1513; GFX9-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:104
1514; GFX9-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:100
1515; GFX9-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:96
1516; GFX9-NEXT:    s_waitcnt vmcnt(14)
1517; GFX9-NEXT:    buffer_store_dword v12, v0, s[0:3], 0 offen offset:92
1518; GFX9-NEXT:    buffer_store_dword v11, v0, s[0:3], 0 offen offset:88
1519; GFX9-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:84
1520; GFX9-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:80
1521; GFX9-NEXT:    s_waitcnt vmcnt(17)
1522; GFX9-NEXT:    buffer_store_dword v16, v0, s[0:3], 0 offen offset:76
1523; GFX9-NEXT:    buffer_store_dword v15, v0, s[0:3], 0 offen offset:72
1524; GFX9-NEXT:    buffer_store_dword v14, v0, s[0:3], 0 offen offset:68
1525; GFX9-NEXT:    buffer_store_dword v13, v0, s[0:3], 0 offen offset:64
1526; GFX9-NEXT:    s_waitcnt vmcnt(20)
1527; GFX9-NEXT:    buffer_store_dword v33, v0, s[0:3], 0 offen offset:128
1528; GFX9-NEXT:    s_waitcnt vmcnt(20)
1529; GFX9-NEXT:    buffer_store_dword v20, v0, s[0:3], 0 offen offset:60
1530; GFX9-NEXT:    buffer_store_dword v19, v0, s[0:3], 0 offen offset:56
1531; GFX9-NEXT:    buffer_store_dword v18, v0, s[0:3], 0 offen offset:52
1532; GFX9-NEXT:    buffer_store_dword v17, v0, s[0:3], 0 offen offset:48
1533; GFX9-NEXT:    s_waitcnt vmcnt(23)
1534; GFX9-NEXT:    buffer_store_dword v24, v0, s[0:3], 0 offen offset:44
1535; GFX9-NEXT:    buffer_store_dword v23, v0, s[0:3], 0 offen offset:40
1536; GFX9-NEXT:    buffer_store_dword v22, v0, s[0:3], 0 offen offset:36
1537; GFX9-NEXT:    buffer_store_dword v21, v0, s[0:3], 0 offen offset:32
1538; GFX9-NEXT:    s_waitcnt vmcnt(26)
1539; GFX9-NEXT:    buffer_store_dword v28, v0, s[0:3], 0 offen offset:28
1540; GFX9-NEXT:    buffer_store_dword v27, v0, s[0:3], 0 offen offset:24
1541; GFX9-NEXT:    buffer_store_dword v26, v0, s[0:3], 0 offen offset:20
1542; GFX9-NEXT:    buffer_store_dword v25, v0, s[0:3], 0 offen offset:16
1543; GFX9-NEXT:    s_waitcnt vmcnt(29)
1544; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:12
1545; GFX9-NEXT:    buffer_store_dword v31, v0, s[0:3], 0 offen offset:8
1546; GFX9-NEXT:    buffer_store_dword v30, v0, s[0:3], 0 offen offset:4
1547; GFX9-NEXT:    buffer_store_dword v29, v0, s[0:3], 0 offen
1548; GFX9-NEXT:    s_waitcnt vmcnt(0)
1549; GFX9-NEXT:    s_setpc_b64 s[30:31]
1550;
1551; GFX11-LABEL: v33i32_func_void:
1552; GFX11:       ; %bb.0:
1553; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1554; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
1555; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
1556; GFX11-NEXT:    s_mov_b32 s2, -1
1557; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1558; GFX11-NEXT:    s_clause 0x8
1559; GFX11-NEXT:    buffer_load_b128 v[1:4], off, s[0:3], 0 offset:112
1560; GFX11-NEXT:    buffer_load_b128 v[5:8], off, s[0:3], 0 offset:96
1561; GFX11-NEXT:    buffer_load_b128 v[9:12], off, s[0:3], 0 offset:80
1562; GFX11-NEXT:    buffer_load_b128 v[13:16], off, s[0:3], 0 offset:64
1563; GFX11-NEXT:    buffer_load_b128 v[17:20], off, s[0:3], 0 offset:48
1564; GFX11-NEXT:    buffer_load_b128 v[21:24], off, s[0:3], 0 offset:32
1565; GFX11-NEXT:    buffer_load_b128 v[25:28], off, s[0:3], 0 offset:16
1566; GFX11-NEXT:    buffer_load_b128 v[29:32], off, s[0:3], 0
1567; GFX11-NEXT:    buffer_load_b32 v33, off, s[0:3], 0 offset:128
1568; GFX11-NEXT:    s_waitcnt vmcnt(8)
1569; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:112
1570; GFX11-NEXT:    s_waitcnt vmcnt(7)
1571; GFX11-NEXT:    scratch_store_b128 v0, v[5:8], off offset:96
1572; GFX11-NEXT:    s_waitcnt vmcnt(6)
1573; GFX11-NEXT:    scratch_store_b128 v0, v[9:12], off offset:80
1574; GFX11-NEXT:    s_waitcnt vmcnt(5)
1575; GFX11-NEXT:    scratch_store_b128 v0, v[13:16], off offset:64
1576; GFX11-NEXT:    s_waitcnt vmcnt(4)
1577; GFX11-NEXT:    scratch_store_b128 v0, v[17:20], off offset:48
1578; GFX11-NEXT:    s_waitcnt vmcnt(3)
1579; GFX11-NEXT:    scratch_store_b128 v0, v[21:24], off offset:32
1580; GFX11-NEXT:    s_waitcnt vmcnt(2)
1581; GFX11-NEXT:    scratch_store_b128 v0, v[25:28], off offset:16
1582; GFX11-NEXT:    s_waitcnt vmcnt(1)
1583; GFX11-NEXT:    scratch_store_b128 v0, v[29:32], off
1584; GFX11-NEXT:    s_waitcnt vmcnt(0)
1585; GFX11-NEXT:    scratch_store_b32 v0, v33, off offset:128
1586; GFX11-NEXT:    s_setpc_b64 s[30:31]
1587  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
1588  %val = load <33 x i32>, ptr addrspace(1) %ptr
1589  ret <33 x i32> %val
1590}
1591
1592define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 {
1593; CI-LABEL: struct_v32i32_i32_func_void:
1594; CI:       ; %bb.0:
1595; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1596; CI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
1597; CI-NEXT:    s_mov_b32 s7, 0xf000
1598; CI-NEXT:    s_mov_b32 s6, -1
1599; CI-NEXT:    v_add_i32_e32 v34, vcc, 0x80, v0
1600; CI-NEXT:    s_waitcnt lgkmcnt(0)
1601; CI-NEXT:    buffer_load_dword v33, off, s[4:7], 0 offset:128
1602; CI-NEXT:    buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112
1603; CI-NEXT:    buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96
1604; CI-NEXT:    buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80
1605; CI-NEXT:    buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64
1606; CI-NEXT:    buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48
1607; CI-NEXT:    buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32
1608; CI-NEXT:    buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16
1609; CI-NEXT:    buffer_load_dwordx4 v[29:32], off, s[4:7], 0
1610; CI-NEXT:    s_waitcnt vmcnt(8)
1611; CI-NEXT:    buffer_store_dword v33, v34, s[0:3], 0 offen
1612; CI-NEXT:    v_add_i32_e32 v33, vcc, 0x7c, v0
1613; CI-NEXT:    s_waitcnt vmcnt(8)
1614; CI-NEXT:    buffer_store_dword v4, v33, s[0:3], 0 offen
1615; CI-NEXT:    v_add_i32_e32 v4, vcc, 0x78, v0
1616; CI-NEXT:    buffer_store_dword v3, v4, s[0:3], 0 offen
1617; CI-NEXT:    v_add_i32_e32 v3, vcc, 0x74, v0
1618; CI-NEXT:    buffer_store_dword v2, v3, s[0:3], 0 offen
1619; CI-NEXT:    v_add_i32_e32 v2, vcc, 0x70, v0
1620; CI-NEXT:    buffer_store_dword v1, v2, s[0:3], 0 offen
1621; CI-NEXT:    v_add_i32_e32 v1, vcc, 0x6c, v0
1622; CI-NEXT:    v_add_i32_e32 v2, vcc, 0x68, v0
1623; CI-NEXT:    v_add_i32_e32 v3, vcc, 0x64, v0
1624; CI-NEXT:    s_waitcnt vmcnt(11)
1625; CI-NEXT:    buffer_store_dword v8, v1, s[0:3], 0 offen
1626; CI-NEXT:    v_add_i32_e32 v1, vcc, 0x60, v0
1627; CI-NEXT:    buffer_store_dword v7, v2, s[0:3], 0 offen
1628; CI-NEXT:    v_add_i32_e32 v2, vcc, 0x5c, v0
1629; CI-NEXT:    buffer_store_dword v6, v3, s[0:3], 0 offen
1630; CI-NEXT:    v_add_i32_e32 v3, vcc, 0x58, v0
1631; CI-NEXT:    buffer_store_dword v5, v1, s[0:3], 0 offen
1632; CI-NEXT:    v_add_i32_e32 v1, vcc, 0x54, v0
1633; CI-NEXT:    v_add_i32_e32 v4, vcc, 0x50, v0
1634; CI-NEXT:    v_add_i32_e32 v5, vcc, 0x4c, v0
1635; CI-NEXT:    s_waitcnt vmcnt(14)
1636; CI-NEXT:    buffer_store_dword v12, v2, s[0:3], 0 offen
1637; CI-NEXT:    buffer_store_dword v11, v3, s[0:3], 0 offen
1638; CI-NEXT:    buffer_store_dword v10, v1, s[0:3], 0 offen
1639; CI-NEXT:    v_add_i32_e32 v1, vcc, 56, v0
1640; CI-NEXT:    v_add_i32_e32 v6, vcc, 0x48, v0
1641; CI-NEXT:    v_add_i32_e32 v7, vcc, 0x44, v0
1642; CI-NEXT:    v_add_i32_e32 v2, vcc, 64, v0
1643; CI-NEXT:    v_add_i32_e32 v3, vcc, 60, v0
1644; CI-NEXT:    buffer_store_dword v9, v4, s[0:3], 0 offen
1645; CI-NEXT:    v_add_i32_e32 v4, vcc, 52, v0
1646; CI-NEXT:    v_add_i32_e32 v8, vcc, 48, v0
1647; CI-NEXT:    v_add_i32_e32 v9, vcc, 44, v0
1648; CI-NEXT:    v_add_i32_e32 v10, vcc, 40, v0
1649; CI-NEXT:    v_add_i32_e32 v11, vcc, 36, v0
1650; CI-NEXT:    s_waitcnt vmcnt(14)
1651; CI-NEXT:    buffer_store_dword v16, v5, s[0:3], 0 offen
1652; CI-NEXT:    buffer_store_dword v15, v6, s[0:3], 0 offen
1653; CI-NEXT:    buffer_store_dword v14, v7, s[0:3], 0 offen
1654; CI-NEXT:    buffer_store_dword v13, v2, s[0:3], 0 offen
1655; CI-NEXT:    buffer_store_dword v20, v3, s[0:3], 0 offen
1656; CI-NEXT:    buffer_store_dword v19, v1, s[0:3], 0 offen
1657; CI-NEXT:    buffer_store_dword v18, v4, s[0:3], 0 offen
1658; CI-NEXT:    buffer_store_dword v17, v8, s[0:3], 0 offen
1659; CI-NEXT:    buffer_store_dword v24, v9, s[0:3], 0 offen
1660; CI-NEXT:    buffer_store_dword v23, v10, s[0:3], 0 offen
1661; CI-NEXT:    buffer_store_dword v22, v11, s[0:3], 0 offen
1662; CI-NEXT:    v_add_i32_e32 v1, vcc, 32, v0
1663; CI-NEXT:    buffer_store_dword v21, v1, s[0:3], 0 offen
1664; CI-NEXT:    v_add_i32_e32 v1, vcc, 28, v0
1665; CI-NEXT:    buffer_store_dword v28, v1, s[0:3], 0 offen
1666; CI-NEXT:    v_add_i32_e32 v1, vcc, 24, v0
1667; CI-NEXT:    buffer_store_dword v27, v1, s[0:3], 0 offen
1668; CI-NEXT:    v_add_i32_e32 v1, vcc, 20, v0
1669; CI-NEXT:    buffer_store_dword v26, v1, s[0:3], 0 offen
1670; CI-NEXT:    v_add_i32_e32 v1, vcc, 16, v0
1671; CI-NEXT:    buffer_store_dword v25, v1, s[0:3], 0 offen
1672; CI-NEXT:    v_add_i32_e32 v1, vcc, 12, v0
1673; CI-NEXT:    s_waitcnt vmcnt(14)
1674; CI-NEXT:    buffer_store_dword v32, v1, s[0:3], 0 offen
1675; CI-NEXT:    v_add_i32_e32 v1, vcc, 8, v0
1676; CI-NEXT:    buffer_store_dword v31, v1, s[0:3], 0 offen
1677; CI-NEXT:    v_add_i32_e32 v1, vcc, 4, v0
1678; CI-NEXT:    buffer_store_dword v30, v1, s[0:3], 0 offen
1679; CI-NEXT:    buffer_store_dword v29, v0, s[0:3], 0 offen
1680; CI-NEXT:    s_waitcnt vmcnt(0)
1681; CI-NEXT:    s_setpc_b64 s[30:31]
1682;
1683; GFX8-LABEL: struct_v32i32_i32_func_void:
1684; GFX8:       ; %bb.0:
1685; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1686; GFX8-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
1687; GFX8-NEXT:    s_mov_b32 s7, 0xf000
1688; GFX8-NEXT:    s_mov_b32 s6, -1
1689; GFX8-NEXT:    v_add_u32_e32 v34, vcc, 0x80, v0
1690; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1691; GFX8-NEXT:    buffer_load_dword v33, off, s[4:7], 0 offset:128
1692; GFX8-NEXT:    buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112
1693; GFX8-NEXT:    buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96
1694; GFX8-NEXT:    buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80
1695; GFX8-NEXT:    buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64
1696; GFX8-NEXT:    buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48
1697; GFX8-NEXT:    buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32
1698; GFX8-NEXT:    buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16
1699; GFX8-NEXT:    buffer_load_dwordx4 v[29:32], off, s[4:7], 0
1700; GFX8-NEXT:    s_waitcnt vmcnt(8)
1701; GFX8-NEXT:    buffer_store_dword v33, v34, s[0:3], 0 offen
1702; GFX8-NEXT:    v_add_u32_e32 v33, vcc, 0x7c, v0
1703; GFX8-NEXT:    s_waitcnt vmcnt(8)
1704; GFX8-NEXT:    buffer_store_dword v4, v33, s[0:3], 0 offen
1705; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0x78, v0
1706; GFX8-NEXT:    buffer_store_dword v3, v4, s[0:3], 0 offen
1707; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0x74, v0
1708; GFX8-NEXT:    buffer_store_dword v2, v3, s[0:3], 0 offen
1709; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0x70, v0
1710; GFX8-NEXT:    buffer_store_dword v1, v2, s[0:3], 0 offen
1711; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x6c, v0
1712; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0x68, v0
1713; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0x64, v0
1714; GFX8-NEXT:    s_waitcnt vmcnt(11)
1715; GFX8-NEXT:    buffer_store_dword v8, v1, s[0:3], 0 offen
1716; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x60, v0
1717; GFX8-NEXT:    buffer_store_dword v7, v2, s[0:3], 0 offen
1718; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0x5c, v0
1719; GFX8-NEXT:    buffer_store_dword v6, v3, s[0:3], 0 offen
1720; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0x58, v0
1721; GFX8-NEXT:    buffer_store_dword v5, v1, s[0:3], 0 offen
1722; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x54, v0
1723; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0x50, v0
1724; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 0x4c, v0
1725; GFX8-NEXT:    s_waitcnt vmcnt(14)
1726; GFX8-NEXT:    buffer_store_dword v12, v2, s[0:3], 0 offen
1727; GFX8-NEXT:    buffer_store_dword v11, v3, s[0:3], 0 offen
1728; GFX8-NEXT:    buffer_store_dword v10, v1, s[0:3], 0 offen
1729; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 56, v0
1730; GFX8-NEXT:    v_add_u32_e32 v6, vcc, 0x48, v0
1731; GFX8-NEXT:    v_add_u32_e32 v7, vcc, 0x44, v0
1732; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 64, v0
1733; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 60, v0
1734; GFX8-NEXT:    buffer_store_dword v9, v4, s[0:3], 0 offen
1735; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 52, v0
1736; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 48, v0
1737; GFX8-NEXT:    v_add_u32_e32 v9, vcc, 44, v0
1738; GFX8-NEXT:    v_add_u32_e32 v10, vcc, 40, v0
1739; GFX8-NEXT:    v_add_u32_e32 v11, vcc, 36, v0
1740; GFX8-NEXT:    s_waitcnt vmcnt(14)
1741; GFX8-NEXT:    buffer_store_dword v16, v5, s[0:3], 0 offen
1742; GFX8-NEXT:    buffer_store_dword v15, v6, s[0:3], 0 offen
1743; GFX8-NEXT:    buffer_store_dword v14, v7, s[0:3], 0 offen
1744; GFX8-NEXT:    buffer_store_dword v13, v2, s[0:3], 0 offen
1745; GFX8-NEXT:    buffer_store_dword v20, v3, s[0:3], 0 offen
1746; GFX8-NEXT:    buffer_store_dword v19, v1, s[0:3], 0 offen
1747; GFX8-NEXT:    buffer_store_dword v18, v4, s[0:3], 0 offen
1748; GFX8-NEXT:    buffer_store_dword v17, v8, s[0:3], 0 offen
1749; GFX8-NEXT:    buffer_store_dword v24, v9, s[0:3], 0 offen
1750; GFX8-NEXT:    buffer_store_dword v23, v10, s[0:3], 0 offen
1751; GFX8-NEXT:    buffer_store_dword v22, v11, s[0:3], 0 offen
1752; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 32, v0
1753; GFX8-NEXT:    buffer_store_dword v21, v1, s[0:3], 0 offen
1754; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 28, v0
1755; GFX8-NEXT:    buffer_store_dword v28, v1, s[0:3], 0 offen
1756; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 24, v0
1757; GFX8-NEXT:    buffer_store_dword v27, v1, s[0:3], 0 offen
1758; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 20, v0
1759; GFX8-NEXT:    buffer_store_dword v26, v1, s[0:3], 0 offen
1760; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 16, v0
1761; GFX8-NEXT:    buffer_store_dword v25, v1, s[0:3], 0 offen
1762; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 12, v0
1763; GFX8-NEXT:    s_waitcnt vmcnt(14)
1764; GFX8-NEXT:    buffer_store_dword v32, v1, s[0:3], 0 offen
1765; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 8, v0
1766; GFX8-NEXT:    buffer_store_dword v31, v1, s[0:3], 0 offen
1767; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 4, v0
1768; GFX8-NEXT:    buffer_store_dword v30, v1, s[0:3], 0 offen
1769; GFX8-NEXT:    buffer_store_dword v29, v0, s[0:3], 0 offen
1770; GFX8-NEXT:    s_waitcnt vmcnt(0)
1771; GFX8-NEXT:    s_setpc_b64 s[30:31]
1772;
1773; GFX9-LABEL: struct_v32i32_i32_func_void:
1774; GFX9:       ; %bb.0:
1775; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1776; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
1777; GFX9-NEXT:    s_mov_b32 s7, 0xf000
1778; GFX9-NEXT:    s_mov_b32 s6, -1
1779; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
1780; GFX9-NEXT:    buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:112
1781; GFX9-NEXT:    buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:96
1782; GFX9-NEXT:    buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:80
1783; GFX9-NEXT:    buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:64
1784; GFX9-NEXT:    buffer_load_dword v33, off, s[4:7], 0 offset:128
1785; GFX9-NEXT:    buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:48
1786; GFX9-NEXT:    buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:32
1787; GFX9-NEXT:    buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:16
1788; GFX9-NEXT:    buffer_load_dwordx4 v[29:32], off, s[4:7], 0
1789; GFX9-NEXT:    s_waitcnt vmcnt(8)
1790; GFX9-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:124
1791; GFX9-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:120
1792; GFX9-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:116
1793; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:112
1794; GFX9-NEXT:    s_waitcnt vmcnt(11)
1795; GFX9-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:108
1796; GFX9-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:104
1797; GFX9-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:100
1798; GFX9-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:96
1799; GFX9-NEXT:    s_waitcnt vmcnt(14)
1800; GFX9-NEXT:    buffer_store_dword v12, v0, s[0:3], 0 offen offset:92
1801; GFX9-NEXT:    buffer_store_dword v11, v0, s[0:3], 0 offen offset:88
1802; GFX9-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:84
1803; GFX9-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:80
1804; GFX9-NEXT:    s_waitcnt vmcnt(17)
1805; GFX9-NEXT:    buffer_store_dword v16, v0, s[0:3], 0 offen offset:76
1806; GFX9-NEXT:    buffer_store_dword v15, v0, s[0:3], 0 offen offset:72
1807; GFX9-NEXT:    buffer_store_dword v14, v0, s[0:3], 0 offen offset:68
1808; GFX9-NEXT:    buffer_store_dword v13, v0, s[0:3], 0 offen offset:64
1809; GFX9-NEXT:    s_waitcnt vmcnt(20)
1810; GFX9-NEXT:    buffer_store_dword v33, v0, s[0:3], 0 offen offset:128
1811; GFX9-NEXT:    s_waitcnt vmcnt(20)
1812; GFX9-NEXT:    buffer_store_dword v20, v0, s[0:3], 0 offen offset:60
1813; GFX9-NEXT:    buffer_store_dword v19, v0, s[0:3], 0 offen offset:56
1814; GFX9-NEXT:    buffer_store_dword v18, v0, s[0:3], 0 offen offset:52
1815; GFX9-NEXT:    buffer_store_dword v17, v0, s[0:3], 0 offen offset:48
1816; GFX9-NEXT:    s_waitcnt vmcnt(23)
1817; GFX9-NEXT:    buffer_store_dword v24, v0, s[0:3], 0 offen offset:44
1818; GFX9-NEXT:    buffer_store_dword v23, v0, s[0:3], 0 offen offset:40
1819; GFX9-NEXT:    buffer_store_dword v22, v0, s[0:3], 0 offen offset:36
1820; GFX9-NEXT:    buffer_store_dword v21, v0, s[0:3], 0 offen offset:32
1821; GFX9-NEXT:    s_waitcnt vmcnt(26)
1822; GFX9-NEXT:    buffer_store_dword v28, v0, s[0:3], 0 offen offset:28
1823; GFX9-NEXT:    buffer_store_dword v27, v0, s[0:3], 0 offen offset:24
1824; GFX9-NEXT:    buffer_store_dword v26, v0, s[0:3], 0 offen offset:20
1825; GFX9-NEXT:    buffer_store_dword v25, v0, s[0:3], 0 offen offset:16
1826; GFX9-NEXT:    s_waitcnt vmcnt(29)
1827; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:12
1828; GFX9-NEXT:    buffer_store_dword v31, v0, s[0:3], 0 offen offset:8
1829; GFX9-NEXT:    buffer_store_dword v30, v0, s[0:3], 0 offen offset:4
1830; GFX9-NEXT:    buffer_store_dword v29, v0, s[0:3], 0 offen
1831; GFX9-NEXT:    s_waitcnt vmcnt(0)
1832; GFX9-NEXT:    s_setpc_b64 s[30:31]
1833;
1834; GFX11-LABEL: struct_v32i32_i32_func_void:
1835; GFX11:       ; %bb.0:
1836; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1837; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
1838; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
1839; GFX11-NEXT:    s_mov_b32 s2, -1
1840; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
1841; GFX11-NEXT:    s_clause 0x8
1842; GFX11-NEXT:    buffer_load_b128 v[1:4], off, s[0:3], 0 offset:112
1843; GFX11-NEXT:    buffer_load_b128 v[5:8], off, s[0:3], 0 offset:96
1844; GFX11-NEXT:    buffer_load_b128 v[9:12], off, s[0:3], 0 offset:80
1845; GFX11-NEXT:    buffer_load_b128 v[13:16], off, s[0:3], 0 offset:64
1846; GFX11-NEXT:    buffer_load_b128 v[17:20], off, s[0:3], 0 offset:48
1847; GFX11-NEXT:    buffer_load_b128 v[21:24], off, s[0:3], 0 offset:32
1848; GFX11-NEXT:    buffer_load_b128 v[25:28], off, s[0:3], 0 offset:16
1849; GFX11-NEXT:    buffer_load_b128 v[29:32], off, s[0:3], 0
1850; GFX11-NEXT:    buffer_load_b32 v33, off, s[0:3], 0 offset:128
1851; GFX11-NEXT:    s_waitcnt vmcnt(8)
1852; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:112
1853; GFX11-NEXT:    s_waitcnt vmcnt(7)
1854; GFX11-NEXT:    scratch_store_b128 v0, v[5:8], off offset:96
1855; GFX11-NEXT:    s_waitcnt vmcnt(6)
1856; GFX11-NEXT:    scratch_store_b128 v0, v[9:12], off offset:80
1857; GFX11-NEXT:    s_waitcnt vmcnt(5)
1858; GFX11-NEXT:    scratch_store_b128 v0, v[13:16], off offset:64
1859; GFX11-NEXT:    s_waitcnt vmcnt(4)
1860; GFX11-NEXT:    scratch_store_b128 v0, v[17:20], off offset:48
1861; GFX11-NEXT:    s_waitcnt vmcnt(3)
1862; GFX11-NEXT:    scratch_store_b128 v0, v[21:24], off offset:32
1863; GFX11-NEXT:    s_waitcnt vmcnt(2)
1864; GFX11-NEXT:    scratch_store_b128 v0, v[25:28], off offset:16
1865; GFX11-NEXT:    s_waitcnt vmcnt(1)
1866; GFX11-NEXT:    scratch_store_b128 v0, v[29:32], off
1867; GFX11-NEXT:    s_waitcnt vmcnt(0)
1868; GFX11-NEXT:    scratch_store_b32 v0, v33, off offset:128
1869; GFX11-NEXT:    s_setpc_b64 s[30:31]
1870  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
1871  %val = load { <32 x i32>, i32 }, ptr addrspace(1) %ptr
1872  ret { <32 x i32>, i32 }%val
1873}
1874
1875define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 {
1876; CI-LABEL: struct_i32_v32i32_func_void:
1877; CI:       ; %bb.0:
1878; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1879; CI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
1880; CI-NEXT:    s_mov_b32 s7, 0xf000
1881; CI-NEXT:    s_mov_b32 s6, -1
1882; CI-NEXT:    s_waitcnt lgkmcnt(0)
1883; CI-NEXT:    buffer_load_dword v33, off, s[4:7], 0
1884; CI-NEXT:    buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:240
1885; CI-NEXT:    buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:224
1886; CI-NEXT:    buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:208
1887; CI-NEXT:    buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:192
1888; CI-NEXT:    buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:176
1889; CI-NEXT:    buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:160
1890; CI-NEXT:    buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:144
1891; CI-NEXT:    buffer_load_dwordx4 v[29:32], off, s[4:7], 0 offset:128
1892; CI-NEXT:    s_waitcnt vmcnt(8)
1893; CI-NEXT:    buffer_store_dword v33, v0, s[0:3], 0 offen
1894; CI-NEXT:    v_add_i32_e32 v33, vcc, 0xfc, v0
1895; CI-NEXT:    s_waitcnt vmcnt(8)
1896; CI-NEXT:    buffer_store_dword v4, v33, s[0:3], 0 offen
1897; CI-NEXT:    v_add_i32_e32 v4, vcc, 0xf8, v0
1898; CI-NEXT:    buffer_store_dword v3, v4, s[0:3], 0 offen
1899; CI-NEXT:    v_add_i32_e32 v3, vcc, 0xf4, v0
1900; CI-NEXT:    buffer_store_dword v2, v3, s[0:3], 0 offen
1901; CI-NEXT:    v_add_i32_e32 v2, vcc, 0xf0, v0
1902; CI-NEXT:    buffer_store_dword v1, v2, s[0:3], 0 offen
1903; CI-NEXT:    v_add_i32_e32 v1, vcc, 0xec, v0
1904; CI-NEXT:    v_add_i32_e32 v2, vcc, 0xe8, v0
1905; CI-NEXT:    v_add_i32_e32 v3, vcc, 0xe4, v0
1906; CI-NEXT:    s_waitcnt vmcnt(11)
1907; CI-NEXT:    buffer_store_dword v8, v1, s[0:3], 0 offen
1908; CI-NEXT:    v_add_i32_e32 v1, vcc, 0xe0, v0
1909; CI-NEXT:    buffer_store_dword v7, v2, s[0:3], 0 offen
1910; CI-NEXT:    v_add_i32_e32 v2, vcc, 0xdc, v0
1911; CI-NEXT:    buffer_store_dword v6, v3, s[0:3], 0 offen
1912; CI-NEXT:    v_add_i32_e32 v3, vcc, 0xd8, v0
1913; CI-NEXT:    buffer_store_dword v5, v1, s[0:3], 0 offen
1914; CI-NEXT:    v_add_i32_e32 v1, vcc, 0xd4, v0
1915; CI-NEXT:    v_add_i32_e32 v4, vcc, 0xd0, v0
1916; CI-NEXT:    v_add_i32_e32 v5, vcc, 0xcc, v0
1917; CI-NEXT:    v_add_i32_e32 v6, vcc, 0xc8, v0
1918; CI-NEXT:    s_waitcnt vmcnt(14)
1919; CI-NEXT:    buffer_store_dword v12, v2, s[0:3], 0 offen
1920; CI-NEXT:    buffer_store_dword v11, v3, s[0:3], 0 offen
1921; CI-NEXT:    buffer_store_dword v10, v1, s[0:3], 0 offen
1922; CI-NEXT:    v_add_i32_e32 v1, vcc, 0xb8, v0
1923; CI-NEXT:    v_add_i32_e32 v7, vcc, 0xc4, v0
1924; CI-NEXT:    v_add_i32_e32 v2, vcc, 0xc0, v0
1925; CI-NEXT:    v_add_i32_e32 v3, vcc, 0xbc, v0
1926; CI-NEXT:    buffer_store_dword v9, v4, s[0:3], 0 offen
1927; CI-NEXT:    v_add_i32_e32 v4, vcc, 0xb4, v0
1928; CI-NEXT:    v_add_i32_e32 v8, vcc, 0xb0, v0
1929; CI-NEXT:    v_add_i32_e32 v9, vcc, 0xac, v0
1930; CI-NEXT:    v_add_i32_e32 v10, vcc, 0xa8, v0
1931; CI-NEXT:    v_add_i32_e32 v11, vcc, 0xa4, v0
1932; CI-NEXT:    s_waitcnt vmcnt(14)
1933; CI-NEXT:    buffer_store_dword v16, v5, s[0:3], 0 offen
1934; CI-NEXT:    v_add_i32_e32 v5, vcc, 0xa0, v0
1935; CI-NEXT:    buffer_store_dword v15, v6, s[0:3], 0 offen
1936; CI-NEXT:    buffer_store_dword v14, v7, s[0:3], 0 offen
1937; CI-NEXT:    buffer_store_dword v13, v2, s[0:3], 0 offen
1938; CI-NEXT:    buffer_store_dword v20, v3, s[0:3], 0 offen
1939; CI-NEXT:    buffer_store_dword v19, v1, s[0:3], 0 offen
1940; CI-NEXT:    buffer_store_dword v18, v4, s[0:3], 0 offen
1941; CI-NEXT:    buffer_store_dword v17, v8, s[0:3], 0 offen
1942; CI-NEXT:    buffer_store_dword v24, v9, s[0:3], 0 offen
1943; CI-NEXT:    buffer_store_dword v23, v10, s[0:3], 0 offen
1944; CI-NEXT:    buffer_store_dword v22, v11, s[0:3], 0 offen
1945; CI-NEXT:    buffer_store_dword v21, v5, s[0:3], 0 offen
1946; CI-NEXT:    v_add_i32_e32 v1, vcc, 0x9c, v0
1947; CI-NEXT:    buffer_store_dword v28, v1, s[0:3], 0 offen
1948; CI-NEXT:    v_add_i32_e32 v1, vcc, 0x98, v0
1949; CI-NEXT:    buffer_store_dword v27, v1, s[0:3], 0 offen
1950; CI-NEXT:    v_add_i32_e32 v1, vcc, 0x94, v0
1951; CI-NEXT:    buffer_store_dword v26, v1, s[0:3], 0 offen
1952; CI-NEXT:    v_add_i32_e32 v1, vcc, 0x90, v0
1953; CI-NEXT:    buffer_store_dword v25, v1, s[0:3], 0 offen
1954; CI-NEXT:    v_add_i32_e32 v1, vcc, 0x8c, v0
1955; CI-NEXT:    s_waitcnt vmcnt(14)
1956; CI-NEXT:    buffer_store_dword v32, v1, s[0:3], 0 offen
1957; CI-NEXT:    v_add_i32_e32 v1, vcc, 0x88, v0
1958; CI-NEXT:    buffer_store_dword v31, v1, s[0:3], 0 offen
1959; CI-NEXT:    v_add_i32_e32 v1, vcc, 0x84, v0
1960; CI-NEXT:    v_add_i32_e32 v0, vcc, 0x80, v0
1961; CI-NEXT:    buffer_store_dword v30, v1, s[0:3], 0 offen
1962; CI-NEXT:    buffer_store_dword v29, v0, s[0:3], 0 offen
1963; CI-NEXT:    s_waitcnt vmcnt(0)
1964; CI-NEXT:    s_setpc_b64 s[30:31]
1965;
1966; GFX8-LABEL: struct_i32_v32i32_func_void:
1967; GFX8:       ; %bb.0:
1968; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1969; GFX8-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
1970; GFX8-NEXT:    s_mov_b32 s7, 0xf000
1971; GFX8-NEXT:    s_mov_b32 s6, -1
1972; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
1973; GFX8-NEXT:    buffer_load_dword v33, off, s[4:7], 0
1974; GFX8-NEXT:    buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:240
1975; GFX8-NEXT:    buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:224
1976; GFX8-NEXT:    buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:208
1977; GFX8-NEXT:    buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:192
1978; GFX8-NEXT:    buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:176
1979; GFX8-NEXT:    buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:160
1980; GFX8-NEXT:    buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:144
1981; GFX8-NEXT:    buffer_load_dwordx4 v[29:32], off, s[4:7], 0 offset:128
1982; GFX8-NEXT:    s_waitcnt vmcnt(8)
1983; GFX8-NEXT:    buffer_store_dword v33, v0, s[0:3], 0 offen
1984; GFX8-NEXT:    v_add_u32_e32 v33, vcc, 0xfc, v0
1985; GFX8-NEXT:    s_waitcnt vmcnt(8)
1986; GFX8-NEXT:    buffer_store_dword v4, v33, s[0:3], 0 offen
1987; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0xf8, v0
1988; GFX8-NEXT:    buffer_store_dword v3, v4, s[0:3], 0 offen
1989; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0xf4, v0
1990; GFX8-NEXT:    buffer_store_dword v2, v3, s[0:3], 0 offen
1991; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0xf0, v0
1992; GFX8-NEXT:    buffer_store_dword v1, v2, s[0:3], 0 offen
1993; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0xec, v0
1994; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0xe8, v0
1995; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0xe4, v0
1996; GFX8-NEXT:    s_waitcnt vmcnt(11)
1997; GFX8-NEXT:    buffer_store_dword v8, v1, s[0:3], 0 offen
1998; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0xe0, v0
1999; GFX8-NEXT:    buffer_store_dword v7, v2, s[0:3], 0 offen
2000; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0xdc, v0
2001; GFX8-NEXT:    buffer_store_dword v6, v3, s[0:3], 0 offen
2002; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0xd8, v0
2003; GFX8-NEXT:    buffer_store_dword v5, v1, s[0:3], 0 offen
2004; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0xd4, v0
2005; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0xd0, v0
2006; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 0xcc, v0
2007; GFX8-NEXT:    v_add_u32_e32 v6, vcc, 0xc8, v0
2008; GFX8-NEXT:    s_waitcnt vmcnt(14)
2009; GFX8-NEXT:    buffer_store_dword v12, v2, s[0:3], 0 offen
2010; GFX8-NEXT:    buffer_store_dword v11, v3, s[0:3], 0 offen
2011; GFX8-NEXT:    buffer_store_dword v10, v1, s[0:3], 0 offen
2012; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0xb8, v0
2013; GFX8-NEXT:    v_add_u32_e32 v7, vcc, 0xc4, v0
2014; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 0xc0, v0
2015; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 0xbc, v0
2016; GFX8-NEXT:    buffer_store_dword v9, v4, s[0:3], 0 offen
2017; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 0xb4, v0
2018; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 0xb0, v0
2019; GFX8-NEXT:    v_add_u32_e32 v9, vcc, 0xac, v0
2020; GFX8-NEXT:    v_add_u32_e32 v10, vcc, 0xa8, v0
2021; GFX8-NEXT:    v_add_u32_e32 v11, vcc, 0xa4, v0
2022; GFX8-NEXT:    s_waitcnt vmcnt(14)
2023; GFX8-NEXT:    buffer_store_dword v16, v5, s[0:3], 0 offen
2024; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 0xa0, v0
2025; GFX8-NEXT:    buffer_store_dword v15, v6, s[0:3], 0 offen
2026; GFX8-NEXT:    buffer_store_dword v14, v7, s[0:3], 0 offen
2027; GFX8-NEXT:    buffer_store_dword v13, v2, s[0:3], 0 offen
2028; GFX8-NEXT:    buffer_store_dword v20, v3, s[0:3], 0 offen
2029; GFX8-NEXT:    buffer_store_dword v19, v1, s[0:3], 0 offen
2030; GFX8-NEXT:    buffer_store_dword v18, v4, s[0:3], 0 offen
2031; GFX8-NEXT:    buffer_store_dword v17, v8, s[0:3], 0 offen
2032; GFX8-NEXT:    buffer_store_dword v24, v9, s[0:3], 0 offen
2033; GFX8-NEXT:    buffer_store_dword v23, v10, s[0:3], 0 offen
2034; GFX8-NEXT:    buffer_store_dword v22, v11, s[0:3], 0 offen
2035; GFX8-NEXT:    buffer_store_dword v21, v5, s[0:3], 0 offen
2036; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x9c, v0
2037; GFX8-NEXT:    buffer_store_dword v28, v1, s[0:3], 0 offen
2038; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x98, v0
2039; GFX8-NEXT:    buffer_store_dword v27, v1, s[0:3], 0 offen
2040; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x94, v0
2041; GFX8-NEXT:    buffer_store_dword v26, v1, s[0:3], 0 offen
2042; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x90, v0
2043; GFX8-NEXT:    buffer_store_dword v25, v1, s[0:3], 0 offen
2044; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x8c, v0
2045; GFX8-NEXT:    s_waitcnt vmcnt(14)
2046; GFX8-NEXT:    buffer_store_dword v32, v1, s[0:3], 0 offen
2047; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x88, v0
2048; GFX8-NEXT:    buffer_store_dword v31, v1, s[0:3], 0 offen
2049; GFX8-NEXT:    v_add_u32_e32 v1, vcc, 0x84, v0
2050; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0x80, v0
2051; GFX8-NEXT:    buffer_store_dword v30, v1, s[0:3], 0 offen
2052; GFX8-NEXT:    buffer_store_dword v29, v0, s[0:3], 0 offen
2053; GFX8-NEXT:    s_waitcnt vmcnt(0)
2054; GFX8-NEXT:    s_setpc_b64 s[30:31]
2055;
2056; GFX9-LABEL: struct_i32_v32i32_func_void:
2057; GFX9:       ; %bb.0:
2058; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2059; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
2060; GFX9-NEXT:    s_mov_b32 s7, 0xf000
2061; GFX9-NEXT:    s_mov_b32 s6, -1
2062; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2063; GFX9-NEXT:    buffer_load_dwordx4 v[1:4], off, s[4:7], 0 offset:240
2064; GFX9-NEXT:    buffer_load_dwordx4 v[5:8], off, s[4:7], 0 offset:224
2065; GFX9-NEXT:    buffer_load_dwordx4 v[9:12], off, s[4:7], 0 offset:208
2066; GFX9-NEXT:    buffer_load_dwordx4 v[13:16], off, s[4:7], 0 offset:192
2067; GFX9-NEXT:    buffer_load_dword v33, off, s[4:7], 0
2068; GFX9-NEXT:    buffer_load_dwordx4 v[17:20], off, s[4:7], 0 offset:176
2069; GFX9-NEXT:    buffer_load_dwordx4 v[21:24], off, s[4:7], 0 offset:160
2070; GFX9-NEXT:    buffer_load_dwordx4 v[25:28], off, s[4:7], 0 offset:144
2071; GFX9-NEXT:    buffer_load_dwordx4 v[29:32], off, s[4:7], 0 offset:128
2072; GFX9-NEXT:    s_waitcnt vmcnt(8)
2073; GFX9-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:252
2074; GFX9-NEXT:    buffer_store_dword v3, v0, s[0:3], 0 offen offset:248
2075; GFX9-NEXT:    buffer_store_dword v2, v0, s[0:3], 0 offen offset:244
2076; GFX9-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:240
2077; GFX9-NEXT:    s_waitcnt vmcnt(11)
2078; GFX9-NEXT:    buffer_store_dword v8, v0, s[0:3], 0 offen offset:236
2079; GFX9-NEXT:    buffer_store_dword v7, v0, s[0:3], 0 offen offset:232
2080; GFX9-NEXT:    buffer_store_dword v6, v0, s[0:3], 0 offen offset:228
2081; GFX9-NEXT:    buffer_store_dword v5, v0, s[0:3], 0 offen offset:224
2082; GFX9-NEXT:    s_waitcnt vmcnt(14)
2083; GFX9-NEXT:    buffer_store_dword v12, v0, s[0:3], 0 offen offset:220
2084; GFX9-NEXT:    buffer_store_dword v11, v0, s[0:3], 0 offen offset:216
2085; GFX9-NEXT:    buffer_store_dword v10, v0, s[0:3], 0 offen offset:212
2086; GFX9-NEXT:    buffer_store_dword v9, v0, s[0:3], 0 offen offset:208
2087; GFX9-NEXT:    s_waitcnt vmcnt(17)
2088; GFX9-NEXT:    buffer_store_dword v16, v0, s[0:3], 0 offen offset:204
2089; GFX9-NEXT:    buffer_store_dword v15, v0, s[0:3], 0 offen offset:200
2090; GFX9-NEXT:    buffer_store_dword v14, v0, s[0:3], 0 offen offset:196
2091; GFX9-NEXT:    buffer_store_dword v13, v0, s[0:3], 0 offen offset:192
2092; GFX9-NEXT:    s_waitcnt vmcnt(20)
2093; GFX9-NEXT:    buffer_store_dword v33, v0, s[0:3], 0 offen
2094; GFX9-NEXT:    s_waitcnt vmcnt(20)
2095; GFX9-NEXT:    buffer_store_dword v20, v0, s[0:3], 0 offen offset:188
2096; GFX9-NEXT:    buffer_store_dword v19, v0, s[0:3], 0 offen offset:184
2097; GFX9-NEXT:    buffer_store_dword v18, v0, s[0:3], 0 offen offset:180
2098; GFX9-NEXT:    buffer_store_dword v17, v0, s[0:3], 0 offen offset:176
2099; GFX9-NEXT:    s_waitcnt vmcnt(23)
2100; GFX9-NEXT:    buffer_store_dword v24, v0, s[0:3], 0 offen offset:172
2101; GFX9-NEXT:    buffer_store_dword v23, v0, s[0:3], 0 offen offset:168
2102; GFX9-NEXT:    buffer_store_dword v22, v0, s[0:3], 0 offen offset:164
2103; GFX9-NEXT:    buffer_store_dword v21, v0, s[0:3], 0 offen offset:160
2104; GFX9-NEXT:    s_waitcnt vmcnt(26)
2105; GFX9-NEXT:    buffer_store_dword v28, v0, s[0:3], 0 offen offset:156
2106; GFX9-NEXT:    buffer_store_dword v27, v0, s[0:3], 0 offen offset:152
2107; GFX9-NEXT:    buffer_store_dword v26, v0, s[0:3], 0 offen offset:148
2108; GFX9-NEXT:    buffer_store_dword v25, v0, s[0:3], 0 offen offset:144
2109; GFX9-NEXT:    s_waitcnt vmcnt(29)
2110; GFX9-NEXT:    buffer_store_dword v32, v0, s[0:3], 0 offen offset:140
2111; GFX9-NEXT:    buffer_store_dword v31, v0, s[0:3], 0 offen offset:136
2112; GFX9-NEXT:    buffer_store_dword v30, v0, s[0:3], 0 offen offset:132
2113; GFX9-NEXT:    buffer_store_dword v29, v0, s[0:3], 0 offen offset:128
2114; GFX9-NEXT:    s_waitcnt vmcnt(0)
2115; GFX9-NEXT:    s_setpc_b64 s[30:31]
2116;
2117; GFX11-LABEL: struct_i32_v32i32_func_void:
2118; GFX11:       ; %bb.0:
2119; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2120; GFX11-NEXT:    s_load_b64 s[0:1], s[0:1], 0x0
2121; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2122; GFX11-NEXT:    s_mov_b32 s2, -1
2123; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2124; GFX11-NEXT:    s_clause 0x8
2125; GFX11-NEXT:    buffer_load_b128 v[1:4], off, s[0:3], 0 offset:240
2126; GFX11-NEXT:    buffer_load_b128 v[5:8], off, s[0:3], 0 offset:224
2127; GFX11-NEXT:    buffer_load_b128 v[9:12], off, s[0:3], 0 offset:208
2128; GFX11-NEXT:    buffer_load_b128 v[13:16], off, s[0:3], 0 offset:192
2129; GFX11-NEXT:    buffer_load_b128 v[17:20], off, s[0:3], 0 offset:176
2130; GFX11-NEXT:    buffer_load_b128 v[21:24], off, s[0:3], 0 offset:160
2131; GFX11-NEXT:    buffer_load_b128 v[25:28], off, s[0:3], 0 offset:144
2132; GFX11-NEXT:    buffer_load_b128 v[29:32], off, s[0:3], 0 offset:128
2133; GFX11-NEXT:    buffer_load_b32 v33, off, s[0:3], 0
2134; GFX11-NEXT:    s_waitcnt vmcnt(8)
2135; GFX11-NEXT:    scratch_store_b128 v0, v[1:4], off offset:240
2136; GFX11-NEXT:    s_waitcnt vmcnt(7)
2137; GFX11-NEXT:    scratch_store_b128 v0, v[5:8], off offset:224
2138; GFX11-NEXT:    s_waitcnt vmcnt(6)
2139; GFX11-NEXT:    scratch_store_b128 v0, v[9:12], off offset:208
2140; GFX11-NEXT:    s_waitcnt vmcnt(5)
2141; GFX11-NEXT:    scratch_store_b128 v0, v[13:16], off offset:192
2142; GFX11-NEXT:    s_waitcnt vmcnt(4)
2143; GFX11-NEXT:    scratch_store_b128 v0, v[17:20], off offset:176
2144; GFX11-NEXT:    s_waitcnt vmcnt(3)
2145; GFX11-NEXT:    scratch_store_b128 v0, v[21:24], off offset:160
2146; GFX11-NEXT:    s_waitcnt vmcnt(2)
2147; GFX11-NEXT:    scratch_store_b128 v0, v[25:28], off offset:144
2148; GFX11-NEXT:    s_waitcnt vmcnt(1)
2149; GFX11-NEXT:    scratch_store_b128 v0, v[29:32], off offset:128
2150; GFX11-NEXT:    s_waitcnt vmcnt(0)
2151; GFX11-NEXT:    scratch_store_b32 v0, v33, off
2152; GFX11-NEXT:    s_setpc_b64 s[30:31]
2153  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
2154  %val = load { i32, <32 x i32> }, ptr addrspace(1) %ptr
2155  ret { i32, <32 x i32> }%val
2156}
2157
2158; Make sure the last struct component is returned in v3, not v4.
2159define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 {
2160; CI-LABEL: v3i32_struct_func_void_wasted_reg:
2161; CI:       ; %bb.0:
2162; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2163; CI-NEXT:    s_mov_b32 m0, -1
2164; CI-NEXT:    ds_read_b32 v0, v0
2165; CI-NEXT:    s_waitcnt lgkmcnt(0)
2166; CI-NEXT:    ds_read_b32 v1, v0
2167; CI-NEXT:    ds_read_b32 v2, v0
2168; CI-NEXT:    ds_read_b32 v3, v0
2169; CI-NEXT:    s_waitcnt lgkmcnt(0)
2170; CI-NEXT:    s_setpc_b64 s[30:31]
2171;
2172; GFX8-LABEL: v3i32_struct_func_void_wasted_reg:
2173; GFX8:       ; %bb.0:
2174; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2175; GFX8-NEXT:    s_mov_b32 m0, -1
2176; GFX8-NEXT:    ds_read_b32 v0, v0
2177; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
2178; GFX8-NEXT:    ds_read_b32 v1, v0
2179; GFX8-NEXT:    ds_read_b32 v2, v0
2180; GFX8-NEXT:    ds_read_b32 v3, v0
2181; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
2182; GFX8-NEXT:    s_setpc_b64 s[30:31]
2183;
2184; GFX9-LABEL: v3i32_struct_func_void_wasted_reg:
2185; GFX9:       ; %bb.0:
2186; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2187; GFX9-NEXT:    ds_read_b32 v0, v0
2188; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2189; GFX9-NEXT:    ds_read_b32 v1, v0
2190; GFX9-NEXT:    ds_read_b32 v2, v0
2191; GFX9-NEXT:    ds_read_b32 v3, v0
2192; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2193; GFX9-NEXT:    s_setpc_b64 s[30:31]
2194;
2195; GFX11-LABEL: v3i32_struct_func_void_wasted_reg:
2196; GFX11:       ; %bb.0:
2197; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2198; GFX11-NEXT:    ds_load_b32 v0, v0
2199; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2200; GFX11-NEXT:    ds_load_b32 v1, v0
2201; GFX11-NEXT:    ds_load_b32 v2, v0
2202; GFX11-NEXT:    ds_load_b32 v3, v0
2203; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2204; GFX11-NEXT:    s_setpc_b64 s[30:31]
2205  %load0 = load volatile i32, ptr addrspace(3) undef
2206  %load1 = load volatile i32, ptr addrspace(3) undef
2207  %load2 = load volatile i32, ptr addrspace(3) undef
2208  %load3 = load volatile i32, ptr addrspace(3) undef
2209
2210  %insert.0 = insertelement <3 x i32> undef, i32 %load0, i32 0
2211  %insert.1 = insertelement <3 x i32> %insert.0, i32 %load1, i32 1
2212  %insert.2 = insertelement <3 x i32> %insert.1, i32 %load2, i32 2
2213  %insert.3 = insertvalue { <3 x i32>, i32 } undef, <3 x i32> %insert.2, 0
2214  %insert.4 = insertvalue { <3 x i32>, i32 } %insert.3, i32 %load3, 1
2215  ret { <3 x i32>, i32 } %insert.4
2216}
2217
2218define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 {
2219; CI-LABEL: v3f32_struct_func_void_wasted_reg:
2220; CI:       ; %bb.0:
2221; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2222; CI-NEXT:    s_mov_b32 m0, -1
2223; CI-NEXT:    ds_read_b32 v0, v0
2224; CI-NEXT:    s_waitcnt lgkmcnt(0)
2225; CI-NEXT:    ds_read_b32 v1, v0
2226; CI-NEXT:    ds_read_b32 v2, v0
2227; CI-NEXT:    ds_read_b32 v3, v0
2228; CI-NEXT:    s_waitcnt lgkmcnt(0)
2229; CI-NEXT:    s_setpc_b64 s[30:31]
2230;
2231; GFX8-LABEL: v3f32_struct_func_void_wasted_reg:
2232; GFX8:       ; %bb.0:
2233; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2234; GFX8-NEXT:    s_mov_b32 m0, -1
2235; GFX8-NEXT:    ds_read_b32 v0, v0
2236; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
2237; GFX8-NEXT:    ds_read_b32 v1, v0
2238; GFX8-NEXT:    ds_read_b32 v2, v0
2239; GFX8-NEXT:    ds_read_b32 v3, v0
2240; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
2241; GFX8-NEXT:    s_setpc_b64 s[30:31]
2242;
2243; GFX9-LABEL: v3f32_struct_func_void_wasted_reg:
2244; GFX9:       ; %bb.0:
2245; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2246; GFX9-NEXT:    ds_read_b32 v0, v0
2247; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2248; GFX9-NEXT:    ds_read_b32 v1, v0
2249; GFX9-NEXT:    ds_read_b32 v2, v0
2250; GFX9-NEXT:    ds_read_b32 v3, v0
2251; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2252; GFX9-NEXT:    s_setpc_b64 s[30:31]
2253;
2254; GFX11-LABEL: v3f32_struct_func_void_wasted_reg:
2255; GFX11:       ; %bb.0:
2256; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2257; GFX11-NEXT:    ds_load_b32 v0, v0
2258; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2259; GFX11-NEXT:    ds_load_b32 v1, v0
2260; GFX11-NEXT:    ds_load_b32 v2, v0
2261; GFX11-NEXT:    ds_load_b32 v3, v0
2262; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2263; GFX11-NEXT:    s_setpc_b64 s[30:31]
2264  %load0 = load volatile float, ptr addrspace(3) undef
2265  %load1 = load volatile float, ptr addrspace(3) undef
2266  %load2 = load volatile float, ptr addrspace(3) undef
2267  %load3 = load volatile i32, ptr addrspace(3) undef
2268
2269  %insert.0 = insertelement <3 x float> undef, float %load0, i32 0
2270  %insert.1 = insertelement <3 x float> %insert.0, float %load1, i32 1
2271  %insert.2 = insertelement <3 x float> %insert.1, float %load2, i32 2
2272  %insert.3 = insertvalue { <3 x float>, i32 } undef, <3 x float> %insert.2, 0
2273  %insert.4 = insertvalue { <3 x float>, i32 } %insert.3, i32 %load3, 1
2274  ret { <3 x float>, i32 } %insert.4
2275}
2276
2277define void @void_func_sret_max_known_zero_bits(ptr addrspace(5) sret(i8) %arg0) #0 {
2278; CI-LABEL: void_func_sret_max_known_zero_bits:
2279; CI:       ; %bb.0:
2280; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2281; CI-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2282; CI-NEXT:    s_mov_b32 m0, -1
2283; CI-NEXT:    ds_write_b32 v0, v0
2284; CI-NEXT:    v_mov_b32_e32 v0, 0
2285; CI-NEXT:    ds_write_b32 v0, v0
2286; CI-NEXT:    ds_write_b32 v0, v0
2287; CI-NEXT:    s_waitcnt lgkmcnt(0)
2288; CI-NEXT:    s_setpc_b64 s[30:31]
2289;
2290; GFX8-LABEL: void_func_sret_max_known_zero_bits:
2291; GFX8:       ; %bb.0:
2292; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2293; GFX8-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2294; GFX8-NEXT:    s_mov_b32 m0, -1
2295; GFX8-NEXT:    ds_write_b32 v0, v0
2296; GFX8-NEXT:    v_mov_b32_e32 v0, 0
2297; GFX8-NEXT:    ds_write_b32 v0, v0
2298; GFX8-NEXT:    ds_write_b32 v0, v0
2299; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
2300; GFX8-NEXT:    s_setpc_b64 s[30:31]
2301;
2302; GFX9-LABEL: void_func_sret_max_known_zero_bits:
2303; GFX9:       ; %bb.0:
2304; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2305; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2306; GFX9-NEXT:    ds_write_b32 v0, v0
2307; GFX9-NEXT:    v_mov_b32_e32 v0, 0
2308; GFX9-NEXT:    ds_write_b32 v0, v0
2309; GFX9-NEXT:    ds_write_b32 v0, v0
2310; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
2311; GFX9-NEXT:    s_setpc_b64 s[30:31]
2312;
2313; GFX11-LABEL: void_func_sret_max_known_zero_bits:
2314; GFX11:       ; %bb.0:
2315; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2316; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
2317; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 17, v0
2318; GFX11-NEXT:    v_mov_b32_e32 v2, 0
2319; GFX11-NEXT:    ds_store_b32 v0, v1
2320; GFX11-NEXT:    ds_store_b32 v0, v0
2321; GFX11-NEXT:    ds_store_b32 v0, v2
2322; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
2323; GFX11-NEXT:    s_setpc_b64 s[30:31]
2324  %arg0.int = ptrtoint ptr addrspace(5) %arg0 to i32
2325
2326  %lshr0 = lshr i32 %arg0.int, 16
2327  %lshr1 = lshr i32 %arg0.int, 17
2328  %lshr2 = lshr i32 %arg0.int, 18
2329
2330  store volatile i32 %lshr0, ptr addrspace(3) undef
2331  store volatile i32 %lshr1, ptr addrspace(3) undef
2332  store volatile i32 %lshr2, ptr addrspace(3) undef
2333  ret void
2334}
2335
2336define bfloat @bf16_func_void() #0 {
2337; CI-LABEL: bf16_func_void:
2338; CI:       ; %bb.0:
2339; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2340; CI-NEXT:    s_mov_b32 s7, 0xf000
2341; CI-NEXT:    s_mov_b32 s6, -1
2342; CI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
2343; CI-NEXT:    s_waitcnt vmcnt(0)
2344; CI-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
2345; CI-NEXT:    s_setpc_b64 s[30:31]
2346;
2347; GFX89-LABEL: bf16_func_void:
2348; GFX89:       ; %bb.0:
2349; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2350; GFX89-NEXT:    s_mov_b32 s7, 0xf000
2351; GFX89-NEXT:    s_mov_b32 s6, -1
2352; GFX89-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
2353; GFX89-NEXT:    s_waitcnt vmcnt(0)
2354; GFX89-NEXT:    s_setpc_b64 s[30:31]
2355;
2356; GFX11-LABEL: bf16_func_void:
2357; GFX11:       ; %bb.0:
2358; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2359; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2360; GFX11-NEXT:    s_mov_b32 s2, -1
2361; GFX11-NEXT:    buffer_load_u16 v0, off, s[0:3], 0
2362; GFX11-NEXT:    s_waitcnt vmcnt(0)
2363; GFX11-NEXT:    s_setpc_b64 s[30:31]
2364  %val = load bfloat, ptr addrspace(1) undef
2365  ret bfloat %val
2366}
2367
2368define <2 x bfloat> @v2bf16_func_void() #0 {
2369; CI-LABEL: v2bf16_func_void:
2370; CI:       ; %bb.0:
2371; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2372; CI-NEXT:    s_mov_b32 s7, 0xf000
2373; CI-NEXT:    s_mov_b32 s6, -1
2374; CI-NEXT:    buffer_load_dword v1, off, s[4:7], 0
2375; CI-NEXT:    s_waitcnt vmcnt(0)
2376; CI-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
2377; CI-NEXT:    v_and_b32_e32 v1, 0xffff0000, v1
2378; CI-NEXT:    s_setpc_b64 s[30:31]
2379;
2380; GFX89-LABEL: v2bf16_func_void:
2381; GFX89:       ; %bb.0:
2382; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2383; GFX89-NEXT:    s_mov_b32 s7, 0xf000
2384; GFX89-NEXT:    s_mov_b32 s6, -1
2385; GFX89-NEXT:    buffer_load_dword v0, off, s[4:7], 0
2386; GFX89-NEXT:    s_waitcnt vmcnt(0)
2387; GFX89-NEXT:    s_setpc_b64 s[30:31]
2388;
2389; GFX11-LABEL: v2bf16_func_void:
2390; GFX11:       ; %bb.0:
2391; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2392; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2393; GFX11-NEXT:    s_mov_b32 s2, -1
2394; GFX11-NEXT:    buffer_load_b32 v0, off, s[0:3], 0
2395; GFX11-NEXT:    s_waitcnt vmcnt(0)
2396; GFX11-NEXT:    s_setpc_b64 s[30:31]
2397  %val = load <2 x bfloat>, ptr addrspace(1) undef
2398  ret <2 x bfloat> %val
2399}
2400
2401define <3 x bfloat> @v3bf16_func_void() #0 {
2402; CI-LABEL: v3bf16_func_void:
2403; CI:       ; %bb.0:
2404; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2405; CI-NEXT:    s_mov_b32 s7, 0xf000
2406; CI-NEXT:    s_mov_b32 s6, -1
2407; CI-NEXT:    buffer_load_dwordx2 v[1:2], off, s[4:7], 0
2408; CI-NEXT:    s_waitcnt vmcnt(0)
2409; CI-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
2410; CI-NEXT:    v_and_b32_e32 v1, 0xffff0000, v1
2411; CI-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
2412; CI-NEXT:    s_setpc_b64 s[30:31]
2413;
2414; GFX89-LABEL: v3bf16_func_void:
2415; GFX89:       ; %bb.0:
2416; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2417; GFX89-NEXT:    s_mov_b32 s7, 0xf000
2418; GFX89-NEXT:    s_mov_b32 s6, -1
2419; GFX89-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
2420; GFX89-NEXT:    s_waitcnt vmcnt(0)
2421; GFX89-NEXT:    s_setpc_b64 s[30:31]
2422;
2423; GFX11-LABEL: v3bf16_func_void:
2424; GFX11:       ; %bb.0:
2425; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2426; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2427; GFX11-NEXT:    s_mov_b32 s2, -1
2428; GFX11-NEXT:    buffer_load_b64 v[0:1], off, s[0:3], 0
2429; GFX11-NEXT:    s_waitcnt vmcnt(0)
2430; GFX11-NEXT:    s_setpc_b64 s[30:31]
2431  %val = load <3 x bfloat>, ptr addrspace(1) undef
2432  ret <3 x bfloat> %val
2433}
2434
2435define <4 x bfloat> @v4bf16_func_void() #0 {
2436; CI-LABEL: v4bf16_func_void:
2437; CI:       ; %bb.0:
2438; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2439; CI-NEXT:    s_mov_b32 s7, 0xf000
2440; CI-NEXT:    s_mov_b32 s6, -1
2441; CI-NEXT:    buffer_load_dwordx2 v[2:3], off, s[4:7], 0
2442; CI-NEXT:    s_waitcnt vmcnt(0)
2443; CI-NEXT:    v_lshlrev_b32_e32 v0, 16, v2
2444; CI-NEXT:    v_and_b32_e32 v1, 0xffff0000, v2
2445; CI-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
2446; CI-NEXT:    v_and_b32_e32 v3, 0xffff0000, v3
2447; CI-NEXT:    s_setpc_b64 s[30:31]
2448;
2449; GFX89-LABEL: v4bf16_func_void:
2450; GFX89:       ; %bb.0:
2451; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2452; GFX89-NEXT:    s_mov_b32 s7, 0xf000
2453; GFX89-NEXT:    s_mov_b32 s6, -1
2454; GFX89-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
2455; GFX89-NEXT:    s_waitcnt vmcnt(0)
2456; GFX89-NEXT:    s_setpc_b64 s[30:31]
2457;
2458; GFX11-LABEL: v4bf16_func_void:
2459; GFX11:       ; %bb.0:
2460; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2461; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2462; GFX11-NEXT:    s_mov_b32 s2, -1
2463; GFX11-NEXT:    buffer_load_b64 v[0:1], off, s[0:3], 0
2464; GFX11-NEXT:    s_waitcnt vmcnt(0)
2465; GFX11-NEXT:    s_setpc_b64 s[30:31]
2466  %val = load <4 x bfloat>, ptr addrspace(1) undef
2467  ret <4 x bfloat> %val
2468}
2469
2470define <6 x bfloat> @v6bf16_func_void() #0 {
2471; CI-LABEL: v6bf16_func_void:
2472; CI:       ; %bb.0:
2473; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2474; CI-NEXT:    s_mov_b32 s7, 0xf000
2475; CI-NEXT:    s_mov_b32 s6, -1
2476; CI-NEXT:    buffer_load_dwordx3 v[3:5], off, s[4:7], 0
2477; CI-NEXT:    s_waitcnt vmcnt(0)
2478; CI-NEXT:    v_lshlrev_b32_e32 v0, 16, v3
2479; CI-NEXT:    v_and_b32_e32 v1, 0xffff0000, v3
2480; CI-NEXT:    v_lshlrev_b32_e32 v2, 16, v4
2481; CI-NEXT:    v_and_b32_e32 v3, 0xffff0000, v4
2482; CI-NEXT:    v_lshlrev_b32_e32 v4, 16, v5
2483; CI-NEXT:    v_and_b32_e32 v5, 0xffff0000, v5
2484; CI-NEXT:    s_setpc_b64 s[30:31]
2485;
2486; GFX89-LABEL: v6bf16_func_void:
2487; GFX89:       ; %bb.0:
2488; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2489; GFX89-NEXT:    s_mov_b32 s7, 0xf000
2490; GFX89-NEXT:    s_mov_b32 s6, -1
2491; GFX89-NEXT:    buffer_load_dwordx3 v[0:2], off, s[4:7], 0
2492; GFX89-NEXT:    s_waitcnt vmcnt(0)
2493; GFX89-NEXT:    s_setpc_b64 s[30:31]
2494;
2495; GFX11-LABEL: v6bf16_func_void:
2496; GFX11:       ; %bb.0:
2497; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2498; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2499; GFX11-NEXT:    s_mov_b32 s2, -1
2500; GFX11-NEXT:    buffer_load_b96 v[0:2], off, s[0:3], 0
2501; GFX11-NEXT:    s_waitcnt vmcnt(0)
2502; GFX11-NEXT:    s_setpc_b64 s[30:31]
2503  %val = load <6 x bfloat>, ptr addrspace(1) undef
2504  ret <6 x bfloat> %val
2505}
2506
2507define <8 x bfloat> @v8bf16_func_void() #0 {
2508; CI-LABEL: v8bf16_func_void:
2509; CI:       ; %bb.0:
2510; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2511; CI-NEXT:    s_mov_b32 s7, 0xf000
2512; CI-NEXT:    s_mov_b32 s6, -1
2513; CI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0
2514; CI-NEXT:    s_waitcnt vmcnt(0)
2515; CI-NEXT:    v_lshlrev_b32_e32 v0, 16, v4
2516; CI-NEXT:    v_and_b32_e32 v1, 0xffff0000, v4
2517; CI-NEXT:    v_lshlrev_b32_e32 v2, 16, v5
2518; CI-NEXT:    v_and_b32_e32 v3, 0xffff0000, v5
2519; CI-NEXT:    v_lshlrev_b32_e32 v4, 16, v6
2520; CI-NEXT:    v_and_b32_e32 v5, 0xffff0000, v6
2521; CI-NEXT:    v_lshlrev_b32_e32 v6, 16, v7
2522; CI-NEXT:    v_and_b32_e32 v7, 0xffff0000, v7
2523; CI-NEXT:    s_setpc_b64 s[30:31]
2524;
2525; GFX89-LABEL: v8bf16_func_void:
2526; GFX89:       ; %bb.0:
2527; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2528; GFX89-NEXT:    s_mov_b32 s7, 0xf000
2529; GFX89-NEXT:    s_mov_b32 s6, -1
2530; GFX89-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
2531; GFX89-NEXT:    s_waitcnt vmcnt(0)
2532; GFX89-NEXT:    s_setpc_b64 s[30:31]
2533;
2534; GFX11-LABEL: v8bf16_func_void:
2535; GFX11:       ; %bb.0:
2536; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2537; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2538; GFX11-NEXT:    s_mov_b32 s2, -1
2539; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
2540; GFX11-NEXT:    s_waitcnt vmcnt(0)
2541; GFX11-NEXT:    s_setpc_b64 s[30:31]
2542  %val = load <8 x bfloat>, ptr addrspace(1) undef
2543  ret <8 x bfloat> %val
2544}
2545
2546define <16 x bfloat> @v16bf16_func_void() #0 {
2547; CI-LABEL: v16bf16_func_void:
2548; CI:       ; %bb.0:
2549; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2550; CI-NEXT:    s_mov_b32 s7, 0xf000
2551; CI-NEXT:    s_mov_b32 s6, -1
2552; CI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0
2553; CI-NEXT:    s_waitcnt vmcnt(0)
2554; CI-NEXT:    v_lshlrev_b32_e32 v0, 16, v4
2555; CI-NEXT:    v_and_b32_e32 v1, 0xffff0000, v4
2556; CI-NEXT:    v_lshlrev_b32_e32 v2, 16, v5
2557; CI-NEXT:    v_and_b32_e32 v3, 0xffff0000, v5
2558; CI-NEXT:    v_lshlrev_b32_e32 v4, 16, v6
2559; CI-NEXT:    v_and_b32_e32 v5, 0xffff0000, v6
2560; CI-NEXT:    v_lshlrev_b32_e32 v6, 16, v7
2561; CI-NEXT:    v_and_b32_e32 v7, 0xffff0000, v7
2562; CI-NEXT:    v_mov_b32_e32 v8, v0
2563; CI-NEXT:    v_mov_b32_e32 v9, v1
2564; CI-NEXT:    v_mov_b32_e32 v10, v2
2565; CI-NEXT:    v_mov_b32_e32 v11, v3
2566; CI-NEXT:    v_mov_b32_e32 v12, v4
2567; CI-NEXT:    v_mov_b32_e32 v13, v5
2568; CI-NEXT:    v_mov_b32_e32 v14, v6
2569; CI-NEXT:    v_mov_b32_e32 v15, v7
2570; CI-NEXT:    s_setpc_b64 s[30:31]
2571;
2572; GFX89-LABEL: v16bf16_func_void:
2573; GFX89:       ; %bb.0:
2574; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2575; GFX89-NEXT:    s_mov_b32 s7, 0xf000
2576; GFX89-NEXT:    s_mov_b32 s6, -1
2577; GFX89-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
2578; GFX89-NEXT:    s_waitcnt vmcnt(0)
2579; GFX89-NEXT:    v_mov_b32_e32 v4, v0
2580; GFX89-NEXT:    v_mov_b32_e32 v5, v1
2581; GFX89-NEXT:    v_mov_b32_e32 v6, v2
2582; GFX89-NEXT:    v_mov_b32_e32 v7, v3
2583; GFX89-NEXT:    s_setpc_b64 s[30:31]
2584;
2585; GFX11-LABEL: v16bf16_func_void:
2586; GFX11:       ; %bb.0:
2587; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2588; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2589; GFX11-NEXT:    s_mov_b32 s2, -1
2590; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
2591; GFX11-NEXT:    s_waitcnt vmcnt(0)
2592; GFX11-NEXT:    v_dual_mov_b32 v4, v0 :: v_dual_mov_b32 v5, v1
2593; GFX11-NEXT:    v_dual_mov_b32 v6, v2 :: v_dual_mov_b32 v7, v3
2594; GFX11-NEXT:    s_setpc_b64 s[30:31]
2595  %val = load <16 x bfloat>, ptr addrspace(1) undef
2596  ret <16 x bfloat> %val
2597}
2598
2599define <32 x bfloat> @v32bf16_func_void() #0 {
2600; CI-LABEL: v32bf16_func_void:
2601; CI:       ; %bb.0:
2602; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2603; CI-NEXT:    s_mov_b32 s7, 0xf000
2604; CI-NEXT:    s_mov_b32 s6, -1
2605; CI-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0
2606; CI-NEXT:    s_waitcnt vmcnt(0)
2607; CI-NEXT:    v_lshlrev_b32_e32 v0, 16, v4
2608; CI-NEXT:    v_and_b32_e32 v1, 0xffff0000, v4
2609; CI-NEXT:    v_lshlrev_b32_e32 v2, 16, v5
2610; CI-NEXT:    v_and_b32_e32 v3, 0xffff0000, v5
2611; CI-NEXT:    v_lshlrev_b32_e32 v4, 16, v6
2612; CI-NEXT:    v_and_b32_e32 v5, 0xffff0000, v6
2613; CI-NEXT:    v_lshlrev_b32_e32 v6, 16, v7
2614; CI-NEXT:    v_and_b32_e32 v7, 0xffff0000, v7
2615; CI-NEXT:    v_mov_b32_e32 v8, v0
2616; CI-NEXT:    v_mov_b32_e32 v9, v1
2617; CI-NEXT:    v_mov_b32_e32 v10, v2
2618; CI-NEXT:    v_mov_b32_e32 v11, v3
2619; CI-NEXT:    v_mov_b32_e32 v16, v0
2620; CI-NEXT:    v_mov_b32_e32 v17, v1
2621; CI-NEXT:    v_mov_b32_e32 v18, v2
2622; CI-NEXT:    v_mov_b32_e32 v19, v3
2623; CI-NEXT:    v_mov_b32_e32 v24, v0
2624; CI-NEXT:    v_mov_b32_e32 v25, v1
2625; CI-NEXT:    v_mov_b32_e32 v26, v2
2626; CI-NEXT:    v_mov_b32_e32 v27, v3
2627; CI-NEXT:    v_mov_b32_e32 v12, v4
2628; CI-NEXT:    v_mov_b32_e32 v20, v4
2629; CI-NEXT:    v_mov_b32_e32 v28, v4
2630; CI-NEXT:    v_mov_b32_e32 v13, v5
2631; CI-NEXT:    v_mov_b32_e32 v21, v5
2632; CI-NEXT:    v_mov_b32_e32 v29, v5
2633; CI-NEXT:    v_mov_b32_e32 v14, v6
2634; CI-NEXT:    v_mov_b32_e32 v22, v6
2635; CI-NEXT:    v_mov_b32_e32 v30, v6
2636; CI-NEXT:    v_mov_b32_e32 v15, v7
2637; CI-NEXT:    v_mov_b32_e32 v23, v7
2638; CI-NEXT:    v_mov_b32_e32 v31, v7
2639; CI-NEXT:    s_setpc_b64 s[30:31]
2640;
2641; GFX89-LABEL: v32bf16_func_void:
2642; GFX89:       ; %bb.0:
2643; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2644; GFX89-NEXT:    s_mov_b32 s7, 0xf000
2645; GFX89-NEXT:    s_mov_b32 s6, -1
2646; GFX89-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
2647; GFX89-NEXT:    s_waitcnt vmcnt(0)
2648; GFX89-NEXT:    v_mov_b32_e32 v4, v0
2649; GFX89-NEXT:    v_mov_b32_e32 v5, v1
2650; GFX89-NEXT:    v_mov_b32_e32 v6, v2
2651; GFX89-NEXT:    v_mov_b32_e32 v7, v3
2652; GFX89-NEXT:    v_mov_b32_e32 v8, v0
2653; GFX89-NEXT:    v_mov_b32_e32 v9, v1
2654; GFX89-NEXT:    v_mov_b32_e32 v10, v2
2655; GFX89-NEXT:    v_mov_b32_e32 v11, v3
2656; GFX89-NEXT:    v_mov_b32_e32 v12, v0
2657; GFX89-NEXT:    v_mov_b32_e32 v13, v1
2658; GFX89-NEXT:    v_mov_b32_e32 v14, v2
2659; GFX89-NEXT:    v_mov_b32_e32 v15, v3
2660; GFX89-NEXT:    s_setpc_b64 s[30:31]
2661;
2662; GFX11-LABEL: v32bf16_func_void:
2663; GFX11:       ; %bb.0:
2664; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2665; GFX11-NEXT:    s_mov_b32 s3, 0x31016000
2666; GFX11-NEXT:    s_mov_b32 s2, -1
2667; GFX11-NEXT:    buffer_load_b128 v[0:3], off, s[0:3], 0
2668; GFX11-NEXT:    s_waitcnt vmcnt(0)
2669; GFX11-NEXT:    v_dual_mov_b32 v4, v0 :: v_dual_mov_b32 v5, v1
2670; GFX11-NEXT:    v_dual_mov_b32 v6, v2 :: v_dual_mov_b32 v7, v3
2671; GFX11-NEXT:    v_dual_mov_b32 v8, v0 :: v_dual_mov_b32 v9, v1
2672; GFX11-NEXT:    v_dual_mov_b32 v10, v2 :: v_dual_mov_b32 v11, v3
2673; GFX11-NEXT:    v_dual_mov_b32 v12, v0 :: v_dual_mov_b32 v13, v1
2674; GFX11-NEXT:    v_dual_mov_b32 v14, v2 :: v_dual_mov_b32 v15, v3
2675; GFX11-NEXT:    s_setpc_b64 s[30:31]
2676  %val = load <32 x bfloat>, ptr addrspace(1) undef
2677  ret <32 x bfloat> %val
2678}
2679
2680attributes #0 = { nounwind }
2681