xref: /llvm-project/llvm/test/CodeGen/AMDGPU/shufflevector.v4i32.v3i32.ll (revision 585858aeb6247b3892218edb9d353c63f1c33186)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s
5
6
7define void @v_shuffle_v4i32_v3i32__u_u_u_u(ptr addrspace(1) inreg %ptr) {
8; GFX9-LABEL: v_shuffle_v4i32_v3i32__u_u_u_u:
9; GFX9:       ; %bb.0:
10; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GFX9-NEXT:    s_setpc_b64 s[30:31]
12  %vec0 = call <3 x i32> asm "; def $0", "=v"()
13  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> poison
14  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
15  ret void
16}
17
18define void @v_shuffle_v4i32_v3i32__0_u_u_u(ptr addrspace(1) inreg %ptr) {
19; GFX900-LABEL: v_shuffle_v4i32_v3i32__0_u_u_u:
20; GFX900:       ; %bb.0:
21; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22; GFX900-NEXT:    v_mov_b32_e32 v3, 0
23; GFX900-NEXT:    ;;#ASMSTART
24; GFX900-NEXT:    ; def v[0:2]
25; GFX900-NEXT:    ;;#ASMEND
26; GFX900-NEXT:    global_store_dwordx4 v3, v[0:3], s[16:17]
27; GFX900-NEXT:    s_waitcnt vmcnt(0)
28; GFX900-NEXT:    s_setpc_b64 s[30:31]
29;
30; GFX90A-LABEL: v_shuffle_v4i32_v3i32__0_u_u_u:
31; GFX90A:       ; %bb.0:
32; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33; GFX90A-NEXT:    v_mov_b32_e32 v3, 0
34; GFX90A-NEXT:    ;;#ASMSTART
35; GFX90A-NEXT:    ; def v[0:2]
36; GFX90A-NEXT:    ;;#ASMEND
37; GFX90A-NEXT:    global_store_dwordx4 v3, v[0:3], s[16:17]
38; GFX90A-NEXT:    s_waitcnt vmcnt(0)
39; GFX90A-NEXT:    s_setpc_b64 s[30:31]
40;
41; GFX940-LABEL: v_shuffle_v4i32_v3i32__0_u_u_u:
42; GFX940:       ; %bb.0:
43; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44; GFX940-NEXT:    v_mov_b32_e32 v3, 0
45; GFX940-NEXT:    ;;#ASMSTART
46; GFX940-NEXT:    ; def v[0:2]
47; GFX940-NEXT:    ;;#ASMEND
48; GFX940-NEXT:    global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1
49; GFX940-NEXT:    s_waitcnt vmcnt(0)
50; GFX940-NEXT:    s_setpc_b64 s[30:31]
51  %vec0 = call <3 x i32> asm "; def $0", "=v"()
52  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
53  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
54  ret void
55}
56
57define void @v_shuffle_v4i32_v3i32__1_u_u_u(ptr addrspace(1) inreg %ptr) {
58; GFX900-LABEL: v_shuffle_v4i32_v3i32__1_u_u_u:
59; GFX900:       ; %bb.0:
60; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61; GFX900-NEXT:    ;;#ASMSTART
62; GFX900-NEXT:    ; def v[0:2]
63; GFX900-NEXT:    ;;#ASMEND
64; GFX900-NEXT:    v_mov_b32_e32 v3, 0
65; GFX900-NEXT:    v_mov_b32_e32 v0, v1
66; GFX900-NEXT:    global_store_dwordx4 v3, v[0:3], s[16:17]
67; GFX900-NEXT:    s_waitcnt vmcnt(0)
68; GFX900-NEXT:    s_setpc_b64 s[30:31]
69;
70; GFX90A-LABEL: v_shuffle_v4i32_v3i32__1_u_u_u:
71; GFX90A:       ; %bb.0:
72; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
73; GFX90A-NEXT:    ;;#ASMSTART
74; GFX90A-NEXT:    ; def v[0:2]
75; GFX90A-NEXT:    ;;#ASMEND
76; GFX90A-NEXT:    v_mov_b32_e32 v3, 0
77; GFX90A-NEXT:    v_mov_b32_e32 v0, v1
78; GFX90A-NEXT:    global_store_dwordx4 v3, v[0:3], s[16:17]
79; GFX90A-NEXT:    s_waitcnt vmcnt(0)
80; GFX90A-NEXT:    s_setpc_b64 s[30:31]
81;
82; GFX940-LABEL: v_shuffle_v4i32_v3i32__1_u_u_u:
83; GFX940:       ; %bb.0:
84; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85; GFX940-NEXT:    ;;#ASMSTART
86; GFX940-NEXT:    ; def v[0:2]
87; GFX940-NEXT:    ;;#ASMEND
88; GFX940-NEXT:    v_mov_b32_e32 v3, 0
89; GFX940-NEXT:    v_mov_b32_e32 v0, v1
90; GFX940-NEXT:    global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1
91; GFX940-NEXT:    s_waitcnt vmcnt(0)
92; GFX940-NEXT:    s_setpc_b64 s[30:31]
93  %vec0 = call <3 x i32> asm "; def $0", "=v"()
94  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
95  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
96  ret void
97}
98
99define void @v_shuffle_v4i32_v3i32__2_u_u_u(ptr addrspace(1) inreg %ptr) {
100; GFX900-LABEL: v_shuffle_v4i32_v3i32__2_u_u_u:
101; GFX900:       ; %bb.0:
102; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103; GFX900-NEXT:    ;;#ASMSTART
104; GFX900-NEXT:    ; def v[0:2]
105; GFX900-NEXT:    ;;#ASMEND
106; GFX900-NEXT:    v_mov_b32_e32 v3, 0
107; GFX900-NEXT:    v_mov_b32_e32 v0, v2
108; GFX900-NEXT:    global_store_dwordx4 v3, v[0:3], s[16:17]
109; GFX900-NEXT:    s_waitcnt vmcnt(0)
110; GFX900-NEXT:    s_setpc_b64 s[30:31]
111;
112; GFX90A-LABEL: v_shuffle_v4i32_v3i32__2_u_u_u:
113; GFX90A:       ; %bb.0:
114; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
115; GFX90A-NEXT:    ;;#ASMSTART
116; GFX90A-NEXT:    ; def v[0:2]
117; GFX90A-NEXT:    ;;#ASMEND
118; GFX90A-NEXT:    v_mov_b32_e32 v3, 0
119; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
120; GFX90A-NEXT:    global_store_dwordx4 v3, v[0:3], s[16:17]
121; GFX90A-NEXT:    s_waitcnt vmcnt(0)
122; GFX90A-NEXT:    s_setpc_b64 s[30:31]
123;
124; GFX940-LABEL: v_shuffle_v4i32_v3i32__2_u_u_u:
125; GFX940:       ; %bb.0:
126; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127; GFX940-NEXT:    ;;#ASMSTART
128; GFX940-NEXT:    ; def v[0:2]
129; GFX940-NEXT:    ;;#ASMEND
130; GFX940-NEXT:    v_mov_b32_e32 v3, 0
131; GFX940-NEXT:    v_mov_b32_e32 v0, v2
132; GFX940-NEXT:    global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1
133; GFX940-NEXT:    s_waitcnt vmcnt(0)
134; GFX940-NEXT:    s_setpc_b64 s[30:31]
135  %vec0 = call <3 x i32> asm "; def $0", "=v"()
136  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
137  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
138  ret void
139}
140
141define void @v_shuffle_v4i32_v3i32__3_u_u_u(ptr addrspace(1) inreg %ptr) {
142; GFX9-LABEL: v_shuffle_v4i32_v3i32__3_u_u_u:
143; GFX9:       ; %bb.0:
144; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145; GFX9-NEXT:    s_setpc_b64 s[30:31]
146  %vec0 = call <3 x i32> asm "; def $0", "=v"()
147  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
148  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
149  ret void
150}
151
152define void @v_shuffle_v4i32_v3i32__4_u_u_u(ptr addrspace(1) inreg %ptr) {
153; GFX900-LABEL: v_shuffle_v4i32_v3i32__4_u_u_u:
154; GFX900:       ; %bb.0:
155; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156; GFX900-NEXT:    ;;#ASMSTART
157; GFX900-NEXT:    ; def v[0:2]
158; GFX900-NEXT:    ;;#ASMEND
159; GFX900-NEXT:    v_mov_b32_e32 v3, 0
160; GFX900-NEXT:    v_mov_b32_e32 v0, v1
161; GFX900-NEXT:    global_store_dwordx4 v3, v[0:3], s[16:17]
162; GFX900-NEXT:    s_waitcnt vmcnt(0)
163; GFX900-NEXT:    s_setpc_b64 s[30:31]
164;
165; GFX90A-LABEL: v_shuffle_v4i32_v3i32__4_u_u_u:
166; GFX90A:       ; %bb.0:
167; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
168; GFX90A-NEXT:    ;;#ASMSTART
169; GFX90A-NEXT:    ; def v[0:2]
170; GFX90A-NEXT:    ;;#ASMEND
171; GFX90A-NEXT:    v_mov_b32_e32 v3, 0
172; GFX90A-NEXT:    v_mov_b32_e32 v0, v1
173; GFX90A-NEXT:    global_store_dwordx4 v3, v[0:3], s[16:17]
174; GFX90A-NEXT:    s_waitcnt vmcnt(0)
175; GFX90A-NEXT:    s_setpc_b64 s[30:31]
176;
177; GFX940-LABEL: v_shuffle_v4i32_v3i32__4_u_u_u:
178; GFX940:       ; %bb.0:
179; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180; GFX940-NEXT:    ;;#ASMSTART
181; GFX940-NEXT:    ; def v[0:2]
182; GFX940-NEXT:    ;;#ASMEND
183; GFX940-NEXT:    v_mov_b32_e32 v3, 0
184; GFX940-NEXT:    v_mov_b32_e32 v0, v1
185; GFX940-NEXT:    global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1
186; GFX940-NEXT:    s_waitcnt vmcnt(0)
187; GFX940-NEXT:    s_setpc_b64 s[30:31]
188  %vec0 = call <3 x i32> asm "; def $0", "=v"()
189  %vec1 = call <3 x i32> asm "; def $0", "=v"()
190  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 poison, i32 poison, i32 poison>
191  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
192  ret void
193}
194
195define void @v_shuffle_v4i32_v3i32__5_u_u_u(ptr addrspace(1) inreg %ptr) {
196; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_u_u_u:
197; GFX900:       ; %bb.0:
198; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
199; GFX900-NEXT:    ;;#ASMSTART
200; GFX900-NEXT:    ; def v[0:2]
201; GFX900-NEXT:    ;;#ASMEND
202; GFX900-NEXT:    v_mov_b32_e32 v3, 0
203; GFX900-NEXT:    v_mov_b32_e32 v0, v2
204; GFX900-NEXT:    global_store_dwordx4 v3, v[0:3], s[16:17]
205; GFX900-NEXT:    s_waitcnt vmcnt(0)
206; GFX900-NEXT:    s_setpc_b64 s[30:31]
207;
208; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_u_u_u:
209; GFX90A:       ; %bb.0:
210; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
211; GFX90A-NEXT:    ;;#ASMSTART
212; GFX90A-NEXT:    ; def v[0:2]
213; GFX90A-NEXT:    ;;#ASMEND
214; GFX90A-NEXT:    v_mov_b32_e32 v3, 0
215; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
216; GFX90A-NEXT:    global_store_dwordx4 v3, v[0:3], s[16:17]
217; GFX90A-NEXT:    s_waitcnt vmcnt(0)
218; GFX90A-NEXT:    s_setpc_b64 s[30:31]
219;
220; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_u_u_u:
221; GFX940:       ; %bb.0:
222; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
223; GFX940-NEXT:    ;;#ASMSTART
224; GFX940-NEXT:    ; def v[0:2]
225; GFX940-NEXT:    ;;#ASMEND
226; GFX940-NEXT:    v_mov_b32_e32 v3, 0
227; GFX940-NEXT:    v_mov_b32_e32 v0, v2
228; GFX940-NEXT:    global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1
229; GFX940-NEXT:    s_waitcnt vmcnt(0)
230; GFX940-NEXT:    s_setpc_b64 s[30:31]
231  %vec0 = call <3 x i32> asm "; def $0", "=v"()
232  %vec1 = call <3 x i32> asm "; def $0", "=v"()
233  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 poison, i32 poison>
234  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
235  ret void
236}
237
238define void @v_shuffle_v4i32_v3i32__5_0_u_u(ptr addrspace(1) inreg %ptr) {
239; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_0_u_u:
240; GFX900:       ; %bb.0:
241; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
242; GFX900-NEXT:    ;;#ASMSTART
243; GFX900-NEXT:    ; def v[1:3]
244; GFX900-NEXT:    ;;#ASMEND
245; GFX900-NEXT:    v_mov_b32_e32 v5, 0
246; GFX900-NEXT:    ;;#ASMSTART
247; GFX900-NEXT:    ; def v[2:4]
248; GFX900-NEXT:    ;;#ASMEND
249; GFX900-NEXT:    v_mov_b32_e32 v0, v4
250; GFX900-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
251; GFX900-NEXT:    s_waitcnt vmcnt(0)
252; GFX900-NEXT:    s_setpc_b64 s[30:31]
253;
254; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_0_u_u:
255; GFX90A:       ; %bb.0:
256; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
257; GFX90A-NEXT:    ;;#ASMSTART
258; GFX90A-NEXT:    ; def v[2:4]
259; GFX90A-NEXT:    ;;#ASMEND
260; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
261; GFX90A-NEXT:    ;;#ASMSTART
262; GFX90A-NEXT:    ; def v[4:6]
263; GFX90A-NEXT:    ;;#ASMEND
264; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
265; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
266; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
267; GFX90A-NEXT:    s_waitcnt vmcnt(0)
268; GFX90A-NEXT:    s_setpc_b64 s[30:31]
269;
270; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_0_u_u:
271; GFX940:       ; %bb.0:
272; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273; GFX940-NEXT:    ;;#ASMSTART
274; GFX940-NEXT:    ; def v[2:4]
275; GFX940-NEXT:    ;;#ASMEND
276; GFX940-NEXT:    v_mov_b32_e32 v7, 0
277; GFX940-NEXT:    ;;#ASMSTART
278; GFX940-NEXT:    ; def v[4:6]
279; GFX940-NEXT:    ;;#ASMEND
280; GFX940-NEXT:    v_mov_b32_e32 v1, v2
281; GFX940-NEXT:    v_mov_b32_e32 v0, v6
282; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
283; GFX940-NEXT:    s_waitcnt vmcnt(0)
284; GFX940-NEXT:    s_setpc_b64 s[30:31]
285  %vec0 = call <3 x i32> asm "; def $0", "=v"()
286  %vec1 = call <3 x i32> asm "; def $0", "=v"()
287  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 poison, i32 poison>
288  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
289  ret void
290}
291
292define void @v_shuffle_v4i32_v3i32__5_1_u_u(ptr addrspace(1) inreg %ptr) {
293; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_1_u_u:
294; GFX900:       ; %bb.0:
295; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
296; GFX900-NEXT:    ;;#ASMSTART
297; GFX900-NEXT:    ; def v[0:2]
298; GFX900-NEXT:    ;;#ASMEND
299; GFX900-NEXT:    v_mov_b32_e32 v5, 0
300; GFX900-NEXT:    ;;#ASMSTART
301; GFX900-NEXT:    ; def v[2:4]
302; GFX900-NEXT:    ;;#ASMEND
303; GFX900-NEXT:    v_mov_b32_e32 v0, v4
304; GFX900-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
305; GFX900-NEXT:    s_waitcnt vmcnt(0)
306; GFX900-NEXT:    s_setpc_b64 s[30:31]
307;
308; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_1_u_u:
309; GFX90A:       ; %bb.0:
310; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
311; GFX90A-NEXT:    ;;#ASMSTART
312; GFX90A-NEXT:    ; def v[0:2]
313; GFX90A-NEXT:    ;;#ASMEND
314; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
315; GFX90A-NEXT:    ;;#ASMSTART
316; GFX90A-NEXT:    ; def v[2:4]
317; GFX90A-NEXT:    ;;#ASMEND
318; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
319; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
320; GFX90A-NEXT:    s_waitcnt vmcnt(0)
321; GFX90A-NEXT:    s_setpc_b64 s[30:31]
322;
323; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_1_u_u:
324; GFX940:       ; %bb.0:
325; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
326; GFX940-NEXT:    ;;#ASMSTART
327; GFX940-NEXT:    ; def v[0:2]
328; GFX940-NEXT:    ;;#ASMEND
329; GFX940-NEXT:    v_mov_b32_e32 v5, 0
330; GFX940-NEXT:    ;;#ASMSTART
331; GFX940-NEXT:    ; def v[2:4]
332; GFX940-NEXT:    ;;#ASMEND
333; GFX940-NEXT:    s_nop 0
334; GFX940-NEXT:    v_mov_b32_e32 v0, v4
335; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
336; GFX940-NEXT:    s_waitcnt vmcnt(0)
337; GFX940-NEXT:    s_setpc_b64 s[30:31]
338  %vec0 = call <3 x i32> asm "; def $0", "=v"()
339  %vec1 = call <3 x i32> asm "; def $0", "=v"()
340  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 poison, i32 poison>
341  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
342  ret void
343}
344
345define void @v_shuffle_v4i32_v3i32__5_2_u_u(ptr addrspace(1) inreg %ptr) {
346; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_2_u_u:
347; GFX900:       ; %bb.0:
348; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
349; GFX900-NEXT:    ;;#ASMSTART
350; GFX900-NEXT:    ; def v[0:2]
351; GFX900-NEXT:    ;;#ASMEND
352; GFX900-NEXT:    v_mov_b32_e32 v6, 0
353; GFX900-NEXT:    ;;#ASMSTART
354; GFX900-NEXT:    ; def v[3:5]
355; GFX900-NEXT:    ;;#ASMEND
356; GFX900-NEXT:    v_mov_b32_e32 v0, v5
357; GFX900-NEXT:    v_mov_b32_e32 v1, v2
358; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
359; GFX900-NEXT:    s_waitcnt vmcnt(0)
360; GFX900-NEXT:    s_setpc_b64 s[30:31]
361;
362; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_2_u_u:
363; GFX90A:       ; %bb.0:
364; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
365; GFX90A-NEXT:    ;;#ASMSTART
366; GFX90A-NEXT:    ; def v[0:2]
367; GFX90A-NEXT:    ;;#ASMEND
368; GFX90A-NEXT:    v_mov_b32_e32 v3, 0
369; GFX90A-NEXT:    ;;#ASMSTART
370; GFX90A-NEXT:    ; def v[4:6]
371; GFX90A-NEXT:    ;;#ASMEND
372; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
373; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
374; GFX90A-NEXT:    global_store_dwordx4 v3, v[0:3], s[16:17]
375; GFX90A-NEXT:    s_waitcnt vmcnt(0)
376; GFX90A-NEXT:    s_setpc_b64 s[30:31]
377;
378; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_2_u_u:
379; GFX940:       ; %bb.0:
380; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
381; GFX940-NEXT:    ;;#ASMSTART
382; GFX940-NEXT:    ; def v[0:2]
383; GFX940-NEXT:    ;;#ASMEND
384; GFX940-NEXT:    v_mov_b32_e32 v3, 0
385; GFX940-NEXT:    ;;#ASMSTART
386; GFX940-NEXT:    ; def v[4:6]
387; GFX940-NEXT:    ;;#ASMEND
388; GFX940-NEXT:    v_mov_b32_e32 v1, v2
389; GFX940-NEXT:    v_mov_b32_e32 v0, v6
390; GFX940-NEXT:    global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1
391; GFX940-NEXT:    s_waitcnt vmcnt(0)
392; GFX940-NEXT:    s_setpc_b64 s[30:31]
393  %vec0 = call <3 x i32> asm "; def $0", "=v"()
394  %vec1 = call <3 x i32> asm "; def $0", "=v"()
395  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 poison, i32 poison>
396  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
397  ret void
398}
399
400define void @v_shuffle_v4i32_v3i32__5_3_u_u(ptr addrspace(1) inreg %ptr) {
401; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_3_u_u:
402; GFX900:       ; %bb.0:
403; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
404; GFX900-NEXT:    v_mov_b32_e32 v4, 0
405; GFX900-NEXT:    ;;#ASMSTART
406; GFX900-NEXT:    ; def v[1:3]
407; GFX900-NEXT:    ;;#ASMEND
408; GFX900-NEXT:    v_mov_b32_e32 v0, v3
409; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
410; GFX900-NEXT:    s_waitcnt vmcnt(0)
411; GFX900-NEXT:    s_setpc_b64 s[30:31]
412;
413; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_3_u_u:
414; GFX90A:       ; %bb.0:
415; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
416; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
417; GFX90A-NEXT:    ;;#ASMSTART
418; GFX90A-NEXT:    ; def v[2:4]
419; GFX90A-NEXT:    ;;#ASMEND
420; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
421; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
422; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
423; GFX90A-NEXT:    s_waitcnt vmcnt(0)
424; GFX90A-NEXT:    s_setpc_b64 s[30:31]
425;
426; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_3_u_u:
427; GFX940:       ; %bb.0:
428; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
429; GFX940-NEXT:    v_mov_b32_e32 v5, 0
430; GFX940-NEXT:    ;;#ASMSTART
431; GFX940-NEXT:    ; def v[2:4]
432; GFX940-NEXT:    ;;#ASMEND
433; GFX940-NEXT:    s_nop 0
434; GFX940-NEXT:    v_mov_b32_e32 v0, v4
435; GFX940-NEXT:    v_mov_b32_e32 v1, v2
436; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
437; GFX940-NEXT:    s_waitcnt vmcnt(0)
438; GFX940-NEXT:    s_setpc_b64 s[30:31]
439  %vec0 = call <3 x i32> asm "; def $0", "=v"()
440  %vec1 = call <3 x i32> asm "; def $0", "=v"()
441  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 poison, i32 poison>
442  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
443  ret void
444}
445
446define void @v_shuffle_v4i32_v3i32__5_4_u_u(ptr addrspace(1) inreg %ptr) {
447; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_4_u_u:
448; GFX900:       ; %bb.0:
449; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
450; GFX900-NEXT:    ;;#ASMSTART
451; GFX900-NEXT:    ; def v[0:2]
452; GFX900-NEXT:    ;;#ASMEND
453; GFX900-NEXT:    v_mov_b32_e32 v3, 0
454; GFX900-NEXT:    v_mov_b32_e32 v0, v2
455; GFX900-NEXT:    global_store_dwordx4 v3, v[0:3], s[16:17]
456; GFX900-NEXT:    s_waitcnt vmcnt(0)
457; GFX900-NEXT:    s_setpc_b64 s[30:31]
458;
459; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_4_u_u:
460; GFX90A:       ; %bb.0:
461; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
462; GFX90A-NEXT:    ;;#ASMSTART
463; GFX90A-NEXT:    ; def v[0:2]
464; GFX90A-NEXT:    ;;#ASMEND
465; GFX90A-NEXT:    v_mov_b32_e32 v3, 0
466; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
467; GFX90A-NEXT:    global_store_dwordx4 v3, v[0:3], s[16:17]
468; GFX90A-NEXT:    s_waitcnt vmcnt(0)
469; GFX90A-NEXT:    s_setpc_b64 s[30:31]
470;
471; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_4_u_u:
472; GFX940:       ; %bb.0:
473; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
474; GFX940-NEXT:    ;;#ASMSTART
475; GFX940-NEXT:    ; def v[0:2]
476; GFX940-NEXT:    ;;#ASMEND
477; GFX940-NEXT:    v_mov_b32_e32 v3, 0
478; GFX940-NEXT:    v_mov_b32_e32 v0, v2
479; GFX940-NEXT:    global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1
480; GFX940-NEXT:    s_waitcnt vmcnt(0)
481; GFX940-NEXT:    s_setpc_b64 s[30:31]
482  %vec0 = call <3 x i32> asm "; def $0", "=v"()
483  %vec1 = call <3 x i32> asm "; def $0", "=v"()
484  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 poison, i32 poison>
485  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
486  ret void
487}
488
489define void @v_shuffle_v4i32_v3i32__5_5_u_u(ptr addrspace(1) inreg %ptr) {
490; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_u_u:
491; GFX900:       ; %bb.0:
492; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
493; GFX900-NEXT:    ;;#ASMSTART
494; GFX900-NEXT:    ; def v[0:2]
495; GFX900-NEXT:    ;;#ASMEND
496; GFX900-NEXT:    v_mov_b32_e32 v3, 0
497; GFX900-NEXT:    v_mov_b32_e32 v0, v2
498; GFX900-NEXT:    v_mov_b32_e32 v1, v2
499; GFX900-NEXT:    global_store_dwordx4 v3, v[0:3], s[16:17]
500; GFX900-NEXT:    s_waitcnt vmcnt(0)
501; GFX900-NEXT:    s_setpc_b64 s[30:31]
502;
503; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_u_u:
504; GFX90A:       ; %bb.0:
505; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
506; GFX90A-NEXT:    ;;#ASMSTART
507; GFX90A-NEXT:    ; def v[0:2]
508; GFX90A-NEXT:    ;;#ASMEND
509; GFX90A-NEXT:    v_mov_b32_e32 v3, 0
510; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
511; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
512; GFX90A-NEXT:    global_store_dwordx4 v3, v[0:3], s[16:17]
513; GFX90A-NEXT:    s_waitcnt vmcnt(0)
514; GFX90A-NEXT:    s_setpc_b64 s[30:31]
515;
516; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_u_u:
517; GFX940:       ; %bb.0:
518; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
519; GFX940-NEXT:    ;;#ASMSTART
520; GFX940-NEXT:    ; def v[0:2]
521; GFX940-NEXT:    ;;#ASMEND
522; GFX940-NEXT:    v_mov_b32_e32 v3, 0
523; GFX940-NEXT:    v_mov_b32_e32 v0, v2
524; GFX940-NEXT:    v_mov_b32_e32 v1, v2
525; GFX940-NEXT:    global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1
526; GFX940-NEXT:    s_waitcnt vmcnt(0)
527; GFX940-NEXT:    s_setpc_b64 s[30:31]
528  %vec0 = call <3 x i32> asm "; def $0", "=v"()
529  %vec1 = call <3 x i32> asm "; def $0", "=v"()
530  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 poison>
531  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
532  ret void
533}
534
535define void @v_shuffle_v4i32_v3i32__5_5_0_u(ptr addrspace(1) inreg %ptr) {
536; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_0_u:
537; GFX900:       ; %bb.0:
538; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
539; GFX900-NEXT:    ;;#ASMSTART
540; GFX900-NEXT:    ; def v[2:4]
541; GFX900-NEXT:    ;;#ASMEND
542; GFX900-NEXT:    v_mov_b32_e32 v6, 0
543; GFX900-NEXT:    ;;#ASMSTART
544; GFX900-NEXT:    ; def v[3:5]
545; GFX900-NEXT:    ;;#ASMEND
546; GFX900-NEXT:    v_mov_b32_e32 v0, v5
547; GFX900-NEXT:    v_mov_b32_e32 v1, v5
548; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
549; GFX900-NEXT:    s_waitcnt vmcnt(0)
550; GFX900-NEXT:    s_setpc_b64 s[30:31]
551;
552; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_0_u:
553; GFX90A:       ; %bb.0:
554; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
555; GFX90A-NEXT:    ;;#ASMSTART
556; GFX90A-NEXT:    ; def v[2:4]
557; GFX90A-NEXT:    ;;#ASMEND
558; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
559; GFX90A-NEXT:    ;;#ASMSTART
560; GFX90A-NEXT:    ; def v[4:6]
561; GFX90A-NEXT:    ;;#ASMEND
562; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
563; GFX90A-NEXT:    v_mov_b32_e32 v1, v6
564; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
565; GFX90A-NEXT:    s_waitcnt vmcnt(0)
566; GFX90A-NEXT:    s_setpc_b64 s[30:31]
567;
568; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_0_u:
569; GFX940:       ; %bb.0:
570; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
571; GFX940-NEXT:    ;;#ASMSTART
572; GFX940-NEXT:    ; def v[2:4]
573; GFX940-NEXT:    ;;#ASMEND
574; GFX940-NEXT:    v_mov_b32_e32 v7, 0
575; GFX940-NEXT:    ;;#ASMSTART
576; GFX940-NEXT:    ; def v[4:6]
577; GFX940-NEXT:    ;;#ASMEND
578; GFX940-NEXT:    s_nop 0
579; GFX940-NEXT:    v_mov_b32_e32 v0, v6
580; GFX940-NEXT:    v_mov_b32_e32 v1, v6
581; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
582; GFX940-NEXT:    s_waitcnt vmcnt(0)
583; GFX940-NEXT:    s_setpc_b64 s[30:31]
584  %vec0 = call <3 x i32> asm "; def $0", "=v"()
585  %vec1 = call <3 x i32> asm "; def $0", "=v"()
586  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 poison>
587  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
588  ret void
589}
590
591define void @v_shuffle_v4i32_v3i32__5_5_1_u(ptr addrspace(1) inreg %ptr) {
592; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_1_u:
593; GFX900:       ; %bb.0:
594; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
595; GFX900-NEXT:    ;;#ASMSTART
596; GFX900-NEXT:    ; def v[1:3]
597; GFX900-NEXT:    ;;#ASMEND
598; GFX900-NEXT:    v_mov_b32_e32 v6, 0
599; GFX900-NEXT:    ;;#ASMSTART
600; GFX900-NEXT:    ; def v[3:5]
601; GFX900-NEXT:    ;;#ASMEND
602; GFX900-NEXT:    v_mov_b32_e32 v0, v5
603; GFX900-NEXT:    v_mov_b32_e32 v1, v5
604; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
605; GFX900-NEXT:    s_waitcnt vmcnt(0)
606; GFX900-NEXT:    s_setpc_b64 s[30:31]
607;
608; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_1_u:
609; GFX90A:       ; %bb.0:
610; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
611; GFX90A-NEXT:    ;;#ASMSTART
612; GFX90A-NEXT:    ; def v[2:4]
613; GFX90A-NEXT:    ;;#ASMEND
614; GFX90A-NEXT:    ;;#ASMSTART
615; GFX90A-NEXT:    ; def v[0:2]
616; GFX90A-NEXT:    ;;#ASMEND
617; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
618; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
619; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
620; GFX90A-NEXT:    v_mov_b32_e32 v2, v3
621; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
622; GFX90A-NEXT:    s_waitcnt vmcnt(0)
623; GFX90A-NEXT:    s_setpc_b64 s[30:31]
624;
625; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_1_u:
626; GFX940:       ; %bb.0:
627; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
628; GFX940-NEXT:    ;;#ASMSTART
629; GFX940-NEXT:    ; def v[2:4]
630; GFX940-NEXT:    ;;#ASMEND
631; GFX940-NEXT:    v_mov_b32_e32 v5, 0
632; GFX940-NEXT:    ;;#ASMSTART
633; GFX940-NEXT:    ; def v[0:2]
634; GFX940-NEXT:    ;;#ASMEND
635; GFX940-NEXT:    s_nop 0
636; GFX940-NEXT:    v_mov_b32_e32 v0, v2
637; GFX940-NEXT:    v_mov_b32_e32 v1, v2
638; GFX940-NEXT:    v_mov_b32_e32 v2, v3
639; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
640; GFX940-NEXT:    s_waitcnt vmcnt(0)
641; GFX940-NEXT:    s_setpc_b64 s[30:31]
642  %vec0 = call <3 x i32> asm "; def $0", "=v"()
643  %vec1 = call <3 x i32> asm "; def $0", "=v"()
644  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 poison>
645  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
646  ret void
647}
648
649define void @v_shuffle_v4i32_v3i32__5_5_2_u(ptr addrspace(1) inreg %ptr) {
650; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_2_u:
651; GFX900:       ; %bb.0:
652; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
653; GFX900-NEXT:    ;;#ASMSTART
654; GFX900-NEXT:    ; def v[0:2]
655; GFX900-NEXT:    ;;#ASMEND
656; GFX900-NEXT:    v_mov_b32_e32 v6, 0
657; GFX900-NEXT:    ;;#ASMSTART
658; GFX900-NEXT:    ; def v[3:5]
659; GFX900-NEXT:    ;;#ASMEND
660; GFX900-NEXT:    v_mov_b32_e32 v0, v5
661; GFX900-NEXT:    v_mov_b32_e32 v1, v5
662; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
663; GFX900-NEXT:    s_waitcnt vmcnt(0)
664; GFX900-NEXT:    s_setpc_b64 s[30:31]
665;
666; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_2_u:
667; GFX90A:       ; %bb.0:
668; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
669; GFX90A-NEXT:    ;;#ASMSTART
670; GFX90A-NEXT:    ; def v[0:2]
671; GFX90A-NEXT:    ;;#ASMEND
672; GFX90A-NEXT:    v_mov_b32_e32 v3, 0
673; GFX90A-NEXT:    ;;#ASMSTART
674; GFX90A-NEXT:    ; def v[4:6]
675; GFX90A-NEXT:    ;;#ASMEND
676; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
677; GFX90A-NEXT:    v_mov_b32_e32 v1, v6
678; GFX90A-NEXT:    global_store_dwordx4 v3, v[0:3], s[16:17]
679; GFX90A-NEXT:    s_waitcnt vmcnt(0)
680; GFX90A-NEXT:    s_setpc_b64 s[30:31]
681;
682; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_2_u:
683; GFX940:       ; %bb.0:
684; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
685; GFX940-NEXT:    ;;#ASMSTART
686; GFX940-NEXT:    ; def v[0:2]
687; GFX940-NEXT:    ;;#ASMEND
688; GFX940-NEXT:    v_mov_b32_e32 v3, 0
689; GFX940-NEXT:    ;;#ASMSTART
690; GFX940-NEXT:    ; def v[4:6]
691; GFX940-NEXT:    ;;#ASMEND
692; GFX940-NEXT:    s_nop 0
693; GFX940-NEXT:    v_mov_b32_e32 v0, v6
694; GFX940-NEXT:    v_mov_b32_e32 v1, v6
695; GFX940-NEXT:    global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1
696; GFX940-NEXT:    s_waitcnt vmcnt(0)
697; GFX940-NEXT:    s_setpc_b64 s[30:31]
698  %vec0 = call <3 x i32> asm "; def $0", "=v"()
699  %vec1 = call <3 x i32> asm "; def $0", "=v"()
700  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 poison>
701  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
702  ret void
703}
704
705define void @v_shuffle_v4i32_v3i32__5_5_3_u(ptr addrspace(1) inreg %ptr) {
706; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_3_u:
707; GFX900:       ; %bb.0:
708; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
709; GFX900-NEXT:    v_mov_b32_e32 v5, 0
710; GFX900-NEXT:    ;;#ASMSTART
711; GFX900-NEXT:    ; def v[2:4]
712; GFX900-NEXT:    ;;#ASMEND
713; GFX900-NEXT:    v_mov_b32_e32 v0, v4
714; GFX900-NEXT:    v_mov_b32_e32 v1, v4
715; GFX900-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
716; GFX900-NEXT:    s_waitcnt vmcnt(0)
717; GFX900-NEXT:    s_setpc_b64 s[30:31]
718;
719; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_3_u:
720; GFX90A:       ; %bb.0:
721; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
722; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
723; GFX90A-NEXT:    ;;#ASMSTART
724; GFX90A-NEXT:    ; def v[2:4]
725; GFX90A-NEXT:    ;;#ASMEND
726; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
727; GFX90A-NEXT:    v_mov_b32_e32 v1, v4
728; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
729; GFX90A-NEXT:    s_waitcnt vmcnt(0)
730; GFX90A-NEXT:    s_setpc_b64 s[30:31]
731;
732; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_3_u:
733; GFX940:       ; %bb.0:
734; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
735; GFX940-NEXT:    v_mov_b32_e32 v5, 0
736; GFX940-NEXT:    ;;#ASMSTART
737; GFX940-NEXT:    ; def v[2:4]
738; GFX940-NEXT:    ;;#ASMEND
739; GFX940-NEXT:    s_nop 0
740; GFX940-NEXT:    v_mov_b32_e32 v0, v4
741; GFX940-NEXT:    v_mov_b32_e32 v1, v4
742; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
743; GFX940-NEXT:    s_waitcnt vmcnt(0)
744; GFX940-NEXT:    s_setpc_b64 s[30:31]
745  %vec0 = call <3 x i32> asm "; def $0", "=v"()
746  %vec1 = call <3 x i32> asm "; def $0", "=v"()
747  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 poison>
748  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
749  ret void
750}
751
752define void @v_shuffle_v4i32_v3i32__5_5_4_u(ptr addrspace(1) inreg %ptr) {
753; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_4_u:
754; GFX900:       ; %bb.0:
755; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
756; GFX900-NEXT:    ;;#ASMSTART
757; GFX900-NEXT:    ; def v[1:3]
758; GFX900-NEXT:    ;;#ASMEND
759; GFX900-NEXT:    v_mov_b32_e32 v4, 0
760; GFX900-NEXT:    v_mov_b32_e32 v0, v3
761; GFX900-NEXT:    v_mov_b32_e32 v1, v3
762; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
763; GFX900-NEXT:    s_waitcnt vmcnt(0)
764; GFX900-NEXT:    s_setpc_b64 s[30:31]
765;
766; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_4_u:
767; GFX90A:       ; %bb.0:
768; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
769; GFX90A-NEXT:    ;;#ASMSTART
770; GFX90A-NEXT:    ; def v[2:4]
771; GFX90A-NEXT:    ;;#ASMEND
772; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
773; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
774; GFX90A-NEXT:    v_mov_b32_e32 v1, v4
775; GFX90A-NEXT:    v_mov_b32_e32 v2, v3
776; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
777; GFX90A-NEXT:    s_waitcnt vmcnt(0)
778; GFX90A-NEXT:    s_setpc_b64 s[30:31]
779;
780; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_4_u:
781; GFX940:       ; %bb.0:
782; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
783; GFX940-NEXT:    ;;#ASMSTART
784; GFX940-NEXT:    ; def v[2:4]
785; GFX940-NEXT:    ;;#ASMEND
786; GFX940-NEXT:    v_mov_b32_e32 v5, 0
787; GFX940-NEXT:    v_mov_b32_e32 v0, v4
788; GFX940-NEXT:    v_mov_b32_e32 v1, v4
789; GFX940-NEXT:    v_mov_b32_e32 v2, v3
790; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
791; GFX940-NEXT:    s_waitcnt vmcnt(0)
792; GFX940-NEXT:    s_setpc_b64 s[30:31]
793  %vec0 = call <3 x i32> asm "; def $0", "=v"()
794  %vec1 = call <3 x i32> asm "; def $0", "=v"()
795  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 poison>
796  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
797  ret void
798}
799
800define void @v_shuffle_v4i32_v3i32__5_5_5_u(ptr addrspace(1) inreg %ptr) {
801; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_5_u:
802; GFX900:       ; %bb.0:
803; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
804; GFX900-NEXT:    ;;#ASMSTART
805; GFX900-NEXT:    ; def v[0:2]
806; GFX900-NEXT:    ;;#ASMEND
807; GFX900-NEXT:    v_mov_b32_e32 v3, 0
808; GFX900-NEXT:    v_mov_b32_e32 v0, v2
809; GFX900-NEXT:    v_mov_b32_e32 v1, v2
810; GFX900-NEXT:    global_store_dwordx4 v3, v[0:3], s[16:17]
811; GFX900-NEXT:    s_waitcnt vmcnt(0)
812; GFX900-NEXT:    s_setpc_b64 s[30:31]
813;
814; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_5_u:
815; GFX90A:       ; %bb.0:
816; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
817; GFX90A-NEXT:    ;;#ASMSTART
818; GFX90A-NEXT:    ; def v[0:2]
819; GFX90A-NEXT:    ;;#ASMEND
820; GFX90A-NEXT:    v_mov_b32_e32 v3, 0
821; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
822; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
823; GFX90A-NEXT:    global_store_dwordx4 v3, v[0:3], s[16:17]
824; GFX90A-NEXT:    s_waitcnt vmcnt(0)
825; GFX90A-NEXT:    s_setpc_b64 s[30:31]
826;
827; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_5_u:
828; GFX940:       ; %bb.0:
829; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
830; GFX940-NEXT:    ;;#ASMSTART
831; GFX940-NEXT:    ; def v[0:2]
832; GFX940-NEXT:    ;;#ASMEND
833; GFX940-NEXT:    v_mov_b32_e32 v3, 0
834; GFX940-NEXT:    v_mov_b32_e32 v0, v2
835; GFX940-NEXT:    v_mov_b32_e32 v1, v2
836; GFX940-NEXT:    global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1
837; GFX940-NEXT:    s_waitcnt vmcnt(0)
838; GFX940-NEXT:    s_setpc_b64 s[30:31]
839  %vec0 = call <3 x i32> asm "; def $0", "=v"()
840  %vec1 = call <3 x i32> asm "; def $0", "=v"()
841  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 poison>
842  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
843  ret void
844}
845
846define void @v_shuffle_v4i32_v3i32__5_5_5_0(ptr addrspace(1) inreg %ptr) {
847; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_5_0:
848; GFX900:       ; %bb.0:
849; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
850; GFX900-NEXT:    ;;#ASMSTART
851; GFX900-NEXT:    ; def v[0:2]
852; GFX900-NEXT:    ;;#ASMEND
853; GFX900-NEXT:    v_mov_b32_e32 v6, 0
854; GFX900-NEXT:    v_mov_b32_e32 v0, v2
855; GFX900-NEXT:    v_mov_b32_e32 v1, v2
856; GFX900-NEXT:    ;;#ASMSTART
857; GFX900-NEXT:    ; def v[3:5]
858; GFX900-NEXT:    ;;#ASMEND
859; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
860; GFX900-NEXT:    s_waitcnt vmcnt(0)
861; GFX900-NEXT:    s_setpc_b64 s[30:31]
862;
863; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_5_0:
864; GFX90A:       ; %bb.0:
865; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
866; GFX90A-NEXT:    ;;#ASMSTART
867; GFX90A-NEXT:    ; def v[0:2]
868; GFX90A-NEXT:    ;;#ASMEND
869; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
870; GFX90A-NEXT:    ;;#ASMSTART
871; GFX90A-NEXT:    ; def v[4:6]
872; GFX90A-NEXT:    ;;#ASMEND
873; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
874; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
875; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
876; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
877; GFX90A-NEXT:    s_waitcnt vmcnt(0)
878; GFX90A-NEXT:    s_setpc_b64 s[30:31]
879;
880; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_5_0:
881; GFX940:       ; %bb.0:
882; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
883; GFX940-NEXT:    ;;#ASMSTART
884; GFX940-NEXT:    ; def v[0:2]
885; GFX940-NEXT:    ;;#ASMEND
886; GFX940-NEXT:    v_mov_b32_e32 v7, 0
887; GFX940-NEXT:    ;;#ASMSTART
888; GFX940-NEXT:    ; def v[4:6]
889; GFX940-NEXT:    ;;#ASMEND
890; GFX940-NEXT:    v_mov_b32_e32 v0, v2
891; GFX940-NEXT:    v_mov_b32_e32 v1, v2
892; GFX940-NEXT:    v_mov_b32_e32 v3, v4
893; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
894; GFX940-NEXT:    s_waitcnt vmcnt(0)
895; GFX940-NEXT:    s_setpc_b64 s[30:31]
896  %vec0 = call <3 x i32> asm "; def $0", "=v"()
897  %vec1 = call <3 x i32> asm "; def $0", "=v"()
898  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 0>
899  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
900  ret void
901}
902
903define void @v_shuffle_v4i32_v3i32__5_5_5_1(ptr addrspace(1) inreg %ptr) {
904; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_5_1:
905; GFX900:       ; %bb.0:
906; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
907; GFX900-NEXT:    ;;#ASMSTART
908; GFX900-NEXT:    ; def v[2:4]
909; GFX900-NEXT:    ;;#ASMEND
910; GFX900-NEXT:    ;;#ASMSTART
911; GFX900-NEXT:    ; def v[0:2]
912; GFX900-NEXT:    ;;#ASMEND
913; GFX900-NEXT:    v_mov_b32_e32 v5, 0
914; GFX900-NEXT:    v_mov_b32_e32 v0, v2
915; GFX900-NEXT:    v_mov_b32_e32 v1, v2
916; GFX900-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
917; GFX900-NEXT:    s_waitcnt vmcnt(0)
918; GFX900-NEXT:    s_setpc_b64 s[30:31]
919;
920; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_5_1:
921; GFX90A:       ; %bb.0:
922; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
923; GFX90A-NEXT:    ;;#ASMSTART
924; GFX90A-NEXT:    ; def v[2:4]
925; GFX90A-NEXT:    ;;#ASMEND
926; GFX90A-NEXT:    ;;#ASMSTART
927; GFX90A-NEXT:    ; def v[0:2]
928; GFX90A-NEXT:    ;;#ASMEND
929; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
930; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
931; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
932; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
933; GFX90A-NEXT:    s_waitcnt vmcnt(0)
934; GFX90A-NEXT:    s_setpc_b64 s[30:31]
935;
936; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_5_1:
937; GFX940:       ; %bb.0:
938; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
939; GFX940-NEXT:    ;;#ASMSTART
940; GFX940-NEXT:    ; def v[2:4]
941; GFX940-NEXT:    ;;#ASMEND
942; GFX940-NEXT:    v_mov_b32_e32 v5, 0
943; GFX940-NEXT:    ;;#ASMSTART
944; GFX940-NEXT:    ; def v[0:2]
945; GFX940-NEXT:    ;;#ASMEND
946; GFX940-NEXT:    s_nop 0
947; GFX940-NEXT:    v_mov_b32_e32 v0, v2
948; GFX940-NEXT:    v_mov_b32_e32 v1, v2
949; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
950; GFX940-NEXT:    s_waitcnt vmcnt(0)
951; GFX940-NEXT:    s_setpc_b64 s[30:31]
952  %vec0 = call <3 x i32> asm "; def $0", "=v"()
953  %vec1 = call <3 x i32> asm "; def $0", "=v"()
954  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 1>
955  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
956  ret void
957}
958
959define void @v_shuffle_v4i32_v3i32__5_5_5_2(ptr addrspace(1) inreg %ptr) {
960; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_5_2:
961; GFX900:       ; %bb.0:
962; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
963; GFX900-NEXT:    ;;#ASMSTART
964; GFX900-NEXT:    ; def v[1:3]
965; GFX900-NEXT:    ;;#ASMEND
966; GFX900-NEXT:    ;;#ASMSTART
967; GFX900-NEXT:    ; def v[0:2]
968; GFX900-NEXT:    ;;#ASMEND
969; GFX900-NEXT:    v_mov_b32_e32 v4, 0
970; GFX900-NEXT:    v_mov_b32_e32 v0, v2
971; GFX900-NEXT:    v_mov_b32_e32 v1, v2
972; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
973; GFX900-NEXT:    s_waitcnt vmcnt(0)
974; GFX900-NEXT:    s_setpc_b64 s[30:31]
975;
976; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_5_2:
977; GFX90A:       ; %bb.0:
978; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
979; GFX90A-NEXT:    ;;#ASMSTART
980; GFX90A-NEXT:    ; def v[2:4]
981; GFX90A-NEXT:    ;;#ASMEND
982; GFX90A-NEXT:    ;;#ASMSTART
983; GFX90A-NEXT:    ; def v[0:2]
984; GFX90A-NEXT:    ;;#ASMEND
985; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
986; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
987; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
988; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
989; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
990; GFX90A-NEXT:    s_waitcnt vmcnt(0)
991; GFX90A-NEXT:    s_setpc_b64 s[30:31]
992;
993; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_5_2:
994; GFX940:       ; %bb.0:
995; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
996; GFX940-NEXT:    ;;#ASMSTART
997; GFX940-NEXT:    ; def v[2:4]
998; GFX940-NEXT:    ;;#ASMEND
999; GFX940-NEXT:    v_mov_b32_e32 v5, 0
1000; GFX940-NEXT:    ;;#ASMSTART
1001; GFX940-NEXT:    ; def v[0:2]
1002; GFX940-NEXT:    ;;#ASMEND
1003; GFX940-NEXT:    v_mov_b32_e32 v3, v4
1004; GFX940-NEXT:    v_mov_b32_e32 v0, v2
1005; GFX940-NEXT:    v_mov_b32_e32 v1, v2
1006; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
1007; GFX940-NEXT:    s_waitcnt vmcnt(0)
1008; GFX940-NEXT:    s_setpc_b64 s[30:31]
1009  %vec0 = call <3 x i32> asm "; def $0", "=v"()
1010  %vec1 = call <3 x i32> asm "; def $0", "=v"()
1011  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 2>
1012  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
1013  ret void
1014}
1015
1016define void @v_shuffle_v4i32_v3i32__5_5_5_3(ptr addrspace(1) inreg %ptr) {
1017; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_5_3:
1018; GFX900:       ; %bb.0:
1019; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1020; GFX900-NEXT:    v_mov_b32_e32 v6, 0
1021; GFX900-NEXT:    ;;#ASMSTART
1022; GFX900-NEXT:    ; def v[3:5]
1023; GFX900-NEXT:    ;;#ASMEND
1024; GFX900-NEXT:    v_mov_b32_e32 v0, v5
1025; GFX900-NEXT:    v_mov_b32_e32 v1, v5
1026; GFX900-NEXT:    v_mov_b32_e32 v2, v5
1027; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
1028; GFX900-NEXT:    s_waitcnt vmcnt(0)
1029; GFX900-NEXT:    s_setpc_b64 s[30:31]
1030;
1031; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_5_3:
1032; GFX90A:       ; %bb.0:
1033; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1034; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
1035; GFX90A-NEXT:    ;;#ASMSTART
1036; GFX90A-NEXT:    ; def v[4:6]
1037; GFX90A-NEXT:    ;;#ASMEND
1038; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
1039; GFX90A-NEXT:    v_mov_b32_e32 v1, v6
1040; GFX90A-NEXT:    v_mov_b32_e32 v2, v6
1041; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
1042; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
1043; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1044; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1045;
1046; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_5_3:
1047; GFX940:       ; %bb.0:
1048; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1049; GFX940-NEXT:    v_mov_b32_e32 v7, 0
1050; GFX940-NEXT:    ;;#ASMSTART
1051; GFX940-NEXT:    ; def v[4:6]
1052; GFX940-NEXT:    ;;#ASMEND
1053; GFX940-NEXT:    s_nop 0
1054; GFX940-NEXT:    v_mov_b32_e32 v0, v6
1055; GFX940-NEXT:    v_mov_b32_e32 v1, v6
1056; GFX940-NEXT:    v_mov_b32_e32 v2, v6
1057; GFX940-NEXT:    v_mov_b32_e32 v3, v4
1058; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
1059; GFX940-NEXT:    s_waitcnt vmcnt(0)
1060; GFX940-NEXT:    s_setpc_b64 s[30:31]
1061  %vec0 = call <3 x i32> asm "; def $0", "=v"()
1062  %vec1 = call <3 x i32> asm "; def $0", "=v"()
1063  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 3>
1064  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
1065  ret void
1066}
1067
1068define void @v_shuffle_v4i32_v3i32__5_5_5_4(ptr addrspace(1) inreg %ptr) {
1069; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_5_4:
1070; GFX900:       ; %bb.0:
1071; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1072; GFX900-NEXT:    ;;#ASMSTART
1073; GFX900-NEXT:    ; def v[2:4]
1074; GFX900-NEXT:    ;;#ASMEND
1075; GFX900-NEXT:    v_mov_b32_e32 v5, 0
1076; GFX900-NEXT:    v_mov_b32_e32 v0, v4
1077; GFX900-NEXT:    v_mov_b32_e32 v1, v4
1078; GFX900-NEXT:    v_mov_b32_e32 v2, v4
1079; GFX900-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
1080; GFX900-NEXT:    s_waitcnt vmcnt(0)
1081; GFX900-NEXT:    s_setpc_b64 s[30:31]
1082;
1083; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_5_4:
1084; GFX90A:       ; %bb.0:
1085; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1086; GFX90A-NEXT:    ;;#ASMSTART
1087; GFX90A-NEXT:    ; def v[2:4]
1088; GFX90A-NEXT:    ;;#ASMEND
1089; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
1090; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
1091; GFX90A-NEXT:    v_mov_b32_e32 v1, v4
1092; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
1093; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
1094; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1095; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1096;
1097; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_5_4:
1098; GFX940:       ; %bb.0:
1099; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1100; GFX940-NEXT:    ;;#ASMSTART
1101; GFX940-NEXT:    ; def v[2:4]
1102; GFX940-NEXT:    ;;#ASMEND
1103; GFX940-NEXT:    v_mov_b32_e32 v5, 0
1104; GFX940-NEXT:    v_mov_b32_e32 v0, v4
1105; GFX940-NEXT:    v_mov_b32_e32 v1, v4
1106; GFX940-NEXT:    v_mov_b32_e32 v2, v4
1107; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
1108; GFX940-NEXT:    s_waitcnt vmcnt(0)
1109; GFX940-NEXT:    s_setpc_b64 s[30:31]
1110  %vec0 = call <3 x i32> asm "; def $0", "=v"()
1111  %vec1 = call <3 x i32> asm "; def $0", "=v"()
1112  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 4>
1113  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
1114  ret void
1115}
1116
1117define void @v_shuffle_v4i32_v3i32__5_5_5_5(ptr addrspace(1) inreg %ptr) {
1118; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_5_5:
1119; GFX900:       ; %bb.0:
1120; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1121; GFX900-NEXT:    ;;#ASMSTART
1122; GFX900-NEXT:    ; def v[0:2]
1123; GFX900-NEXT:    ;;#ASMEND
1124; GFX900-NEXT:    v_mov_b32_e32 v4, 0
1125; GFX900-NEXT:    v_mov_b32_e32 v0, v2
1126; GFX900-NEXT:    v_mov_b32_e32 v1, v2
1127; GFX900-NEXT:    v_mov_b32_e32 v3, v2
1128; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
1129; GFX900-NEXT:    s_waitcnt vmcnt(0)
1130; GFX900-NEXT:    s_setpc_b64 s[30:31]
1131;
1132; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_5_5:
1133; GFX90A:       ; %bb.0:
1134; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1135; GFX90A-NEXT:    ;;#ASMSTART
1136; GFX90A-NEXT:    ; def v[0:2]
1137; GFX90A-NEXT:    ;;#ASMEND
1138; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1139; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
1140; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
1141; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
1142; GFX90A-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
1143; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1144; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1145;
1146; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_5_5:
1147; GFX940:       ; %bb.0:
1148; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1149; GFX940-NEXT:    ;;#ASMSTART
1150; GFX940-NEXT:    ; def v[0:2]
1151; GFX940-NEXT:    ;;#ASMEND
1152; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1153; GFX940-NEXT:    v_mov_b32_e32 v0, v2
1154; GFX940-NEXT:    v_mov_b32_e32 v1, v2
1155; GFX940-NEXT:    v_mov_b32_e32 v3, v2
1156; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
1157; GFX940-NEXT:    s_waitcnt vmcnt(0)
1158; GFX940-NEXT:    s_setpc_b64 s[30:31]
1159  %vec0 = call <3 x i32> asm "; def $0", "=v"()
1160  %vec1 = call <3 x i32> asm "; def $0", "=v"()
1161  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
1162  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
1163  ret void
1164}
1165
1166define void @v_shuffle_v4i32_v3i32__u_0_0_0(ptr addrspace(1) inreg %ptr) {
1167; GFX900-LABEL: v_shuffle_v4i32_v3i32__u_0_0_0:
1168; GFX900:       ; %bb.0:
1169; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1170; GFX900-NEXT:    ;;#ASMSTART
1171; GFX900-NEXT:    ; def v[1:3]
1172; GFX900-NEXT:    ;;#ASMEND
1173; GFX900-NEXT:    v_mov_b32_e32 v0, 0
1174; GFX900-NEXT:    v_mov_b32_e32 v2, v1
1175; GFX900-NEXT:    v_mov_b32_e32 v3, v1
1176; GFX900-NEXT:    global_store_dwordx4 v0, v[0:3], s[16:17]
1177; GFX900-NEXT:    s_waitcnt vmcnt(0)
1178; GFX900-NEXT:    s_setpc_b64 s[30:31]
1179;
1180; GFX90A-LABEL: v_shuffle_v4i32_v3i32__u_0_0_0:
1181; GFX90A:       ; %bb.0:
1182; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1183; GFX90A-NEXT:    ;;#ASMSTART
1184; GFX90A-NEXT:    ; def v[0:2]
1185; GFX90A-NEXT:    ;;#ASMEND
1186; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1187; GFX90A-NEXT:    v_mov_b32_e32 v1, v0
1188; GFX90A-NEXT:    v_mov_b32_e32 v2, v0
1189; GFX90A-NEXT:    v_mov_b32_e32 v3, v0
1190; GFX90A-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
1191; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1192; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1193;
1194; GFX940-LABEL: v_shuffle_v4i32_v3i32__u_0_0_0:
1195; GFX940:       ; %bb.0:
1196; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1197; GFX940-NEXT:    ;;#ASMSTART
1198; GFX940-NEXT:    ; def v[0:2]
1199; GFX940-NEXT:    ;;#ASMEND
1200; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1201; GFX940-NEXT:    v_mov_b32_e32 v1, v0
1202; GFX940-NEXT:    v_mov_b32_e32 v2, v0
1203; GFX940-NEXT:    v_mov_b32_e32 v3, v0
1204; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
1205; GFX940-NEXT:    s_waitcnt vmcnt(0)
1206; GFX940-NEXT:    s_setpc_b64 s[30:31]
1207  %vec0 = call <3 x i32> asm "; def $0", "=v"()
1208  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0>
1209  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
1210  ret void
1211}
1212
1213define void @v_shuffle_v4i32_v3i32__0_0_0_0(ptr addrspace(1) inreg %ptr) {
1214; GFX900-LABEL: v_shuffle_v4i32_v3i32__0_0_0_0:
1215; GFX900:       ; %bb.0:
1216; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1217; GFX900-NEXT:    ;;#ASMSTART
1218; GFX900-NEXT:    ; def v[0:2]
1219; GFX900-NEXT:    ;;#ASMEND
1220; GFX900-NEXT:    v_mov_b32_e32 v4, 0
1221; GFX900-NEXT:    v_mov_b32_e32 v1, v0
1222; GFX900-NEXT:    v_mov_b32_e32 v2, v0
1223; GFX900-NEXT:    v_mov_b32_e32 v3, v0
1224; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
1225; GFX900-NEXT:    s_waitcnt vmcnt(0)
1226; GFX900-NEXT:    s_setpc_b64 s[30:31]
1227;
1228; GFX90A-LABEL: v_shuffle_v4i32_v3i32__0_0_0_0:
1229; GFX90A:       ; %bb.0:
1230; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1231; GFX90A-NEXT:    ;;#ASMSTART
1232; GFX90A-NEXT:    ; def v[0:2]
1233; GFX90A-NEXT:    ;;#ASMEND
1234; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1235; GFX90A-NEXT:    v_mov_b32_e32 v1, v0
1236; GFX90A-NEXT:    v_mov_b32_e32 v2, v0
1237; GFX90A-NEXT:    v_mov_b32_e32 v3, v0
1238; GFX90A-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
1239; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1240; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1241;
1242; GFX940-LABEL: v_shuffle_v4i32_v3i32__0_0_0_0:
1243; GFX940:       ; %bb.0:
1244; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1245; GFX940-NEXT:    ;;#ASMSTART
1246; GFX940-NEXT:    ; def v[0:2]
1247; GFX940-NEXT:    ;;#ASMEND
1248; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1249; GFX940-NEXT:    v_mov_b32_e32 v1, v0
1250; GFX940-NEXT:    v_mov_b32_e32 v2, v0
1251; GFX940-NEXT:    v_mov_b32_e32 v3, v0
1252; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
1253; GFX940-NEXT:    s_waitcnt vmcnt(0)
1254; GFX940-NEXT:    s_setpc_b64 s[30:31]
1255  %vec0 = call <3 x i32> asm "; def $0", "=v"()
1256  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> zeroinitializer
1257  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
1258  ret void
1259}
1260
1261define void @v_shuffle_v4i32_v3i32__1_0_0_0(ptr addrspace(1) inreg %ptr) {
1262; GFX900-LABEL: v_shuffle_v4i32_v3i32__1_0_0_0:
1263; GFX900:       ; %bb.0:
1264; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1265; GFX900-NEXT:    ;;#ASMSTART
1266; GFX900-NEXT:    ; def v[1:3]
1267; GFX900-NEXT:    ;;#ASMEND
1268; GFX900-NEXT:    v_mov_b32_e32 v4, 0
1269; GFX900-NEXT:    v_mov_b32_e32 v0, v2
1270; GFX900-NEXT:    v_mov_b32_e32 v2, v1
1271; GFX900-NEXT:    v_mov_b32_e32 v3, v1
1272; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
1273; GFX900-NEXT:    s_waitcnt vmcnt(0)
1274; GFX900-NEXT:    s_setpc_b64 s[30:31]
1275;
1276; GFX90A-LABEL: v_shuffle_v4i32_v3i32__1_0_0_0:
1277; GFX90A:       ; %bb.0:
1278; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1279; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
1280; GFX90A-NEXT:    ;;#ASMSTART
1281; GFX90A-NEXT:    ; def v[4:6]
1282; GFX90A-NEXT:    ;;#ASMEND
1283; GFX90A-NEXT:    v_mov_b32_e32 v0, v5
1284; GFX90A-NEXT:    v_mov_b32_e32 v1, v4
1285; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
1286; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
1287; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
1288; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1289; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1290;
1291; GFX940-LABEL: v_shuffle_v4i32_v3i32__1_0_0_0:
1292; GFX940:       ; %bb.0:
1293; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1294; GFX940-NEXT:    v_mov_b32_e32 v7, 0
1295; GFX940-NEXT:    ;;#ASMSTART
1296; GFX940-NEXT:    ; def v[4:6]
1297; GFX940-NEXT:    ;;#ASMEND
1298; GFX940-NEXT:    s_nop 0
1299; GFX940-NEXT:    v_mov_b32_e32 v0, v5
1300; GFX940-NEXT:    v_mov_b32_e32 v1, v4
1301; GFX940-NEXT:    v_mov_b32_e32 v2, v4
1302; GFX940-NEXT:    v_mov_b32_e32 v3, v4
1303; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
1304; GFX940-NEXT:    s_waitcnt vmcnt(0)
1305; GFX940-NEXT:    s_setpc_b64 s[30:31]
1306  %vec0 = call <3 x i32> asm "; def $0", "=v"()
1307  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
1308  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
1309  ret void
1310}
1311
1312define void @v_shuffle_v4i32_v3i32__2_0_0_0(ptr addrspace(1) inreg %ptr) {
1313; GFX900-LABEL: v_shuffle_v4i32_v3i32__2_0_0_0:
1314; GFX900:       ; %bb.0:
1315; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1316; GFX900-NEXT:    ;;#ASMSTART
1317; GFX900-NEXT:    ; def v[1:3]
1318; GFX900-NEXT:    ;;#ASMEND
1319; GFX900-NEXT:    v_mov_b32_e32 v4, 0
1320; GFX900-NEXT:    v_mov_b32_e32 v0, v3
1321; GFX900-NEXT:    v_mov_b32_e32 v2, v1
1322; GFX900-NEXT:    v_mov_b32_e32 v3, v1
1323; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
1324; GFX900-NEXT:    s_waitcnt vmcnt(0)
1325; GFX900-NEXT:    s_setpc_b64 s[30:31]
1326;
1327; GFX90A-LABEL: v_shuffle_v4i32_v3i32__2_0_0_0:
1328; GFX90A:       ; %bb.0:
1329; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1330; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
1331; GFX90A-NEXT:    ;;#ASMSTART
1332; GFX90A-NEXT:    ; def v[4:6]
1333; GFX90A-NEXT:    ;;#ASMEND
1334; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
1335; GFX90A-NEXT:    v_mov_b32_e32 v1, v4
1336; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
1337; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
1338; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
1339; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1340; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1341;
1342; GFX940-LABEL: v_shuffle_v4i32_v3i32__2_0_0_0:
1343; GFX940:       ; %bb.0:
1344; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1345; GFX940-NEXT:    v_mov_b32_e32 v7, 0
1346; GFX940-NEXT:    ;;#ASMSTART
1347; GFX940-NEXT:    ; def v[4:6]
1348; GFX940-NEXT:    ;;#ASMEND
1349; GFX940-NEXT:    s_nop 0
1350; GFX940-NEXT:    v_mov_b32_e32 v0, v6
1351; GFX940-NEXT:    v_mov_b32_e32 v1, v4
1352; GFX940-NEXT:    v_mov_b32_e32 v2, v4
1353; GFX940-NEXT:    v_mov_b32_e32 v3, v4
1354; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
1355; GFX940-NEXT:    s_waitcnt vmcnt(0)
1356; GFX940-NEXT:    s_setpc_b64 s[30:31]
1357  %vec0 = call <3 x i32> asm "; def $0", "=v"()
1358  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
1359  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
1360  ret void
1361}
1362
1363define void @v_shuffle_v4i32_v3i32__3_0_0_0(ptr addrspace(1) inreg %ptr) {
1364; GFX900-LABEL: v_shuffle_v4i32_v3i32__3_0_0_0:
1365; GFX900:       ; %bb.0:
1366; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1367; GFX900-NEXT:    ;;#ASMSTART
1368; GFX900-NEXT:    ; def v[1:3]
1369; GFX900-NEXT:    ;;#ASMEND
1370; GFX900-NEXT:    v_mov_b32_e32 v0, 0
1371; GFX900-NEXT:    v_mov_b32_e32 v2, v1
1372; GFX900-NEXT:    v_mov_b32_e32 v3, v1
1373; GFX900-NEXT:    global_store_dwordx4 v0, v[0:3], s[16:17]
1374; GFX900-NEXT:    s_waitcnt vmcnt(0)
1375; GFX900-NEXT:    s_setpc_b64 s[30:31]
1376;
1377; GFX90A-LABEL: v_shuffle_v4i32_v3i32__3_0_0_0:
1378; GFX90A:       ; %bb.0:
1379; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1380; GFX90A-NEXT:    ;;#ASMSTART
1381; GFX90A-NEXT:    ; def v[0:2]
1382; GFX90A-NEXT:    ;;#ASMEND
1383; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1384; GFX90A-NEXT:    v_mov_b32_e32 v1, v0
1385; GFX90A-NEXT:    v_mov_b32_e32 v2, v0
1386; GFX90A-NEXT:    v_mov_b32_e32 v3, v0
1387; GFX90A-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
1388; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1389; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1390;
1391; GFX940-LABEL: v_shuffle_v4i32_v3i32__3_0_0_0:
1392; GFX940:       ; %bb.0:
1393; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1394; GFX940-NEXT:    ;;#ASMSTART
1395; GFX940-NEXT:    ; def v[0:2]
1396; GFX940-NEXT:    ;;#ASMEND
1397; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1398; GFX940-NEXT:    v_mov_b32_e32 v1, v0
1399; GFX940-NEXT:    v_mov_b32_e32 v2, v0
1400; GFX940-NEXT:    v_mov_b32_e32 v3, v0
1401; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
1402; GFX940-NEXT:    s_waitcnt vmcnt(0)
1403; GFX940-NEXT:    s_setpc_b64 s[30:31]
1404  %vec0 = call <3 x i32> asm "; def $0", "=v"()
1405  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
1406  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
1407  ret void
1408}
1409
1410define void @v_shuffle_v4i32_v3i32__4_0_0_0(ptr addrspace(1) inreg %ptr) {
1411; GFX900-LABEL: v_shuffle_v4i32_v3i32__4_0_0_0:
1412; GFX900:       ; %bb.0:
1413; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1414; GFX900-NEXT:    ;;#ASMSTART
1415; GFX900-NEXT:    ; def v[1:3]
1416; GFX900-NEXT:    ;;#ASMEND
1417; GFX900-NEXT:    ;;#ASMSTART
1418; GFX900-NEXT:    ; def v[2:4]
1419; GFX900-NEXT:    ;;#ASMEND
1420; GFX900-NEXT:    v_mov_b32_e32 v5, 0
1421; GFX900-NEXT:    v_mov_b32_e32 v0, v3
1422; GFX900-NEXT:    v_mov_b32_e32 v2, v1
1423; GFX900-NEXT:    v_mov_b32_e32 v3, v1
1424; GFX900-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
1425; GFX900-NEXT:    s_waitcnt vmcnt(0)
1426; GFX900-NEXT:    s_setpc_b64 s[30:31]
1427;
1428; GFX90A-LABEL: v_shuffle_v4i32_v3i32__4_0_0_0:
1429; GFX90A:       ; %bb.0:
1430; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1431; GFX90A-NEXT:    ;;#ASMSTART
1432; GFX90A-NEXT:    ; def v[0:2]
1433; GFX90A-NEXT:    ;;#ASMEND
1434; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
1435; GFX90A-NEXT:    ;;#ASMSTART
1436; GFX90A-NEXT:    ; def v[4:6]
1437; GFX90A-NEXT:    ;;#ASMEND
1438; GFX90A-NEXT:    v_mov_b32_e32 v0, v1
1439; GFX90A-NEXT:    v_mov_b32_e32 v1, v4
1440; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
1441; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
1442; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
1443; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1444; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1445;
1446; GFX940-LABEL: v_shuffle_v4i32_v3i32__4_0_0_0:
1447; GFX940:       ; %bb.0:
1448; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1449; GFX940-NEXT:    ;;#ASMSTART
1450; GFX940-NEXT:    ; def v[0:2]
1451; GFX940-NEXT:    ;;#ASMEND
1452; GFX940-NEXT:    v_mov_b32_e32 v7, 0
1453; GFX940-NEXT:    ;;#ASMSTART
1454; GFX940-NEXT:    ; def v[4:6]
1455; GFX940-NEXT:    ;;#ASMEND
1456; GFX940-NEXT:    v_mov_b32_e32 v0, v1
1457; GFX940-NEXT:    v_mov_b32_e32 v1, v4
1458; GFX940-NEXT:    v_mov_b32_e32 v2, v4
1459; GFX940-NEXT:    v_mov_b32_e32 v3, v4
1460; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
1461; GFX940-NEXT:    s_waitcnt vmcnt(0)
1462; GFX940-NEXT:    s_setpc_b64 s[30:31]
1463  %vec0 = call <3 x i32> asm "; def $0", "=v"()
1464  %vec1 = call <3 x i32> asm "; def $0", "=v"()
1465  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 0, i32 0, i32 0>
1466  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
1467  ret void
1468}
1469
1470define void @v_shuffle_v4i32_v3i32__5_0_0_0(ptr addrspace(1) inreg %ptr) {
1471; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_0_0_0:
1472; GFX900:       ; %bb.0:
1473; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1474; GFX900-NEXT:    ;;#ASMSTART
1475; GFX900-NEXT:    ; def v[1:3]
1476; GFX900-NEXT:    ;;#ASMEND
1477; GFX900-NEXT:    ;;#ASMSTART
1478; GFX900-NEXT:    ; def v[2:4]
1479; GFX900-NEXT:    ;;#ASMEND
1480; GFX900-NEXT:    v_mov_b32_e32 v5, 0
1481; GFX900-NEXT:    v_mov_b32_e32 v0, v4
1482; GFX900-NEXT:    v_mov_b32_e32 v2, v1
1483; GFX900-NEXT:    v_mov_b32_e32 v3, v1
1484; GFX900-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
1485; GFX900-NEXT:    s_waitcnt vmcnt(0)
1486; GFX900-NEXT:    s_setpc_b64 s[30:31]
1487;
1488; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_0_0_0:
1489; GFX90A:       ; %bb.0:
1490; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1491; GFX90A-NEXT:    ;;#ASMSTART
1492; GFX90A-NEXT:    ; def v[0:2]
1493; GFX90A-NEXT:    ;;#ASMEND
1494; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
1495; GFX90A-NEXT:    ;;#ASMSTART
1496; GFX90A-NEXT:    ; def v[4:6]
1497; GFX90A-NEXT:    ;;#ASMEND
1498; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
1499; GFX90A-NEXT:    v_mov_b32_e32 v1, v4
1500; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
1501; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
1502; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
1503; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1504; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1505;
1506; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_0_0_0:
1507; GFX940:       ; %bb.0:
1508; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1509; GFX940-NEXT:    ;;#ASMSTART
1510; GFX940-NEXT:    ; def v[0:2]
1511; GFX940-NEXT:    ;;#ASMEND
1512; GFX940-NEXT:    v_mov_b32_e32 v7, 0
1513; GFX940-NEXT:    ;;#ASMSTART
1514; GFX940-NEXT:    ; def v[4:6]
1515; GFX940-NEXT:    ;;#ASMEND
1516; GFX940-NEXT:    v_mov_b32_e32 v0, v2
1517; GFX940-NEXT:    v_mov_b32_e32 v1, v4
1518; GFX940-NEXT:    v_mov_b32_e32 v2, v4
1519; GFX940-NEXT:    v_mov_b32_e32 v3, v4
1520; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
1521; GFX940-NEXT:    s_waitcnt vmcnt(0)
1522; GFX940-NEXT:    s_setpc_b64 s[30:31]
1523  %vec0 = call <3 x i32> asm "; def $0", "=v"()
1524  %vec1 = call <3 x i32> asm "; def $0", "=v"()
1525  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 0, i32 0>
1526  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
1527  ret void
1528}
1529
1530define void @v_shuffle_v4i32_v3i32__5_u_0_0(ptr addrspace(1) inreg %ptr) {
1531; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_u_0_0:
1532; GFX900:       ; %bb.0:
1533; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1534; GFX900-NEXT:    ;;#ASMSTART
1535; GFX900-NEXT:    ; def v[1:3]
1536; GFX900-NEXT:    ;;#ASMEND
1537; GFX900-NEXT:    ;;#ASMSTART
1538; GFX900-NEXT:    ; def v[2:4]
1539; GFX900-NEXT:    ;;#ASMEND
1540; GFX900-NEXT:    v_mov_b32_e32 v5, 0
1541; GFX900-NEXT:    v_mov_b32_e32 v0, v4
1542; GFX900-NEXT:    v_mov_b32_e32 v2, v1
1543; GFX900-NEXT:    v_mov_b32_e32 v3, v1
1544; GFX900-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
1545; GFX900-NEXT:    s_waitcnt vmcnt(0)
1546; GFX900-NEXT:    s_setpc_b64 s[30:31]
1547;
1548; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_u_0_0:
1549; GFX90A:       ; %bb.0:
1550; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1551; GFX90A-NEXT:    ;;#ASMSTART
1552; GFX90A-NEXT:    ; def v[0:2]
1553; GFX90A-NEXT:    ;;#ASMEND
1554; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
1555; GFX90A-NEXT:    ;;#ASMSTART
1556; GFX90A-NEXT:    ; def v[4:6]
1557; GFX90A-NEXT:    ;;#ASMEND
1558; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
1559; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
1560; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
1561; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
1562; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1563; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1564;
1565; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_u_0_0:
1566; GFX940:       ; %bb.0:
1567; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1568; GFX940-NEXT:    ;;#ASMSTART
1569; GFX940-NEXT:    ; def v[0:2]
1570; GFX940-NEXT:    ;;#ASMEND
1571; GFX940-NEXT:    v_mov_b32_e32 v7, 0
1572; GFX940-NEXT:    ;;#ASMSTART
1573; GFX940-NEXT:    ; def v[4:6]
1574; GFX940-NEXT:    ;;#ASMEND
1575; GFX940-NEXT:    v_mov_b32_e32 v0, v2
1576; GFX940-NEXT:    v_mov_b32_e32 v2, v4
1577; GFX940-NEXT:    v_mov_b32_e32 v3, v4
1578; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
1579; GFX940-NEXT:    s_waitcnt vmcnt(0)
1580; GFX940-NEXT:    s_setpc_b64 s[30:31]
1581  %vec0 = call <3 x i32> asm "; def $0", "=v"()
1582  %vec1 = call <3 x i32> asm "; def $0", "=v"()
1583  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 0, i32 0>
1584  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
1585  ret void
1586}
1587
1588define void @v_shuffle_v4i32_v3i32__5_1_0_0(ptr addrspace(1) inreg %ptr) {
1589; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_1_0_0:
1590; GFX900:       ; %bb.0:
1591; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1592; GFX900-NEXT:    ;;#ASMSTART
1593; GFX900-NEXT:    ; def v[0:2]
1594; GFX900-NEXT:    ;;#ASMEND
1595; GFX900-NEXT:    v_mov_b32_e32 v6, 0
1596; GFX900-NEXT:    ;;#ASMSTART
1597; GFX900-NEXT:    ; def v[3:5]
1598; GFX900-NEXT:    ;;#ASMEND
1599; GFX900-NEXT:    v_mov_b32_e32 v0, v2
1600; GFX900-NEXT:    v_mov_b32_e32 v1, v4
1601; GFX900-NEXT:    v_mov_b32_e32 v2, v3
1602; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
1603; GFX900-NEXT:    s_waitcnt vmcnt(0)
1604; GFX900-NEXT:    s_setpc_b64 s[30:31]
1605;
1606; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_1_0_0:
1607; GFX90A:       ; %bb.0:
1608; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1609; GFX90A-NEXT:    ;;#ASMSTART
1610; GFX90A-NEXT:    ; def v[0:2]
1611; GFX90A-NEXT:    ;;#ASMEND
1612; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
1613; GFX90A-NEXT:    ;;#ASMSTART
1614; GFX90A-NEXT:    ; def v[4:6]
1615; GFX90A-NEXT:    ;;#ASMEND
1616; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
1617; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
1618; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
1619; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
1620; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
1621; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1622; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1623;
1624; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_1_0_0:
1625; GFX940:       ; %bb.0:
1626; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1627; GFX940-NEXT:    ;;#ASMSTART
1628; GFX940-NEXT:    ; def v[0:2]
1629; GFX940-NEXT:    ;;#ASMEND
1630; GFX940-NEXT:    v_mov_b32_e32 v7, 0
1631; GFX940-NEXT:    ;;#ASMSTART
1632; GFX940-NEXT:    ; def v[4:6]
1633; GFX940-NEXT:    ;;#ASMEND
1634; GFX940-NEXT:    v_mov_b32_e32 v0, v2
1635; GFX940-NEXT:    v_mov_b32_e32 v1, v5
1636; GFX940-NEXT:    v_mov_b32_e32 v2, v4
1637; GFX940-NEXT:    v_mov_b32_e32 v3, v4
1638; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
1639; GFX940-NEXT:    s_waitcnt vmcnt(0)
1640; GFX940-NEXT:    s_setpc_b64 s[30:31]
1641  %vec0 = call <3 x i32> asm "; def $0", "=v"()
1642  %vec1 = call <3 x i32> asm "; def $0", "=v"()
1643  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 0, i32 0>
1644  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
1645  ret void
1646}
1647
1648define void @v_shuffle_v4i32_v3i32__5_2_0_0(ptr addrspace(1) inreg %ptr) {
1649; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_2_0_0:
1650; GFX900:       ; %bb.0:
1651; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1652; GFX900-NEXT:    ;;#ASMSTART
1653; GFX900-NEXT:    ; def v[0:2]
1654; GFX900-NEXT:    ;;#ASMEND
1655; GFX900-NEXT:    v_mov_b32_e32 v6, 0
1656; GFX900-NEXT:    ;;#ASMSTART
1657; GFX900-NEXT:    ; def v[3:5]
1658; GFX900-NEXT:    ;;#ASMEND
1659; GFX900-NEXT:    v_mov_b32_e32 v0, v2
1660; GFX900-NEXT:    v_mov_b32_e32 v1, v5
1661; GFX900-NEXT:    v_mov_b32_e32 v2, v3
1662; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
1663; GFX900-NEXT:    s_waitcnt vmcnt(0)
1664; GFX900-NEXT:    s_setpc_b64 s[30:31]
1665;
1666; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_2_0_0:
1667; GFX90A:       ; %bb.0:
1668; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1669; GFX90A-NEXT:    ;;#ASMSTART
1670; GFX90A-NEXT:    ; def v[0:2]
1671; GFX90A-NEXT:    ;;#ASMEND
1672; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
1673; GFX90A-NEXT:    ;;#ASMSTART
1674; GFX90A-NEXT:    ; def v[4:6]
1675; GFX90A-NEXT:    ;;#ASMEND
1676; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
1677; GFX90A-NEXT:    v_mov_b32_e32 v1, v6
1678; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
1679; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
1680; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
1681; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1682; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1683;
1684; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_2_0_0:
1685; GFX940:       ; %bb.0:
1686; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1687; GFX940-NEXT:    ;;#ASMSTART
1688; GFX940-NEXT:    ; def v[0:2]
1689; GFX940-NEXT:    ;;#ASMEND
1690; GFX940-NEXT:    v_mov_b32_e32 v7, 0
1691; GFX940-NEXT:    ;;#ASMSTART
1692; GFX940-NEXT:    ; def v[4:6]
1693; GFX940-NEXT:    ;;#ASMEND
1694; GFX940-NEXT:    v_mov_b32_e32 v0, v2
1695; GFX940-NEXT:    v_mov_b32_e32 v1, v6
1696; GFX940-NEXT:    v_mov_b32_e32 v2, v4
1697; GFX940-NEXT:    v_mov_b32_e32 v3, v4
1698; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
1699; GFX940-NEXT:    s_waitcnt vmcnt(0)
1700; GFX940-NEXT:    s_setpc_b64 s[30:31]
1701  %vec0 = call <3 x i32> asm "; def $0", "=v"()
1702  %vec1 = call <3 x i32> asm "; def $0", "=v"()
1703  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 0, i32 0>
1704  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
1705  ret void
1706}
1707
1708define void @v_shuffle_v4i32_v3i32__5_3_0_0(ptr addrspace(1) inreg %ptr) {
1709; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_3_0_0:
1710; GFX900:       ; %bb.0:
1711; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1712; GFX900-NEXT:    ;;#ASMSTART
1713; GFX900-NEXT:    ; def v[1:3]
1714; GFX900-NEXT:    ;;#ASMEND
1715; GFX900-NEXT:    v_mov_b32_e32 v7, 0
1716; GFX900-NEXT:    ;;#ASMSTART
1717; GFX900-NEXT:    ; def v[4:6]
1718; GFX900-NEXT:    ;;#ASMEND
1719; GFX900-NEXT:    v_mov_b32_e32 v0, v3
1720; GFX900-NEXT:    v_mov_b32_e32 v2, v4
1721; GFX900-NEXT:    v_mov_b32_e32 v3, v4
1722; GFX900-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
1723; GFX900-NEXT:    s_waitcnt vmcnt(0)
1724; GFX900-NEXT:    s_setpc_b64 s[30:31]
1725;
1726; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_3_0_0:
1727; GFX90A:       ; %bb.0:
1728; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1729; GFX90A-NEXT:    ;;#ASMSTART
1730; GFX90A-NEXT:    ; def v[4:6]
1731; GFX90A-NEXT:    ;;#ASMEND
1732; GFX90A-NEXT:    v_mov_b32_e32 v9, 0
1733; GFX90A-NEXT:    ;;#ASMSTART
1734; GFX90A-NEXT:    ; def v[6:8]
1735; GFX90A-NEXT:    ;;#ASMEND
1736; GFX90A-NEXT:    v_mov_b32_e32 v0, v8
1737; GFX90A-NEXT:    v_mov_b32_e32 v1, v6
1738; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
1739; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
1740; GFX90A-NEXT:    global_store_dwordx4 v9, v[0:3], s[16:17]
1741; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1742; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1743;
1744; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_3_0_0:
1745; GFX940:       ; %bb.0:
1746; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1747; GFX940-NEXT:    ;;#ASMSTART
1748; GFX940-NEXT:    ; def v[4:6]
1749; GFX940-NEXT:    ;;#ASMEND
1750; GFX940-NEXT:    v_mov_b32_e32 v9, 0
1751; GFX940-NEXT:    ;;#ASMSTART
1752; GFX940-NEXT:    ; def v[6:8]
1753; GFX940-NEXT:    ;;#ASMEND
1754; GFX940-NEXT:    v_mov_b32_e32 v2, v4
1755; GFX940-NEXT:    v_mov_b32_e32 v0, v8
1756; GFX940-NEXT:    v_mov_b32_e32 v1, v6
1757; GFX940-NEXT:    v_mov_b32_e32 v3, v4
1758; GFX940-NEXT:    global_store_dwordx4 v9, v[0:3], s[0:1] sc0 sc1
1759; GFX940-NEXT:    s_waitcnt vmcnt(0)
1760; GFX940-NEXT:    s_setpc_b64 s[30:31]
1761  %vec0 = call <3 x i32> asm "; def $0", "=v"()
1762  %vec1 = call <3 x i32> asm "; def $0", "=v"()
1763  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 0, i32 0>
1764  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
1765  ret void
1766}
1767
1768define void @v_shuffle_v4i32_v3i32__5_4_0_0(ptr addrspace(1) inreg %ptr) {
1769; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_4_0_0:
1770; GFX900:       ; %bb.0:
1771; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1772; GFX900-NEXT:    ;;#ASMSTART
1773; GFX900-NEXT:    ; def v[0:2]
1774; GFX900-NEXT:    ;;#ASMEND
1775; GFX900-NEXT:    v_mov_b32_e32 v6, 0
1776; GFX900-NEXT:    ;;#ASMSTART
1777; GFX900-NEXT:    ; def v[3:5]
1778; GFX900-NEXT:    ;;#ASMEND
1779; GFX900-NEXT:    v_mov_b32_e32 v0, v2
1780; GFX900-NEXT:    v_mov_b32_e32 v2, v3
1781; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
1782; GFX900-NEXT:    s_waitcnt vmcnt(0)
1783; GFX900-NEXT:    s_setpc_b64 s[30:31]
1784;
1785; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_4_0_0:
1786; GFX90A:       ; %bb.0:
1787; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1788; GFX90A-NEXT:    ;;#ASMSTART
1789; GFX90A-NEXT:    ; def v[0:2]
1790; GFX90A-NEXT:    ;;#ASMEND
1791; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
1792; GFX90A-NEXT:    ;;#ASMSTART
1793; GFX90A-NEXT:    ; def v[4:6]
1794; GFX90A-NEXT:    ;;#ASMEND
1795; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
1796; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
1797; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
1798; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
1799; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1800; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1801;
1802; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_4_0_0:
1803; GFX940:       ; %bb.0:
1804; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1805; GFX940-NEXT:    ;;#ASMSTART
1806; GFX940-NEXT:    ; def v[0:2]
1807; GFX940-NEXT:    ;;#ASMEND
1808; GFX940-NEXT:    v_mov_b32_e32 v7, 0
1809; GFX940-NEXT:    ;;#ASMSTART
1810; GFX940-NEXT:    ; def v[4:6]
1811; GFX940-NEXT:    ;;#ASMEND
1812; GFX940-NEXT:    v_mov_b32_e32 v0, v2
1813; GFX940-NEXT:    v_mov_b32_e32 v2, v4
1814; GFX940-NEXT:    v_mov_b32_e32 v3, v4
1815; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
1816; GFX940-NEXT:    s_waitcnt vmcnt(0)
1817; GFX940-NEXT:    s_setpc_b64 s[30:31]
1818  %vec0 = call <3 x i32> asm "; def $0", "=v"()
1819  %vec1 = call <3 x i32> asm "; def $0", "=v"()
1820  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 0, i32 0>
1821  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
1822  ret void
1823}
1824
1825define void @v_shuffle_v4i32_v3i32__5_5_0_0(ptr addrspace(1) inreg %ptr) {
1826; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_0_0:
1827; GFX900:       ; %bb.0:
1828; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1829; GFX900-NEXT:    ;;#ASMSTART
1830; GFX900-NEXT:    ; def v[0:2]
1831; GFX900-NEXT:    ;;#ASMEND
1832; GFX900-NEXT:    v_mov_b32_e32 v6, 0
1833; GFX900-NEXT:    ;;#ASMSTART
1834; GFX900-NEXT:    ; def v[3:5]
1835; GFX900-NEXT:    ;;#ASMEND
1836; GFX900-NEXT:    v_mov_b32_e32 v0, v2
1837; GFX900-NEXT:    v_mov_b32_e32 v1, v2
1838; GFX900-NEXT:    v_mov_b32_e32 v2, v3
1839; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
1840; GFX900-NEXT:    s_waitcnt vmcnt(0)
1841; GFX900-NEXT:    s_setpc_b64 s[30:31]
1842;
1843; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_0_0:
1844; GFX90A:       ; %bb.0:
1845; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1846; GFX90A-NEXT:    ;;#ASMSTART
1847; GFX90A-NEXT:    ; def v[0:2]
1848; GFX90A-NEXT:    ;;#ASMEND
1849; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
1850; GFX90A-NEXT:    ;;#ASMSTART
1851; GFX90A-NEXT:    ; def v[4:6]
1852; GFX90A-NEXT:    ;;#ASMEND
1853; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
1854; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
1855; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
1856; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
1857; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
1858; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1859; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1860;
1861; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_0_0:
1862; GFX940:       ; %bb.0:
1863; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1864; GFX940-NEXT:    ;;#ASMSTART
1865; GFX940-NEXT:    ; def v[0:2]
1866; GFX940-NEXT:    ;;#ASMEND
1867; GFX940-NEXT:    v_mov_b32_e32 v7, 0
1868; GFX940-NEXT:    ;;#ASMSTART
1869; GFX940-NEXT:    ; def v[4:6]
1870; GFX940-NEXT:    ;;#ASMEND
1871; GFX940-NEXT:    v_mov_b32_e32 v0, v2
1872; GFX940-NEXT:    v_mov_b32_e32 v1, v2
1873; GFX940-NEXT:    v_mov_b32_e32 v2, v4
1874; GFX940-NEXT:    v_mov_b32_e32 v3, v4
1875; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
1876; GFX940-NEXT:    s_waitcnt vmcnt(0)
1877; GFX940-NEXT:    s_setpc_b64 s[30:31]
1878  %vec0 = call <3 x i32> asm "; def $0", "=v"()
1879  %vec1 = call <3 x i32> asm "; def $0", "=v"()
1880  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 0>
1881  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
1882  ret void
1883}
1884
1885define void @v_shuffle_v4i32_v3i32__5_5_u_0(ptr addrspace(1) inreg %ptr) {
1886; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_u_0:
1887; GFX900:       ; %bb.0:
1888; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1889; GFX900-NEXT:    ;;#ASMSTART
1890; GFX900-NEXT:    ; def v[2:4]
1891; GFX900-NEXT:    ;;#ASMEND
1892; GFX900-NEXT:    ;;#ASMSTART
1893; GFX900-NEXT:    ; def v[3:5]
1894; GFX900-NEXT:    ;;#ASMEND
1895; GFX900-NEXT:    v_mov_b32_e32 v6, 0
1896; GFX900-NEXT:    v_mov_b32_e32 v0, v5
1897; GFX900-NEXT:    v_mov_b32_e32 v1, v5
1898; GFX900-NEXT:    v_mov_b32_e32 v3, v2
1899; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
1900; GFX900-NEXT:    s_waitcnt vmcnt(0)
1901; GFX900-NEXT:    s_setpc_b64 s[30:31]
1902;
1903; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_u_0:
1904; GFX90A:       ; %bb.0:
1905; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1906; GFX90A-NEXT:    ;;#ASMSTART
1907; GFX90A-NEXT:    ; def v[2:4]
1908; GFX90A-NEXT:    ;;#ASMEND
1909; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
1910; GFX90A-NEXT:    ;;#ASMSTART
1911; GFX90A-NEXT:    ; def v[4:6]
1912; GFX90A-NEXT:    ;;#ASMEND
1913; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
1914; GFX90A-NEXT:    v_mov_b32_e32 v1, v6
1915; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
1916; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
1917; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1918; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1919;
1920; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_u_0:
1921; GFX940:       ; %bb.0:
1922; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1923; GFX940-NEXT:    ;;#ASMSTART
1924; GFX940-NEXT:    ; def v[2:4]
1925; GFX940-NEXT:    ;;#ASMEND
1926; GFX940-NEXT:    v_mov_b32_e32 v7, 0
1927; GFX940-NEXT:    ;;#ASMSTART
1928; GFX940-NEXT:    ; def v[4:6]
1929; GFX940-NEXT:    ;;#ASMEND
1930; GFX940-NEXT:    v_mov_b32_e32 v3, v2
1931; GFX940-NEXT:    v_mov_b32_e32 v0, v6
1932; GFX940-NEXT:    v_mov_b32_e32 v1, v6
1933; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
1934; GFX940-NEXT:    s_waitcnt vmcnt(0)
1935; GFX940-NEXT:    s_setpc_b64 s[30:31]
1936  %vec0 = call <3 x i32> asm "; def $0", "=v"()
1937  %vec1 = call <3 x i32> asm "; def $0", "=v"()
1938  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 0>
1939  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
1940  ret void
1941}
1942
1943define void @v_shuffle_v4i32_v3i32__5_5_1_0(ptr addrspace(1) inreg %ptr) {
1944; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_1_0:
1945; GFX900:       ; %bb.0:
1946; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1947; GFX900-NEXT:    ;;#ASMSTART
1948; GFX900-NEXT:    ; def v[0:2]
1949; GFX900-NEXT:    ;;#ASMEND
1950; GFX900-NEXT:    v_mov_b32_e32 v6, 0
1951; GFX900-NEXT:    ;;#ASMSTART
1952; GFX900-NEXT:    ; def v[3:5]
1953; GFX900-NEXT:    ;;#ASMEND
1954; GFX900-NEXT:    v_mov_b32_e32 v0, v2
1955; GFX900-NEXT:    v_mov_b32_e32 v1, v2
1956; GFX900-NEXT:    v_mov_b32_e32 v2, v4
1957; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
1958; GFX900-NEXT:    s_waitcnt vmcnt(0)
1959; GFX900-NEXT:    s_setpc_b64 s[30:31]
1960;
1961; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_1_0:
1962; GFX90A:       ; %bb.0:
1963; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1964; GFX90A-NEXT:    ;;#ASMSTART
1965; GFX90A-NEXT:    ; def v[0:2]
1966; GFX90A-NEXT:    ;;#ASMEND
1967; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
1968; GFX90A-NEXT:    ;;#ASMSTART
1969; GFX90A-NEXT:    ; def v[4:6]
1970; GFX90A-NEXT:    ;;#ASMEND
1971; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
1972; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
1973; GFX90A-NEXT:    v_mov_b32_e32 v2, v5
1974; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
1975; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
1976; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1977; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1978;
1979; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_1_0:
1980; GFX940:       ; %bb.0:
1981; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1982; GFX940-NEXT:    ;;#ASMSTART
1983; GFX940-NEXT:    ; def v[0:2]
1984; GFX940-NEXT:    ;;#ASMEND
1985; GFX940-NEXT:    v_mov_b32_e32 v7, 0
1986; GFX940-NEXT:    ;;#ASMSTART
1987; GFX940-NEXT:    ; def v[4:6]
1988; GFX940-NEXT:    ;;#ASMEND
1989; GFX940-NEXT:    v_mov_b32_e32 v0, v2
1990; GFX940-NEXT:    v_mov_b32_e32 v1, v2
1991; GFX940-NEXT:    v_mov_b32_e32 v2, v5
1992; GFX940-NEXT:    v_mov_b32_e32 v3, v4
1993; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
1994; GFX940-NEXT:    s_waitcnt vmcnt(0)
1995; GFX940-NEXT:    s_setpc_b64 s[30:31]
1996  %vec0 = call <3 x i32> asm "; def $0", "=v"()
1997  %vec1 = call <3 x i32> asm "; def $0", "=v"()
1998  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 0>
1999  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
2000  ret void
2001}
2002
2003define void @v_shuffle_v4i32_v3i32__5_5_2_0(ptr addrspace(1) inreg %ptr) {
2004; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_2_0:
2005; GFX900:       ; %bb.0:
2006; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2007; GFX900-NEXT:    ;;#ASMSTART
2008; GFX900-NEXT:    ; def v[0:2]
2009; GFX900-NEXT:    ;;#ASMEND
2010; GFX900-NEXT:    v_mov_b32_e32 v6, 0
2011; GFX900-NEXT:    ;;#ASMSTART
2012; GFX900-NEXT:    ; def v[3:5]
2013; GFX900-NEXT:    ;;#ASMEND
2014; GFX900-NEXT:    v_mov_b32_e32 v0, v2
2015; GFX900-NEXT:    v_mov_b32_e32 v1, v2
2016; GFX900-NEXT:    v_mov_b32_e32 v2, v5
2017; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
2018; GFX900-NEXT:    s_waitcnt vmcnt(0)
2019; GFX900-NEXT:    s_setpc_b64 s[30:31]
2020;
2021; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_2_0:
2022; GFX90A:       ; %bb.0:
2023; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2024; GFX90A-NEXT:    ;;#ASMSTART
2025; GFX90A-NEXT:    ; def v[0:2]
2026; GFX90A-NEXT:    ;;#ASMEND
2027; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
2028; GFX90A-NEXT:    ;;#ASMSTART
2029; GFX90A-NEXT:    ; def v[4:6]
2030; GFX90A-NEXT:    ;;#ASMEND
2031; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
2032; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
2033; GFX90A-NEXT:    v_mov_b32_e32 v2, v6
2034; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
2035; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
2036; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2037; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2038;
2039; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_2_0:
2040; GFX940:       ; %bb.0:
2041; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2042; GFX940-NEXT:    ;;#ASMSTART
2043; GFX940-NEXT:    ; def v[0:2]
2044; GFX940-NEXT:    ;;#ASMEND
2045; GFX940-NEXT:    v_mov_b32_e32 v7, 0
2046; GFX940-NEXT:    ;;#ASMSTART
2047; GFX940-NEXT:    ; def v[4:6]
2048; GFX940-NEXT:    ;;#ASMEND
2049; GFX940-NEXT:    v_mov_b32_e32 v0, v2
2050; GFX940-NEXT:    v_mov_b32_e32 v1, v2
2051; GFX940-NEXT:    v_mov_b32_e32 v2, v6
2052; GFX940-NEXT:    v_mov_b32_e32 v3, v4
2053; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
2054; GFX940-NEXT:    s_waitcnt vmcnt(0)
2055; GFX940-NEXT:    s_setpc_b64 s[30:31]
2056  %vec0 = call <3 x i32> asm "; def $0", "=v"()
2057  %vec1 = call <3 x i32> asm "; def $0", "=v"()
2058  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 0>
2059  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
2060  ret void
2061}
2062
2063define void @v_shuffle_v4i32_v3i32__5_5_3_0(ptr addrspace(1) inreg %ptr) {
2064; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_3_0:
2065; GFX900:       ; %bb.0:
2066; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2067; GFX900-NEXT:    ;;#ASMSTART
2068; GFX900-NEXT:    ; def v[3:5]
2069; GFX900-NEXT:    ;;#ASMEND
2070; GFX900-NEXT:    v_mov_b32_e32 v7, 0
2071; GFX900-NEXT:    ;;#ASMSTART
2072; GFX900-NEXT:    ; def v[4:6]
2073; GFX900-NEXT:    ;;#ASMEND
2074; GFX900-NEXT:    v_mov_b32_e32 v0, v6
2075; GFX900-NEXT:    v_mov_b32_e32 v1, v6
2076; GFX900-NEXT:    v_mov_b32_e32 v2, v4
2077; GFX900-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
2078; GFX900-NEXT:    s_waitcnt vmcnt(0)
2079; GFX900-NEXT:    s_setpc_b64 s[30:31]
2080;
2081; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_3_0:
2082; GFX90A:       ; %bb.0:
2083; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2084; GFX90A-NEXT:    ;;#ASMSTART
2085; GFX90A-NEXT:    ; def v[4:6]
2086; GFX90A-NEXT:    ;;#ASMEND
2087; GFX90A-NEXT:    v_mov_b32_e32 v9, 0
2088; GFX90A-NEXT:    ;;#ASMSTART
2089; GFX90A-NEXT:    ; def v[6:8]
2090; GFX90A-NEXT:    ;;#ASMEND
2091; GFX90A-NEXT:    v_mov_b32_e32 v0, v8
2092; GFX90A-NEXT:    v_mov_b32_e32 v1, v8
2093; GFX90A-NEXT:    v_mov_b32_e32 v2, v6
2094; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
2095; GFX90A-NEXT:    global_store_dwordx4 v9, v[0:3], s[16:17]
2096; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2097; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2098;
2099; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_3_0:
2100; GFX940:       ; %bb.0:
2101; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2102; GFX940-NEXT:    ;;#ASMSTART
2103; GFX940-NEXT:    ; def v[4:6]
2104; GFX940-NEXT:    ;;#ASMEND
2105; GFX940-NEXT:    v_mov_b32_e32 v9, 0
2106; GFX940-NEXT:    ;;#ASMSTART
2107; GFX940-NEXT:    ; def v[6:8]
2108; GFX940-NEXT:    ;;#ASMEND
2109; GFX940-NEXT:    v_mov_b32_e32 v3, v4
2110; GFX940-NEXT:    v_mov_b32_e32 v0, v8
2111; GFX940-NEXT:    v_mov_b32_e32 v1, v8
2112; GFX940-NEXT:    v_mov_b32_e32 v2, v6
2113; GFX940-NEXT:    global_store_dwordx4 v9, v[0:3], s[0:1] sc0 sc1
2114; GFX940-NEXT:    s_waitcnt vmcnt(0)
2115; GFX940-NEXT:    s_setpc_b64 s[30:31]
2116  %vec0 = call <3 x i32> asm "; def $0", "=v"()
2117  %vec1 = call <3 x i32> asm "; def $0", "=v"()
2118  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 0>
2119  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
2120  ret void
2121}
2122
2123define void @v_shuffle_v4i32_v3i32__5_5_4_0(ptr addrspace(1) inreg %ptr) {
2124; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_4_0:
2125; GFX900:       ; %bb.0:
2126; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2127; GFX900-NEXT:    ;;#ASMSTART
2128; GFX900-NEXT:    ; def v[1:3]
2129; GFX900-NEXT:    ;;#ASMEND
2130; GFX900-NEXT:    v_mov_b32_e32 v7, 0
2131; GFX900-NEXT:    ;;#ASMSTART
2132; GFX900-NEXT:    ; def v[4:6]
2133; GFX900-NEXT:    ;;#ASMEND
2134; GFX900-NEXT:    v_mov_b32_e32 v0, v3
2135; GFX900-NEXT:    v_mov_b32_e32 v1, v3
2136; GFX900-NEXT:    v_mov_b32_e32 v3, v4
2137; GFX900-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
2138; GFX900-NEXT:    s_waitcnt vmcnt(0)
2139; GFX900-NEXT:    s_setpc_b64 s[30:31]
2140;
2141; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_4_0:
2142; GFX90A:       ; %bb.0:
2143; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2144; GFX90A-NEXT:    ;;#ASMSTART
2145; GFX90A-NEXT:    ; def v[4:6]
2146; GFX90A-NEXT:    ;;#ASMEND
2147; GFX90A-NEXT:    v_mov_b32_e32 v9, 0
2148; GFX90A-NEXT:    ;;#ASMSTART
2149; GFX90A-NEXT:    ; def v[6:8]
2150; GFX90A-NEXT:    ;;#ASMEND
2151; GFX90A-NEXT:    v_mov_b32_e32 v0, v8
2152; GFX90A-NEXT:    v_mov_b32_e32 v1, v8
2153; GFX90A-NEXT:    v_mov_b32_e32 v2, v7
2154; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
2155; GFX90A-NEXT:    global_store_dwordx4 v9, v[0:3], s[16:17]
2156; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2157; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2158;
2159; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_4_0:
2160; GFX940:       ; %bb.0:
2161; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2162; GFX940-NEXT:    ;;#ASMSTART
2163; GFX940-NEXT:    ; def v[4:6]
2164; GFX940-NEXT:    ;;#ASMEND
2165; GFX940-NEXT:    v_mov_b32_e32 v9, 0
2166; GFX940-NEXT:    ;;#ASMSTART
2167; GFX940-NEXT:    ; def v[6:8]
2168; GFX940-NEXT:    ;;#ASMEND
2169; GFX940-NEXT:    v_mov_b32_e32 v3, v4
2170; GFX940-NEXT:    v_mov_b32_e32 v0, v8
2171; GFX940-NEXT:    v_mov_b32_e32 v1, v8
2172; GFX940-NEXT:    v_mov_b32_e32 v2, v7
2173; GFX940-NEXT:    global_store_dwordx4 v9, v[0:3], s[0:1] sc0 sc1
2174; GFX940-NEXT:    s_waitcnt vmcnt(0)
2175; GFX940-NEXT:    s_setpc_b64 s[30:31]
2176  %vec0 = call <3 x i32> asm "; def $0", "=v"()
2177  %vec1 = call <3 x i32> asm "; def $0", "=v"()
2178  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 0>
2179  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
2180  ret void
2181}
2182
2183define void @v_shuffle_v4i32_v3i32__u_1_1_1(ptr addrspace(1) inreg %ptr) {
2184; GFX900-LABEL: v_shuffle_v4i32_v3i32__u_1_1_1:
2185; GFX900:       ; %bb.0:
2186; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2187; GFX900-NEXT:    ;;#ASMSTART
2188; GFX900-NEXT:    ; def v[0:2]
2189; GFX900-NEXT:    ;;#ASMEND
2190; GFX900-NEXT:    v_mov_b32_e32 v4, 0
2191; GFX900-NEXT:    v_mov_b32_e32 v2, v1
2192; GFX900-NEXT:    v_mov_b32_e32 v3, v1
2193; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
2194; GFX900-NEXT:    s_waitcnt vmcnt(0)
2195; GFX900-NEXT:    s_setpc_b64 s[30:31]
2196;
2197; GFX90A-LABEL: v_shuffle_v4i32_v3i32__u_1_1_1:
2198; GFX90A:       ; %bb.0:
2199; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2200; GFX90A-NEXT:    ;;#ASMSTART
2201; GFX90A-NEXT:    ; def v[0:2]
2202; GFX90A-NEXT:    ;;#ASMEND
2203; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2204; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
2205; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
2206; GFX90A-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
2207; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2208; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2209;
2210; GFX940-LABEL: v_shuffle_v4i32_v3i32__u_1_1_1:
2211; GFX940:       ; %bb.0:
2212; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2213; GFX940-NEXT:    ;;#ASMSTART
2214; GFX940-NEXT:    ; def v[0:2]
2215; GFX940-NEXT:    ;;#ASMEND
2216; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2217; GFX940-NEXT:    v_mov_b32_e32 v2, v1
2218; GFX940-NEXT:    v_mov_b32_e32 v3, v1
2219; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
2220; GFX940-NEXT:    s_waitcnt vmcnt(0)
2221; GFX940-NEXT:    s_setpc_b64 s[30:31]
2222  %vec0 = call <3 x i32> asm "; def $0", "=v"()
2223  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
2224  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
2225  ret void
2226}
2227
2228define void @v_shuffle_v4i32_v3i32__0_1_1_1(ptr addrspace(1) inreg %ptr) {
2229; GFX900-LABEL: v_shuffle_v4i32_v3i32__0_1_1_1:
2230; GFX900:       ; %bb.0:
2231; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2232; GFX900-NEXT:    ;;#ASMSTART
2233; GFX900-NEXT:    ; def v[0:2]
2234; GFX900-NEXT:    ;;#ASMEND
2235; GFX900-NEXT:    v_mov_b32_e32 v4, 0
2236; GFX900-NEXT:    v_mov_b32_e32 v2, v1
2237; GFX900-NEXT:    v_mov_b32_e32 v3, v1
2238; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
2239; GFX900-NEXT:    s_waitcnt vmcnt(0)
2240; GFX900-NEXT:    s_setpc_b64 s[30:31]
2241;
2242; GFX90A-LABEL: v_shuffle_v4i32_v3i32__0_1_1_1:
2243; GFX90A:       ; %bb.0:
2244; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2245; GFX90A-NEXT:    ;;#ASMSTART
2246; GFX90A-NEXT:    ; def v[0:2]
2247; GFX90A-NEXT:    ;;#ASMEND
2248; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2249; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
2250; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
2251; GFX90A-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
2252; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2253; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2254;
2255; GFX940-LABEL: v_shuffle_v4i32_v3i32__0_1_1_1:
2256; GFX940:       ; %bb.0:
2257; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2258; GFX940-NEXT:    ;;#ASMSTART
2259; GFX940-NEXT:    ; def v[0:2]
2260; GFX940-NEXT:    ;;#ASMEND
2261; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2262; GFX940-NEXT:    v_mov_b32_e32 v2, v1
2263; GFX940-NEXT:    v_mov_b32_e32 v3, v1
2264; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
2265; GFX940-NEXT:    s_waitcnt vmcnt(0)
2266; GFX940-NEXT:    s_setpc_b64 s[30:31]
2267  %vec0 = call <3 x i32> asm "; def $0", "=v"()
2268  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
2269  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
2270  ret void
2271}
2272
2273define void @v_shuffle_v4i32_v3i32__1_1_1_1(ptr addrspace(1) inreg %ptr) {
2274; GFX900-LABEL: v_shuffle_v4i32_v3i32__1_1_1_1:
2275; GFX900:       ; %bb.0:
2276; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2277; GFX900-NEXT:    ;;#ASMSTART
2278; GFX900-NEXT:    ; def v[0:2]
2279; GFX900-NEXT:    ;;#ASMEND
2280; GFX900-NEXT:    v_mov_b32_e32 v4, 0
2281; GFX900-NEXT:    v_mov_b32_e32 v0, v1
2282; GFX900-NEXT:    v_mov_b32_e32 v2, v1
2283; GFX900-NEXT:    v_mov_b32_e32 v3, v1
2284; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
2285; GFX900-NEXT:    s_waitcnt vmcnt(0)
2286; GFX900-NEXT:    s_setpc_b64 s[30:31]
2287;
2288; GFX90A-LABEL: v_shuffle_v4i32_v3i32__1_1_1_1:
2289; GFX90A:       ; %bb.0:
2290; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2291; GFX90A-NEXT:    ;;#ASMSTART
2292; GFX90A-NEXT:    ; def v[0:2]
2293; GFX90A-NEXT:    ;;#ASMEND
2294; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2295; GFX90A-NEXT:    v_mov_b32_e32 v0, v1
2296; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
2297; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
2298; GFX90A-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
2299; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2300; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2301;
2302; GFX940-LABEL: v_shuffle_v4i32_v3i32__1_1_1_1:
2303; GFX940:       ; %bb.0:
2304; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2305; GFX940-NEXT:    ;;#ASMSTART
2306; GFX940-NEXT:    ; def v[0:2]
2307; GFX940-NEXT:    ;;#ASMEND
2308; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2309; GFX940-NEXT:    v_mov_b32_e32 v0, v1
2310; GFX940-NEXT:    v_mov_b32_e32 v2, v1
2311; GFX940-NEXT:    v_mov_b32_e32 v3, v1
2312; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
2313; GFX940-NEXT:    s_waitcnt vmcnt(0)
2314; GFX940-NEXT:    s_setpc_b64 s[30:31]
2315  %vec0 = call <3 x i32> asm "; def $0", "=v"()
2316  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2317  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
2318  ret void
2319}
2320
2321define void @v_shuffle_v4i32_v3i32__2_1_1_1(ptr addrspace(1) inreg %ptr) {
2322; GFX900-LABEL: v_shuffle_v4i32_v3i32__2_1_1_1:
2323; GFX900:       ; %bb.0:
2324; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2325; GFX900-NEXT:    ;;#ASMSTART
2326; GFX900-NEXT:    ; def v[0:2]
2327; GFX900-NEXT:    ;;#ASMEND
2328; GFX900-NEXT:    v_mov_b32_e32 v4, 0
2329; GFX900-NEXT:    v_mov_b32_e32 v0, v2
2330; GFX900-NEXT:    v_mov_b32_e32 v2, v1
2331; GFX900-NEXT:    v_mov_b32_e32 v3, v1
2332; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
2333; GFX900-NEXT:    s_waitcnt vmcnt(0)
2334; GFX900-NEXT:    s_setpc_b64 s[30:31]
2335;
2336; GFX90A-LABEL: v_shuffle_v4i32_v3i32__2_1_1_1:
2337; GFX90A:       ; %bb.0:
2338; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2339; GFX90A-NEXT:    ;;#ASMSTART
2340; GFX90A-NEXT:    ; def v[0:2]
2341; GFX90A-NEXT:    ;;#ASMEND
2342; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2343; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
2344; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
2345; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
2346; GFX90A-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
2347; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2348; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2349;
2350; GFX940-LABEL: v_shuffle_v4i32_v3i32__2_1_1_1:
2351; GFX940:       ; %bb.0:
2352; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2353; GFX940-NEXT:    ;;#ASMSTART
2354; GFX940-NEXT:    ; def v[0:2]
2355; GFX940-NEXT:    ;;#ASMEND
2356; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2357; GFX940-NEXT:    v_mov_b32_e32 v0, v2
2358; GFX940-NEXT:    v_mov_b32_e32 v2, v1
2359; GFX940-NEXT:    v_mov_b32_e32 v3, v1
2360; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
2361; GFX940-NEXT:    s_waitcnt vmcnt(0)
2362; GFX940-NEXT:    s_setpc_b64 s[30:31]
2363  %vec0 = call <3 x i32> asm "; def $0", "=v"()
2364  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
2365  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
2366  ret void
2367}
2368
2369define void @v_shuffle_v4i32_v3i32__3_1_1_1(ptr addrspace(1) inreg %ptr) {
2370; GFX900-LABEL: v_shuffle_v4i32_v3i32__3_1_1_1:
2371; GFX900:       ; %bb.0:
2372; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2373; GFX900-NEXT:    ;;#ASMSTART
2374; GFX900-NEXT:    ; def v[0:2]
2375; GFX900-NEXT:    ;;#ASMEND
2376; GFX900-NEXT:    v_mov_b32_e32 v4, 0
2377; GFX900-NEXT:    v_mov_b32_e32 v2, v1
2378; GFX900-NEXT:    v_mov_b32_e32 v3, v1
2379; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
2380; GFX900-NEXT:    s_waitcnt vmcnt(0)
2381; GFX900-NEXT:    s_setpc_b64 s[30:31]
2382;
2383; GFX90A-LABEL: v_shuffle_v4i32_v3i32__3_1_1_1:
2384; GFX90A:       ; %bb.0:
2385; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2386; GFX90A-NEXT:    ;;#ASMSTART
2387; GFX90A-NEXT:    ; def v[0:2]
2388; GFX90A-NEXT:    ;;#ASMEND
2389; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2390; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
2391; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
2392; GFX90A-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
2393; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2394; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2395;
2396; GFX940-LABEL: v_shuffle_v4i32_v3i32__3_1_1_1:
2397; GFX940:       ; %bb.0:
2398; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2399; GFX940-NEXT:    ;;#ASMSTART
2400; GFX940-NEXT:    ; def v[0:2]
2401; GFX940-NEXT:    ;;#ASMEND
2402; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2403; GFX940-NEXT:    v_mov_b32_e32 v2, v1
2404; GFX940-NEXT:    v_mov_b32_e32 v3, v1
2405; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
2406; GFX940-NEXT:    s_waitcnt vmcnt(0)
2407; GFX940-NEXT:    s_setpc_b64 s[30:31]
2408  %vec0 = call <3 x i32> asm "; def $0", "=v"()
2409  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
2410  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
2411  ret void
2412}
2413
2414define void @v_shuffle_v4i32_v3i32__4_1_1_1(ptr addrspace(1) inreg %ptr) {
2415; GFX900-LABEL: v_shuffle_v4i32_v3i32__4_1_1_1:
2416; GFX900:       ; %bb.0:
2417; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2418; GFX900-NEXT:    ;;#ASMSTART
2419; GFX900-NEXT:    ; def v[0:2]
2420; GFX900-NEXT:    ;;#ASMEND
2421; GFX900-NEXT:    ;;#ASMSTART
2422; GFX900-NEXT:    ; def v[2:4]
2423; GFX900-NEXT:    ;;#ASMEND
2424; GFX900-NEXT:    v_mov_b32_e32 v5, 0
2425; GFX900-NEXT:    v_mov_b32_e32 v0, v3
2426; GFX900-NEXT:    v_mov_b32_e32 v2, v1
2427; GFX900-NEXT:    v_mov_b32_e32 v3, v1
2428; GFX900-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
2429; GFX900-NEXT:    s_waitcnt vmcnt(0)
2430; GFX900-NEXT:    s_setpc_b64 s[30:31]
2431;
2432; GFX90A-LABEL: v_shuffle_v4i32_v3i32__4_1_1_1:
2433; GFX90A:       ; %bb.0:
2434; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2435; GFX90A-NEXT:    ;;#ASMSTART
2436; GFX90A-NEXT:    ; def v[0:2]
2437; GFX90A-NEXT:    ;;#ASMEND
2438; GFX90A-NEXT:    ;;#ASMSTART
2439; GFX90A-NEXT:    ; def v[2:4]
2440; GFX90A-NEXT:    ;;#ASMEND
2441; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
2442; GFX90A-NEXT:    v_mov_b32_e32 v0, v3
2443; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
2444; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
2445; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
2446; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2447; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2448;
2449; GFX940-LABEL: v_shuffle_v4i32_v3i32__4_1_1_1:
2450; GFX940:       ; %bb.0:
2451; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2452; GFX940-NEXT:    ;;#ASMSTART
2453; GFX940-NEXT:    ; def v[0:2]
2454; GFX940-NEXT:    ;;#ASMEND
2455; GFX940-NEXT:    v_mov_b32_e32 v5, 0
2456; GFX940-NEXT:    ;;#ASMSTART
2457; GFX940-NEXT:    ; def v[2:4]
2458; GFX940-NEXT:    ;;#ASMEND
2459; GFX940-NEXT:    s_nop 0
2460; GFX940-NEXT:    v_mov_b32_e32 v0, v3
2461; GFX940-NEXT:    v_mov_b32_e32 v2, v1
2462; GFX940-NEXT:    v_mov_b32_e32 v3, v1
2463; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
2464; GFX940-NEXT:    s_waitcnt vmcnt(0)
2465; GFX940-NEXT:    s_setpc_b64 s[30:31]
2466  %vec0 = call <3 x i32> asm "; def $0", "=v"()
2467  %vec1 = call <3 x i32> asm "; def $0", "=v"()
2468  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 1, i32 1, i32 1>
2469  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
2470  ret void
2471}
2472
2473define void @v_shuffle_v4i32_v3i32__5_1_1_1(ptr addrspace(1) inreg %ptr) {
2474; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_1_1_1:
2475; GFX900:       ; %bb.0:
2476; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2477; GFX900-NEXT:    ;;#ASMSTART
2478; GFX900-NEXT:    ; def v[0:2]
2479; GFX900-NEXT:    ;;#ASMEND
2480; GFX900-NEXT:    ;;#ASMSTART
2481; GFX900-NEXT:    ; def v[2:4]
2482; GFX900-NEXT:    ;;#ASMEND
2483; GFX900-NEXT:    v_mov_b32_e32 v5, 0
2484; GFX900-NEXT:    v_mov_b32_e32 v0, v4
2485; GFX900-NEXT:    v_mov_b32_e32 v2, v1
2486; GFX900-NEXT:    v_mov_b32_e32 v3, v1
2487; GFX900-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
2488; GFX900-NEXT:    s_waitcnt vmcnt(0)
2489; GFX900-NEXT:    s_setpc_b64 s[30:31]
2490;
2491; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_1_1_1:
2492; GFX90A:       ; %bb.0:
2493; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2494; GFX90A-NEXT:    ;;#ASMSTART
2495; GFX90A-NEXT:    ; def v[0:2]
2496; GFX90A-NEXT:    ;;#ASMEND
2497; GFX90A-NEXT:    ;;#ASMSTART
2498; GFX90A-NEXT:    ; def v[2:4]
2499; GFX90A-NEXT:    ;;#ASMEND
2500; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
2501; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
2502; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
2503; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
2504; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
2505; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2506; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2507;
2508; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_1_1_1:
2509; GFX940:       ; %bb.0:
2510; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2511; GFX940-NEXT:    ;;#ASMSTART
2512; GFX940-NEXT:    ; def v[0:2]
2513; GFX940-NEXT:    ;;#ASMEND
2514; GFX940-NEXT:    v_mov_b32_e32 v5, 0
2515; GFX940-NEXT:    ;;#ASMSTART
2516; GFX940-NEXT:    ; def v[2:4]
2517; GFX940-NEXT:    ;;#ASMEND
2518; GFX940-NEXT:    s_nop 0
2519; GFX940-NEXT:    v_mov_b32_e32 v0, v4
2520; GFX940-NEXT:    v_mov_b32_e32 v2, v1
2521; GFX940-NEXT:    v_mov_b32_e32 v3, v1
2522; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
2523; GFX940-NEXT:    s_waitcnt vmcnt(0)
2524; GFX940-NEXT:    s_setpc_b64 s[30:31]
2525  %vec0 = call <3 x i32> asm "; def $0", "=v"()
2526  %vec1 = call <3 x i32> asm "; def $0", "=v"()
2527  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 1, i32 1>
2528  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
2529  ret void
2530}
2531
2532define void @v_shuffle_v4i32_v3i32__5_u_1_1(ptr addrspace(1) inreg %ptr) {
2533; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_u_1_1:
2534; GFX900:       ; %bb.0:
2535; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2536; GFX900-NEXT:    ;;#ASMSTART
2537; GFX900-NEXT:    ; def v[1:3]
2538; GFX900-NEXT:    ;;#ASMEND
2539; GFX900-NEXT:    ;;#ASMSTART
2540; GFX900-NEXT:    ; def v[3:5]
2541; GFX900-NEXT:    ;;#ASMEND
2542; GFX900-NEXT:    v_mov_b32_e32 v6, 0
2543; GFX900-NEXT:    v_mov_b32_e32 v0, v5
2544; GFX900-NEXT:    v_mov_b32_e32 v3, v2
2545; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
2546; GFX900-NEXT:    s_waitcnt vmcnt(0)
2547; GFX900-NEXT:    s_setpc_b64 s[30:31]
2548;
2549; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_u_1_1:
2550; GFX90A:       ; %bb.0:
2551; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2552; GFX90A-NEXT:    ;;#ASMSTART
2553; GFX90A-NEXT:    ; def v[0:2]
2554; GFX90A-NEXT:    ;;#ASMEND
2555; GFX90A-NEXT:    ;;#ASMSTART
2556; GFX90A-NEXT:    ; def v[2:4]
2557; GFX90A-NEXT:    ;;#ASMEND
2558; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
2559; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
2560; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
2561; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
2562; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
2563; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2564; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2565;
2566; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_u_1_1:
2567; GFX940:       ; %bb.0:
2568; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2569; GFX940-NEXT:    ;;#ASMSTART
2570; GFX940-NEXT:    ; def v[0:2]
2571; GFX940-NEXT:    ;;#ASMEND
2572; GFX940-NEXT:    v_mov_b32_e32 v5, 0
2573; GFX940-NEXT:    ;;#ASMSTART
2574; GFX940-NEXT:    ; def v[2:4]
2575; GFX940-NEXT:    ;;#ASMEND
2576; GFX940-NEXT:    s_nop 0
2577; GFX940-NEXT:    v_mov_b32_e32 v0, v4
2578; GFX940-NEXT:    v_mov_b32_e32 v2, v1
2579; GFX940-NEXT:    v_mov_b32_e32 v3, v1
2580; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
2581; GFX940-NEXT:    s_waitcnt vmcnt(0)
2582; GFX940-NEXT:    s_setpc_b64 s[30:31]
2583  %vec0 = call <3 x i32> asm "; def $0", "=v"()
2584  %vec1 = call <3 x i32> asm "; def $0", "=v"()
2585  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 1, i32 1>
2586  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
2587  ret void
2588}
2589
2590define void @v_shuffle_v4i32_v3i32__5_0_1_1(ptr addrspace(1) inreg %ptr) {
2591; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_0_1_1:
2592; GFX900:       ; %bb.0:
2593; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2594; GFX900-NEXT:    ;;#ASMSTART
2595; GFX900-NEXT:    ; def v[1:3]
2596; GFX900-NEXT:    ;;#ASMEND
2597; GFX900-NEXT:    ;;#ASMSTART
2598; GFX900-NEXT:    ; def v[3:5]
2599; GFX900-NEXT:    ;;#ASMEND
2600; GFX900-NEXT:    v_mov_b32_e32 v6, 0
2601; GFX900-NEXT:    v_mov_b32_e32 v0, v5
2602; GFX900-NEXT:    v_mov_b32_e32 v3, v2
2603; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
2604; GFX900-NEXT:    s_waitcnt vmcnt(0)
2605; GFX900-NEXT:    s_setpc_b64 s[30:31]
2606;
2607; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_0_1_1:
2608; GFX90A:       ; %bb.0:
2609; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2610; GFX90A-NEXT:    ;;#ASMSTART
2611; GFX90A-NEXT:    ; def v[2:4]
2612; GFX90A-NEXT:    ;;#ASMEND
2613; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
2614; GFX90A-NEXT:    ;;#ASMSTART
2615; GFX90A-NEXT:    ; def v[4:6]
2616; GFX90A-NEXT:    ;;#ASMEND
2617; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
2618; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
2619; GFX90A-NEXT:    v_mov_b32_e32 v2, v3
2620; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
2621; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2622; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2623;
2624; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_0_1_1:
2625; GFX940:       ; %bb.0:
2626; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2627; GFX940-NEXT:    ;;#ASMSTART
2628; GFX940-NEXT:    ; def v[2:4]
2629; GFX940-NEXT:    ;;#ASMEND
2630; GFX940-NEXT:    v_mov_b32_e32 v7, 0
2631; GFX940-NEXT:    ;;#ASMSTART
2632; GFX940-NEXT:    ; def v[4:6]
2633; GFX940-NEXT:    ;;#ASMEND
2634; GFX940-NEXT:    v_mov_b32_e32 v1, v2
2635; GFX940-NEXT:    v_mov_b32_e32 v0, v6
2636; GFX940-NEXT:    v_mov_b32_e32 v2, v3
2637; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
2638; GFX940-NEXT:    s_waitcnt vmcnt(0)
2639; GFX940-NEXT:    s_setpc_b64 s[30:31]
2640  %vec0 = call <3 x i32> asm "; def $0", "=v"()
2641  %vec1 = call <3 x i32> asm "; def $0", "=v"()
2642  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 1, i32 1>
2643  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
2644  ret void
2645}
2646
2647define void @v_shuffle_v4i32_v3i32__5_2_1_1(ptr addrspace(1) inreg %ptr) {
2648; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_2_1_1:
2649; GFX900:       ; %bb.0:
2650; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2651; GFX900-NEXT:    ;;#ASMSTART
2652; GFX900-NEXT:    ; def v[1:3]
2653; GFX900-NEXT:    ;;#ASMEND
2654; GFX900-NEXT:    v_mov_b32_e32 v7, 0
2655; GFX900-NEXT:    ;;#ASMSTART
2656; GFX900-NEXT:    ; def v[4:6]
2657; GFX900-NEXT:    ;;#ASMEND
2658; GFX900-NEXT:    v_mov_b32_e32 v0, v6
2659; GFX900-NEXT:    v_mov_b32_e32 v1, v3
2660; GFX900-NEXT:    v_mov_b32_e32 v3, v2
2661; GFX900-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
2662; GFX900-NEXT:    s_waitcnt vmcnt(0)
2663; GFX900-NEXT:    s_setpc_b64 s[30:31]
2664;
2665; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_2_1_1:
2666; GFX90A:       ; %bb.0:
2667; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2668; GFX90A-NEXT:    ;;#ASMSTART
2669; GFX90A-NEXT:    ; def v[2:4]
2670; GFX90A-NEXT:    ;;#ASMEND
2671; GFX90A-NEXT:    ;;#ASMSTART
2672; GFX90A-NEXT:    ; def v[0:2]
2673; GFX90A-NEXT:    ;;#ASMEND
2674; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
2675; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
2676; GFX90A-NEXT:    v_mov_b32_e32 v1, v4
2677; GFX90A-NEXT:    v_mov_b32_e32 v2, v3
2678; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
2679; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2680; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2681;
2682; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_2_1_1:
2683; GFX940:       ; %bb.0:
2684; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2685; GFX940-NEXT:    ;;#ASMSTART
2686; GFX940-NEXT:    ; def v[2:4]
2687; GFX940-NEXT:    ;;#ASMEND
2688; GFX940-NEXT:    v_mov_b32_e32 v5, 0
2689; GFX940-NEXT:    ;;#ASMSTART
2690; GFX940-NEXT:    ; def v[0:2]
2691; GFX940-NEXT:    ;;#ASMEND
2692; GFX940-NEXT:    s_nop 0
2693; GFX940-NEXT:    v_mov_b32_e32 v0, v2
2694; GFX940-NEXT:    v_mov_b32_e32 v1, v4
2695; GFX940-NEXT:    v_mov_b32_e32 v2, v3
2696; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
2697; GFX940-NEXT:    s_waitcnt vmcnt(0)
2698; GFX940-NEXT:    s_setpc_b64 s[30:31]
2699  %vec0 = call <3 x i32> asm "; def $0", "=v"()
2700  %vec1 = call <3 x i32> asm "; def $0", "=v"()
2701  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 1, i32 1>
2702  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
2703  ret void
2704}
2705
2706define void @v_shuffle_v4i32_v3i32__5_3_1_1(ptr addrspace(1) inreg %ptr) {
2707; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_3_1_1:
2708; GFX900:       ; %bb.0:
2709; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2710; GFX900-NEXT:    ;;#ASMSTART
2711; GFX900-NEXT:    ; def v[3:5]
2712; GFX900-NEXT:    ;;#ASMEND
2713; GFX900-NEXT:    ;;#ASMSTART
2714; GFX900-NEXT:    ; def v[1:3]
2715; GFX900-NEXT:    ;;#ASMEND
2716; GFX900-NEXT:    v_mov_b32_e32 v6, 0
2717; GFX900-NEXT:    v_mov_b32_e32 v0, v3
2718; GFX900-NEXT:    v_mov_b32_e32 v2, v4
2719; GFX900-NEXT:    v_mov_b32_e32 v3, v4
2720; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
2721; GFX900-NEXT:    s_waitcnt vmcnt(0)
2722; GFX900-NEXT:    s_setpc_b64 s[30:31]
2723;
2724; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_3_1_1:
2725; GFX90A:       ; %bb.0:
2726; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2727; GFX90A-NEXT:    ;;#ASMSTART
2728; GFX90A-NEXT:    ; def v[2:4]
2729; GFX90A-NEXT:    ;;#ASMEND
2730; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
2731; GFX90A-NEXT:    ;;#ASMSTART
2732; GFX90A-NEXT:    ; def v[4:6]
2733; GFX90A-NEXT:    ;;#ASMEND
2734; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
2735; GFX90A-NEXT:    v_mov_b32_e32 v1, v4
2736; GFX90A-NEXT:    v_mov_b32_e32 v2, v3
2737; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
2738; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2739; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2740;
2741; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_3_1_1:
2742; GFX940:       ; %bb.0:
2743; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2744; GFX940-NEXT:    ;;#ASMSTART
2745; GFX940-NEXT:    ; def v[2:4]
2746; GFX940-NEXT:    ;;#ASMEND
2747; GFX940-NEXT:    v_mov_b32_e32 v7, 0
2748; GFX940-NEXT:    ;;#ASMSTART
2749; GFX940-NEXT:    ; def v[4:6]
2750; GFX940-NEXT:    ;;#ASMEND
2751; GFX940-NEXT:    v_mov_b32_e32 v2, v3
2752; GFX940-NEXT:    v_mov_b32_e32 v0, v6
2753; GFX940-NEXT:    v_mov_b32_e32 v1, v4
2754; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
2755; GFX940-NEXT:    s_waitcnt vmcnt(0)
2756; GFX940-NEXT:    s_setpc_b64 s[30:31]
2757  %vec0 = call <3 x i32> asm "; def $0", "=v"()
2758  %vec1 = call <3 x i32> asm "; def $0", "=v"()
2759  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 1, i32 1>
2760  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
2761  ret void
2762}
2763
2764define void @v_shuffle_v4i32_v3i32__5_4_1_1(ptr addrspace(1) inreg %ptr) {
2765; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_4_1_1:
2766; GFX900:       ; %bb.0:
2767; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2768; GFX900-NEXT:    ;;#ASMSTART
2769; GFX900-NEXT:    ; def v[2:4]
2770; GFX900-NEXT:    ;;#ASMEND
2771; GFX900-NEXT:    ;;#ASMSTART
2772; GFX900-NEXT:    ; def v[0:2]
2773; GFX900-NEXT:    ;;#ASMEND
2774; GFX900-NEXT:    v_mov_b32_e32 v5, 0
2775; GFX900-NEXT:    v_mov_b32_e32 v0, v2
2776; GFX900-NEXT:    v_mov_b32_e32 v2, v3
2777; GFX900-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
2778; GFX900-NEXT:    s_waitcnt vmcnt(0)
2779; GFX900-NEXT:    s_setpc_b64 s[30:31]
2780;
2781; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_4_1_1:
2782; GFX90A:       ; %bb.0:
2783; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2784; GFX90A-NEXT:    ;;#ASMSTART
2785; GFX90A-NEXT:    ; def v[2:4]
2786; GFX90A-NEXT:    ;;#ASMEND
2787; GFX90A-NEXT:    ;;#ASMSTART
2788; GFX90A-NEXT:    ; def v[0:2]
2789; GFX90A-NEXT:    ;;#ASMEND
2790; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
2791; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
2792; GFX90A-NEXT:    v_mov_b32_e32 v2, v3
2793; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
2794; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2795; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2796;
2797; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_4_1_1:
2798; GFX940:       ; %bb.0:
2799; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2800; GFX940-NEXT:    ;;#ASMSTART
2801; GFX940-NEXT:    ; def v[2:4]
2802; GFX940-NEXT:    ;;#ASMEND
2803; GFX940-NEXT:    v_mov_b32_e32 v5, 0
2804; GFX940-NEXT:    ;;#ASMSTART
2805; GFX940-NEXT:    ; def v[0:2]
2806; GFX940-NEXT:    ;;#ASMEND
2807; GFX940-NEXT:    s_nop 0
2808; GFX940-NEXT:    v_mov_b32_e32 v0, v2
2809; GFX940-NEXT:    v_mov_b32_e32 v2, v3
2810; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
2811; GFX940-NEXT:    s_waitcnt vmcnt(0)
2812; GFX940-NEXT:    s_setpc_b64 s[30:31]
2813  %vec0 = call <3 x i32> asm "; def $0", "=v"()
2814  %vec1 = call <3 x i32> asm "; def $0", "=v"()
2815  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 1, i32 1>
2816  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
2817  ret void
2818}
2819
2820define void @v_shuffle_v4i32_v3i32__5_5_1_1(ptr addrspace(1) inreg %ptr) {
2821; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_1_1:
2822; GFX900:       ; %bb.0:
2823; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2824; GFX900-NEXT:    ;;#ASMSTART
2825; GFX900-NEXT:    ; def v[1:3]
2826; GFX900-NEXT:    ;;#ASMEND
2827; GFX900-NEXT:    ;;#ASMSTART
2828; GFX900-NEXT:    ; def v[3:5]
2829; GFX900-NEXT:    ;;#ASMEND
2830; GFX900-NEXT:    v_mov_b32_e32 v6, 0
2831; GFX900-NEXT:    v_mov_b32_e32 v0, v5
2832; GFX900-NEXT:    v_mov_b32_e32 v1, v5
2833; GFX900-NEXT:    v_mov_b32_e32 v3, v2
2834; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
2835; GFX900-NEXT:    s_waitcnt vmcnt(0)
2836; GFX900-NEXT:    s_setpc_b64 s[30:31]
2837;
2838; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_1_1:
2839; GFX90A:       ; %bb.0:
2840; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2841; GFX90A-NEXT:    ;;#ASMSTART
2842; GFX90A-NEXT:    ; def v[2:4]
2843; GFX90A-NEXT:    ;;#ASMEND
2844; GFX90A-NEXT:    ;;#ASMSTART
2845; GFX90A-NEXT:    ; def v[0:2]
2846; GFX90A-NEXT:    ;;#ASMEND
2847; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
2848; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
2849; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
2850; GFX90A-NEXT:    v_mov_b32_e32 v2, v3
2851; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
2852; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2853; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2854;
2855; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_1_1:
2856; GFX940:       ; %bb.0:
2857; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2858; GFX940-NEXT:    ;;#ASMSTART
2859; GFX940-NEXT:    ; def v[2:4]
2860; GFX940-NEXT:    ;;#ASMEND
2861; GFX940-NEXT:    v_mov_b32_e32 v5, 0
2862; GFX940-NEXT:    ;;#ASMSTART
2863; GFX940-NEXT:    ; def v[0:2]
2864; GFX940-NEXT:    ;;#ASMEND
2865; GFX940-NEXT:    s_nop 0
2866; GFX940-NEXT:    v_mov_b32_e32 v0, v2
2867; GFX940-NEXT:    v_mov_b32_e32 v1, v2
2868; GFX940-NEXT:    v_mov_b32_e32 v2, v3
2869; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
2870; GFX940-NEXT:    s_waitcnt vmcnt(0)
2871; GFX940-NEXT:    s_setpc_b64 s[30:31]
2872  %vec0 = call <3 x i32> asm "; def $0", "=v"()
2873  %vec1 = call <3 x i32> asm "; def $0", "=v"()
2874  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 1>
2875  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
2876  ret void
2877}
2878
2879define void @v_shuffle_v4i32_v3i32__5_5_u_1(ptr addrspace(1) inreg %ptr) {
2880; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_u_1:
2881; GFX900:       ; %bb.0:
2882; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2883; GFX900-NEXT:    ;;#ASMSTART
2884; GFX900-NEXT:    ; def v[1:3]
2885; GFX900-NEXT:    ;;#ASMEND
2886; GFX900-NEXT:    ;;#ASMSTART
2887; GFX900-NEXT:    ; def v[3:5]
2888; GFX900-NEXT:    ;;#ASMEND
2889; GFX900-NEXT:    v_mov_b32_e32 v6, 0
2890; GFX900-NEXT:    v_mov_b32_e32 v0, v5
2891; GFX900-NEXT:    v_mov_b32_e32 v1, v5
2892; GFX900-NEXT:    v_mov_b32_e32 v3, v2
2893; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
2894; GFX900-NEXT:    s_waitcnt vmcnt(0)
2895; GFX900-NEXT:    s_setpc_b64 s[30:31]
2896;
2897; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_u_1:
2898; GFX90A:       ; %bb.0:
2899; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2900; GFX90A-NEXT:    ;;#ASMSTART
2901; GFX90A-NEXT:    ; def v[2:4]
2902; GFX90A-NEXT:    ;;#ASMEND
2903; GFX90A-NEXT:    ;;#ASMSTART
2904; GFX90A-NEXT:    ; def v[0:2]
2905; GFX90A-NEXT:    ;;#ASMEND
2906; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
2907; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
2908; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
2909; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
2910; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2911; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2912;
2913; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_u_1:
2914; GFX940:       ; %bb.0:
2915; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2916; GFX940-NEXT:    ;;#ASMSTART
2917; GFX940-NEXT:    ; def v[2:4]
2918; GFX940-NEXT:    ;;#ASMEND
2919; GFX940-NEXT:    v_mov_b32_e32 v5, 0
2920; GFX940-NEXT:    ;;#ASMSTART
2921; GFX940-NEXT:    ; def v[0:2]
2922; GFX940-NEXT:    ;;#ASMEND
2923; GFX940-NEXT:    s_nop 0
2924; GFX940-NEXT:    v_mov_b32_e32 v0, v2
2925; GFX940-NEXT:    v_mov_b32_e32 v1, v2
2926; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
2927; GFX940-NEXT:    s_waitcnt vmcnt(0)
2928; GFX940-NEXT:    s_setpc_b64 s[30:31]
2929  %vec0 = call <3 x i32> asm "; def $0", "=v"()
2930  %vec1 = call <3 x i32> asm "; def $0", "=v"()
2931  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 1>
2932  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
2933  ret void
2934}
2935
2936define void @v_shuffle_v4i32_v3i32__5_5_0_1(ptr addrspace(1) inreg %ptr) {
2937; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_0_1:
2938; GFX900:       ; %bb.0:
2939; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2940; GFX900-NEXT:    ;;#ASMSTART
2941; GFX900-NEXT:    ; def v[2:4]
2942; GFX900-NEXT:    ;;#ASMEND
2943; GFX900-NEXT:    v_mov_b32_e32 v7, 0
2944; GFX900-NEXT:    ;;#ASMSTART
2945; GFX900-NEXT:    ; def v[4:6]
2946; GFX900-NEXT:    ;;#ASMEND
2947; GFX900-NEXT:    v_mov_b32_e32 v0, v6
2948; GFX900-NEXT:    v_mov_b32_e32 v1, v6
2949; GFX900-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
2950; GFX900-NEXT:    s_waitcnt vmcnt(0)
2951; GFX900-NEXT:    s_setpc_b64 s[30:31]
2952;
2953; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_0_1:
2954; GFX90A:       ; %bb.0:
2955; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2956; GFX90A-NEXT:    ;;#ASMSTART
2957; GFX90A-NEXT:    ; def v[2:4]
2958; GFX90A-NEXT:    ;;#ASMEND
2959; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
2960; GFX90A-NEXT:    ;;#ASMSTART
2961; GFX90A-NEXT:    ; def v[4:6]
2962; GFX90A-NEXT:    ;;#ASMEND
2963; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
2964; GFX90A-NEXT:    v_mov_b32_e32 v1, v6
2965; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
2966; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2967; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2968;
2969; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_0_1:
2970; GFX940:       ; %bb.0:
2971; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2972; GFX940-NEXT:    ;;#ASMSTART
2973; GFX940-NEXT:    ; def v[2:4]
2974; GFX940-NEXT:    ;;#ASMEND
2975; GFX940-NEXT:    v_mov_b32_e32 v7, 0
2976; GFX940-NEXT:    ;;#ASMSTART
2977; GFX940-NEXT:    ; def v[4:6]
2978; GFX940-NEXT:    ;;#ASMEND
2979; GFX940-NEXT:    s_nop 0
2980; GFX940-NEXT:    v_mov_b32_e32 v0, v6
2981; GFX940-NEXT:    v_mov_b32_e32 v1, v6
2982; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
2983; GFX940-NEXT:    s_waitcnt vmcnt(0)
2984; GFX940-NEXT:    s_setpc_b64 s[30:31]
2985  %vec0 = call <3 x i32> asm "; def $0", "=v"()
2986  %vec1 = call <3 x i32> asm "; def $0", "=v"()
2987  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 1>
2988  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
2989  ret void
2990}
2991
2992define void @v_shuffle_v4i32_v3i32__5_5_2_1(ptr addrspace(1) inreg %ptr) {
2993; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_2_1:
2994; GFX900:       ; %bb.0:
2995; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2996; GFX900-NEXT:    ;;#ASMSTART
2997; GFX900-NEXT:    ; def v[2:4]
2998; GFX900-NEXT:    ;;#ASMEND
2999; GFX900-NEXT:    ;;#ASMSTART
3000; GFX900-NEXT:    ; def v[0:2]
3001; GFX900-NEXT:    ;;#ASMEND
3002; GFX900-NEXT:    v_mov_b32_e32 v5, 0
3003; GFX900-NEXT:    v_mov_b32_e32 v0, v2
3004; GFX900-NEXT:    v_mov_b32_e32 v1, v2
3005; GFX900-NEXT:    v_mov_b32_e32 v2, v4
3006; GFX900-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
3007; GFX900-NEXT:    s_waitcnt vmcnt(0)
3008; GFX900-NEXT:    s_setpc_b64 s[30:31]
3009;
3010; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_2_1:
3011; GFX90A:       ; %bb.0:
3012; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3013; GFX90A-NEXT:    ;;#ASMSTART
3014; GFX90A-NEXT:    ; def v[2:4]
3015; GFX90A-NEXT:    ;;#ASMEND
3016; GFX90A-NEXT:    ;;#ASMSTART
3017; GFX90A-NEXT:    ; def v[0:2]
3018; GFX90A-NEXT:    ;;#ASMEND
3019; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
3020; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
3021; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
3022; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
3023; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
3024; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3025; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3026;
3027; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_2_1:
3028; GFX940:       ; %bb.0:
3029; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3030; GFX940-NEXT:    ;;#ASMSTART
3031; GFX940-NEXT:    ; def v[2:4]
3032; GFX940-NEXT:    ;;#ASMEND
3033; GFX940-NEXT:    v_mov_b32_e32 v5, 0
3034; GFX940-NEXT:    ;;#ASMSTART
3035; GFX940-NEXT:    ; def v[0:2]
3036; GFX940-NEXT:    ;;#ASMEND
3037; GFX940-NEXT:    s_nop 0
3038; GFX940-NEXT:    v_mov_b32_e32 v0, v2
3039; GFX940-NEXT:    v_mov_b32_e32 v1, v2
3040; GFX940-NEXT:    v_mov_b32_e32 v2, v4
3041; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
3042; GFX940-NEXT:    s_waitcnt vmcnt(0)
3043; GFX940-NEXT:    s_setpc_b64 s[30:31]
3044  %vec0 = call <3 x i32> asm "; def $0", "=v"()
3045  %vec1 = call <3 x i32> asm "; def $0", "=v"()
3046  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 1>
3047  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
3048  ret void
3049}
3050
3051define void @v_shuffle_v4i32_v3i32__5_5_3_1(ptr addrspace(1) inreg %ptr) {
3052; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_3_1:
3053; GFX900:       ; %bb.0:
3054; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3055; GFX900-NEXT:    ;;#ASMSTART
3056; GFX900-NEXT:    ; def v[2:4]
3057; GFX900-NEXT:    ;;#ASMEND
3058; GFX900-NEXT:    v_mov_b32_e32 v7, 0
3059; GFX900-NEXT:    ;;#ASMSTART
3060; GFX900-NEXT:    ; def v[4:6]
3061; GFX900-NEXT:    ;;#ASMEND
3062; GFX900-NEXT:    v_mov_b32_e32 v0, v6
3063; GFX900-NEXT:    v_mov_b32_e32 v1, v6
3064; GFX900-NEXT:    v_mov_b32_e32 v2, v4
3065; GFX900-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
3066; GFX900-NEXT:    s_waitcnt vmcnt(0)
3067; GFX900-NEXT:    s_setpc_b64 s[30:31]
3068;
3069; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_3_1:
3070; GFX90A:       ; %bb.0:
3071; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3072; GFX90A-NEXT:    ;;#ASMSTART
3073; GFX90A-NEXT:    ; def v[2:4]
3074; GFX90A-NEXT:    ;;#ASMEND
3075; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
3076; GFX90A-NEXT:    ;;#ASMSTART
3077; GFX90A-NEXT:    ; def v[4:6]
3078; GFX90A-NEXT:    ;;#ASMEND
3079; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
3080; GFX90A-NEXT:    v_mov_b32_e32 v1, v6
3081; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
3082; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
3083; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3084; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3085;
3086; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_3_1:
3087; GFX940:       ; %bb.0:
3088; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3089; GFX940-NEXT:    ;;#ASMSTART
3090; GFX940-NEXT:    ; def v[2:4]
3091; GFX940-NEXT:    ;;#ASMEND
3092; GFX940-NEXT:    v_mov_b32_e32 v7, 0
3093; GFX940-NEXT:    ;;#ASMSTART
3094; GFX940-NEXT:    ; def v[4:6]
3095; GFX940-NEXT:    ;;#ASMEND
3096; GFX940-NEXT:    s_nop 0
3097; GFX940-NEXT:    v_mov_b32_e32 v0, v6
3098; GFX940-NEXT:    v_mov_b32_e32 v1, v6
3099; GFX940-NEXT:    v_mov_b32_e32 v2, v4
3100; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
3101; GFX940-NEXT:    s_waitcnt vmcnt(0)
3102; GFX940-NEXT:    s_setpc_b64 s[30:31]
3103  %vec0 = call <3 x i32> asm "; def $0", "=v"()
3104  %vec1 = call <3 x i32> asm "; def $0", "=v"()
3105  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 1>
3106  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
3107  ret void
3108}
3109
3110define void @v_shuffle_v4i32_v3i32__5_5_4_1(ptr addrspace(1) inreg %ptr) {
3111; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_4_1:
3112; GFX900:       ; %bb.0:
3113; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3114; GFX900-NEXT:    ;;#ASMSTART
3115; GFX900-NEXT:    ; def v[3:5]
3116; GFX900-NEXT:    ;;#ASMEND
3117; GFX900-NEXT:    ;;#ASMSTART
3118; GFX900-NEXT:    ; def v[1:3]
3119; GFX900-NEXT:    ;;#ASMEND
3120; GFX900-NEXT:    v_mov_b32_e32 v6, 0
3121; GFX900-NEXT:    v_mov_b32_e32 v0, v3
3122; GFX900-NEXT:    v_mov_b32_e32 v1, v3
3123; GFX900-NEXT:    v_mov_b32_e32 v3, v4
3124; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
3125; GFX900-NEXT:    s_waitcnt vmcnt(0)
3126; GFX900-NEXT:    s_setpc_b64 s[30:31]
3127;
3128; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_4_1:
3129; GFX90A:       ; %bb.0:
3130; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3131; GFX90A-NEXT:    ;;#ASMSTART
3132; GFX90A-NEXT:    ; def v[2:4]
3133; GFX90A-NEXT:    ;;#ASMEND
3134; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
3135; GFX90A-NEXT:    ;;#ASMSTART
3136; GFX90A-NEXT:    ; def v[4:6]
3137; GFX90A-NEXT:    ;;#ASMEND
3138; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
3139; GFX90A-NEXT:    v_mov_b32_e32 v1, v6
3140; GFX90A-NEXT:    v_mov_b32_e32 v2, v5
3141; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
3142; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3143; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3144;
3145; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_4_1:
3146; GFX940:       ; %bb.0:
3147; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3148; GFX940-NEXT:    ;;#ASMSTART
3149; GFX940-NEXT:    ; def v[2:4]
3150; GFX940-NEXT:    ;;#ASMEND
3151; GFX940-NEXT:    v_mov_b32_e32 v7, 0
3152; GFX940-NEXT:    ;;#ASMSTART
3153; GFX940-NEXT:    ; def v[4:6]
3154; GFX940-NEXT:    ;;#ASMEND
3155; GFX940-NEXT:    s_nop 0
3156; GFX940-NEXT:    v_mov_b32_e32 v0, v6
3157; GFX940-NEXT:    v_mov_b32_e32 v1, v6
3158; GFX940-NEXT:    v_mov_b32_e32 v2, v5
3159; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
3160; GFX940-NEXT:    s_waitcnt vmcnt(0)
3161; GFX940-NEXT:    s_setpc_b64 s[30:31]
3162  %vec0 = call <3 x i32> asm "; def $0", "=v"()
3163  %vec1 = call <3 x i32> asm "; def $0", "=v"()
3164  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 1>
3165  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
3166  ret void
3167}
3168
3169define void @v_shuffle_v4i32_v3i32__u_2_2_2(ptr addrspace(1) inreg %ptr) {
3170; GFX900-LABEL: v_shuffle_v4i32_v3i32__u_2_2_2:
3171; GFX900:       ; %bb.0:
3172; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3173; GFX900-NEXT:    ;;#ASMSTART
3174; GFX900-NEXT:    ; def v[0:2]
3175; GFX900-NEXT:    ;;#ASMEND
3176; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3177; GFX900-NEXT:    v_mov_b32_e32 v1, v2
3178; GFX900-NEXT:    v_mov_b32_e32 v3, v2
3179; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
3180; GFX900-NEXT:    s_waitcnt vmcnt(0)
3181; GFX900-NEXT:    s_setpc_b64 s[30:31]
3182;
3183; GFX90A-LABEL: v_shuffle_v4i32_v3i32__u_2_2_2:
3184; GFX90A:       ; %bb.0:
3185; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3186; GFX90A-NEXT:    ;;#ASMSTART
3187; GFX90A-NEXT:    ; def v[0:2]
3188; GFX90A-NEXT:    ;;#ASMEND
3189; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3190; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
3191; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
3192; GFX90A-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
3193; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3194; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3195;
3196; GFX940-LABEL: v_shuffle_v4i32_v3i32__u_2_2_2:
3197; GFX940:       ; %bb.0:
3198; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3199; GFX940-NEXT:    ;;#ASMSTART
3200; GFX940-NEXT:    ; def v[0:2]
3201; GFX940-NEXT:    ;;#ASMEND
3202; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3203; GFX940-NEXT:    v_mov_b32_e32 v1, v2
3204; GFX940-NEXT:    v_mov_b32_e32 v3, v2
3205; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
3206; GFX940-NEXT:    s_waitcnt vmcnt(0)
3207; GFX940-NEXT:    s_setpc_b64 s[30:31]
3208  %vec0 = call <3 x i32> asm "; def $0", "=v"()
3209  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
3210  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
3211  ret void
3212}
3213
3214define void @v_shuffle_v4i32_v3i32__0_2_2_2(ptr addrspace(1) inreg %ptr) {
3215; GFX900-LABEL: v_shuffle_v4i32_v3i32__0_2_2_2:
3216; GFX900:       ; %bb.0:
3217; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3218; GFX900-NEXT:    ;;#ASMSTART
3219; GFX900-NEXT:    ; def v[0:2]
3220; GFX900-NEXT:    ;;#ASMEND
3221; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3222; GFX900-NEXT:    v_mov_b32_e32 v1, v2
3223; GFX900-NEXT:    v_mov_b32_e32 v3, v2
3224; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
3225; GFX900-NEXT:    s_waitcnt vmcnt(0)
3226; GFX900-NEXT:    s_setpc_b64 s[30:31]
3227;
3228; GFX90A-LABEL: v_shuffle_v4i32_v3i32__0_2_2_2:
3229; GFX90A:       ; %bb.0:
3230; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3231; GFX90A-NEXT:    ;;#ASMSTART
3232; GFX90A-NEXT:    ; def v[0:2]
3233; GFX90A-NEXT:    ;;#ASMEND
3234; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3235; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
3236; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
3237; GFX90A-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
3238; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3239; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3240;
3241; GFX940-LABEL: v_shuffle_v4i32_v3i32__0_2_2_2:
3242; GFX940:       ; %bb.0:
3243; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3244; GFX940-NEXT:    ;;#ASMSTART
3245; GFX940-NEXT:    ; def v[0:2]
3246; GFX940-NEXT:    ;;#ASMEND
3247; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3248; GFX940-NEXT:    v_mov_b32_e32 v1, v2
3249; GFX940-NEXT:    v_mov_b32_e32 v3, v2
3250; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
3251; GFX940-NEXT:    s_waitcnt vmcnt(0)
3252; GFX940-NEXT:    s_setpc_b64 s[30:31]
3253  %vec0 = call <3 x i32> asm "; def $0", "=v"()
3254  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
3255  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
3256  ret void
3257}
3258
3259define void @v_shuffle_v4i32_v3i32__1_2_2_2(ptr addrspace(1) inreg %ptr) {
3260; GFX900-LABEL: v_shuffle_v4i32_v3i32__1_2_2_2:
3261; GFX900:       ; %bb.0:
3262; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3263; GFX900-NEXT:    ;;#ASMSTART
3264; GFX900-NEXT:    ; def v[0:2]
3265; GFX900-NEXT:    ;;#ASMEND
3266; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3267; GFX900-NEXT:    v_mov_b32_e32 v0, v1
3268; GFX900-NEXT:    v_mov_b32_e32 v1, v2
3269; GFX900-NEXT:    v_mov_b32_e32 v3, v2
3270; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
3271; GFX900-NEXT:    s_waitcnt vmcnt(0)
3272; GFX900-NEXT:    s_setpc_b64 s[30:31]
3273;
3274; GFX90A-LABEL: v_shuffle_v4i32_v3i32__1_2_2_2:
3275; GFX90A:       ; %bb.0:
3276; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3277; GFX90A-NEXT:    ;;#ASMSTART
3278; GFX90A-NEXT:    ; def v[0:2]
3279; GFX90A-NEXT:    ;;#ASMEND
3280; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3281; GFX90A-NEXT:    v_mov_b32_e32 v0, v1
3282; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
3283; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
3284; GFX90A-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
3285; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3286; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3287;
3288; GFX940-LABEL: v_shuffle_v4i32_v3i32__1_2_2_2:
3289; GFX940:       ; %bb.0:
3290; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3291; GFX940-NEXT:    ;;#ASMSTART
3292; GFX940-NEXT:    ; def v[0:2]
3293; GFX940-NEXT:    ;;#ASMEND
3294; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3295; GFX940-NEXT:    v_mov_b32_e32 v0, v1
3296; GFX940-NEXT:    v_mov_b32_e32 v1, v2
3297; GFX940-NEXT:    v_mov_b32_e32 v3, v2
3298; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
3299; GFX940-NEXT:    s_waitcnt vmcnt(0)
3300; GFX940-NEXT:    s_setpc_b64 s[30:31]
3301  %vec0 = call <3 x i32> asm "; def $0", "=v"()
3302  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
3303  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
3304  ret void
3305}
3306
3307define void @v_shuffle_v4i32_v3i32__2_2_2_2(ptr addrspace(1) inreg %ptr) {
3308; GFX900-LABEL: v_shuffle_v4i32_v3i32__2_2_2_2:
3309; GFX900:       ; %bb.0:
3310; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3311; GFX900-NEXT:    ;;#ASMSTART
3312; GFX900-NEXT:    ; def v[0:2]
3313; GFX900-NEXT:    ;;#ASMEND
3314; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3315; GFX900-NEXT:    v_mov_b32_e32 v0, v2
3316; GFX900-NEXT:    v_mov_b32_e32 v1, v2
3317; GFX900-NEXT:    v_mov_b32_e32 v3, v2
3318; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
3319; GFX900-NEXT:    s_waitcnt vmcnt(0)
3320; GFX900-NEXT:    s_setpc_b64 s[30:31]
3321;
3322; GFX90A-LABEL: v_shuffle_v4i32_v3i32__2_2_2_2:
3323; GFX90A:       ; %bb.0:
3324; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3325; GFX90A-NEXT:    ;;#ASMSTART
3326; GFX90A-NEXT:    ; def v[0:2]
3327; GFX90A-NEXT:    ;;#ASMEND
3328; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3329; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
3330; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
3331; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
3332; GFX90A-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
3333; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3334; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3335;
3336; GFX940-LABEL: v_shuffle_v4i32_v3i32__2_2_2_2:
3337; GFX940:       ; %bb.0:
3338; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3339; GFX940-NEXT:    ;;#ASMSTART
3340; GFX940-NEXT:    ; def v[0:2]
3341; GFX940-NEXT:    ;;#ASMEND
3342; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3343; GFX940-NEXT:    v_mov_b32_e32 v0, v2
3344; GFX940-NEXT:    v_mov_b32_e32 v1, v2
3345; GFX940-NEXT:    v_mov_b32_e32 v3, v2
3346; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
3347; GFX940-NEXT:    s_waitcnt vmcnt(0)
3348; GFX940-NEXT:    s_setpc_b64 s[30:31]
3349  %vec0 = call <3 x i32> asm "; def $0", "=v"()
3350  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
3351  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
3352  ret void
3353}
3354
3355define void @v_shuffle_v4i32_v3i32__3_2_2_2(ptr addrspace(1) inreg %ptr) {
3356; GFX900-LABEL: v_shuffle_v4i32_v3i32__3_2_2_2:
3357; GFX900:       ; %bb.0:
3358; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3359; GFX900-NEXT:    ;;#ASMSTART
3360; GFX900-NEXT:    ; def v[0:2]
3361; GFX900-NEXT:    ;;#ASMEND
3362; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3363; GFX900-NEXT:    v_mov_b32_e32 v1, v2
3364; GFX900-NEXT:    v_mov_b32_e32 v3, v2
3365; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
3366; GFX900-NEXT:    s_waitcnt vmcnt(0)
3367; GFX900-NEXT:    s_setpc_b64 s[30:31]
3368;
3369; GFX90A-LABEL: v_shuffle_v4i32_v3i32__3_2_2_2:
3370; GFX90A:       ; %bb.0:
3371; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3372; GFX90A-NEXT:    ;;#ASMSTART
3373; GFX90A-NEXT:    ; def v[0:2]
3374; GFX90A-NEXT:    ;;#ASMEND
3375; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3376; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
3377; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
3378; GFX90A-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
3379; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3380; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3381;
3382; GFX940-LABEL: v_shuffle_v4i32_v3i32__3_2_2_2:
3383; GFX940:       ; %bb.0:
3384; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3385; GFX940-NEXT:    ;;#ASMSTART
3386; GFX940-NEXT:    ; def v[0:2]
3387; GFX940-NEXT:    ;;#ASMEND
3388; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3389; GFX940-NEXT:    v_mov_b32_e32 v1, v2
3390; GFX940-NEXT:    v_mov_b32_e32 v3, v2
3391; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
3392; GFX940-NEXT:    s_waitcnt vmcnt(0)
3393; GFX940-NEXT:    s_setpc_b64 s[30:31]
3394  %vec0 = call <3 x i32> asm "; def $0", "=v"()
3395  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
3396  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
3397  ret void
3398}
3399
3400define void @v_shuffle_v4i32_v3i32__4_2_2_2(ptr addrspace(1) inreg %ptr) {
3401; GFX900-LABEL: v_shuffle_v4i32_v3i32__4_2_2_2:
3402; GFX900:       ; %bb.0:
3403; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3404; GFX900-NEXT:    ;;#ASMSTART
3405; GFX900-NEXT:    ; def v[0:2]
3406; GFX900-NEXT:    ;;#ASMEND
3407; GFX900-NEXT:    ;;#ASMSTART
3408; GFX900-NEXT:    ; def v[3:5]
3409; GFX900-NEXT:    ;;#ASMEND
3410; GFX900-NEXT:    v_mov_b32_e32 v6, 0
3411; GFX900-NEXT:    v_mov_b32_e32 v0, v4
3412; GFX900-NEXT:    v_mov_b32_e32 v1, v2
3413; GFX900-NEXT:    v_mov_b32_e32 v3, v2
3414; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
3415; GFX900-NEXT:    s_waitcnt vmcnt(0)
3416; GFX900-NEXT:    s_setpc_b64 s[30:31]
3417;
3418; GFX90A-LABEL: v_shuffle_v4i32_v3i32__4_2_2_2:
3419; GFX90A:       ; %bb.0:
3420; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3421; GFX90A-NEXT:    ;;#ASMSTART
3422; GFX90A-NEXT:    ; def v[0:2]
3423; GFX90A-NEXT:    ;;#ASMEND
3424; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
3425; GFX90A-NEXT:    ;;#ASMSTART
3426; GFX90A-NEXT:    ; def v[4:6]
3427; GFX90A-NEXT:    ;;#ASMEND
3428; GFX90A-NEXT:    v_mov_b32_e32 v0, v5
3429; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
3430; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
3431; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
3432; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3433; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3434;
3435; GFX940-LABEL: v_shuffle_v4i32_v3i32__4_2_2_2:
3436; GFX940:       ; %bb.0:
3437; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3438; GFX940-NEXT:    ;;#ASMSTART
3439; GFX940-NEXT:    ; def v[0:2]
3440; GFX940-NEXT:    ;;#ASMEND
3441; GFX940-NEXT:    v_mov_b32_e32 v7, 0
3442; GFX940-NEXT:    ;;#ASMSTART
3443; GFX940-NEXT:    ; def v[4:6]
3444; GFX940-NEXT:    ;;#ASMEND
3445; GFX940-NEXT:    v_mov_b32_e32 v1, v2
3446; GFX940-NEXT:    v_mov_b32_e32 v0, v5
3447; GFX940-NEXT:    v_mov_b32_e32 v3, v2
3448; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
3449; GFX940-NEXT:    s_waitcnt vmcnt(0)
3450; GFX940-NEXT:    s_setpc_b64 s[30:31]
3451  %vec0 = call <3 x i32> asm "; def $0", "=v"()
3452  %vec1 = call <3 x i32> asm "; def $0", "=v"()
3453  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 2, i32 2, i32 2>
3454  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
3455  ret void
3456}
3457
3458define void @v_shuffle_v4i32_v3i32__5_2_2_2(ptr addrspace(1) inreg %ptr) {
3459; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_2_2_2:
3460; GFX900:       ; %bb.0:
3461; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3462; GFX900-NEXT:    ;;#ASMSTART
3463; GFX900-NEXT:    ; def v[0:2]
3464; GFX900-NEXT:    ;;#ASMEND
3465; GFX900-NEXT:    ;;#ASMSTART
3466; GFX900-NEXT:    ; def v[3:5]
3467; GFX900-NEXT:    ;;#ASMEND
3468; GFX900-NEXT:    v_mov_b32_e32 v6, 0
3469; GFX900-NEXT:    v_mov_b32_e32 v0, v5
3470; GFX900-NEXT:    v_mov_b32_e32 v1, v2
3471; GFX900-NEXT:    v_mov_b32_e32 v3, v2
3472; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
3473; GFX900-NEXT:    s_waitcnt vmcnt(0)
3474; GFX900-NEXT:    s_setpc_b64 s[30:31]
3475;
3476; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_2_2_2:
3477; GFX90A:       ; %bb.0:
3478; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3479; GFX90A-NEXT:    ;;#ASMSTART
3480; GFX90A-NEXT:    ; def v[0:2]
3481; GFX90A-NEXT:    ;;#ASMEND
3482; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
3483; GFX90A-NEXT:    ;;#ASMSTART
3484; GFX90A-NEXT:    ; def v[4:6]
3485; GFX90A-NEXT:    ;;#ASMEND
3486; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
3487; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
3488; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
3489; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
3490; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3491; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3492;
3493; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_2_2_2:
3494; GFX940:       ; %bb.0:
3495; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3496; GFX940-NEXT:    ;;#ASMSTART
3497; GFX940-NEXT:    ; def v[0:2]
3498; GFX940-NEXT:    ;;#ASMEND
3499; GFX940-NEXT:    v_mov_b32_e32 v7, 0
3500; GFX940-NEXT:    ;;#ASMSTART
3501; GFX940-NEXT:    ; def v[4:6]
3502; GFX940-NEXT:    ;;#ASMEND
3503; GFX940-NEXT:    v_mov_b32_e32 v1, v2
3504; GFX940-NEXT:    v_mov_b32_e32 v0, v6
3505; GFX940-NEXT:    v_mov_b32_e32 v3, v2
3506; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
3507; GFX940-NEXT:    s_waitcnt vmcnt(0)
3508; GFX940-NEXT:    s_setpc_b64 s[30:31]
3509  %vec0 = call <3 x i32> asm "; def $0", "=v"()
3510  %vec1 = call <3 x i32> asm "; def $0", "=v"()
3511  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2>
3512  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
3513  ret void
3514}
3515
3516define void @v_shuffle_v4i32_v3i32__5_u_2_2(ptr addrspace(1) inreg %ptr) {
3517; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_u_2_2:
3518; GFX900:       ; %bb.0:
3519; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3520; GFX900-NEXT:    ;;#ASMSTART
3521; GFX900-NEXT:    ; def v[0:2]
3522; GFX900-NEXT:    ;;#ASMEND
3523; GFX900-NEXT:    ;;#ASMSTART
3524; GFX900-NEXT:    ; def v[3:5]
3525; GFX900-NEXT:    ;;#ASMEND
3526; GFX900-NEXT:    v_mov_b32_e32 v6, 0
3527; GFX900-NEXT:    v_mov_b32_e32 v0, v5
3528; GFX900-NEXT:    v_mov_b32_e32 v3, v2
3529; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
3530; GFX900-NEXT:    s_waitcnt vmcnt(0)
3531; GFX900-NEXT:    s_setpc_b64 s[30:31]
3532;
3533; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_u_2_2:
3534; GFX90A:       ; %bb.0:
3535; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3536; GFX90A-NEXT:    ;;#ASMSTART
3537; GFX90A-NEXT:    ; def v[0:2]
3538; GFX90A-NEXT:    ;;#ASMEND
3539; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
3540; GFX90A-NEXT:    ;;#ASMSTART
3541; GFX90A-NEXT:    ; def v[4:6]
3542; GFX90A-NEXT:    ;;#ASMEND
3543; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
3544; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
3545; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
3546; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3547; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3548;
3549; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_u_2_2:
3550; GFX940:       ; %bb.0:
3551; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3552; GFX940-NEXT:    ;;#ASMSTART
3553; GFX940-NEXT:    ; def v[0:2]
3554; GFX940-NEXT:    ;;#ASMEND
3555; GFX940-NEXT:    v_mov_b32_e32 v7, 0
3556; GFX940-NEXT:    ;;#ASMSTART
3557; GFX940-NEXT:    ; def v[4:6]
3558; GFX940-NEXT:    ;;#ASMEND
3559; GFX940-NEXT:    v_mov_b32_e32 v3, v2
3560; GFX940-NEXT:    v_mov_b32_e32 v0, v6
3561; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
3562; GFX940-NEXT:    s_waitcnt vmcnt(0)
3563; GFX940-NEXT:    s_setpc_b64 s[30:31]
3564  %vec0 = call <3 x i32> asm "; def $0", "=v"()
3565  %vec1 = call <3 x i32> asm "; def $0", "=v"()
3566  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 2, i32 2>
3567  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
3568  ret void
3569}
3570
3571define void @v_shuffle_v4i32_v3i32__5_0_2_2(ptr addrspace(1) inreg %ptr) {
3572; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_0_2_2:
3573; GFX900:       ; %bb.0:
3574; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3575; GFX900-NEXT:    ;;#ASMSTART
3576; GFX900-NEXT:    ; def v[1:3]
3577; GFX900-NEXT:    ;;#ASMEND
3578; GFX900-NEXT:    v_mov_b32_e32 v7, 0
3579; GFX900-NEXT:    ;;#ASMSTART
3580; GFX900-NEXT:    ; def v[4:6]
3581; GFX900-NEXT:    ;;#ASMEND
3582; GFX900-NEXT:    v_mov_b32_e32 v0, v6
3583; GFX900-NEXT:    v_mov_b32_e32 v2, v3
3584; GFX900-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
3585; GFX900-NEXT:    s_waitcnt vmcnt(0)
3586; GFX900-NEXT:    s_setpc_b64 s[30:31]
3587;
3588; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_0_2_2:
3589; GFX90A:       ; %bb.0:
3590; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3591; GFX90A-NEXT:    ;;#ASMSTART
3592; GFX90A-NEXT:    ; def v[2:4]
3593; GFX90A-NEXT:    ;;#ASMEND
3594; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
3595; GFX90A-NEXT:    ;;#ASMSTART
3596; GFX90A-NEXT:    ; def v[6:8]
3597; GFX90A-NEXT:    ;;#ASMEND
3598; GFX90A-NEXT:    v_mov_b32_e32 v0, v8
3599; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
3600; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
3601; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
3602; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
3603; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3604; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3605;
3606; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_0_2_2:
3607; GFX940:       ; %bb.0:
3608; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3609; GFX940-NEXT:    ;;#ASMSTART
3610; GFX940-NEXT:    ; def v[2:4]
3611; GFX940-NEXT:    ;;#ASMEND
3612; GFX940-NEXT:    v_mov_b32_e32 v5, 0
3613; GFX940-NEXT:    ;;#ASMSTART
3614; GFX940-NEXT:    ; def v[6:8]
3615; GFX940-NEXT:    ;;#ASMEND
3616; GFX940-NEXT:    v_mov_b32_e32 v1, v2
3617; GFX940-NEXT:    v_mov_b32_e32 v0, v8
3618; GFX940-NEXT:    v_mov_b32_e32 v2, v4
3619; GFX940-NEXT:    v_mov_b32_e32 v3, v4
3620; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
3621; GFX940-NEXT:    s_waitcnt vmcnt(0)
3622; GFX940-NEXT:    s_setpc_b64 s[30:31]
3623  %vec0 = call <3 x i32> asm "; def $0", "=v"()
3624  %vec1 = call <3 x i32> asm "; def $0", "=v"()
3625  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 2, i32 2>
3626  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
3627  ret void
3628}
3629
3630define void @v_shuffle_v4i32_v3i32__5_1_2_2(ptr addrspace(1) inreg %ptr) {
3631; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_1_2_2:
3632; GFX900:       ; %bb.0:
3633; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3634; GFX900-NEXT:    ;;#ASMSTART
3635; GFX900-NEXT:    ; def v[0:2]
3636; GFX900-NEXT:    ;;#ASMEND
3637; GFX900-NEXT:    ;;#ASMSTART
3638; GFX900-NEXT:    ; def v[3:5]
3639; GFX900-NEXT:    ;;#ASMEND
3640; GFX900-NEXT:    v_mov_b32_e32 v6, 0
3641; GFX900-NEXT:    v_mov_b32_e32 v0, v5
3642; GFX900-NEXT:    v_mov_b32_e32 v3, v2
3643; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
3644; GFX900-NEXT:    s_waitcnt vmcnt(0)
3645; GFX900-NEXT:    s_setpc_b64 s[30:31]
3646;
3647; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_1_2_2:
3648; GFX90A:       ; %bb.0:
3649; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3650; GFX90A-NEXT:    ;;#ASMSTART
3651; GFX90A-NEXT:    ; def v[0:2]
3652; GFX90A-NEXT:    ;;#ASMEND
3653; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
3654; GFX90A-NEXT:    ;;#ASMSTART
3655; GFX90A-NEXT:    ; def v[4:6]
3656; GFX90A-NEXT:    ;;#ASMEND
3657; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
3658; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
3659; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
3660; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3661; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3662;
3663; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_1_2_2:
3664; GFX940:       ; %bb.0:
3665; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3666; GFX940-NEXT:    ;;#ASMSTART
3667; GFX940-NEXT:    ; def v[0:2]
3668; GFX940-NEXT:    ;;#ASMEND
3669; GFX940-NEXT:    v_mov_b32_e32 v7, 0
3670; GFX940-NEXT:    ;;#ASMSTART
3671; GFX940-NEXT:    ; def v[4:6]
3672; GFX940-NEXT:    ;;#ASMEND
3673; GFX940-NEXT:    v_mov_b32_e32 v3, v2
3674; GFX940-NEXT:    v_mov_b32_e32 v0, v6
3675; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
3676; GFX940-NEXT:    s_waitcnt vmcnt(0)
3677; GFX940-NEXT:    s_setpc_b64 s[30:31]
3678  %vec0 = call <3 x i32> asm "; def $0", "=v"()
3679  %vec1 = call <3 x i32> asm "; def $0", "=v"()
3680  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 2, i32 2>
3681  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
3682  ret void
3683}
3684
3685define void @v_shuffle_v4i32_v3i32__5_3_2_2(ptr addrspace(1) inreg %ptr) {
3686; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_3_2_2:
3687; GFX900:       ; %bb.0:
3688; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3689; GFX900-NEXT:    ;;#ASMSTART
3690; GFX900-NEXT:    ; def v[2:4]
3691; GFX900-NEXT:    ;;#ASMEND
3692; GFX900-NEXT:    ;;#ASMSTART
3693; GFX900-NEXT:    ; def v[1:3]
3694; GFX900-NEXT:    ;;#ASMEND
3695; GFX900-NEXT:    v_mov_b32_e32 v5, 0
3696; GFX900-NEXT:    v_mov_b32_e32 v0, v3
3697; GFX900-NEXT:    v_mov_b32_e32 v2, v4
3698; GFX900-NEXT:    v_mov_b32_e32 v3, v4
3699; GFX900-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
3700; GFX900-NEXT:    s_waitcnt vmcnt(0)
3701; GFX900-NEXT:    s_setpc_b64 s[30:31]
3702;
3703; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_3_2_2:
3704; GFX90A:       ; %bb.0:
3705; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3706; GFX90A-NEXT:    ;;#ASMSTART
3707; GFX90A-NEXT:    ; def v[0:2]
3708; GFX90A-NEXT:    ;;#ASMEND
3709; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
3710; GFX90A-NEXT:    ;;#ASMSTART
3711; GFX90A-NEXT:    ; def v[4:6]
3712; GFX90A-NEXT:    ;;#ASMEND
3713; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
3714; GFX90A-NEXT:    v_mov_b32_e32 v1, v4
3715; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
3716; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
3717; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3718; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3719;
3720; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_3_2_2:
3721; GFX940:       ; %bb.0:
3722; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3723; GFX940-NEXT:    ;;#ASMSTART
3724; GFX940-NEXT:    ; def v[0:2]
3725; GFX940-NEXT:    ;;#ASMEND
3726; GFX940-NEXT:    v_mov_b32_e32 v7, 0
3727; GFX940-NEXT:    ;;#ASMSTART
3728; GFX940-NEXT:    ; def v[4:6]
3729; GFX940-NEXT:    ;;#ASMEND
3730; GFX940-NEXT:    v_mov_b32_e32 v3, v2
3731; GFX940-NEXT:    v_mov_b32_e32 v0, v6
3732; GFX940-NEXT:    v_mov_b32_e32 v1, v4
3733; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
3734; GFX940-NEXT:    s_waitcnt vmcnt(0)
3735; GFX940-NEXT:    s_setpc_b64 s[30:31]
3736  %vec0 = call <3 x i32> asm "; def $0", "=v"()
3737  %vec1 = call <3 x i32> asm "; def $0", "=v"()
3738  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 2, i32 2>
3739  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
3740  ret void
3741}
3742
3743define void @v_shuffle_v4i32_v3i32__5_4_2_2(ptr addrspace(1) inreg %ptr) {
3744; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_4_2_2:
3745; GFX900:       ; %bb.0:
3746; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3747; GFX900-NEXT:    ;;#ASMSTART
3748; GFX900-NEXT:    ; def v[1:3]
3749; GFX900-NEXT:    ;;#ASMEND
3750; GFX900-NEXT:    ;;#ASMSTART
3751; GFX900-NEXT:    ; def v[0:2]
3752; GFX900-NEXT:    ;;#ASMEND
3753; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3754; GFX900-NEXT:    v_mov_b32_e32 v0, v2
3755; GFX900-NEXT:    v_mov_b32_e32 v2, v3
3756; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
3757; GFX900-NEXT:    s_waitcnt vmcnt(0)
3758; GFX900-NEXT:    s_setpc_b64 s[30:31]
3759;
3760; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_4_2_2:
3761; GFX90A:       ; %bb.0:
3762; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3763; GFX90A-NEXT:    ;;#ASMSTART
3764; GFX90A-NEXT:    ; def v[2:4]
3765; GFX90A-NEXT:    ;;#ASMEND
3766; GFX90A-NEXT:    ;;#ASMSTART
3767; GFX90A-NEXT:    ; def v[0:2]
3768; GFX90A-NEXT:    ;;#ASMEND
3769; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
3770; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
3771; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
3772; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
3773; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
3774; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3775; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3776;
3777; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_4_2_2:
3778; GFX940:       ; %bb.0:
3779; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3780; GFX940-NEXT:    ;;#ASMSTART
3781; GFX940-NEXT:    ; def v[2:4]
3782; GFX940-NEXT:    ;;#ASMEND
3783; GFX940-NEXT:    v_mov_b32_e32 v5, 0
3784; GFX940-NEXT:    ;;#ASMSTART
3785; GFX940-NEXT:    ; def v[0:2]
3786; GFX940-NEXT:    ;;#ASMEND
3787; GFX940-NEXT:    v_mov_b32_e32 v3, v4
3788; GFX940-NEXT:    v_mov_b32_e32 v0, v2
3789; GFX940-NEXT:    v_mov_b32_e32 v2, v4
3790; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
3791; GFX940-NEXT:    s_waitcnt vmcnt(0)
3792; GFX940-NEXT:    s_setpc_b64 s[30:31]
3793  %vec0 = call <3 x i32> asm "; def $0", "=v"()
3794  %vec1 = call <3 x i32> asm "; def $0", "=v"()
3795  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 2, i32 2>
3796  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
3797  ret void
3798}
3799
3800define void @v_shuffle_v4i32_v3i32__5_5_2_2(ptr addrspace(1) inreg %ptr) {
3801; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_2_2:
3802; GFX900:       ; %bb.0:
3803; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3804; GFX900-NEXT:    ;;#ASMSTART
3805; GFX900-NEXT:    ; def v[0:2]
3806; GFX900-NEXT:    ;;#ASMEND
3807; GFX900-NEXT:    ;;#ASMSTART
3808; GFX900-NEXT:    ; def v[3:5]
3809; GFX900-NEXT:    ;;#ASMEND
3810; GFX900-NEXT:    v_mov_b32_e32 v6, 0
3811; GFX900-NEXT:    v_mov_b32_e32 v0, v5
3812; GFX900-NEXT:    v_mov_b32_e32 v1, v5
3813; GFX900-NEXT:    v_mov_b32_e32 v3, v2
3814; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
3815; GFX900-NEXT:    s_waitcnt vmcnt(0)
3816; GFX900-NEXT:    s_setpc_b64 s[30:31]
3817;
3818; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_2_2:
3819; GFX90A:       ; %bb.0:
3820; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3821; GFX90A-NEXT:    ;;#ASMSTART
3822; GFX90A-NEXT:    ; def v[0:2]
3823; GFX90A-NEXT:    ;;#ASMEND
3824; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
3825; GFX90A-NEXT:    ;;#ASMSTART
3826; GFX90A-NEXT:    ; def v[4:6]
3827; GFX90A-NEXT:    ;;#ASMEND
3828; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
3829; GFX90A-NEXT:    v_mov_b32_e32 v1, v6
3830; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
3831; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
3832; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3833; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3834;
3835; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_2_2:
3836; GFX940:       ; %bb.0:
3837; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3838; GFX940-NEXT:    ;;#ASMSTART
3839; GFX940-NEXT:    ; def v[0:2]
3840; GFX940-NEXT:    ;;#ASMEND
3841; GFX940-NEXT:    v_mov_b32_e32 v7, 0
3842; GFX940-NEXT:    ;;#ASMSTART
3843; GFX940-NEXT:    ; def v[4:6]
3844; GFX940-NEXT:    ;;#ASMEND
3845; GFX940-NEXT:    v_mov_b32_e32 v3, v2
3846; GFX940-NEXT:    v_mov_b32_e32 v0, v6
3847; GFX940-NEXT:    v_mov_b32_e32 v1, v6
3848; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
3849; GFX940-NEXT:    s_waitcnt vmcnt(0)
3850; GFX940-NEXT:    s_setpc_b64 s[30:31]
3851  %vec0 = call <3 x i32> asm "; def $0", "=v"()
3852  %vec1 = call <3 x i32> asm "; def $0", "=v"()
3853  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 2>
3854  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
3855  ret void
3856}
3857
3858define void @v_shuffle_v4i32_v3i32__5_5_u_2(ptr addrspace(1) inreg %ptr) {
3859; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_u_2:
3860; GFX900:       ; %bb.0:
3861; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3862; GFX900-NEXT:    ;;#ASMSTART
3863; GFX900-NEXT:    ; def v[1:3]
3864; GFX900-NEXT:    ;;#ASMEND
3865; GFX900-NEXT:    ;;#ASMSTART
3866; GFX900-NEXT:    ; def v[0:2]
3867; GFX900-NEXT:    ;;#ASMEND
3868; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3869; GFX900-NEXT:    v_mov_b32_e32 v0, v2
3870; GFX900-NEXT:    v_mov_b32_e32 v1, v2
3871; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
3872; GFX900-NEXT:    s_waitcnt vmcnt(0)
3873; GFX900-NEXT:    s_setpc_b64 s[30:31]
3874;
3875; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_u_2:
3876; GFX90A:       ; %bb.0:
3877; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3878; GFX90A-NEXT:    ;;#ASMSTART
3879; GFX90A-NEXT:    ; def v[0:2]
3880; GFX90A-NEXT:    ;;#ASMEND
3881; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
3882; GFX90A-NEXT:    ;;#ASMSTART
3883; GFX90A-NEXT:    ; def v[4:6]
3884; GFX90A-NEXT:    ;;#ASMEND
3885; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
3886; GFX90A-NEXT:    v_mov_b32_e32 v1, v6
3887; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
3888; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
3889; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3890; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3891;
3892; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_u_2:
3893; GFX940:       ; %bb.0:
3894; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3895; GFX940-NEXT:    ;;#ASMSTART
3896; GFX940-NEXT:    ; def v[0:2]
3897; GFX940-NEXT:    ;;#ASMEND
3898; GFX940-NEXT:    v_mov_b32_e32 v7, 0
3899; GFX940-NEXT:    ;;#ASMSTART
3900; GFX940-NEXT:    ; def v[4:6]
3901; GFX940-NEXT:    ;;#ASMEND
3902; GFX940-NEXT:    v_mov_b32_e32 v3, v2
3903; GFX940-NEXT:    v_mov_b32_e32 v0, v6
3904; GFX940-NEXT:    v_mov_b32_e32 v1, v6
3905; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
3906; GFX940-NEXT:    s_waitcnt vmcnt(0)
3907; GFX940-NEXT:    s_setpc_b64 s[30:31]
3908  %vec0 = call <3 x i32> asm "; def $0", "=v"()
3909  %vec1 = call <3 x i32> asm "; def $0", "=v"()
3910  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 2>
3911  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
3912  ret void
3913}
3914
3915define void @v_shuffle_v4i32_v3i32__5_5_0_2(ptr addrspace(1) inreg %ptr) {
3916; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_0_2:
3917; GFX900:       ; %bb.0:
3918; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3919; GFX900-NEXT:    ;;#ASMSTART
3920; GFX900-NEXT:    ; def v[2:4]
3921; GFX900-NEXT:    ;;#ASMEND
3922; GFX900-NEXT:    v_mov_b32_e32 v8, 0
3923; GFX900-NEXT:    ;;#ASMSTART
3924; GFX900-NEXT:    ; def v[5:7]
3925; GFX900-NEXT:    ;;#ASMEND
3926; GFX900-NEXT:    v_mov_b32_e32 v0, v7
3927; GFX900-NEXT:    v_mov_b32_e32 v1, v7
3928; GFX900-NEXT:    v_mov_b32_e32 v3, v4
3929; GFX900-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17]
3930; GFX900-NEXT:    s_waitcnt vmcnt(0)
3931; GFX900-NEXT:    s_setpc_b64 s[30:31]
3932;
3933; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_0_2:
3934; GFX90A:       ; %bb.0:
3935; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3936; GFX90A-NEXT:    ;;#ASMSTART
3937; GFX90A-NEXT:    ; def v[2:4]
3938; GFX90A-NEXT:    ;;#ASMEND
3939; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
3940; GFX90A-NEXT:    ;;#ASMSTART
3941; GFX90A-NEXT:    ; def v[6:8]
3942; GFX90A-NEXT:    ;;#ASMEND
3943; GFX90A-NEXT:    v_mov_b32_e32 v0, v8
3944; GFX90A-NEXT:    v_mov_b32_e32 v1, v8
3945; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
3946; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
3947; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3948; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3949;
3950; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_0_2:
3951; GFX940:       ; %bb.0:
3952; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3953; GFX940-NEXT:    ;;#ASMSTART
3954; GFX940-NEXT:    ; def v[2:4]
3955; GFX940-NEXT:    ;;#ASMEND
3956; GFX940-NEXT:    v_mov_b32_e32 v5, 0
3957; GFX940-NEXT:    ;;#ASMSTART
3958; GFX940-NEXT:    ; def v[6:8]
3959; GFX940-NEXT:    ;;#ASMEND
3960; GFX940-NEXT:    v_mov_b32_e32 v3, v4
3961; GFX940-NEXT:    v_mov_b32_e32 v0, v8
3962; GFX940-NEXT:    v_mov_b32_e32 v1, v8
3963; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
3964; GFX940-NEXT:    s_waitcnt vmcnt(0)
3965; GFX940-NEXT:    s_setpc_b64 s[30:31]
3966  %vec0 = call <3 x i32> asm "; def $0", "=v"()
3967  %vec1 = call <3 x i32> asm "; def $0", "=v"()
3968  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 2>
3969  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
3970  ret void
3971}
3972
3973define void @v_shuffle_v4i32_v3i32__5_5_1_2(ptr addrspace(1) inreg %ptr) {
3974; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_1_2:
3975; GFX900:       ; %bb.0:
3976; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3977; GFX900-NEXT:    ;;#ASMSTART
3978; GFX900-NEXT:    ; def v[1:3]
3979; GFX900-NEXT:    ;;#ASMEND
3980; GFX900-NEXT:    v_mov_b32_e32 v7, 0
3981; GFX900-NEXT:    ;;#ASMSTART
3982; GFX900-NEXT:    ; def v[4:6]
3983; GFX900-NEXT:    ;;#ASMEND
3984; GFX900-NEXT:    v_mov_b32_e32 v0, v6
3985; GFX900-NEXT:    v_mov_b32_e32 v1, v6
3986; GFX900-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
3987; GFX900-NEXT:    s_waitcnt vmcnt(0)
3988; GFX900-NEXT:    s_setpc_b64 s[30:31]
3989;
3990; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_1_2:
3991; GFX90A:       ; %bb.0:
3992; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3993; GFX90A-NEXT:    ;;#ASMSTART
3994; GFX90A-NEXT:    ; def v[2:4]
3995; GFX90A-NEXT:    ;;#ASMEND
3996; GFX90A-NEXT:    ;;#ASMSTART
3997; GFX90A-NEXT:    ; def v[0:2]
3998; GFX90A-NEXT:    ;;#ASMEND
3999; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
4000; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
4001; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
4002; GFX90A-NEXT:    v_mov_b32_e32 v2, v3
4003; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
4004; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
4005; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4006; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4007;
4008; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_1_2:
4009; GFX940:       ; %bb.0:
4010; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4011; GFX940-NEXT:    ;;#ASMSTART
4012; GFX940-NEXT:    ; def v[2:4]
4013; GFX940-NEXT:    ;;#ASMEND
4014; GFX940-NEXT:    v_mov_b32_e32 v5, 0
4015; GFX940-NEXT:    ;;#ASMSTART
4016; GFX940-NEXT:    ; def v[0:2]
4017; GFX940-NEXT:    ;;#ASMEND
4018; GFX940-NEXT:    s_nop 0
4019; GFX940-NEXT:    v_mov_b32_e32 v0, v2
4020; GFX940-NEXT:    v_mov_b32_e32 v1, v2
4021; GFX940-NEXT:    v_mov_b32_e32 v2, v3
4022; GFX940-NEXT:    v_mov_b32_e32 v3, v4
4023; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
4024; GFX940-NEXT:    s_waitcnt vmcnt(0)
4025; GFX940-NEXT:    s_setpc_b64 s[30:31]
4026  %vec0 = call <3 x i32> asm "; def $0", "=v"()
4027  %vec1 = call <3 x i32> asm "; def $0", "=v"()
4028  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 2>
4029  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
4030  ret void
4031}
4032
4033define void @v_shuffle_v4i32_v3i32__5_5_3_2(ptr addrspace(1) inreg %ptr) {
4034; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_3_2:
4035; GFX900:       ; %bb.0:
4036; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4037; GFX900-NEXT:    ;;#ASMSTART
4038; GFX900-NEXT:    ; def v[1:3]
4039; GFX900-NEXT:    ;;#ASMEND
4040; GFX900-NEXT:    v_mov_b32_e32 v7, 0
4041; GFX900-NEXT:    ;;#ASMSTART
4042; GFX900-NEXT:    ; def v[4:6]
4043; GFX900-NEXT:    ;;#ASMEND
4044; GFX900-NEXT:    v_mov_b32_e32 v0, v6
4045; GFX900-NEXT:    v_mov_b32_e32 v1, v6
4046; GFX900-NEXT:    v_mov_b32_e32 v2, v4
4047; GFX900-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
4048; GFX900-NEXT:    s_waitcnt vmcnt(0)
4049; GFX900-NEXT:    s_setpc_b64 s[30:31]
4050;
4051; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_3_2:
4052; GFX90A:       ; %bb.0:
4053; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4054; GFX90A-NEXT:    ;;#ASMSTART
4055; GFX90A-NEXT:    ; def v[2:4]
4056; GFX90A-NEXT:    ;;#ASMEND
4057; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
4058; GFX90A-NEXT:    ;;#ASMSTART
4059; GFX90A-NEXT:    ; def v[6:8]
4060; GFX90A-NEXT:    ;;#ASMEND
4061; GFX90A-NEXT:    v_mov_b32_e32 v0, v8
4062; GFX90A-NEXT:    v_mov_b32_e32 v1, v8
4063; GFX90A-NEXT:    v_mov_b32_e32 v2, v6
4064; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
4065; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
4066; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4067; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4068;
4069; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_3_2:
4070; GFX940:       ; %bb.0:
4071; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4072; GFX940-NEXT:    ;;#ASMSTART
4073; GFX940-NEXT:    ; def v[2:4]
4074; GFX940-NEXT:    ;;#ASMEND
4075; GFX940-NEXT:    v_mov_b32_e32 v5, 0
4076; GFX940-NEXT:    ;;#ASMSTART
4077; GFX940-NEXT:    ; def v[6:8]
4078; GFX940-NEXT:    ;;#ASMEND
4079; GFX940-NEXT:    v_mov_b32_e32 v3, v4
4080; GFX940-NEXT:    v_mov_b32_e32 v0, v8
4081; GFX940-NEXT:    v_mov_b32_e32 v1, v8
4082; GFX940-NEXT:    v_mov_b32_e32 v2, v6
4083; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
4084; GFX940-NEXT:    s_waitcnt vmcnt(0)
4085; GFX940-NEXT:    s_setpc_b64 s[30:31]
4086  %vec0 = call <3 x i32> asm "; def $0", "=v"()
4087  %vec1 = call <3 x i32> asm "; def $0", "=v"()
4088  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 2>
4089  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
4090  ret void
4091}
4092
4093define void @v_shuffle_v4i32_v3i32__5_5_4_2(ptr addrspace(1) inreg %ptr) {
4094; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_4_2:
4095; GFX900:       ; %bb.0:
4096; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4097; GFX900-NEXT:    ;;#ASMSTART
4098; GFX900-NEXT:    ; def v[2:4]
4099; GFX900-NEXT:    ;;#ASMEND
4100; GFX900-NEXT:    ;;#ASMSTART
4101; GFX900-NEXT:    ; def v[1:3]
4102; GFX900-NEXT:    ;;#ASMEND
4103; GFX900-NEXT:    v_mov_b32_e32 v5, 0
4104; GFX900-NEXT:    v_mov_b32_e32 v0, v3
4105; GFX900-NEXT:    v_mov_b32_e32 v1, v3
4106; GFX900-NEXT:    v_mov_b32_e32 v3, v4
4107; GFX900-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
4108; GFX900-NEXT:    s_waitcnt vmcnt(0)
4109; GFX900-NEXT:    s_setpc_b64 s[30:31]
4110;
4111; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_4_2:
4112; GFX90A:       ; %bb.0:
4113; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4114; GFX90A-NEXT:    ;;#ASMSTART
4115; GFX90A-NEXT:    ; def v[2:4]
4116; GFX90A-NEXT:    ;;#ASMEND
4117; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
4118; GFX90A-NEXT:    ;;#ASMSTART
4119; GFX90A-NEXT:    ; def v[6:8]
4120; GFX90A-NEXT:    ;;#ASMEND
4121; GFX90A-NEXT:    v_mov_b32_e32 v0, v8
4122; GFX90A-NEXT:    v_mov_b32_e32 v1, v8
4123; GFX90A-NEXT:    v_mov_b32_e32 v2, v7
4124; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
4125; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
4126; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4127; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4128;
4129; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_4_2:
4130; GFX940:       ; %bb.0:
4131; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4132; GFX940-NEXT:    ;;#ASMSTART
4133; GFX940-NEXT:    ; def v[2:4]
4134; GFX940-NEXT:    ;;#ASMEND
4135; GFX940-NEXT:    v_mov_b32_e32 v5, 0
4136; GFX940-NEXT:    ;;#ASMSTART
4137; GFX940-NEXT:    ; def v[6:8]
4138; GFX940-NEXT:    ;;#ASMEND
4139; GFX940-NEXT:    v_mov_b32_e32 v3, v4
4140; GFX940-NEXT:    v_mov_b32_e32 v0, v8
4141; GFX940-NEXT:    v_mov_b32_e32 v1, v8
4142; GFX940-NEXT:    v_mov_b32_e32 v2, v7
4143; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
4144; GFX940-NEXT:    s_waitcnt vmcnt(0)
4145; GFX940-NEXT:    s_setpc_b64 s[30:31]
4146  %vec0 = call <3 x i32> asm "; def $0", "=v"()
4147  %vec1 = call <3 x i32> asm "; def $0", "=v"()
4148  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 2>
4149  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
4150  ret void
4151}
4152
4153define void @v_shuffle_v4i32_v3i32__u_3_3_3(ptr addrspace(1) inreg %ptr) {
4154; GFX9-LABEL: v_shuffle_v4i32_v3i32__u_3_3_3:
4155; GFX9:       ; %bb.0:
4156; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4157; GFX9-NEXT:    s_setpc_b64 s[30:31]
4158  %vec0 = call <3 x i32> asm "; def $0", "=v"()
4159  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
4160  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
4161  ret void
4162}
4163
4164define void @v_shuffle_v4i32_v3i32__0_3_3_3(ptr addrspace(1) inreg %ptr) {
4165; GFX900-LABEL: v_shuffle_v4i32_v3i32__0_3_3_3:
4166; GFX900:       ; %bb.0:
4167; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4168; GFX900-NEXT:    v_mov_b32_e32 v3, 0
4169; GFX900-NEXT:    ;;#ASMSTART
4170; GFX900-NEXT:    ; def v[0:2]
4171; GFX900-NEXT:    ;;#ASMEND
4172; GFX900-NEXT:    global_store_dwordx4 v3, v[0:3], s[16:17]
4173; GFX900-NEXT:    s_waitcnt vmcnt(0)
4174; GFX900-NEXT:    s_setpc_b64 s[30:31]
4175;
4176; GFX90A-LABEL: v_shuffle_v4i32_v3i32__0_3_3_3:
4177; GFX90A:       ; %bb.0:
4178; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4179; GFX90A-NEXT:    v_mov_b32_e32 v3, 0
4180; GFX90A-NEXT:    ;;#ASMSTART
4181; GFX90A-NEXT:    ; def v[0:2]
4182; GFX90A-NEXT:    ;;#ASMEND
4183; GFX90A-NEXT:    global_store_dwordx4 v3, v[0:3], s[16:17]
4184; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4185; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4186;
4187; GFX940-LABEL: v_shuffle_v4i32_v3i32__0_3_3_3:
4188; GFX940:       ; %bb.0:
4189; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4190; GFX940-NEXT:    v_mov_b32_e32 v3, 0
4191; GFX940-NEXT:    ;;#ASMSTART
4192; GFX940-NEXT:    ; def v[0:2]
4193; GFX940-NEXT:    ;;#ASMEND
4194; GFX940-NEXT:    global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1
4195; GFX940-NEXT:    s_waitcnt vmcnt(0)
4196; GFX940-NEXT:    s_setpc_b64 s[30:31]
4197  %vec0 = call <3 x i32> asm "; def $0", "=v"()
4198  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
4199  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
4200  ret void
4201}
4202
4203define void @v_shuffle_v4i32_v3i32__1_3_3_3(ptr addrspace(1) inreg %ptr) {
4204; GFX900-LABEL: v_shuffle_v4i32_v3i32__1_3_3_3:
4205; GFX900:       ; %bb.0:
4206; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4207; GFX900-NEXT:    ;;#ASMSTART
4208; GFX900-NEXT:    ; def v[0:2]
4209; GFX900-NEXT:    ;;#ASMEND
4210; GFX900-NEXT:    v_mov_b32_e32 v3, 0
4211; GFX900-NEXT:    v_mov_b32_e32 v0, v1
4212; GFX900-NEXT:    global_store_dwordx4 v3, v[0:3], s[16:17]
4213; GFX900-NEXT:    s_waitcnt vmcnt(0)
4214; GFX900-NEXT:    s_setpc_b64 s[30:31]
4215;
4216; GFX90A-LABEL: v_shuffle_v4i32_v3i32__1_3_3_3:
4217; GFX90A:       ; %bb.0:
4218; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4219; GFX90A-NEXT:    ;;#ASMSTART
4220; GFX90A-NEXT:    ; def v[0:2]
4221; GFX90A-NEXT:    ;;#ASMEND
4222; GFX90A-NEXT:    v_mov_b32_e32 v3, 0
4223; GFX90A-NEXT:    v_mov_b32_e32 v0, v1
4224; GFX90A-NEXT:    global_store_dwordx4 v3, v[0:3], s[16:17]
4225; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4226; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4227;
4228; GFX940-LABEL: v_shuffle_v4i32_v3i32__1_3_3_3:
4229; GFX940:       ; %bb.0:
4230; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4231; GFX940-NEXT:    ;;#ASMSTART
4232; GFX940-NEXT:    ; def v[0:2]
4233; GFX940-NEXT:    ;;#ASMEND
4234; GFX940-NEXT:    v_mov_b32_e32 v3, 0
4235; GFX940-NEXT:    v_mov_b32_e32 v0, v1
4236; GFX940-NEXT:    global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1
4237; GFX940-NEXT:    s_waitcnt vmcnt(0)
4238; GFX940-NEXT:    s_setpc_b64 s[30:31]
4239  %vec0 = call <3 x i32> asm "; def $0", "=v"()
4240  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
4241  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
4242  ret void
4243}
4244
4245define void @v_shuffle_v4i32_v3i32__2_3_3_3(ptr addrspace(1) inreg %ptr) {
4246; GFX900-LABEL: v_shuffle_v4i32_v3i32__2_3_3_3:
4247; GFX900:       ; %bb.0:
4248; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4249; GFX900-NEXT:    ;;#ASMSTART
4250; GFX900-NEXT:    ; def v[0:2]
4251; GFX900-NEXT:    ;;#ASMEND
4252; GFX900-NEXT:    v_mov_b32_e32 v3, 0
4253; GFX900-NEXT:    v_mov_b32_e32 v0, v2
4254; GFX900-NEXT:    global_store_dwordx4 v3, v[0:3], s[16:17]
4255; GFX900-NEXT:    s_waitcnt vmcnt(0)
4256; GFX900-NEXT:    s_setpc_b64 s[30:31]
4257;
4258; GFX90A-LABEL: v_shuffle_v4i32_v3i32__2_3_3_3:
4259; GFX90A:       ; %bb.0:
4260; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4261; GFX90A-NEXT:    ;;#ASMSTART
4262; GFX90A-NEXT:    ; def v[0:2]
4263; GFX90A-NEXT:    ;;#ASMEND
4264; GFX90A-NEXT:    v_mov_b32_e32 v3, 0
4265; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
4266; GFX90A-NEXT:    global_store_dwordx4 v3, v[0:3], s[16:17]
4267; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4268; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4269;
4270; GFX940-LABEL: v_shuffle_v4i32_v3i32__2_3_3_3:
4271; GFX940:       ; %bb.0:
4272; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4273; GFX940-NEXT:    ;;#ASMSTART
4274; GFX940-NEXT:    ; def v[0:2]
4275; GFX940-NEXT:    ;;#ASMEND
4276; GFX940-NEXT:    v_mov_b32_e32 v3, 0
4277; GFX940-NEXT:    v_mov_b32_e32 v0, v2
4278; GFX940-NEXT:    global_store_dwordx4 v3, v[0:3], s[0:1] sc0 sc1
4279; GFX940-NEXT:    s_waitcnt vmcnt(0)
4280; GFX940-NEXT:    s_setpc_b64 s[30:31]
4281  %vec0 = call <3 x i32> asm "; def $0", "=v"()
4282  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
4283  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
4284  ret void
4285}
4286
4287define void @v_shuffle_v4i32_v3i32__3_3_3_3(ptr addrspace(1) inreg %ptr) {
4288; GFX9-LABEL: v_shuffle_v4i32_v3i32__3_3_3_3:
4289; GFX9:       ; %bb.0:
4290; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4291; GFX9-NEXT:    s_setpc_b64 s[30:31]
4292  %vec0 = call <3 x i32> asm "; def $0", "=v"()
4293  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
4294  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
4295  ret void
4296}
4297
4298define void @v_shuffle_v4i32_v3i32__4_3_3_3(ptr addrspace(1) inreg %ptr) {
4299; GFX900-LABEL: v_shuffle_v4i32_v3i32__4_3_3_3:
4300; GFX900:       ; %bb.0:
4301; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4302; GFX900-NEXT:    ;;#ASMSTART
4303; GFX900-NEXT:    ; def v[1:3]
4304; GFX900-NEXT:    ;;#ASMEND
4305; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4306; GFX900-NEXT:    v_mov_b32_e32 v0, v2
4307; GFX900-NEXT:    v_mov_b32_e32 v2, v1
4308; GFX900-NEXT:    v_mov_b32_e32 v3, v1
4309; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
4310; GFX900-NEXT:    s_waitcnt vmcnt(0)
4311; GFX900-NEXT:    s_setpc_b64 s[30:31]
4312;
4313; GFX90A-LABEL: v_shuffle_v4i32_v3i32__4_3_3_3:
4314; GFX90A:       ; %bb.0:
4315; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4316; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
4317; GFX90A-NEXT:    ;;#ASMSTART
4318; GFX90A-NEXT:    ; def v[4:6]
4319; GFX90A-NEXT:    ;;#ASMEND
4320; GFX90A-NEXT:    v_mov_b32_e32 v0, v5
4321; GFX90A-NEXT:    v_mov_b32_e32 v1, v4
4322; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
4323; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
4324; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
4325; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4326; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4327;
4328; GFX940-LABEL: v_shuffle_v4i32_v3i32__4_3_3_3:
4329; GFX940:       ; %bb.0:
4330; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4331; GFX940-NEXT:    v_mov_b32_e32 v7, 0
4332; GFX940-NEXT:    ;;#ASMSTART
4333; GFX940-NEXT:    ; def v[4:6]
4334; GFX940-NEXT:    ;;#ASMEND
4335; GFX940-NEXT:    s_nop 0
4336; GFX940-NEXT:    v_mov_b32_e32 v0, v5
4337; GFX940-NEXT:    v_mov_b32_e32 v1, v4
4338; GFX940-NEXT:    v_mov_b32_e32 v2, v4
4339; GFX940-NEXT:    v_mov_b32_e32 v3, v4
4340; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
4341; GFX940-NEXT:    s_waitcnt vmcnt(0)
4342; GFX940-NEXT:    s_setpc_b64 s[30:31]
4343  %vec0 = call <3 x i32> asm "; def $0", "=v"()
4344  %vec1 = call <3 x i32> asm "; def $0", "=v"()
4345  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 3, i32 3, i32 3>
4346  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
4347  ret void
4348}
4349
4350define void @v_shuffle_v4i32_v3i32__5_3_3_3(ptr addrspace(1) inreg %ptr) {
4351; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_3_3_3:
4352; GFX900:       ; %bb.0:
4353; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4354; GFX900-NEXT:    ;;#ASMSTART
4355; GFX900-NEXT:    ; def v[1:3]
4356; GFX900-NEXT:    ;;#ASMEND
4357; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4358; GFX900-NEXT:    v_mov_b32_e32 v0, v3
4359; GFX900-NEXT:    v_mov_b32_e32 v2, v1
4360; GFX900-NEXT:    v_mov_b32_e32 v3, v1
4361; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
4362; GFX900-NEXT:    s_waitcnt vmcnt(0)
4363; GFX900-NEXT:    s_setpc_b64 s[30:31]
4364;
4365; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_3_3_3:
4366; GFX90A:       ; %bb.0:
4367; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4368; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
4369; GFX90A-NEXT:    ;;#ASMSTART
4370; GFX90A-NEXT:    ; def v[4:6]
4371; GFX90A-NEXT:    ;;#ASMEND
4372; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
4373; GFX90A-NEXT:    v_mov_b32_e32 v1, v4
4374; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
4375; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
4376; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
4377; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4378; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4379;
4380; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_3_3_3:
4381; GFX940:       ; %bb.0:
4382; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4383; GFX940-NEXT:    v_mov_b32_e32 v7, 0
4384; GFX940-NEXT:    ;;#ASMSTART
4385; GFX940-NEXT:    ; def v[4:6]
4386; GFX940-NEXT:    ;;#ASMEND
4387; GFX940-NEXT:    s_nop 0
4388; GFX940-NEXT:    v_mov_b32_e32 v0, v6
4389; GFX940-NEXT:    v_mov_b32_e32 v1, v4
4390; GFX940-NEXT:    v_mov_b32_e32 v2, v4
4391; GFX940-NEXT:    v_mov_b32_e32 v3, v4
4392; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
4393; GFX940-NEXT:    s_waitcnt vmcnt(0)
4394; GFX940-NEXT:    s_setpc_b64 s[30:31]
4395  %vec0 = call <3 x i32> asm "; def $0", "=v"()
4396  %vec1 = call <3 x i32> asm "; def $0", "=v"()
4397  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3>
4398  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
4399  ret void
4400}
4401
4402define void @v_shuffle_v4i32_v3i32__5_u_3_3(ptr addrspace(1) inreg %ptr) {
4403; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_u_3_3:
4404; GFX900:       ; %bb.0:
4405; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4406; GFX900-NEXT:    ;;#ASMSTART
4407; GFX900-NEXT:    ; def v[1:3]
4408; GFX900-NEXT:    ;;#ASMEND
4409; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4410; GFX900-NEXT:    v_mov_b32_e32 v0, v3
4411; GFX900-NEXT:    v_mov_b32_e32 v2, v1
4412; GFX900-NEXT:    v_mov_b32_e32 v3, v1
4413; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
4414; GFX900-NEXT:    s_waitcnt vmcnt(0)
4415; GFX900-NEXT:    s_setpc_b64 s[30:31]
4416;
4417; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_u_3_3:
4418; GFX90A:       ; %bb.0:
4419; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4420; GFX90A-NEXT:    v_mov_b32_e32 v1, 0
4421; GFX90A-NEXT:    ;;#ASMSTART
4422; GFX90A-NEXT:    ; def v[4:6]
4423; GFX90A-NEXT:    ;;#ASMEND
4424; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
4425; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
4426; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
4427; GFX90A-NEXT:    global_store_dwordx4 v1, v[0:3], s[16:17]
4428; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4429; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4430;
4431; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_u_3_3:
4432; GFX940:       ; %bb.0:
4433; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4434; GFX940-NEXT:    v_mov_b32_e32 v1, 0
4435; GFX940-NEXT:    ;;#ASMSTART
4436; GFX940-NEXT:    ; def v[4:6]
4437; GFX940-NEXT:    ;;#ASMEND
4438; GFX940-NEXT:    s_nop 0
4439; GFX940-NEXT:    v_mov_b32_e32 v0, v6
4440; GFX940-NEXT:    v_mov_b32_e32 v2, v4
4441; GFX940-NEXT:    v_mov_b32_e32 v3, v4
4442; GFX940-NEXT:    global_store_dwordx4 v1, v[0:3], s[0:1] sc0 sc1
4443; GFX940-NEXT:    s_waitcnt vmcnt(0)
4444; GFX940-NEXT:    s_setpc_b64 s[30:31]
4445  %vec0 = call <3 x i32> asm "; def $0", "=v"()
4446  %vec1 = call <3 x i32> asm "; def $0", "=v"()
4447  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 3, i32 3>
4448  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
4449  ret void
4450}
4451
4452define void @v_shuffle_v4i32_v3i32__5_0_3_3(ptr addrspace(1) inreg %ptr) {
4453; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_0_3_3:
4454; GFX900:       ; %bb.0:
4455; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4456; GFX900-NEXT:    ;;#ASMSTART
4457; GFX900-NEXT:    ; def v[1:3]
4458; GFX900-NEXT:    ;;#ASMEND
4459; GFX900-NEXT:    v_mov_b32_e32 v6, 0
4460; GFX900-NEXT:    ;;#ASMSTART
4461; GFX900-NEXT:    ; def v[3:5]
4462; GFX900-NEXT:    ;;#ASMEND
4463; GFX900-NEXT:    v_mov_b32_e32 v0, v5
4464; GFX900-NEXT:    v_mov_b32_e32 v2, v3
4465; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
4466; GFX900-NEXT:    s_waitcnt vmcnt(0)
4467; GFX900-NEXT:    s_setpc_b64 s[30:31]
4468;
4469; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_0_3_3:
4470; GFX90A:       ; %bb.0:
4471; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4472; GFX90A-NEXT:    ;;#ASMSTART
4473; GFX90A-NEXT:    ; def v[2:4]
4474; GFX90A-NEXT:    ;;#ASMEND
4475; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
4476; GFX90A-NEXT:    ;;#ASMSTART
4477; GFX90A-NEXT:    ; def v[4:6]
4478; GFX90A-NEXT:    ;;#ASMEND
4479; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
4480; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
4481; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
4482; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
4483; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
4484; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4485; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4486;
4487; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_0_3_3:
4488; GFX940:       ; %bb.0:
4489; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4490; GFX940-NEXT:    ;;#ASMSTART
4491; GFX940-NEXT:    ; def v[2:4]
4492; GFX940-NEXT:    ;;#ASMEND
4493; GFX940-NEXT:    v_mov_b32_e32 v7, 0
4494; GFX940-NEXT:    ;;#ASMSTART
4495; GFX940-NEXT:    ; def v[4:6]
4496; GFX940-NEXT:    ;;#ASMEND
4497; GFX940-NEXT:    v_mov_b32_e32 v1, v2
4498; GFX940-NEXT:    v_mov_b32_e32 v0, v6
4499; GFX940-NEXT:    v_mov_b32_e32 v2, v4
4500; GFX940-NEXT:    v_mov_b32_e32 v3, v4
4501; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
4502; GFX940-NEXT:    s_waitcnt vmcnt(0)
4503; GFX940-NEXT:    s_setpc_b64 s[30:31]
4504  %vec0 = call <3 x i32> asm "; def $0", "=v"()
4505  %vec1 = call <3 x i32> asm "; def $0", "=v"()
4506  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 3, i32 3>
4507  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
4508  ret void
4509}
4510
4511define void @v_shuffle_v4i32_v3i32__5_1_3_3(ptr addrspace(1) inreg %ptr) {
4512; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_1_3_3:
4513; GFX900:       ; %bb.0:
4514; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4515; GFX900-NEXT:    ;;#ASMSTART
4516; GFX900-NEXT:    ; def v[0:2]
4517; GFX900-NEXT:    ;;#ASMEND
4518; GFX900-NEXT:    v_mov_b32_e32 v6, 0
4519; GFX900-NEXT:    ;;#ASMSTART
4520; GFX900-NEXT:    ; def v[3:5]
4521; GFX900-NEXT:    ;;#ASMEND
4522; GFX900-NEXT:    v_mov_b32_e32 v0, v5
4523; GFX900-NEXT:    v_mov_b32_e32 v2, v3
4524; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
4525; GFX900-NEXT:    s_waitcnt vmcnt(0)
4526; GFX900-NEXT:    s_setpc_b64 s[30:31]
4527;
4528; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_1_3_3:
4529; GFX90A:       ; %bb.0:
4530; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4531; GFX90A-NEXT:    ;;#ASMSTART
4532; GFX90A-NEXT:    ; def v[0:2]
4533; GFX90A-NEXT:    ;;#ASMEND
4534; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
4535; GFX90A-NEXT:    ;;#ASMSTART
4536; GFX90A-NEXT:    ; def v[4:6]
4537; GFX90A-NEXT:    ;;#ASMEND
4538; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
4539; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
4540; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
4541; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
4542; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4543; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4544;
4545; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_1_3_3:
4546; GFX940:       ; %bb.0:
4547; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4548; GFX940-NEXT:    ;;#ASMSTART
4549; GFX940-NEXT:    ; def v[0:2]
4550; GFX940-NEXT:    ;;#ASMEND
4551; GFX940-NEXT:    v_mov_b32_e32 v7, 0
4552; GFX940-NEXT:    ;;#ASMSTART
4553; GFX940-NEXT:    ; def v[4:6]
4554; GFX940-NEXT:    ;;#ASMEND
4555; GFX940-NEXT:    s_nop 0
4556; GFX940-NEXT:    v_mov_b32_e32 v0, v6
4557; GFX940-NEXT:    v_mov_b32_e32 v2, v4
4558; GFX940-NEXT:    v_mov_b32_e32 v3, v4
4559; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
4560; GFX940-NEXT:    s_waitcnt vmcnt(0)
4561; GFX940-NEXT:    s_setpc_b64 s[30:31]
4562  %vec0 = call <3 x i32> asm "; def $0", "=v"()
4563  %vec1 = call <3 x i32> asm "; def $0", "=v"()
4564  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 3, i32 3>
4565  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
4566  ret void
4567}
4568
4569define void @v_shuffle_v4i32_v3i32__5_2_3_3(ptr addrspace(1) inreg %ptr) {
4570; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_2_3_3:
4571; GFX900:       ; %bb.0:
4572; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4573; GFX900-NEXT:    ;;#ASMSTART
4574; GFX900-NEXT:    ; def v[0:2]
4575; GFX900-NEXT:    ;;#ASMEND
4576; GFX900-NEXT:    v_mov_b32_e32 v6, 0
4577; GFX900-NEXT:    ;;#ASMSTART
4578; GFX900-NEXT:    ; def v[3:5]
4579; GFX900-NEXT:    ;;#ASMEND
4580; GFX900-NEXT:    v_mov_b32_e32 v0, v5
4581; GFX900-NEXT:    v_mov_b32_e32 v1, v2
4582; GFX900-NEXT:    v_mov_b32_e32 v2, v3
4583; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
4584; GFX900-NEXT:    s_waitcnt vmcnt(0)
4585; GFX900-NEXT:    s_setpc_b64 s[30:31]
4586;
4587; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_2_3_3:
4588; GFX90A:       ; %bb.0:
4589; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4590; GFX90A-NEXT:    ;;#ASMSTART
4591; GFX90A-NEXT:    ; def v[0:2]
4592; GFX90A-NEXT:    ;;#ASMEND
4593; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
4594; GFX90A-NEXT:    ;;#ASMSTART
4595; GFX90A-NEXT:    ; def v[4:6]
4596; GFX90A-NEXT:    ;;#ASMEND
4597; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
4598; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
4599; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
4600; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
4601; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
4602; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4603; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4604;
4605; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_2_3_3:
4606; GFX940:       ; %bb.0:
4607; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4608; GFX940-NEXT:    ;;#ASMSTART
4609; GFX940-NEXT:    ; def v[0:2]
4610; GFX940-NEXT:    ;;#ASMEND
4611; GFX940-NEXT:    v_mov_b32_e32 v7, 0
4612; GFX940-NEXT:    ;;#ASMSTART
4613; GFX940-NEXT:    ; def v[4:6]
4614; GFX940-NEXT:    ;;#ASMEND
4615; GFX940-NEXT:    v_mov_b32_e32 v1, v2
4616; GFX940-NEXT:    v_mov_b32_e32 v0, v6
4617; GFX940-NEXT:    v_mov_b32_e32 v2, v4
4618; GFX940-NEXT:    v_mov_b32_e32 v3, v4
4619; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
4620; GFX940-NEXT:    s_waitcnt vmcnt(0)
4621; GFX940-NEXT:    s_setpc_b64 s[30:31]
4622  %vec0 = call <3 x i32> asm "; def $0", "=v"()
4623  %vec1 = call <3 x i32> asm "; def $0", "=v"()
4624  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 3, i32 3>
4625  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
4626  ret void
4627}
4628
4629define void @v_shuffle_v4i32_v3i32__5_4_3_3(ptr addrspace(1) inreg %ptr) {
4630; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_4_3_3:
4631; GFX900:       ; %bb.0:
4632; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4633; GFX900-NEXT:    v_mov_b32_e32 v6, 0
4634; GFX900-NEXT:    ;;#ASMSTART
4635; GFX900-NEXT:    ; def v[3:5]
4636; GFX900-NEXT:    ;;#ASMEND
4637; GFX900-NEXT:    v_mov_b32_e32 v0, v5
4638; GFX900-NEXT:    v_mov_b32_e32 v1, v4
4639; GFX900-NEXT:    v_mov_b32_e32 v2, v3
4640; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
4641; GFX900-NEXT:    s_waitcnt vmcnt(0)
4642; GFX900-NEXT:    s_setpc_b64 s[30:31]
4643;
4644; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_4_3_3:
4645; GFX90A:       ; %bb.0:
4646; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4647; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
4648; GFX90A-NEXT:    ;;#ASMSTART
4649; GFX90A-NEXT:    ; def v[4:6]
4650; GFX90A-NEXT:    ;;#ASMEND
4651; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
4652; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
4653; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
4654; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
4655; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
4656; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4657; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4658;
4659; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_4_3_3:
4660; GFX940:       ; %bb.0:
4661; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4662; GFX940-NEXT:    v_mov_b32_e32 v7, 0
4663; GFX940-NEXT:    ;;#ASMSTART
4664; GFX940-NEXT:    ; def v[4:6]
4665; GFX940-NEXT:    ;;#ASMEND
4666; GFX940-NEXT:    s_nop 0
4667; GFX940-NEXT:    v_mov_b32_e32 v0, v6
4668; GFX940-NEXT:    v_mov_b32_e32 v1, v5
4669; GFX940-NEXT:    v_mov_b32_e32 v2, v4
4670; GFX940-NEXT:    v_mov_b32_e32 v3, v4
4671; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
4672; GFX940-NEXT:    s_waitcnt vmcnt(0)
4673; GFX940-NEXT:    s_setpc_b64 s[30:31]
4674  %vec0 = call <3 x i32> asm "; def $0", "=v"()
4675  %vec1 = call <3 x i32> asm "; def $0", "=v"()
4676  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 3, i32 3>
4677  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
4678  ret void
4679}
4680
4681define void @v_shuffle_v4i32_v3i32__5_5_3_3(ptr addrspace(1) inreg %ptr) {
4682; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_3_3:
4683; GFX900:       ; %bb.0:
4684; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4685; GFX900-NEXT:    v_mov_b32_e32 v6, 0
4686; GFX900-NEXT:    ;;#ASMSTART
4687; GFX900-NEXT:    ; def v[3:5]
4688; GFX900-NEXT:    ;;#ASMEND
4689; GFX900-NEXT:    v_mov_b32_e32 v0, v5
4690; GFX900-NEXT:    v_mov_b32_e32 v1, v5
4691; GFX900-NEXT:    v_mov_b32_e32 v2, v3
4692; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
4693; GFX900-NEXT:    s_waitcnt vmcnt(0)
4694; GFX900-NEXT:    s_setpc_b64 s[30:31]
4695;
4696; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_3_3:
4697; GFX90A:       ; %bb.0:
4698; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4699; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
4700; GFX90A-NEXT:    ;;#ASMSTART
4701; GFX90A-NEXT:    ; def v[4:6]
4702; GFX90A-NEXT:    ;;#ASMEND
4703; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
4704; GFX90A-NEXT:    v_mov_b32_e32 v1, v6
4705; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
4706; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
4707; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
4708; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4709; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4710;
4711; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_3_3:
4712; GFX940:       ; %bb.0:
4713; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4714; GFX940-NEXT:    v_mov_b32_e32 v7, 0
4715; GFX940-NEXT:    ;;#ASMSTART
4716; GFX940-NEXT:    ; def v[4:6]
4717; GFX940-NEXT:    ;;#ASMEND
4718; GFX940-NEXT:    s_nop 0
4719; GFX940-NEXT:    v_mov_b32_e32 v0, v6
4720; GFX940-NEXT:    v_mov_b32_e32 v1, v6
4721; GFX940-NEXT:    v_mov_b32_e32 v2, v4
4722; GFX940-NEXT:    v_mov_b32_e32 v3, v4
4723; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
4724; GFX940-NEXT:    s_waitcnt vmcnt(0)
4725; GFX940-NEXT:    s_setpc_b64 s[30:31]
4726  %vec0 = call <3 x i32> asm "; def $0", "=v"()
4727  %vec1 = call <3 x i32> asm "; def $0", "=v"()
4728  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 3>
4729  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
4730  ret void
4731}
4732
4733define void @v_shuffle_v4i32_v3i32__5_5_u_3(ptr addrspace(1) inreg %ptr) {
4734; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_u_3:
4735; GFX900:       ; %bb.0:
4736; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4737; GFX900-NEXT:    ;;#ASMSTART
4738; GFX900-NEXT:    ; def v[2:4]
4739; GFX900-NEXT:    ;;#ASMEND
4740; GFX900-NEXT:    v_mov_b32_e32 v5, 0
4741; GFX900-NEXT:    v_mov_b32_e32 v0, v4
4742; GFX900-NEXT:    v_mov_b32_e32 v1, v4
4743; GFX900-NEXT:    v_mov_b32_e32 v3, v2
4744; GFX900-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
4745; GFX900-NEXT:    s_waitcnt vmcnt(0)
4746; GFX900-NEXT:    s_setpc_b64 s[30:31]
4747;
4748; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_u_3:
4749; GFX90A:       ; %bb.0:
4750; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4751; GFX90A-NEXT:    ;;#ASMSTART
4752; GFX90A-NEXT:    ; def v[2:4]
4753; GFX90A-NEXT:    ;;#ASMEND
4754; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
4755; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
4756; GFX90A-NEXT:    v_mov_b32_e32 v1, v4
4757; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
4758; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
4759; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4760; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4761;
4762; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_u_3:
4763; GFX940:       ; %bb.0:
4764; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4765; GFX940-NEXT:    ;;#ASMSTART
4766; GFX940-NEXT:    ; def v[2:4]
4767; GFX940-NEXT:    ;;#ASMEND
4768; GFX940-NEXT:    v_mov_b32_e32 v5, 0
4769; GFX940-NEXT:    v_mov_b32_e32 v0, v4
4770; GFX940-NEXT:    v_mov_b32_e32 v1, v4
4771; GFX940-NEXT:    v_mov_b32_e32 v3, v2
4772; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
4773; GFX940-NEXT:    s_waitcnt vmcnt(0)
4774; GFX940-NEXT:    s_setpc_b64 s[30:31]
4775  %vec0 = call <3 x i32> asm "; def $0", "=v"()
4776  %vec1 = call <3 x i32> asm "; def $0", "=v"()
4777  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 3>
4778  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
4779  ret void
4780}
4781
4782define void @v_shuffle_v4i32_v3i32__5_5_0_3(ptr addrspace(1) inreg %ptr) {
4783; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_0_3:
4784; GFX900:       ; %bb.0:
4785; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4786; GFX900-NEXT:    ;;#ASMSTART
4787; GFX900-NEXT:    ; def v[2:4]
4788; GFX900-NEXT:    ;;#ASMEND
4789; GFX900-NEXT:    v_mov_b32_e32 v6, 0
4790; GFX900-NEXT:    ;;#ASMSTART
4791; GFX900-NEXT:    ; def v[3:5]
4792; GFX900-NEXT:    ;;#ASMEND
4793; GFX900-NEXT:    v_mov_b32_e32 v0, v5
4794; GFX900-NEXT:    v_mov_b32_e32 v1, v5
4795; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
4796; GFX900-NEXT:    s_waitcnt vmcnt(0)
4797; GFX900-NEXT:    s_setpc_b64 s[30:31]
4798;
4799; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_0_3:
4800; GFX90A:       ; %bb.0:
4801; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4802; GFX90A-NEXT:    ;;#ASMSTART
4803; GFX90A-NEXT:    ; def v[2:4]
4804; GFX90A-NEXT:    ;;#ASMEND
4805; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
4806; GFX90A-NEXT:    ;;#ASMSTART
4807; GFX90A-NEXT:    ; def v[4:6]
4808; GFX90A-NEXT:    ;;#ASMEND
4809; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
4810; GFX90A-NEXT:    v_mov_b32_e32 v1, v6
4811; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
4812; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
4813; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4814; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4815;
4816; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_0_3:
4817; GFX940:       ; %bb.0:
4818; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4819; GFX940-NEXT:    ;;#ASMSTART
4820; GFX940-NEXT:    ; def v[2:4]
4821; GFX940-NEXT:    ;;#ASMEND
4822; GFX940-NEXT:    v_mov_b32_e32 v7, 0
4823; GFX940-NEXT:    ;;#ASMSTART
4824; GFX940-NEXT:    ; def v[4:6]
4825; GFX940-NEXT:    ;;#ASMEND
4826; GFX940-NEXT:    s_nop 0
4827; GFX940-NEXT:    v_mov_b32_e32 v0, v6
4828; GFX940-NEXT:    v_mov_b32_e32 v1, v6
4829; GFX940-NEXT:    v_mov_b32_e32 v3, v4
4830; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
4831; GFX940-NEXT:    s_waitcnt vmcnt(0)
4832; GFX940-NEXT:    s_setpc_b64 s[30:31]
4833  %vec0 = call <3 x i32> asm "; def $0", "=v"()
4834  %vec1 = call <3 x i32> asm "; def $0", "=v"()
4835  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 3>
4836  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
4837  ret void
4838}
4839
4840define void @v_shuffle_v4i32_v3i32__5_5_1_3(ptr addrspace(1) inreg %ptr) {
4841; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_1_3:
4842; GFX900:       ; %bb.0:
4843; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4844; GFX900-NEXT:    ;;#ASMSTART
4845; GFX900-NEXT:    ; def v[1:3]
4846; GFX900-NEXT:    ;;#ASMEND
4847; GFX900-NEXT:    v_mov_b32_e32 v6, 0
4848; GFX900-NEXT:    ;;#ASMSTART
4849; GFX900-NEXT:    ; def v[3:5]
4850; GFX900-NEXT:    ;;#ASMEND
4851; GFX900-NEXT:    v_mov_b32_e32 v0, v5
4852; GFX900-NEXT:    v_mov_b32_e32 v1, v5
4853; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
4854; GFX900-NEXT:    s_waitcnt vmcnt(0)
4855; GFX900-NEXT:    s_setpc_b64 s[30:31]
4856;
4857; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_1_3:
4858; GFX90A:       ; %bb.0:
4859; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4860; GFX90A-NEXT:    ;;#ASMSTART
4861; GFX90A-NEXT:    ; def v[2:4]
4862; GFX90A-NEXT:    ;;#ASMEND
4863; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
4864; GFX90A-NEXT:    ;;#ASMSTART
4865; GFX90A-NEXT:    ; def v[4:6]
4866; GFX90A-NEXT:    ;;#ASMEND
4867; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
4868; GFX90A-NEXT:    v_mov_b32_e32 v1, v6
4869; GFX90A-NEXT:    v_mov_b32_e32 v2, v3
4870; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
4871; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
4872; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4873; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4874;
4875; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_1_3:
4876; GFX940:       ; %bb.0:
4877; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4878; GFX940-NEXT:    ;;#ASMSTART
4879; GFX940-NEXT:    ; def v[2:4]
4880; GFX940-NEXT:    ;;#ASMEND
4881; GFX940-NEXT:    v_mov_b32_e32 v7, 0
4882; GFX940-NEXT:    ;;#ASMSTART
4883; GFX940-NEXT:    ; def v[4:6]
4884; GFX940-NEXT:    ;;#ASMEND
4885; GFX940-NEXT:    v_mov_b32_e32 v2, v3
4886; GFX940-NEXT:    v_mov_b32_e32 v0, v6
4887; GFX940-NEXT:    v_mov_b32_e32 v1, v6
4888; GFX940-NEXT:    v_mov_b32_e32 v3, v4
4889; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
4890; GFX940-NEXT:    s_waitcnt vmcnt(0)
4891; GFX940-NEXT:    s_setpc_b64 s[30:31]
4892  %vec0 = call <3 x i32> asm "; def $0", "=v"()
4893  %vec1 = call <3 x i32> asm "; def $0", "=v"()
4894  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 3>
4895  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
4896  ret void
4897}
4898
4899define void @v_shuffle_v4i32_v3i32__5_5_2_3(ptr addrspace(1) inreg %ptr) {
4900; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_2_3:
4901; GFX900:       ; %bb.0:
4902; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4903; GFX900-NEXT:    ;;#ASMSTART
4904; GFX900-NEXT:    ; def v[0:2]
4905; GFX900-NEXT:    ;;#ASMEND
4906; GFX900-NEXT:    v_mov_b32_e32 v6, 0
4907; GFX900-NEXT:    ;;#ASMSTART
4908; GFX900-NEXT:    ; def v[3:5]
4909; GFX900-NEXT:    ;;#ASMEND
4910; GFX900-NEXT:    v_mov_b32_e32 v0, v5
4911; GFX900-NEXT:    v_mov_b32_e32 v1, v5
4912; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
4913; GFX900-NEXT:    s_waitcnt vmcnt(0)
4914; GFX900-NEXT:    s_setpc_b64 s[30:31]
4915;
4916; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_2_3:
4917; GFX90A:       ; %bb.0:
4918; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4919; GFX90A-NEXT:    ;;#ASMSTART
4920; GFX90A-NEXT:    ; def v[0:2]
4921; GFX90A-NEXT:    ;;#ASMEND
4922; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
4923; GFX90A-NEXT:    ;;#ASMSTART
4924; GFX90A-NEXT:    ; def v[4:6]
4925; GFX90A-NEXT:    ;;#ASMEND
4926; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
4927; GFX90A-NEXT:    v_mov_b32_e32 v1, v6
4928; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
4929; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
4930; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4931; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4932;
4933; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_2_3:
4934; GFX940:       ; %bb.0:
4935; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4936; GFX940-NEXT:    ;;#ASMSTART
4937; GFX940-NEXT:    ; def v[0:2]
4938; GFX940-NEXT:    ;;#ASMEND
4939; GFX940-NEXT:    v_mov_b32_e32 v7, 0
4940; GFX940-NEXT:    ;;#ASMSTART
4941; GFX940-NEXT:    ; def v[4:6]
4942; GFX940-NEXT:    ;;#ASMEND
4943; GFX940-NEXT:    s_nop 0
4944; GFX940-NEXT:    v_mov_b32_e32 v0, v6
4945; GFX940-NEXT:    v_mov_b32_e32 v1, v6
4946; GFX940-NEXT:    v_mov_b32_e32 v3, v4
4947; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
4948; GFX940-NEXT:    s_waitcnt vmcnt(0)
4949; GFX940-NEXT:    s_setpc_b64 s[30:31]
4950  %vec0 = call <3 x i32> asm "; def $0", "=v"()
4951  %vec1 = call <3 x i32> asm "; def $0", "=v"()
4952  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 3>
4953  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
4954  ret void
4955}
4956
4957define void @v_shuffle_v4i32_v3i32__5_5_4_3(ptr addrspace(1) inreg %ptr) {
4958; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_4_3:
4959; GFX900:       ; %bb.0:
4960; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4961; GFX900-NEXT:    v_mov_b32_e32 v6, 0
4962; GFX900-NEXT:    ;;#ASMSTART
4963; GFX900-NEXT:    ; def v[3:5]
4964; GFX900-NEXT:    ;;#ASMEND
4965; GFX900-NEXT:    v_mov_b32_e32 v0, v5
4966; GFX900-NEXT:    v_mov_b32_e32 v1, v5
4967; GFX900-NEXT:    v_mov_b32_e32 v2, v4
4968; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
4969; GFX900-NEXT:    s_waitcnt vmcnt(0)
4970; GFX900-NEXT:    s_setpc_b64 s[30:31]
4971;
4972; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_4_3:
4973; GFX90A:       ; %bb.0:
4974; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4975; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
4976; GFX90A-NEXT:    ;;#ASMSTART
4977; GFX90A-NEXT:    ; def v[4:6]
4978; GFX90A-NEXT:    ;;#ASMEND
4979; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
4980; GFX90A-NEXT:    v_mov_b32_e32 v1, v6
4981; GFX90A-NEXT:    v_mov_b32_e32 v2, v5
4982; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
4983; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
4984; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4985; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4986;
4987; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_4_3:
4988; GFX940:       ; %bb.0:
4989; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4990; GFX940-NEXT:    v_mov_b32_e32 v7, 0
4991; GFX940-NEXT:    ;;#ASMSTART
4992; GFX940-NEXT:    ; def v[4:6]
4993; GFX940-NEXT:    ;;#ASMEND
4994; GFX940-NEXT:    s_nop 0
4995; GFX940-NEXT:    v_mov_b32_e32 v0, v6
4996; GFX940-NEXT:    v_mov_b32_e32 v1, v6
4997; GFX940-NEXT:    v_mov_b32_e32 v2, v5
4998; GFX940-NEXT:    v_mov_b32_e32 v3, v4
4999; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
5000; GFX940-NEXT:    s_waitcnt vmcnt(0)
5001; GFX940-NEXT:    s_setpc_b64 s[30:31]
5002  %vec0 = call <3 x i32> asm "; def $0", "=v"()
5003  %vec1 = call <3 x i32> asm "; def $0", "=v"()
5004  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 3>
5005  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
5006  ret void
5007}
5008
5009define void @v_shuffle_v4i32_v3i32__u_4_4_4(ptr addrspace(1) inreg %ptr) {
5010; GFX900-LABEL: v_shuffle_v4i32_v3i32__u_4_4_4:
5011; GFX900:       ; %bb.0:
5012; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5013; GFX900-NEXT:    ;;#ASMSTART
5014; GFX900-NEXT:    ; def v[0:2]
5015; GFX900-NEXT:    ;;#ASMEND
5016; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5017; GFX900-NEXT:    v_mov_b32_e32 v2, v1
5018; GFX900-NEXT:    v_mov_b32_e32 v3, v1
5019; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
5020; GFX900-NEXT:    s_waitcnt vmcnt(0)
5021; GFX900-NEXT:    s_setpc_b64 s[30:31]
5022;
5023; GFX90A-LABEL: v_shuffle_v4i32_v3i32__u_4_4_4:
5024; GFX90A:       ; %bb.0:
5025; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5026; GFX90A-NEXT:    ;;#ASMSTART
5027; GFX90A-NEXT:    ; def v[0:2]
5028; GFX90A-NEXT:    ;;#ASMEND
5029; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5030; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
5031; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
5032; GFX90A-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
5033; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5034; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5035;
5036; GFX940-LABEL: v_shuffle_v4i32_v3i32__u_4_4_4:
5037; GFX940:       ; %bb.0:
5038; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5039; GFX940-NEXT:    ;;#ASMSTART
5040; GFX940-NEXT:    ; def v[0:2]
5041; GFX940-NEXT:    ;;#ASMEND
5042; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5043; GFX940-NEXT:    v_mov_b32_e32 v2, v1
5044; GFX940-NEXT:    v_mov_b32_e32 v3, v1
5045; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
5046; GFX940-NEXT:    s_waitcnt vmcnt(0)
5047; GFX940-NEXT:    s_setpc_b64 s[30:31]
5048  %vec0 = call <3 x i32> asm "; def $0", "=v"()
5049  %vec1 = call <3 x i32> asm "; def $0", "=v"()
5050  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
5051  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
5052  ret void
5053}
5054
5055define void @v_shuffle_v4i32_v3i32__0_4_4_4(ptr addrspace(1) inreg %ptr) {
5056; GFX900-LABEL: v_shuffle_v4i32_v3i32__0_4_4_4:
5057; GFX900:       ; %bb.0:
5058; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5059; GFX900-NEXT:    ;;#ASMSTART
5060; GFX900-NEXT:    ; def v[0:2]
5061; GFX900-NEXT:    ;;#ASMEND
5062; GFX900-NEXT:    ;;#ASMSTART
5063; GFX900-NEXT:    ; def v[1:3]
5064; GFX900-NEXT:    ;;#ASMEND
5065; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5066; GFX900-NEXT:    v_mov_b32_e32 v1, v2
5067; GFX900-NEXT:    v_mov_b32_e32 v3, v2
5068; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
5069; GFX900-NEXT:    s_waitcnt vmcnt(0)
5070; GFX900-NEXT:    s_setpc_b64 s[30:31]
5071;
5072; GFX90A-LABEL: v_shuffle_v4i32_v3i32__0_4_4_4:
5073; GFX90A:       ; %bb.0:
5074; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5075; GFX90A-NEXT:    ;;#ASMSTART
5076; GFX90A-NEXT:    ; def v[0:2]
5077; GFX90A-NEXT:    ;;#ASMEND
5078; GFX90A-NEXT:    ;;#ASMSTART
5079; GFX90A-NEXT:    ; def v[2:4]
5080; GFX90A-NEXT:    ;;#ASMEND
5081; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
5082; GFX90A-NEXT:    v_mov_b32_e32 v1, v3
5083; GFX90A-NEXT:    v_mov_b32_e32 v2, v3
5084; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
5085; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5086; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5087;
5088; GFX940-LABEL: v_shuffle_v4i32_v3i32__0_4_4_4:
5089; GFX940:       ; %bb.0:
5090; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5091; GFX940-NEXT:    ;;#ASMSTART
5092; GFX940-NEXT:    ; def v[0:2]
5093; GFX940-NEXT:    ;;#ASMEND
5094; GFX940-NEXT:    v_mov_b32_e32 v5, 0
5095; GFX940-NEXT:    ;;#ASMSTART
5096; GFX940-NEXT:    ; def v[2:4]
5097; GFX940-NEXT:    ;;#ASMEND
5098; GFX940-NEXT:    s_nop 0
5099; GFX940-NEXT:    v_mov_b32_e32 v1, v3
5100; GFX940-NEXT:    v_mov_b32_e32 v2, v3
5101; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
5102; GFX940-NEXT:    s_waitcnt vmcnt(0)
5103; GFX940-NEXT:    s_setpc_b64 s[30:31]
5104  %vec0 = call <3 x i32> asm "; def $0", "=v"()
5105  %vec1 = call <3 x i32> asm "; def $0", "=v"()
5106  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
5107  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
5108  ret void
5109}
5110
5111define void @v_shuffle_v4i32_v3i32__1_4_4_4(ptr addrspace(1) inreg %ptr) {
5112; GFX900-LABEL: v_shuffle_v4i32_v3i32__1_4_4_4:
5113; GFX900:       ; %bb.0:
5114; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5115; GFX900-NEXT:    ;;#ASMSTART
5116; GFX900-NEXT:    ; def v[2:4]
5117; GFX900-NEXT:    ;;#ASMEND
5118; GFX900-NEXT:    ;;#ASMSTART
5119; GFX900-NEXT:    ; def v[0:2]
5120; GFX900-NEXT:    ;;#ASMEND
5121; GFX900-NEXT:    v_mov_b32_e32 v5, 0
5122; GFX900-NEXT:    v_mov_b32_e32 v0, v3
5123; GFX900-NEXT:    v_mov_b32_e32 v2, v1
5124; GFX900-NEXT:    v_mov_b32_e32 v3, v1
5125; GFX900-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
5126; GFX900-NEXT:    s_waitcnt vmcnt(0)
5127; GFX900-NEXT:    s_setpc_b64 s[30:31]
5128;
5129; GFX90A-LABEL: v_shuffle_v4i32_v3i32__1_4_4_4:
5130; GFX90A:       ; %bb.0:
5131; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5132; GFX90A-NEXT:    ;;#ASMSTART
5133; GFX90A-NEXT:    ; def v[2:4]
5134; GFX90A-NEXT:    ;;#ASMEND
5135; GFX90A-NEXT:    ;;#ASMSTART
5136; GFX90A-NEXT:    ; def v[0:2]
5137; GFX90A-NEXT:    ;;#ASMEND
5138; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
5139; GFX90A-NEXT:    v_mov_b32_e32 v0, v3
5140; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
5141; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
5142; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
5143; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5144; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5145;
5146; GFX940-LABEL: v_shuffle_v4i32_v3i32__1_4_4_4:
5147; GFX940:       ; %bb.0:
5148; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5149; GFX940-NEXT:    ;;#ASMSTART
5150; GFX940-NEXT:    ; def v[2:4]
5151; GFX940-NEXT:    ;;#ASMEND
5152; GFX940-NEXT:    v_mov_b32_e32 v5, 0
5153; GFX940-NEXT:    ;;#ASMSTART
5154; GFX940-NEXT:    ; def v[0:2]
5155; GFX940-NEXT:    ;;#ASMEND
5156; GFX940-NEXT:    s_nop 0
5157; GFX940-NEXT:    v_mov_b32_e32 v0, v3
5158; GFX940-NEXT:    v_mov_b32_e32 v2, v1
5159; GFX940-NEXT:    v_mov_b32_e32 v3, v1
5160; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
5161; GFX940-NEXT:    s_waitcnt vmcnt(0)
5162; GFX940-NEXT:    s_setpc_b64 s[30:31]
5163  %vec0 = call <3 x i32> asm "; def $0", "=v"()
5164  %vec1 = call <3 x i32> asm "; def $0", "=v"()
5165  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
5166  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
5167  ret void
5168}
5169
5170define void @v_shuffle_v4i32_v3i32__2_4_4_4(ptr addrspace(1) inreg %ptr) {
5171; GFX900-LABEL: v_shuffle_v4i32_v3i32__2_4_4_4:
5172; GFX900:       ; %bb.0:
5173; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5174; GFX900-NEXT:    ;;#ASMSTART
5175; GFX900-NEXT:    ; def v[1:3]
5176; GFX900-NEXT:    ;;#ASMEND
5177; GFX900-NEXT:    ;;#ASMSTART
5178; GFX900-NEXT:    ; def v[0:2]
5179; GFX900-NEXT:    ;;#ASMEND
5180; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5181; GFX900-NEXT:    v_mov_b32_e32 v0, v3
5182; GFX900-NEXT:    v_mov_b32_e32 v2, v1
5183; GFX900-NEXT:    v_mov_b32_e32 v3, v1
5184; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
5185; GFX900-NEXT:    s_waitcnt vmcnt(0)
5186; GFX900-NEXT:    s_setpc_b64 s[30:31]
5187;
5188; GFX90A-LABEL: v_shuffle_v4i32_v3i32__2_4_4_4:
5189; GFX90A:       ; %bb.0:
5190; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5191; GFX90A-NEXT:    ;;#ASMSTART
5192; GFX90A-NEXT:    ; def v[2:4]
5193; GFX90A-NEXT:    ;;#ASMEND
5194; GFX90A-NEXT:    ;;#ASMSTART
5195; GFX90A-NEXT:    ; def v[0:2]
5196; GFX90A-NEXT:    ;;#ASMEND
5197; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
5198; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
5199; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
5200; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
5201; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
5202; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5203; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5204;
5205; GFX940-LABEL: v_shuffle_v4i32_v3i32__2_4_4_4:
5206; GFX940:       ; %bb.0:
5207; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5208; GFX940-NEXT:    ;;#ASMSTART
5209; GFX940-NEXT:    ; def v[2:4]
5210; GFX940-NEXT:    ;;#ASMEND
5211; GFX940-NEXT:    v_mov_b32_e32 v5, 0
5212; GFX940-NEXT:    ;;#ASMSTART
5213; GFX940-NEXT:    ; def v[0:2]
5214; GFX940-NEXT:    ;;#ASMEND
5215; GFX940-NEXT:    s_nop 0
5216; GFX940-NEXT:    v_mov_b32_e32 v0, v4
5217; GFX940-NEXT:    v_mov_b32_e32 v2, v1
5218; GFX940-NEXT:    v_mov_b32_e32 v3, v1
5219; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
5220; GFX940-NEXT:    s_waitcnt vmcnt(0)
5221; GFX940-NEXT:    s_setpc_b64 s[30:31]
5222  %vec0 = call <3 x i32> asm "; def $0", "=v"()
5223  %vec1 = call <3 x i32> asm "; def $0", "=v"()
5224  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
5225  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
5226  ret void
5227}
5228
5229define void @v_shuffle_v4i32_v3i32__3_4_4_4(ptr addrspace(1) inreg %ptr) {
5230; GFX900-LABEL: v_shuffle_v4i32_v3i32__3_4_4_4:
5231; GFX900:       ; %bb.0:
5232; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5233; GFX900-NEXT:    ;;#ASMSTART
5234; GFX900-NEXT:    ; def v[0:2]
5235; GFX900-NEXT:    ;;#ASMEND
5236; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5237; GFX900-NEXT:    v_mov_b32_e32 v2, v1
5238; GFX900-NEXT:    v_mov_b32_e32 v3, v1
5239; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
5240; GFX900-NEXT:    s_waitcnt vmcnt(0)
5241; GFX900-NEXT:    s_setpc_b64 s[30:31]
5242;
5243; GFX90A-LABEL: v_shuffle_v4i32_v3i32__3_4_4_4:
5244; GFX90A:       ; %bb.0:
5245; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5246; GFX90A-NEXT:    ;;#ASMSTART
5247; GFX90A-NEXT:    ; def v[0:2]
5248; GFX90A-NEXT:    ;;#ASMEND
5249; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5250; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
5251; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
5252; GFX90A-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
5253; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5254; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5255;
5256; GFX940-LABEL: v_shuffle_v4i32_v3i32__3_4_4_4:
5257; GFX940:       ; %bb.0:
5258; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5259; GFX940-NEXT:    ;;#ASMSTART
5260; GFX940-NEXT:    ; def v[0:2]
5261; GFX940-NEXT:    ;;#ASMEND
5262; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5263; GFX940-NEXT:    v_mov_b32_e32 v2, v1
5264; GFX940-NEXT:    v_mov_b32_e32 v3, v1
5265; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
5266; GFX940-NEXT:    s_waitcnt vmcnt(0)
5267; GFX940-NEXT:    s_setpc_b64 s[30:31]
5268  %vec0 = call <3 x i32> asm "; def $0", "=v"()
5269  %vec1 = call <3 x i32> asm "; def $0", "=v"()
5270  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
5271  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
5272  ret void
5273}
5274
5275define void @v_shuffle_v4i32_v3i32__4_4_4_4(ptr addrspace(1) inreg %ptr) {
5276; GFX900-LABEL: v_shuffle_v4i32_v3i32__4_4_4_4:
5277; GFX900:       ; %bb.0:
5278; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5279; GFX900-NEXT:    ;;#ASMSTART
5280; GFX900-NEXT:    ; def v[0:2]
5281; GFX900-NEXT:    ;;#ASMEND
5282; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5283; GFX900-NEXT:    v_mov_b32_e32 v0, v1
5284; GFX900-NEXT:    v_mov_b32_e32 v2, v1
5285; GFX900-NEXT:    v_mov_b32_e32 v3, v1
5286; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
5287; GFX900-NEXT:    s_waitcnt vmcnt(0)
5288; GFX900-NEXT:    s_setpc_b64 s[30:31]
5289;
5290; GFX90A-LABEL: v_shuffle_v4i32_v3i32__4_4_4_4:
5291; GFX90A:       ; %bb.0:
5292; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5293; GFX90A-NEXT:    ;;#ASMSTART
5294; GFX90A-NEXT:    ; def v[0:2]
5295; GFX90A-NEXT:    ;;#ASMEND
5296; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5297; GFX90A-NEXT:    v_mov_b32_e32 v0, v1
5298; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
5299; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
5300; GFX90A-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
5301; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5302; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5303;
5304; GFX940-LABEL: v_shuffle_v4i32_v3i32__4_4_4_4:
5305; GFX940:       ; %bb.0:
5306; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5307; GFX940-NEXT:    ;;#ASMSTART
5308; GFX940-NEXT:    ; def v[0:2]
5309; GFX940-NEXT:    ;;#ASMEND
5310; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5311; GFX940-NEXT:    v_mov_b32_e32 v0, v1
5312; GFX940-NEXT:    v_mov_b32_e32 v2, v1
5313; GFX940-NEXT:    v_mov_b32_e32 v3, v1
5314; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
5315; GFX940-NEXT:    s_waitcnt vmcnt(0)
5316; GFX940-NEXT:    s_setpc_b64 s[30:31]
5317  %vec0 = call <3 x i32> asm "; def $0", "=v"()
5318  %vec1 = call <3 x i32> asm "; def $0", "=v"()
5319  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
5320  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
5321  ret void
5322}
5323
5324define void @v_shuffle_v4i32_v3i32__5_4_4_4(ptr addrspace(1) inreg %ptr) {
5325; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_4_4_4:
5326; GFX900:       ; %bb.0:
5327; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5328; GFX900-NEXT:    ;;#ASMSTART
5329; GFX900-NEXT:    ; def v[0:2]
5330; GFX900-NEXT:    ;;#ASMEND
5331; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5332; GFX900-NEXT:    v_mov_b32_e32 v0, v2
5333; GFX900-NEXT:    v_mov_b32_e32 v2, v1
5334; GFX900-NEXT:    v_mov_b32_e32 v3, v1
5335; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
5336; GFX900-NEXT:    s_waitcnt vmcnt(0)
5337; GFX900-NEXT:    s_setpc_b64 s[30:31]
5338;
5339; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_4_4_4:
5340; GFX90A:       ; %bb.0:
5341; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5342; GFX90A-NEXT:    ;;#ASMSTART
5343; GFX90A-NEXT:    ; def v[0:2]
5344; GFX90A-NEXT:    ;;#ASMEND
5345; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5346; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
5347; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
5348; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
5349; GFX90A-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
5350; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5351; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5352;
5353; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_4_4_4:
5354; GFX940:       ; %bb.0:
5355; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5356; GFX940-NEXT:    ;;#ASMSTART
5357; GFX940-NEXT:    ; def v[0:2]
5358; GFX940-NEXT:    ;;#ASMEND
5359; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5360; GFX940-NEXT:    v_mov_b32_e32 v0, v2
5361; GFX940-NEXT:    v_mov_b32_e32 v2, v1
5362; GFX940-NEXT:    v_mov_b32_e32 v3, v1
5363; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
5364; GFX940-NEXT:    s_waitcnt vmcnt(0)
5365; GFX940-NEXT:    s_setpc_b64 s[30:31]
5366  %vec0 = call <3 x i32> asm "; def $0", "=v"()
5367  %vec1 = call <3 x i32> asm "; def $0", "=v"()
5368  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
5369  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
5370  ret void
5371}
5372
5373define void @v_shuffle_v4i32_v3i32__5_u_4_4(ptr addrspace(1) inreg %ptr) {
5374; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_u_4_4:
5375; GFX900:       ; %bb.0:
5376; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5377; GFX900-NEXT:    ;;#ASMSTART
5378; GFX900-NEXT:    ; def v[1:3]
5379; GFX900-NEXT:    ;;#ASMEND
5380; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5381; GFX900-NEXT:    v_mov_b32_e32 v0, v3
5382; GFX900-NEXT:    v_mov_b32_e32 v3, v2
5383; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
5384; GFX900-NEXT:    s_waitcnt vmcnt(0)
5385; GFX900-NEXT:    s_setpc_b64 s[30:31]
5386;
5387; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_u_4_4:
5388; GFX90A:       ; %bb.0:
5389; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5390; GFX90A-NEXT:    ;;#ASMSTART
5391; GFX90A-NEXT:    ; def v[0:2]
5392; GFX90A-NEXT:    ;;#ASMEND
5393; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5394; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
5395; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
5396; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
5397; GFX90A-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
5398; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5399; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5400;
5401; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_u_4_4:
5402; GFX940:       ; %bb.0:
5403; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5404; GFX940-NEXT:    ;;#ASMSTART
5405; GFX940-NEXT:    ; def v[0:2]
5406; GFX940-NEXT:    ;;#ASMEND
5407; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5408; GFX940-NEXT:    v_mov_b32_e32 v0, v2
5409; GFX940-NEXT:    v_mov_b32_e32 v2, v1
5410; GFX940-NEXT:    v_mov_b32_e32 v3, v1
5411; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
5412; GFX940-NEXT:    s_waitcnt vmcnt(0)
5413; GFX940-NEXT:    s_setpc_b64 s[30:31]
5414  %vec0 = call <3 x i32> asm "; def $0", "=v"()
5415  %vec1 = call <3 x i32> asm "; def $0", "=v"()
5416  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 4, i32 4>
5417  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
5418  ret void
5419}
5420
5421define void @v_shuffle_v4i32_v3i32__5_0_4_4(ptr addrspace(1) inreg %ptr) {
5422; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_0_4_4:
5423; GFX900:       ; %bb.0:
5424; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5425; GFX900-NEXT:    ;;#ASMSTART
5426; GFX900-NEXT:    ; def v[1:3]
5427; GFX900-NEXT:    ;;#ASMEND
5428; GFX900-NEXT:    ;;#ASMSTART
5429; GFX900-NEXT:    ; def v[2:4]
5430; GFX900-NEXT:    ;;#ASMEND
5431; GFX900-NEXT:    v_mov_b32_e32 v5, 0
5432; GFX900-NEXT:    v_mov_b32_e32 v0, v4
5433; GFX900-NEXT:    v_mov_b32_e32 v2, v3
5434; GFX900-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
5435; GFX900-NEXT:    s_waitcnt vmcnt(0)
5436; GFX900-NEXT:    s_setpc_b64 s[30:31]
5437;
5438; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_0_4_4:
5439; GFX90A:       ; %bb.0:
5440; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5441; GFX90A-NEXT:    ;;#ASMSTART
5442; GFX90A-NEXT:    ; def v[2:4]
5443; GFX90A-NEXT:    ;;#ASMEND
5444; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
5445; GFX90A-NEXT:    ;;#ASMSTART
5446; GFX90A-NEXT:    ; def v[4:6]
5447; GFX90A-NEXT:    ;;#ASMEND
5448; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
5449; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
5450; GFX90A-NEXT:    v_mov_b32_e32 v2, v5
5451; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
5452; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
5453; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5454; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5455;
5456; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_0_4_4:
5457; GFX940:       ; %bb.0:
5458; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5459; GFX940-NEXT:    ;;#ASMSTART
5460; GFX940-NEXT:    ; def v[2:4]
5461; GFX940-NEXT:    ;;#ASMEND
5462; GFX940-NEXT:    v_mov_b32_e32 v7, 0
5463; GFX940-NEXT:    ;;#ASMSTART
5464; GFX940-NEXT:    ; def v[4:6]
5465; GFX940-NEXT:    ;;#ASMEND
5466; GFX940-NEXT:    v_mov_b32_e32 v1, v2
5467; GFX940-NEXT:    v_mov_b32_e32 v0, v6
5468; GFX940-NEXT:    v_mov_b32_e32 v2, v5
5469; GFX940-NEXT:    v_mov_b32_e32 v3, v5
5470; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
5471; GFX940-NEXT:    s_waitcnt vmcnt(0)
5472; GFX940-NEXT:    s_setpc_b64 s[30:31]
5473  %vec0 = call <3 x i32> asm "; def $0", "=v"()
5474  %vec1 = call <3 x i32> asm "; def $0", "=v"()
5475  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 4, i32 4>
5476  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
5477  ret void
5478}
5479
5480define void @v_shuffle_v4i32_v3i32__5_1_4_4(ptr addrspace(1) inreg %ptr) {
5481; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_1_4_4:
5482; GFX900:       ; %bb.0:
5483; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5484; GFX900-NEXT:    ;;#ASMSTART
5485; GFX900-NEXT:    ; def v[0:2]
5486; GFX900-NEXT:    ;;#ASMEND
5487; GFX900-NEXT:    ;;#ASMSTART
5488; GFX900-NEXT:    ; def v[2:4]
5489; GFX900-NEXT:    ;;#ASMEND
5490; GFX900-NEXT:    v_mov_b32_e32 v5, 0
5491; GFX900-NEXT:    v_mov_b32_e32 v0, v4
5492; GFX900-NEXT:    v_mov_b32_e32 v2, v3
5493; GFX900-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
5494; GFX900-NEXT:    s_waitcnt vmcnt(0)
5495; GFX900-NEXT:    s_setpc_b64 s[30:31]
5496;
5497; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_1_4_4:
5498; GFX90A:       ; %bb.0:
5499; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5500; GFX90A-NEXT:    ;;#ASMSTART
5501; GFX90A-NEXT:    ; def v[0:2]
5502; GFX90A-NEXT:    ;;#ASMEND
5503; GFX90A-NEXT:    ;;#ASMSTART
5504; GFX90A-NEXT:    ; def v[2:4]
5505; GFX90A-NEXT:    ;;#ASMEND
5506; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
5507; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
5508; GFX90A-NEXT:    v_mov_b32_e32 v2, v3
5509; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
5510; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5511; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5512;
5513; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_1_4_4:
5514; GFX940:       ; %bb.0:
5515; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5516; GFX940-NEXT:    ;;#ASMSTART
5517; GFX940-NEXT:    ; def v[0:2]
5518; GFX940-NEXT:    ;;#ASMEND
5519; GFX940-NEXT:    v_mov_b32_e32 v5, 0
5520; GFX940-NEXT:    ;;#ASMSTART
5521; GFX940-NEXT:    ; def v[2:4]
5522; GFX940-NEXT:    ;;#ASMEND
5523; GFX940-NEXT:    s_nop 0
5524; GFX940-NEXT:    v_mov_b32_e32 v0, v4
5525; GFX940-NEXT:    v_mov_b32_e32 v2, v3
5526; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
5527; GFX940-NEXT:    s_waitcnt vmcnt(0)
5528; GFX940-NEXT:    s_setpc_b64 s[30:31]
5529  %vec0 = call <3 x i32> asm "; def $0", "=v"()
5530  %vec1 = call <3 x i32> asm "; def $0", "=v"()
5531  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 4, i32 4>
5532  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
5533  ret void
5534}
5535
5536define void @v_shuffle_v4i32_v3i32__5_2_4_4(ptr addrspace(1) inreg %ptr) {
5537; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_2_4_4:
5538; GFX900:       ; %bb.0:
5539; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5540; GFX900-NEXT:    ;;#ASMSTART
5541; GFX900-NEXT:    ; def v[2:4]
5542; GFX900-NEXT:    ;;#ASMEND
5543; GFX900-NEXT:    ;;#ASMSTART
5544; GFX900-NEXT:    ; def v[1:3]
5545; GFX900-NEXT:    ;;#ASMEND
5546; GFX900-NEXT:    v_mov_b32_e32 v5, 0
5547; GFX900-NEXT:    v_mov_b32_e32 v0, v3
5548; GFX900-NEXT:    v_mov_b32_e32 v1, v4
5549; GFX900-NEXT:    v_mov_b32_e32 v3, v2
5550; GFX900-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
5551; GFX900-NEXT:    s_waitcnt vmcnt(0)
5552; GFX900-NEXT:    s_setpc_b64 s[30:31]
5553;
5554; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_2_4_4:
5555; GFX90A:       ; %bb.0:
5556; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5557; GFX90A-NEXT:    ;;#ASMSTART
5558; GFX90A-NEXT:    ; def v[0:2]
5559; GFX90A-NEXT:    ;;#ASMEND
5560; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
5561; GFX90A-NEXT:    ;;#ASMSTART
5562; GFX90A-NEXT:    ; def v[4:6]
5563; GFX90A-NEXT:    ;;#ASMEND
5564; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
5565; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
5566; GFX90A-NEXT:    v_mov_b32_e32 v2, v5
5567; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
5568; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
5569; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5570; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5571;
5572; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_2_4_4:
5573; GFX940:       ; %bb.0:
5574; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5575; GFX940-NEXT:    ;;#ASMSTART
5576; GFX940-NEXT:    ; def v[0:2]
5577; GFX940-NEXT:    ;;#ASMEND
5578; GFX940-NEXT:    v_mov_b32_e32 v7, 0
5579; GFX940-NEXT:    ;;#ASMSTART
5580; GFX940-NEXT:    ; def v[4:6]
5581; GFX940-NEXT:    ;;#ASMEND
5582; GFX940-NEXT:    v_mov_b32_e32 v1, v2
5583; GFX940-NEXT:    v_mov_b32_e32 v0, v6
5584; GFX940-NEXT:    v_mov_b32_e32 v2, v5
5585; GFX940-NEXT:    v_mov_b32_e32 v3, v5
5586; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
5587; GFX940-NEXT:    s_waitcnt vmcnt(0)
5588; GFX940-NEXT:    s_setpc_b64 s[30:31]
5589  %vec0 = call <3 x i32> asm "; def $0", "=v"()
5590  %vec1 = call <3 x i32> asm "; def $0", "=v"()
5591  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 4, i32 4>
5592  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
5593  ret void
5594}
5595
5596define void @v_shuffle_v4i32_v3i32__5_3_4_4(ptr addrspace(1) inreg %ptr) {
5597; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_3_4_4:
5598; GFX900:       ; %bb.0:
5599; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5600; GFX900-NEXT:    ;;#ASMSTART
5601; GFX900-NEXT:    ; def v[1:3]
5602; GFX900-NEXT:    ;;#ASMEND
5603; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5604; GFX900-NEXT:    v_mov_b32_e32 v0, v3
5605; GFX900-NEXT:    v_mov_b32_e32 v3, v2
5606; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
5607; GFX900-NEXT:    s_waitcnt vmcnt(0)
5608; GFX900-NEXT:    s_setpc_b64 s[30:31]
5609;
5610; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_3_4_4:
5611; GFX90A:       ; %bb.0:
5612; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5613; GFX90A-NEXT:    ;;#ASMSTART
5614; GFX90A-NEXT:    ; def v[2:4]
5615; GFX90A-NEXT:    ;;#ASMEND
5616; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
5617; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
5618; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
5619; GFX90A-NEXT:    v_mov_b32_e32 v2, v3
5620; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
5621; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5622; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5623;
5624; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_3_4_4:
5625; GFX940:       ; %bb.0:
5626; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5627; GFX940-NEXT:    ;;#ASMSTART
5628; GFX940-NEXT:    ; def v[2:4]
5629; GFX940-NEXT:    ;;#ASMEND
5630; GFX940-NEXT:    v_mov_b32_e32 v5, 0
5631; GFX940-NEXT:    v_mov_b32_e32 v0, v4
5632; GFX940-NEXT:    v_mov_b32_e32 v1, v2
5633; GFX940-NEXT:    v_mov_b32_e32 v2, v3
5634; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
5635; GFX940-NEXT:    s_waitcnt vmcnt(0)
5636; GFX940-NEXT:    s_setpc_b64 s[30:31]
5637  %vec0 = call <3 x i32> asm "; def $0", "=v"()
5638  %vec1 = call <3 x i32> asm "; def $0", "=v"()
5639  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 4, i32 4>
5640  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
5641  ret void
5642}
5643
5644define void @v_shuffle_v4i32_v3i32__5_5_4_4(ptr addrspace(1) inreg %ptr) {
5645; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_4_4:
5646; GFX900:       ; %bb.0:
5647; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5648; GFX900-NEXT:    ;;#ASMSTART
5649; GFX900-NEXT:    ; def v[1:3]
5650; GFX900-NEXT:    ;;#ASMEND
5651; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5652; GFX900-NEXT:    v_mov_b32_e32 v0, v3
5653; GFX900-NEXT:    v_mov_b32_e32 v1, v3
5654; GFX900-NEXT:    v_mov_b32_e32 v3, v2
5655; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
5656; GFX900-NEXT:    s_waitcnt vmcnt(0)
5657; GFX900-NEXT:    s_setpc_b64 s[30:31]
5658;
5659; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_4_4:
5660; GFX90A:       ; %bb.0:
5661; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5662; GFX90A-NEXT:    ;;#ASMSTART
5663; GFX90A-NEXT:    ; def v[2:4]
5664; GFX90A-NEXT:    ;;#ASMEND
5665; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
5666; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
5667; GFX90A-NEXT:    v_mov_b32_e32 v1, v4
5668; GFX90A-NEXT:    v_mov_b32_e32 v2, v3
5669; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
5670; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5671; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5672;
5673; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_4_4:
5674; GFX940:       ; %bb.0:
5675; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5676; GFX940-NEXT:    ;;#ASMSTART
5677; GFX940-NEXT:    ; def v[2:4]
5678; GFX940-NEXT:    ;;#ASMEND
5679; GFX940-NEXT:    v_mov_b32_e32 v5, 0
5680; GFX940-NEXT:    v_mov_b32_e32 v0, v4
5681; GFX940-NEXT:    v_mov_b32_e32 v1, v4
5682; GFX940-NEXT:    v_mov_b32_e32 v2, v3
5683; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
5684; GFX940-NEXT:    s_waitcnt vmcnt(0)
5685; GFX940-NEXT:    s_setpc_b64 s[30:31]
5686  %vec0 = call <3 x i32> asm "; def $0", "=v"()
5687  %vec1 = call <3 x i32> asm "; def $0", "=v"()
5688  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 4>
5689  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
5690  ret void
5691}
5692
5693define void @v_shuffle_v4i32_v3i32__5_5_u_4(ptr addrspace(1) inreg %ptr) {
5694; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_u_4:
5695; GFX900:       ; %bb.0:
5696; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5697; GFX900-NEXT:    ;;#ASMSTART
5698; GFX900-NEXT:    ; def v[1:3]
5699; GFX900-NEXT:    ;;#ASMEND
5700; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5701; GFX900-NEXT:    v_mov_b32_e32 v0, v3
5702; GFX900-NEXT:    v_mov_b32_e32 v1, v3
5703; GFX900-NEXT:    v_mov_b32_e32 v3, v2
5704; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
5705; GFX900-NEXT:    s_waitcnt vmcnt(0)
5706; GFX900-NEXT:    s_setpc_b64 s[30:31]
5707;
5708; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_u_4:
5709; GFX90A:       ; %bb.0:
5710; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5711; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
5712; GFX90A-NEXT:    ;;#ASMSTART
5713; GFX90A-NEXT:    ; def v[2:4]
5714; GFX90A-NEXT:    ;;#ASMEND
5715; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
5716; GFX90A-NEXT:    v_mov_b32_e32 v1, v4
5717; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
5718; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5719; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5720;
5721; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_u_4:
5722; GFX940:       ; %bb.0:
5723; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5724; GFX940-NEXT:    v_mov_b32_e32 v5, 0
5725; GFX940-NEXT:    ;;#ASMSTART
5726; GFX940-NEXT:    ; def v[2:4]
5727; GFX940-NEXT:    ;;#ASMEND
5728; GFX940-NEXT:    s_nop 0
5729; GFX940-NEXT:    v_mov_b32_e32 v0, v4
5730; GFX940-NEXT:    v_mov_b32_e32 v1, v4
5731; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
5732; GFX940-NEXT:    s_waitcnt vmcnt(0)
5733; GFX940-NEXT:    s_setpc_b64 s[30:31]
5734  %vec0 = call <3 x i32> asm "; def $0", "=v"()
5735  %vec1 = call <3 x i32> asm "; def $0", "=v"()
5736  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 4>
5737  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
5738  ret void
5739}
5740
5741define void @v_shuffle_v4i32_v3i32__5_5_0_4(ptr addrspace(1) inreg %ptr) {
5742; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_0_4:
5743; GFX900:       ; %bb.0:
5744; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5745; GFX900-NEXT:    ;;#ASMSTART
5746; GFX900-NEXT:    ; def v[2:4]
5747; GFX900-NEXT:    ;;#ASMEND
5748; GFX900-NEXT:    ;;#ASMSTART
5749; GFX900-NEXT:    ; def v[3:5]
5750; GFX900-NEXT:    ;;#ASMEND
5751; GFX900-NEXT:    v_mov_b32_e32 v6, 0
5752; GFX900-NEXT:    v_mov_b32_e32 v0, v5
5753; GFX900-NEXT:    v_mov_b32_e32 v1, v5
5754; GFX900-NEXT:    v_mov_b32_e32 v3, v4
5755; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
5756; GFX900-NEXT:    s_waitcnt vmcnt(0)
5757; GFX900-NEXT:    s_setpc_b64 s[30:31]
5758;
5759; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_0_4:
5760; GFX90A:       ; %bb.0:
5761; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5762; GFX90A-NEXT:    ;;#ASMSTART
5763; GFX90A-NEXT:    ; def v[2:4]
5764; GFX90A-NEXT:    ;;#ASMEND
5765; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
5766; GFX90A-NEXT:    ;;#ASMSTART
5767; GFX90A-NEXT:    ; def v[4:6]
5768; GFX90A-NEXT:    ;;#ASMEND
5769; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
5770; GFX90A-NEXT:    v_mov_b32_e32 v1, v6
5771; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
5772; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
5773; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5774; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5775;
5776; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_0_4:
5777; GFX940:       ; %bb.0:
5778; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5779; GFX940-NEXT:    ;;#ASMSTART
5780; GFX940-NEXT:    ; def v[2:4]
5781; GFX940-NEXT:    ;;#ASMEND
5782; GFX940-NEXT:    v_mov_b32_e32 v7, 0
5783; GFX940-NEXT:    ;;#ASMSTART
5784; GFX940-NEXT:    ; def v[4:6]
5785; GFX940-NEXT:    ;;#ASMEND
5786; GFX940-NEXT:    s_nop 0
5787; GFX940-NEXT:    v_mov_b32_e32 v0, v6
5788; GFX940-NEXT:    v_mov_b32_e32 v1, v6
5789; GFX940-NEXT:    v_mov_b32_e32 v3, v5
5790; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
5791; GFX940-NEXT:    s_waitcnt vmcnt(0)
5792; GFX940-NEXT:    s_setpc_b64 s[30:31]
5793  %vec0 = call <3 x i32> asm "; def $0", "=v"()
5794  %vec1 = call <3 x i32> asm "; def $0", "=v"()
5795  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 4>
5796  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
5797  ret void
5798}
5799
5800define void @v_shuffle_v4i32_v3i32__5_5_1_4(ptr addrspace(1) inreg %ptr) {
5801; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_1_4:
5802; GFX900:       ; %bb.0:
5803; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5804; GFX900-NEXT:    ;;#ASMSTART
5805; GFX900-NEXT:    ; def v[1:3]
5806; GFX900-NEXT:    ;;#ASMEND
5807; GFX900-NEXT:    ;;#ASMSTART
5808; GFX900-NEXT:    ; def v[3:5]
5809; GFX900-NEXT:    ;;#ASMEND
5810; GFX900-NEXT:    v_mov_b32_e32 v6, 0
5811; GFX900-NEXT:    v_mov_b32_e32 v0, v5
5812; GFX900-NEXT:    v_mov_b32_e32 v1, v5
5813; GFX900-NEXT:    v_mov_b32_e32 v3, v4
5814; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
5815; GFX900-NEXT:    s_waitcnt vmcnt(0)
5816; GFX900-NEXT:    s_setpc_b64 s[30:31]
5817;
5818; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_1_4:
5819; GFX90A:       ; %bb.0:
5820; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5821; GFX90A-NEXT:    ;;#ASMSTART
5822; GFX90A-NEXT:    ; def v[2:4]
5823; GFX90A-NEXT:    ;;#ASMEND
5824; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
5825; GFX90A-NEXT:    ;;#ASMSTART
5826; GFX90A-NEXT:    ; def v[4:6]
5827; GFX90A-NEXT:    ;;#ASMEND
5828; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
5829; GFX90A-NEXT:    v_mov_b32_e32 v1, v6
5830; GFX90A-NEXT:    v_mov_b32_e32 v2, v3
5831; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
5832; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
5833; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5834; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5835;
5836; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_1_4:
5837; GFX940:       ; %bb.0:
5838; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5839; GFX940-NEXT:    ;;#ASMSTART
5840; GFX940-NEXT:    ; def v[2:4]
5841; GFX940-NEXT:    ;;#ASMEND
5842; GFX940-NEXT:    v_mov_b32_e32 v7, 0
5843; GFX940-NEXT:    ;;#ASMSTART
5844; GFX940-NEXT:    ; def v[4:6]
5845; GFX940-NEXT:    ;;#ASMEND
5846; GFX940-NEXT:    v_mov_b32_e32 v2, v3
5847; GFX940-NEXT:    v_mov_b32_e32 v0, v6
5848; GFX940-NEXT:    v_mov_b32_e32 v1, v6
5849; GFX940-NEXT:    v_mov_b32_e32 v3, v5
5850; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
5851; GFX940-NEXT:    s_waitcnt vmcnt(0)
5852; GFX940-NEXT:    s_setpc_b64 s[30:31]
5853  %vec0 = call <3 x i32> asm "; def $0", "=v"()
5854  %vec1 = call <3 x i32> asm "; def $0", "=v"()
5855  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 4>
5856  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
5857  ret void
5858}
5859
5860define void @v_shuffle_v4i32_v3i32__5_5_2_4(ptr addrspace(1) inreg %ptr) {
5861; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_2_4:
5862; GFX900:       ; %bb.0:
5863; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5864; GFX900-NEXT:    ;;#ASMSTART
5865; GFX900-NEXT:    ; def v[0:2]
5866; GFX900-NEXT:    ;;#ASMEND
5867; GFX900-NEXT:    ;;#ASMSTART
5868; GFX900-NEXT:    ; def v[3:5]
5869; GFX900-NEXT:    ;;#ASMEND
5870; GFX900-NEXT:    v_mov_b32_e32 v6, 0
5871; GFX900-NEXT:    v_mov_b32_e32 v0, v5
5872; GFX900-NEXT:    v_mov_b32_e32 v1, v5
5873; GFX900-NEXT:    v_mov_b32_e32 v3, v4
5874; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
5875; GFX900-NEXT:    s_waitcnt vmcnt(0)
5876; GFX900-NEXT:    s_setpc_b64 s[30:31]
5877;
5878; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_2_4:
5879; GFX90A:       ; %bb.0:
5880; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5881; GFX90A-NEXT:    ;;#ASMSTART
5882; GFX90A-NEXT:    ; def v[0:2]
5883; GFX90A-NEXT:    ;;#ASMEND
5884; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
5885; GFX90A-NEXT:    ;;#ASMSTART
5886; GFX90A-NEXT:    ; def v[4:6]
5887; GFX90A-NEXT:    ;;#ASMEND
5888; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
5889; GFX90A-NEXT:    v_mov_b32_e32 v1, v6
5890; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
5891; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
5892; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5893; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5894;
5895; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_2_4:
5896; GFX940:       ; %bb.0:
5897; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5898; GFX940-NEXT:    ;;#ASMSTART
5899; GFX940-NEXT:    ; def v[0:2]
5900; GFX940-NEXT:    ;;#ASMEND
5901; GFX940-NEXT:    v_mov_b32_e32 v7, 0
5902; GFX940-NEXT:    ;;#ASMSTART
5903; GFX940-NEXT:    ; def v[4:6]
5904; GFX940-NEXT:    ;;#ASMEND
5905; GFX940-NEXT:    s_nop 0
5906; GFX940-NEXT:    v_mov_b32_e32 v0, v6
5907; GFX940-NEXT:    v_mov_b32_e32 v1, v6
5908; GFX940-NEXT:    v_mov_b32_e32 v3, v5
5909; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
5910; GFX940-NEXT:    s_waitcnt vmcnt(0)
5911; GFX940-NEXT:    s_setpc_b64 s[30:31]
5912  %vec0 = call <3 x i32> asm "; def $0", "=v"()
5913  %vec1 = call <3 x i32> asm "; def $0", "=v"()
5914  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 4>
5915  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
5916  ret void
5917}
5918
5919define void @v_shuffle_v4i32_v3i32__5_5_3_4(ptr addrspace(1) inreg %ptr) {
5920; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_3_4:
5921; GFX900:       ; %bb.0:
5922; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5923; GFX900-NEXT:    v_mov_b32_e32 v5, 0
5924; GFX900-NEXT:    ;;#ASMSTART
5925; GFX900-NEXT:    ; def v[2:4]
5926; GFX900-NEXT:    ;;#ASMEND
5927; GFX900-NEXT:    v_mov_b32_e32 v0, v4
5928; GFX900-NEXT:    v_mov_b32_e32 v1, v4
5929; GFX900-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
5930; GFX900-NEXT:    s_waitcnt vmcnt(0)
5931; GFX900-NEXT:    s_setpc_b64 s[30:31]
5932;
5933; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_3_4:
5934; GFX90A:       ; %bb.0:
5935; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5936; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
5937; GFX90A-NEXT:    ;;#ASMSTART
5938; GFX90A-NEXT:    ; def v[2:4]
5939; GFX90A-NEXT:    ;;#ASMEND
5940; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
5941; GFX90A-NEXT:    v_mov_b32_e32 v1, v4
5942; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
5943; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5944; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5945;
5946; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_3_4:
5947; GFX940:       ; %bb.0:
5948; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5949; GFX940-NEXT:    v_mov_b32_e32 v5, 0
5950; GFX940-NEXT:    ;;#ASMSTART
5951; GFX940-NEXT:    ; def v[2:4]
5952; GFX940-NEXT:    ;;#ASMEND
5953; GFX940-NEXT:    s_nop 0
5954; GFX940-NEXT:    v_mov_b32_e32 v0, v4
5955; GFX940-NEXT:    v_mov_b32_e32 v1, v4
5956; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
5957; GFX940-NEXT:    s_waitcnt vmcnt(0)
5958; GFX940-NEXT:    s_setpc_b64 s[30:31]
5959  %vec0 = call <3 x i32> asm "; def $0", "=v"()
5960  %vec1 = call <3 x i32> asm "; def $0", "=v"()
5961  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 4>
5962  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
5963  ret void
5964}
5965
5966define void @v_shuffle_v4i32_v3i32__u_5_5_5(ptr addrspace(1) inreg %ptr) {
5967; GFX900-LABEL: v_shuffle_v4i32_v3i32__u_5_5_5:
5968; GFX900:       ; %bb.0:
5969; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5970; GFX900-NEXT:    ;;#ASMSTART
5971; GFX900-NEXT:    ; def v[0:2]
5972; GFX900-NEXT:    ;;#ASMEND
5973; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5974; GFX900-NEXT:    v_mov_b32_e32 v1, v2
5975; GFX900-NEXT:    v_mov_b32_e32 v3, v2
5976; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
5977; GFX900-NEXT:    s_waitcnt vmcnt(0)
5978; GFX900-NEXT:    s_setpc_b64 s[30:31]
5979;
5980; GFX90A-LABEL: v_shuffle_v4i32_v3i32__u_5_5_5:
5981; GFX90A:       ; %bb.0:
5982; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5983; GFX90A-NEXT:    ;;#ASMSTART
5984; GFX90A-NEXT:    ; def v[0:2]
5985; GFX90A-NEXT:    ;;#ASMEND
5986; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5987; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
5988; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
5989; GFX90A-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
5990; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5991; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5992;
5993; GFX940-LABEL: v_shuffle_v4i32_v3i32__u_5_5_5:
5994; GFX940:       ; %bb.0:
5995; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5996; GFX940-NEXT:    ;;#ASMSTART
5997; GFX940-NEXT:    ; def v[0:2]
5998; GFX940-NEXT:    ;;#ASMEND
5999; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6000; GFX940-NEXT:    v_mov_b32_e32 v1, v2
6001; GFX940-NEXT:    v_mov_b32_e32 v3, v2
6002; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
6003; GFX940-NEXT:    s_waitcnt vmcnt(0)
6004; GFX940-NEXT:    s_setpc_b64 s[30:31]
6005  %vec0 = call <3 x i32> asm "; def $0", "=v"()
6006  %vec1 = call <3 x i32> asm "; def $0", "=v"()
6007  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
6008  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
6009  ret void
6010}
6011
6012define void @v_shuffle_v4i32_v3i32__0_5_5_5(ptr addrspace(1) inreg %ptr) {
6013; GFX900-LABEL: v_shuffle_v4i32_v3i32__0_5_5_5:
6014; GFX900:       ; %bb.0:
6015; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6016; GFX900-NEXT:    ;;#ASMSTART
6017; GFX900-NEXT:    ; def v[0:2]
6018; GFX900-NEXT:    ;;#ASMEND
6019; GFX900-NEXT:    ;;#ASMSTART
6020; GFX900-NEXT:    ; def v[1:3]
6021; GFX900-NEXT:    ;;#ASMEND
6022; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6023; GFX900-NEXT:    v_mov_b32_e32 v1, v3
6024; GFX900-NEXT:    v_mov_b32_e32 v2, v3
6025; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
6026; GFX900-NEXT:    s_waitcnt vmcnt(0)
6027; GFX900-NEXT:    s_setpc_b64 s[30:31]
6028;
6029; GFX90A-LABEL: v_shuffle_v4i32_v3i32__0_5_5_5:
6030; GFX90A:       ; %bb.0:
6031; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6032; GFX90A-NEXT:    ;;#ASMSTART
6033; GFX90A-NEXT:    ; def v[0:2]
6034; GFX90A-NEXT:    ;;#ASMEND
6035; GFX90A-NEXT:    ;;#ASMSTART
6036; GFX90A-NEXT:    ; def v[2:4]
6037; GFX90A-NEXT:    ;;#ASMEND
6038; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
6039; GFX90A-NEXT:    v_mov_b32_e32 v1, v4
6040; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
6041; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
6042; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
6043; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6044; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6045;
6046; GFX940-LABEL: v_shuffle_v4i32_v3i32__0_5_5_5:
6047; GFX940:       ; %bb.0:
6048; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6049; GFX940-NEXT:    ;;#ASMSTART
6050; GFX940-NEXT:    ; def v[0:2]
6051; GFX940-NEXT:    ;;#ASMEND
6052; GFX940-NEXT:    v_mov_b32_e32 v5, 0
6053; GFX940-NEXT:    ;;#ASMSTART
6054; GFX940-NEXT:    ; def v[2:4]
6055; GFX940-NEXT:    ;;#ASMEND
6056; GFX940-NEXT:    s_nop 0
6057; GFX940-NEXT:    v_mov_b32_e32 v1, v4
6058; GFX940-NEXT:    v_mov_b32_e32 v2, v4
6059; GFX940-NEXT:    v_mov_b32_e32 v3, v4
6060; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
6061; GFX940-NEXT:    s_waitcnt vmcnt(0)
6062; GFX940-NEXT:    s_setpc_b64 s[30:31]
6063  %vec0 = call <3 x i32> asm "; def $0", "=v"()
6064  %vec1 = call <3 x i32> asm "; def $0", "=v"()
6065  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
6066  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
6067  ret void
6068}
6069
6070define void @v_shuffle_v4i32_v3i32__1_5_5_5(ptr addrspace(1) inreg %ptr) {
6071; GFX900-LABEL: v_shuffle_v4i32_v3i32__1_5_5_5:
6072; GFX900:       ; %bb.0:
6073; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6074; GFX900-NEXT:    ;;#ASMSTART
6075; GFX900-NEXT:    ; def v[2:4]
6076; GFX900-NEXT:    ;;#ASMEND
6077; GFX900-NEXT:    ;;#ASMSTART
6078; GFX900-NEXT:    ; def v[0:2]
6079; GFX900-NEXT:    ;;#ASMEND
6080; GFX900-NEXT:    v_mov_b32_e32 v5, 0
6081; GFX900-NEXT:    v_mov_b32_e32 v0, v3
6082; GFX900-NEXT:    v_mov_b32_e32 v1, v2
6083; GFX900-NEXT:    v_mov_b32_e32 v3, v2
6084; GFX900-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
6085; GFX900-NEXT:    s_waitcnt vmcnt(0)
6086; GFX900-NEXT:    s_setpc_b64 s[30:31]
6087;
6088; GFX90A-LABEL: v_shuffle_v4i32_v3i32__1_5_5_5:
6089; GFX90A:       ; %bb.0:
6090; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6091; GFX90A-NEXT:    ;;#ASMSTART
6092; GFX90A-NEXT:    ; def v[2:4]
6093; GFX90A-NEXT:    ;;#ASMEND
6094; GFX90A-NEXT:    ;;#ASMSTART
6095; GFX90A-NEXT:    ; def v[0:2]
6096; GFX90A-NEXT:    ;;#ASMEND
6097; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
6098; GFX90A-NEXT:    v_mov_b32_e32 v0, v3
6099; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
6100; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
6101; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
6102; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6103; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6104;
6105; GFX940-LABEL: v_shuffle_v4i32_v3i32__1_5_5_5:
6106; GFX940:       ; %bb.0:
6107; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6108; GFX940-NEXT:    ;;#ASMSTART
6109; GFX940-NEXT:    ; def v[2:4]
6110; GFX940-NEXT:    ;;#ASMEND
6111; GFX940-NEXT:    v_mov_b32_e32 v5, 0
6112; GFX940-NEXT:    ;;#ASMSTART
6113; GFX940-NEXT:    ; def v[0:2]
6114; GFX940-NEXT:    ;;#ASMEND
6115; GFX940-NEXT:    s_nop 0
6116; GFX940-NEXT:    v_mov_b32_e32 v0, v3
6117; GFX940-NEXT:    v_mov_b32_e32 v1, v2
6118; GFX940-NEXT:    v_mov_b32_e32 v3, v2
6119; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
6120; GFX940-NEXT:    s_waitcnt vmcnt(0)
6121; GFX940-NEXT:    s_setpc_b64 s[30:31]
6122  %vec0 = call <3 x i32> asm "; def $0", "=v"()
6123  %vec1 = call <3 x i32> asm "; def $0", "=v"()
6124  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
6125  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
6126  ret void
6127}
6128
6129define void @v_shuffle_v4i32_v3i32__2_5_5_5(ptr addrspace(1) inreg %ptr) {
6130; GFX900-LABEL: v_shuffle_v4i32_v3i32__2_5_5_5:
6131; GFX900:       ; %bb.0:
6132; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6133; GFX900-NEXT:    ;;#ASMSTART
6134; GFX900-NEXT:    ; def v[1:3]
6135; GFX900-NEXT:    ;;#ASMEND
6136; GFX900-NEXT:    ;;#ASMSTART
6137; GFX900-NEXT:    ; def v[0:2]
6138; GFX900-NEXT:    ;;#ASMEND
6139; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6140; GFX900-NEXT:    v_mov_b32_e32 v0, v3
6141; GFX900-NEXT:    v_mov_b32_e32 v1, v2
6142; GFX900-NEXT:    v_mov_b32_e32 v3, v2
6143; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
6144; GFX900-NEXT:    s_waitcnt vmcnt(0)
6145; GFX900-NEXT:    s_setpc_b64 s[30:31]
6146;
6147; GFX90A-LABEL: v_shuffle_v4i32_v3i32__2_5_5_5:
6148; GFX90A:       ; %bb.0:
6149; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6150; GFX90A-NEXT:    ;;#ASMSTART
6151; GFX90A-NEXT:    ; def v[2:4]
6152; GFX90A-NEXT:    ;;#ASMEND
6153; GFX90A-NEXT:    ;;#ASMSTART
6154; GFX90A-NEXT:    ; def v[0:2]
6155; GFX90A-NEXT:    ;;#ASMEND
6156; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
6157; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
6158; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
6159; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
6160; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
6161; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6162; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6163;
6164; GFX940-LABEL: v_shuffle_v4i32_v3i32__2_5_5_5:
6165; GFX940:       ; %bb.0:
6166; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6167; GFX940-NEXT:    ;;#ASMSTART
6168; GFX940-NEXT:    ; def v[2:4]
6169; GFX940-NEXT:    ;;#ASMEND
6170; GFX940-NEXT:    v_mov_b32_e32 v5, 0
6171; GFX940-NEXT:    ;;#ASMSTART
6172; GFX940-NEXT:    ; def v[0:2]
6173; GFX940-NEXT:    ;;#ASMEND
6174; GFX940-NEXT:    s_nop 0
6175; GFX940-NEXT:    v_mov_b32_e32 v0, v4
6176; GFX940-NEXT:    v_mov_b32_e32 v1, v2
6177; GFX940-NEXT:    v_mov_b32_e32 v3, v2
6178; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
6179; GFX940-NEXT:    s_waitcnt vmcnt(0)
6180; GFX940-NEXT:    s_setpc_b64 s[30:31]
6181  %vec0 = call <3 x i32> asm "; def $0", "=v"()
6182  %vec1 = call <3 x i32> asm "; def $0", "=v"()
6183  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
6184  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
6185  ret void
6186}
6187
6188define void @v_shuffle_v4i32_v3i32__3_5_5_5(ptr addrspace(1) inreg %ptr) {
6189; GFX900-LABEL: v_shuffle_v4i32_v3i32__3_5_5_5:
6190; GFX900:       ; %bb.0:
6191; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6192; GFX900-NEXT:    ;;#ASMSTART
6193; GFX900-NEXT:    ; def v[0:2]
6194; GFX900-NEXT:    ;;#ASMEND
6195; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6196; GFX900-NEXT:    v_mov_b32_e32 v1, v2
6197; GFX900-NEXT:    v_mov_b32_e32 v3, v2
6198; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
6199; GFX900-NEXT:    s_waitcnt vmcnt(0)
6200; GFX900-NEXT:    s_setpc_b64 s[30:31]
6201;
6202; GFX90A-LABEL: v_shuffle_v4i32_v3i32__3_5_5_5:
6203; GFX90A:       ; %bb.0:
6204; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6205; GFX90A-NEXT:    ;;#ASMSTART
6206; GFX90A-NEXT:    ; def v[0:2]
6207; GFX90A-NEXT:    ;;#ASMEND
6208; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6209; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
6210; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
6211; GFX90A-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
6212; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6213; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6214;
6215; GFX940-LABEL: v_shuffle_v4i32_v3i32__3_5_5_5:
6216; GFX940:       ; %bb.0:
6217; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6218; GFX940-NEXT:    ;;#ASMSTART
6219; GFX940-NEXT:    ; def v[0:2]
6220; GFX940-NEXT:    ;;#ASMEND
6221; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6222; GFX940-NEXT:    v_mov_b32_e32 v1, v2
6223; GFX940-NEXT:    v_mov_b32_e32 v3, v2
6224; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
6225; GFX940-NEXT:    s_waitcnt vmcnt(0)
6226; GFX940-NEXT:    s_setpc_b64 s[30:31]
6227  %vec0 = call <3 x i32> asm "; def $0", "=v"()
6228  %vec1 = call <3 x i32> asm "; def $0", "=v"()
6229  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
6230  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
6231  ret void
6232}
6233
6234define void @v_shuffle_v4i32_v3i32__4_5_5_5(ptr addrspace(1) inreg %ptr) {
6235; GFX900-LABEL: v_shuffle_v4i32_v3i32__4_5_5_5:
6236; GFX900:       ; %bb.0:
6237; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6238; GFX900-NEXT:    ;;#ASMSTART
6239; GFX900-NEXT:    ; def v[0:2]
6240; GFX900-NEXT:    ;;#ASMEND
6241; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6242; GFX900-NEXT:    v_mov_b32_e32 v0, v1
6243; GFX900-NEXT:    v_mov_b32_e32 v1, v2
6244; GFX900-NEXT:    v_mov_b32_e32 v3, v2
6245; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
6246; GFX900-NEXT:    s_waitcnt vmcnt(0)
6247; GFX900-NEXT:    s_setpc_b64 s[30:31]
6248;
6249; GFX90A-LABEL: v_shuffle_v4i32_v3i32__4_5_5_5:
6250; GFX90A:       ; %bb.0:
6251; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6252; GFX90A-NEXT:    ;;#ASMSTART
6253; GFX90A-NEXT:    ; def v[0:2]
6254; GFX90A-NEXT:    ;;#ASMEND
6255; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6256; GFX90A-NEXT:    v_mov_b32_e32 v0, v1
6257; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
6258; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
6259; GFX90A-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
6260; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6261; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6262;
6263; GFX940-LABEL: v_shuffle_v4i32_v3i32__4_5_5_5:
6264; GFX940:       ; %bb.0:
6265; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6266; GFX940-NEXT:    ;;#ASMSTART
6267; GFX940-NEXT:    ; def v[0:2]
6268; GFX940-NEXT:    ;;#ASMEND
6269; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6270; GFX940-NEXT:    v_mov_b32_e32 v0, v1
6271; GFX940-NEXT:    v_mov_b32_e32 v1, v2
6272; GFX940-NEXT:    v_mov_b32_e32 v3, v2
6273; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
6274; GFX940-NEXT:    s_waitcnt vmcnt(0)
6275; GFX940-NEXT:    s_setpc_b64 s[30:31]
6276  %vec0 = call <3 x i32> asm "; def $0", "=v"()
6277  %vec1 = call <3 x i32> asm "; def $0", "=v"()
6278  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
6279  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
6280  ret void
6281}
6282
6283define void @v_shuffle_v4i32_v3i32__5_u_5_5(ptr addrspace(1) inreg %ptr) {
6284; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_u_5_5:
6285; GFX900:       ; %bb.0:
6286; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6287; GFX900-NEXT:    ;;#ASMSTART
6288; GFX900-NEXT:    ; def v[0:2]
6289; GFX900-NEXT:    ;;#ASMEND
6290; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6291; GFX900-NEXT:    v_mov_b32_e32 v0, v2
6292; GFX900-NEXT:    v_mov_b32_e32 v3, v2
6293; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
6294; GFX900-NEXT:    s_waitcnt vmcnt(0)
6295; GFX900-NEXT:    s_setpc_b64 s[30:31]
6296;
6297; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_u_5_5:
6298; GFX90A:       ; %bb.0:
6299; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6300; GFX90A-NEXT:    ;;#ASMSTART
6301; GFX90A-NEXT:    ; def v[0:2]
6302; GFX90A-NEXT:    ;;#ASMEND
6303; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6304; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
6305; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
6306; GFX90A-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
6307; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6308; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6309;
6310; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_u_5_5:
6311; GFX940:       ; %bb.0:
6312; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6313; GFX940-NEXT:    ;;#ASMSTART
6314; GFX940-NEXT:    ; def v[0:2]
6315; GFX940-NEXT:    ;;#ASMEND
6316; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6317; GFX940-NEXT:    v_mov_b32_e32 v0, v2
6318; GFX940-NEXT:    v_mov_b32_e32 v3, v2
6319; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
6320; GFX940-NEXT:    s_waitcnt vmcnt(0)
6321; GFX940-NEXT:    s_setpc_b64 s[30:31]
6322  %vec0 = call <3 x i32> asm "; def $0", "=v"()
6323  %vec1 = call <3 x i32> asm "; def $0", "=v"()
6324  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 5, i32 5>
6325  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
6326  ret void
6327}
6328
6329define void @v_shuffle_v4i32_v3i32__5_0_5_5(ptr addrspace(1) inreg %ptr) {
6330; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_0_5_5:
6331; GFX900:       ; %bb.0:
6332; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6333; GFX900-NEXT:    ;;#ASMSTART
6334; GFX900-NEXT:    ; def v[1:3]
6335; GFX900-NEXT:    ;;#ASMEND
6336; GFX900-NEXT:    ;;#ASMSTART
6337; GFX900-NEXT:    ; def v[2:4]
6338; GFX900-NEXT:    ;;#ASMEND
6339; GFX900-NEXT:    v_mov_b32_e32 v5, 0
6340; GFX900-NEXT:    v_mov_b32_e32 v0, v4
6341; GFX900-NEXT:    v_mov_b32_e32 v2, v4
6342; GFX900-NEXT:    v_mov_b32_e32 v3, v4
6343; GFX900-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
6344; GFX900-NEXT:    s_waitcnt vmcnt(0)
6345; GFX900-NEXT:    s_setpc_b64 s[30:31]
6346;
6347; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_0_5_5:
6348; GFX90A:       ; %bb.0:
6349; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6350; GFX90A-NEXT:    ;;#ASMSTART
6351; GFX90A-NEXT:    ; def v[0:2]
6352; GFX90A-NEXT:    ;;#ASMEND
6353; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
6354; GFX90A-NEXT:    ;;#ASMSTART
6355; GFX90A-NEXT:    ; def v[4:6]
6356; GFX90A-NEXT:    ;;#ASMEND
6357; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
6358; GFX90A-NEXT:    v_mov_b32_e32 v1, v4
6359; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
6360; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
6361; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6362; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6363;
6364; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_0_5_5:
6365; GFX940:       ; %bb.0:
6366; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6367; GFX940-NEXT:    ;;#ASMSTART
6368; GFX940-NEXT:    ; def v[0:2]
6369; GFX940-NEXT:    ;;#ASMEND
6370; GFX940-NEXT:    v_mov_b32_e32 v7, 0
6371; GFX940-NEXT:    ;;#ASMSTART
6372; GFX940-NEXT:    ; def v[4:6]
6373; GFX940-NEXT:    ;;#ASMEND
6374; GFX940-NEXT:    v_mov_b32_e32 v0, v2
6375; GFX940-NEXT:    v_mov_b32_e32 v1, v4
6376; GFX940-NEXT:    v_mov_b32_e32 v3, v2
6377; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
6378; GFX940-NEXT:    s_waitcnt vmcnt(0)
6379; GFX940-NEXT:    s_setpc_b64 s[30:31]
6380  %vec0 = call <3 x i32> asm "; def $0", "=v"()
6381  %vec1 = call <3 x i32> asm "; def $0", "=v"()
6382  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 5, i32 5>
6383  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
6384  ret void
6385}
6386
6387define void @v_shuffle_v4i32_v3i32__5_1_5_5(ptr addrspace(1) inreg %ptr) {
6388; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_1_5_5:
6389; GFX900:       ; %bb.0:
6390; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6391; GFX900-NEXT:    ;;#ASMSTART
6392; GFX900-NEXT:    ; def v[0:2]
6393; GFX900-NEXT:    ;;#ASMEND
6394; GFX900-NEXT:    ;;#ASMSTART
6395; GFX900-NEXT:    ; def v[2:4]
6396; GFX900-NEXT:    ;;#ASMEND
6397; GFX900-NEXT:    v_mov_b32_e32 v5, 0
6398; GFX900-NEXT:    v_mov_b32_e32 v0, v4
6399; GFX900-NEXT:    v_mov_b32_e32 v2, v4
6400; GFX900-NEXT:    v_mov_b32_e32 v3, v4
6401; GFX900-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
6402; GFX900-NEXT:    s_waitcnt vmcnt(0)
6403; GFX900-NEXT:    s_setpc_b64 s[30:31]
6404;
6405; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_1_5_5:
6406; GFX90A:       ; %bb.0:
6407; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6408; GFX90A-NEXT:    ;;#ASMSTART
6409; GFX90A-NEXT:    ; def v[0:2]
6410; GFX90A-NEXT:    ;;#ASMEND
6411; GFX90A-NEXT:    ;;#ASMSTART
6412; GFX90A-NEXT:    ; def v[2:4]
6413; GFX90A-NEXT:    ;;#ASMEND
6414; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
6415; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
6416; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
6417; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
6418; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
6419; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6420; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6421;
6422; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_1_5_5:
6423; GFX940:       ; %bb.0:
6424; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6425; GFX940-NEXT:    ;;#ASMSTART
6426; GFX940-NEXT:    ; def v[0:2]
6427; GFX940-NEXT:    ;;#ASMEND
6428; GFX940-NEXT:    v_mov_b32_e32 v5, 0
6429; GFX940-NEXT:    ;;#ASMSTART
6430; GFX940-NEXT:    ; def v[2:4]
6431; GFX940-NEXT:    ;;#ASMEND
6432; GFX940-NEXT:    s_nop 0
6433; GFX940-NEXT:    v_mov_b32_e32 v0, v4
6434; GFX940-NEXT:    v_mov_b32_e32 v2, v4
6435; GFX940-NEXT:    v_mov_b32_e32 v3, v4
6436; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
6437; GFX940-NEXT:    s_waitcnt vmcnt(0)
6438; GFX940-NEXT:    s_setpc_b64 s[30:31]
6439  %vec0 = call <3 x i32> asm "; def $0", "=v"()
6440  %vec1 = call <3 x i32> asm "; def $0", "=v"()
6441  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 5, i32 5>
6442  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
6443  ret void
6444}
6445
6446define void @v_shuffle_v4i32_v3i32__5_2_5_5(ptr addrspace(1) inreg %ptr) {
6447; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_2_5_5:
6448; GFX900:       ; %bb.0:
6449; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6450; GFX900-NEXT:    ;;#ASMSTART
6451; GFX900-NEXT:    ; def v[1:3]
6452; GFX900-NEXT:    ;;#ASMEND
6453; GFX900-NEXT:    ;;#ASMSTART
6454; GFX900-NEXT:    ; def v[0:2]
6455; GFX900-NEXT:    ;;#ASMEND
6456; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6457; GFX900-NEXT:    v_mov_b32_e32 v0, v2
6458; GFX900-NEXT:    v_mov_b32_e32 v1, v3
6459; GFX900-NEXT:    v_mov_b32_e32 v3, v2
6460; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
6461; GFX900-NEXT:    s_waitcnt vmcnt(0)
6462; GFX900-NEXT:    s_setpc_b64 s[30:31]
6463;
6464; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_2_5_5:
6465; GFX90A:       ; %bb.0:
6466; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6467; GFX90A-NEXT:    ;;#ASMSTART
6468; GFX90A-NEXT:    ; def v[2:4]
6469; GFX90A-NEXT:    ;;#ASMEND
6470; GFX90A-NEXT:    ;;#ASMSTART
6471; GFX90A-NEXT:    ; def v[0:2]
6472; GFX90A-NEXT:    ;;#ASMEND
6473; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
6474; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
6475; GFX90A-NEXT:    v_mov_b32_e32 v1, v4
6476; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
6477; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
6478; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6479; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6480;
6481; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_2_5_5:
6482; GFX940:       ; %bb.0:
6483; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6484; GFX940-NEXT:    ;;#ASMSTART
6485; GFX940-NEXT:    ; def v[2:4]
6486; GFX940-NEXT:    ;;#ASMEND
6487; GFX940-NEXT:    v_mov_b32_e32 v5, 0
6488; GFX940-NEXT:    ;;#ASMSTART
6489; GFX940-NEXT:    ; def v[0:2]
6490; GFX940-NEXT:    ;;#ASMEND
6491; GFX940-NEXT:    s_nop 0
6492; GFX940-NEXT:    v_mov_b32_e32 v0, v2
6493; GFX940-NEXT:    v_mov_b32_e32 v1, v4
6494; GFX940-NEXT:    v_mov_b32_e32 v3, v2
6495; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
6496; GFX940-NEXT:    s_waitcnt vmcnt(0)
6497; GFX940-NEXT:    s_setpc_b64 s[30:31]
6498  %vec0 = call <3 x i32> asm "; def $0", "=v"()
6499  %vec1 = call <3 x i32> asm "; def $0", "=v"()
6500  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 5, i32 5>
6501  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
6502  ret void
6503}
6504
6505define void @v_shuffle_v4i32_v3i32__5_3_5_5(ptr addrspace(1) inreg %ptr) {
6506; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_3_5_5:
6507; GFX900:       ; %bb.0:
6508; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6509; GFX900-NEXT:    ;;#ASMSTART
6510; GFX900-NEXT:    ; def v[1:3]
6511; GFX900-NEXT:    ;;#ASMEND
6512; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6513; GFX900-NEXT:    v_mov_b32_e32 v0, v3
6514; GFX900-NEXT:    v_mov_b32_e32 v2, v3
6515; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
6516; GFX900-NEXT:    s_waitcnt vmcnt(0)
6517; GFX900-NEXT:    s_setpc_b64 s[30:31]
6518;
6519; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_3_5_5:
6520; GFX90A:       ; %bb.0:
6521; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6522; GFX90A-NEXT:    ;;#ASMSTART
6523; GFX90A-NEXT:    ; def v[2:4]
6524; GFX90A-NEXT:    ;;#ASMEND
6525; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
6526; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
6527; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
6528; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
6529; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
6530; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
6531; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6532; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6533;
6534; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_3_5_5:
6535; GFX940:       ; %bb.0:
6536; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6537; GFX940-NEXT:    ;;#ASMSTART
6538; GFX940-NEXT:    ; def v[2:4]
6539; GFX940-NEXT:    ;;#ASMEND
6540; GFX940-NEXT:    v_mov_b32_e32 v5, 0
6541; GFX940-NEXT:    v_mov_b32_e32 v0, v4
6542; GFX940-NEXT:    v_mov_b32_e32 v1, v2
6543; GFX940-NEXT:    v_mov_b32_e32 v2, v4
6544; GFX940-NEXT:    v_mov_b32_e32 v3, v4
6545; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
6546; GFX940-NEXT:    s_waitcnt vmcnt(0)
6547; GFX940-NEXT:    s_setpc_b64 s[30:31]
6548  %vec0 = call <3 x i32> asm "; def $0", "=v"()
6549  %vec1 = call <3 x i32> asm "; def $0", "=v"()
6550  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 5, i32 5>
6551  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
6552  ret void
6553}
6554
6555define void @v_shuffle_v4i32_v3i32__5_4_5_5(ptr addrspace(1) inreg %ptr) {
6556; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_4_5_5:
6557; GFX900:       ; %bb.0:
6558; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6559; GFX900-NEXT:    ;;#ASMSTART
6560; GFX900-NEXT:    ; def v[0:2]
6561; GFX900-NEXT:    ;;#ASMEND
6562; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6563; GFX900-NEXT:    v_mov_b32_e32 v0, v2
6564; GFX900-NEXT:    v_mov_b32_e32 v3, v2
6565; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
6566; GFX900-NEXT:    s_waitcnt vmcnt(0)
6567; GFX900-NEXT:    s_setpc_b64 s[30:31]
6568;
6569; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_4_5_5:
6570; GFX90A:       ; %bb.0:
6571; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6572; GFX90A-NEXT:    ;;#ASMSTART
6573; GFX90A-NEXT:    ; def v[0:2]
6574; GFX90A-NEXT:    ;;#ASMEND
6575; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6576; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
6577; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
6578; GFX90A-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
6579; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6580; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6581;
6582; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_4_5_5:
6583; GFX940:       ; %bb.0:
6584; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6585; GFX940-NEXT:    ;;#ASMSTART
6586; GFX940-NEXT:    ; def v[0:2]
6587; GFX940-NEXT:    ;;#ASMEND
6588; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6589; GFX940-NEXT:    v_mov_b32_e32 v0, v2
6590; GFX940-NEXT:    v_mov_b32_e32 v3, v2
6591; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
6592; GFX940-NEXT:    s_waitcnt vmcnt(0)
6593; GFX940-NEXT:    s_setpc_b64 s[30:31]
6594  %vec0 = call <3 x i32> asm "; def $0", "=v"()
6595  %vec1 = call <3 x i32> asm "; def $0", "=v"()
6596  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 5, i32 5>
6597  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
6598  ret void
6599}
6600
6601define void @v_shuffle_v4i32_v3i32__5_5_u_5(ptr addrspace(1) inreg %ptr) {
6602; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_u_5:
6603; GFX900:       ; %bb.0:
6604; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6605; GFX900-NEXT:    ;;#ASMSTART
6606; GFX900-NEXT:    ; def v[1:3]
6607; GFX900-NEXT:    ;;#ASMEND
6608; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6609; GFX900-NEXT:    v_mov_b32_e32 v0, v3
6610; GFX900-NEXT:    v_mov_b32_e32 v1, v3
6611; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
6612; GFX900-NEXT:    s_waitcnt vmcnt(0)
6613; GFX900-NEXT:    s_setpc_b64 s[30:31]
6614;
6615; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_u_5:
6616; GFX90A:       ; %bb.0:
6617; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6618; GFX90A-NEXT:    ;;#ASMSTART
6619; GFX90A-NEXT:    ; def v[0:2]
6620; GFX90A-NEXT:    ;;#ASMEND
6621; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6622; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
6623; GFX90A-NEXT:    v_mov_b32_e32 v1, v2
6624; GFX90A-NEXT:    v_mov_b32_e32 v3, v2
6625; GFX90A-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
6626; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6627; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6628;
6629; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_u_5:
6630; GFX940:       ; %bb.0:
6631; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6632; GFX940-NEXT:    ;;#ASMSTART
6633; GFX940-NEXT:    ; def v[0:2]
6634; GFX940-NEXT:    ;;#ASMEND
6635; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6636; GFX940-NEXT:    v_mov_b32_e32 v0, v2
6637; GFX940-NEXT:    v_mov_b32_e32 v1, v2
6638; GFX940-NEXT:    v_mov_b32_e32 v3, v2
6639; GFX940-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
6640; GFX940-NEXT:    s_waitcnt vmcnt(0)
6641; GFX940-NEXT:    s_setpc_b64 s[30:31]
6642  %vec0 = call <3 x i32> asm "; def $0", "=v"()
6643  %vec1 = call <3 x i32> asm "; def $0", "=v"()
6644  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 5>
6645  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
6646  ret void
6647}
6648
6649define void @v_shuffle_v4i32_v3i32__5_5_0_5(ptr addrspace(1) inreg %ptr) {
6650; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_0_5:
6651; GFX900:       ; %bb.0:
6652; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6653; GFX900-NEXT:    ;;#ASMSTART
6654; GFX900-NEXT:    ; def v[1:3]
6655; GFX900-NEXT:    ;;#ASMEND
6656; GFX900-NEXT:    v_mov_b32_e32 v7, 0
6657; GFX900-NEXT:    ;;#ASMSTART
6658; GFX900-NEXT:    ; def v[4:6]
6659; GFX900-NEXT:    ;;#ASMEND
6660; GFX900-NEXT:    v_mov_b32_e32 v0, v3
6661; GFX900-NEXT:    v_mov_b32_e32 v1, v3
6662; GFX900-NEXT:    v_mov_b32_e32 v2, v4
6663; GFX900-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
6664; GFX900-NEXT:    s_waitcnt vmcnt(0)
6665; GFX900-NEXT:    s_setpc_b64 s[30:31]
6666;
6667; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_0_5:
6668; GFX90A:       ; %bb.0:
6669; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6670; GFX90A-NEXT:    ;;#ASMSTART
6671; GFX90A-NEXT:    ; def v[2:4]
6672; GFX90A-NEXT:    ;;#ASMEND
6673; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
6674; GFX90A-NEXT:    ;;#ASMSTART
6675; GFX90A-NEXT:    ; def v[4:6]
6676; GFX90A-NEXT:    ;;#ASMEND
6677; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
6678; GFX90A-NEXT:    v_mov_b32_e32 v1, v6
6679; GFX90A-NEXT:    v_mov_b32_e32 v3, v6
6680; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
6681; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6682; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6683;
6684; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_0_5:
6685; GFX940:       ; %bb.0:
6686; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6687; GFX940-NEXT:    ;;#ASMSTART
6688; GFX940-NEXT:    ; def v[2:4]
6689; GFX940-NEXT:    ;;#ASMEND
6690; GFX940-NEXT:    v_mov_b32_e32 v7, 0
6691; GFX940-NEXT:    ;;#ASMSTART
6692; GFX940-NEXT:    ; def v[4:6]
6693; GFX940-NEXT:    ;;#ASMEND
6694; GFX940-NEXT:    s_nop 0
6695; GFX940-NEXT:    v_mov_b32_e32 v0, v6
6696; GFX940-NEXT:    v_mov_b32_e32 v1, v6
6697; GFX940-NEXT:    v_mov_b32_e32 v3, v6
6698; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
6699; GFX940-NEXT:    s_waitcnt vmcnt(0)
6700; GFX940-NEXT:    s_setpc_b64 s[30:31]
6701  %vec0 = call <3 x i32> asm "; def $0", "=v"()
6702  %vec1 = call <3 x i32> asm "; def $0", "=v"()
6703  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 5>
6704  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
6705  ret void
6706}
6707
6708define void @v_shuffle_v4i32_v3i32__5_5_1_5(ptr addrspace(1) inreg %ptr) {
6709; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_1_5:
6710; GFX900:       ; %bb.0:
6711; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6712; GFX900-NEXT:    ;;#ASMSTART
6713; GFX900-NEXT:    ; def v[1:3]
6714; GFX900-NEXT:    ;;#ASMEND
6715; GFX900-NEXT:    ;;#ASMSTART
6716; GFX900-NEXT:    ; def v[3:5]
6717; GFX900-NEXT:    ;;#ASMEND
6718; GFX900-NEXT:    v_mov_b32_e32 v6, 0
6719; GFX900-NEXT:    v_mov_b32_e32 v0, v5
6720; GFX900-NEXT:    v_mov_b32_e32 v1, v5
6721; GFX900-NEXT:    v_mov_b32_e32 v3, v5
6722; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
6723; GFX900-NEXT:    s_waitcnt vmcnt(0)
6724; GFX900-NEXT:    s_setpc_b64 s[30:31]
6725;
6726; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_1_5:
6727; GFX90A:       ; %bb.0:
6728; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6729; GFX90A-NEXT:    ;;#ASMSTART
6730; GFX90A-NEXT:    ; def v[2:4]
6731; GFX90A-NEXT:    ;;#ASMEND
6732; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
6733; GFX90A-NEXT:    ;;#ASMSTART
6734; GFX90A-NEXT:    ; def v[4:6]
6735; GFX90A-NEXT:    ;;#ASMEND
6736; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
6737; GFX90A-NEXT:    v_mov_b32_e32 v1, v6
6738; GFX90A-NEXT:    v_mov_b32_e32 v2, v3
6739; GFX90A-NEXT:    v_mov_b32_e32 v3, v6
6740; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
6741; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6742; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6743;
6744; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_1_5:
6745; GFX940:       ; %bb.0:
6746; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6747; GFX940-NEXT:    ;;#ASMSTART
6748; GFX940-NEXT:    ; def v[2:4]
6749; GFX940-NEXT:    ;;#ASMEND
6750; GFX940-NEXT:    v_mov_b32_e32 v7, 0
6751; GFX940-NEXT:    ;;#ASMSTART
6752; GFX940-NEXT:    ; def v[4:6]
6753; GFX940-NEXT:    ;;#ASMEND
6754; GFX940-NEXT:    v_mov_b32_e32 v2, v3
6755; GFX940-NEXT:    v_mov_b32_e32 v0, v6
6756; GFX940-NEXT:    v_mov_b32_e32 v1, v6
6757; GFX940-NEXT:    v_mov_b32_e32 v3, v6
6758; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
6759; GFX940-NEXT:    s_waitcnt vmcnt(0)
6760; GFX940-NEXT:    s_setpc_b64 s[30:31]
6761  %vec0 = call <3 x i32> asm "; def $0", "=v"()
6762  %vec1 = call <3 x i32> asm "; def $0", "=v"()
6763  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 5>
6764  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
6765  ret void
6766}
6767
6768define void @v_shuffle_v4i32_v3i32__5_5_2_5(ptr addrspace(1) inreg %ptr) {
6769; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_2_5:
6770; GFX900:       ; %bb.0:
6771; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6772; GFX900-NEXT:    ;;#ASMSTART
6773; GFX900-NEXT:    ; def v[0:2]
6774; GFX900-NEXT:    ;;#ASMEND
6775; GFX900-NEXT:    ;;#ASMSTART
6776; GFX900-NEXT:    ; def v[3:5]
6777; GFX900-NEXT:    ;;#ASMEND
6778; GFX900-NEXT:    v_mov_b32_e32 v6, 0
6779; GFX900-NEXT:    v_mov_b32_e32 v0, v5
6780; GFX900-NEXT:    v_mov_b32_e32 v1, v5
6781; GFX900-NEXT:    v_mov_b32_e32 v3, v5
6782; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
6783; GFX900-NEXT:    s_waitcnt vmcnt(0)
6784; GFX900-NEXT:    s_setpc_b64 s[30:31]
6785;
6786; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_2_5:
6787; GFX90A:       ; %bb.0:
6788; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6789; GFX90A-NEXT:    ;;#ASMSTART
6790; GFX90A-NEXT:    ; def v[0:2]
6791; GFX90A-NEXT:    ;;#ASMEND
6792; GFX90A-NEXT:    v_mov_b32_e32 v7, 0
6793; GFX90A-NEXT:    ;;#ASMSTART
6794; GFX90A-NEXT:    ; def v[4:6]
6795; GFX90A-NEXT:    ;;#ASMEND
6796; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
6797; GFX90A-NEXT:    v_mov_b32_e32 v1, v6
6798; GFX90A-NEXT:    v_mov_b32_e32 v3, v6
6799; GFX90A-NEXT:    global_store_dwordx4 v7, v[0:3], s[16:17]
6800; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6801; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6802;
6803; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_2_5:
6804; GFX940:       ; %bb.0:
6805; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6806; GFX940-NEXT:    ;;#ASMSTART
6807; GFX940-NEXT:    ; def v[0:2]
6808; GFX940-NEXT:    ;;#ASMEND
6809; GFX940-NEXT:    v_mov_b32_e32 v7, 0
6810; GFX940-NEXT:    ;;#ASMSTART
6811; GFX940-NEXT:    ; def v[4:6]
6812; GFX940-NEXT:    ;;#ASMEND
6813; GFX940-NEXT:    s_nop 0
6814; GFX940-NEXT:    v_mov_b32_e32 v0, v6
6815; GFX940-NEXT:    v_mov_b32_e32 v1, v6
6816; GFX940-NEXT:    v_mov_b32_e32 v3, v6
6817; GFX940-NEXT:    global_store_dwordx4 v7, v[0:3], s[0:1] sc0 sc1
6818; GFX940-NEXT:    s_waitcnt vmcnt(0)
6819; GFX940-NEXT:    s_setpc_b64 s[30:31]
6820  %vec0 = call <3 x i32> asm "; def $0", "=v"()
6821  %vec1 = call <3 x i32> asm "; def $0", "=v"()
6822  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 5>
6823  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
6824  ret void
6825}
6826
6827define void @v_shuffle_v4i32_v3i32__5_5_3_5(ptr addrspace(1) inreg %ptr) {
6828; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_3_5:
6829; GFX900:       ; %bb.0:
6830; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6831; GFX900-NEXT:    ;;#ASMSTART
6832; GFX900-NEXT:    ; def v[2:4]
6833; GFX900-NEXT:    ;;#ASMEND
6834; GFX900-NEXT:    v_mov_b32_e32 v5, 0
6835; GFX900-NEXT:    v_mov_b32_e32 v0, v4
6836; GFX900-NEXT:    v_mov_b32_e32 v1, v4
6837; GFX900-NEXT:    v_mov_b32_e32 v3, v4
6838; GFX900-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
6839; GFX900-NEXT:    s_waitcnt vmcnt(0)
6840; GFX900-NEXT:    s_setpc_b64 s[30:31]
6841;
6842; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_3_5:
6843; GFX90A:       ; %bb.0:
6844; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6845; GFX90A-NEXT:    ;;#ASMSTART
6846; GFX90A-NEXT:    ; def v[2:4]
6847; GFX90A-NEXT:    ;;#ASMEND
6848; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
6849; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
6850; GFX90A-NEXT:    v_mov_b32_e32 v1, v4
6851; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
6852; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
6853; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6854; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6855;
6856; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_3_5:
6857; GFX940:       ; %bb.0:
6858; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6859; GFX940-NEXT:    ;;#ASMSTART
6860; GFX940-NEXT:    ; def v[2:4]
6861; GFX940-NEXT:    ;;#ASMEND
6862; GFX940-NEXT:    v_mov_b32_e32 v5, 0
6863; GFX940-NEXT:    v_mov_b32_e32 v0, v4
6864; GFX940-NEXT:    v_mov_b32_e32 v1, v4
6865; GFX940-NEXT:    v_mov_b32_e32 v3, v4
6866; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
6867; GFX940-NEXT:    s_waitcnt vmcnt(0)
6868; GFX940-NEXT:    s_setpc_b64 s[30:31]
6869  %vec0 = call <3 x i32> asm "; def $0", "=v"()
6870  %vec1 = call <3 x i32> asm "; def $0", "=v"()
6871  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 5>
6872  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
6873  ret void
6874}
6875
6876define void @v_shuffle_v4i32_v3i32__5_5_4_5(ptr addrspace(1) inreg %ptr) {
6877; GFX900-LABEL: v_shuffle_v4i32_v3i32__5_5_4_5:
6878; GFX900:       ; %bb.0:
6879; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6880; GFX900-NEXT:    ;;#ASMSTART
6881; GFX900-NEXT:    ; def v[1:3]
6882; GFX900-NEXT:    ;;#ASMEND
6883; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6884; GFX900-NEXT:    v_mov_b32_e32 v0, v3
6885; GFX900-NEXT:    v_mov_b32_e32 v1, v3
6886; GFX900-NEXT:    global_store_dwordx4 v4, v[0:3], s[16:17]
6887; GFX900-NEXT:    s_waitcnt vmcnt(0)
6888; GFX900-NEXT:    s_setpc_b64 s[30:31]
6889;
6890; GFX90A-LABEL: v_shuffle_v4i32_v3i32__5_5_4_5:
6891; GFX90A:       ; %bb.0:
6892; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6893; GFX90A-NEXT:    ;;#ASMSTART
6894; GFX90A-NEXT:    ; def v[2:4]
6895; GFX90A-NEXT:    ;;#ASMEND
6896; GFX90A-NEXT:    v_mov_b32_e32 v5, 0
6897; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
6898; GFX90A-NEXT:    v_mov_b32_e32 v1, v4
6899; GFX90A-NEXT:    v_mov_b32_e32 v2, v3
6900; GFX90A-NEXT:    v_mov_b32_e32 v3, v4
6901; GFX90A-NEXT:    global_store_dwordx4 v5, v[0:3], s[16:17]
6902; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6903; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6904;
6905; GFX940-LABEL: v_shuffle_v4i32_v3i32__5_5_4_5:
6906; GFX940:       ; %bb.0:
6907; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6908; GFX940-NEXT:    ;;#ASMSTART
6909; GFX940-NEXT:    ; def v[2:4]
6910; GFX940-NEXT:    ;;#ASMEND
6911; GFX940-NEXT:    v_mov_b32_e32 v5, 0
6912; GFX940-NEXT:    v_mov_b32_e32 v0, v4
6913; GFX940-NEXT:    v_mov_b32_e32 v1, v4
6914; GFX940-NEXT:    v_mov_b32_e32 v2, v3
6915; GFX940-NEXT:    v_mov_b32_e32 v3, v4
6916; GFX940-NEXT:    global_store_dwordx4 v5, v[0:3], s[0:1] sc0 sc1
6917; GFX940-NEXT:    s_waitcnt vmcnt(0)
6918; GFX940-NEXT:    s_setpc_b64 s[30:31]
6919  %vec0 = call <3 x i32> asm "; def $0", "=v"()
6920  %vec1 = call <3 x i32> asm "; def $0", "=v"()
6921  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 5>
6922  store <4 x i32> %shuf, ptr addrspace(1) %ptr, align 16
6923  ret void
6924}
6925
6926define void @s_shuffle_v4i32_v3i32__u_u_u_u() {
6927; GFX9-LABEL: s_shuffle_v4i32_v3i32__u_u_u_u:
6928; GFX9:       ; %bb.0:
6929; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6930; GFX9-NEXT:    ;;#ASMSTART
6931; GFX9-NEXT:    ; use s[8:11]
6932; GFX9-NEXT:    ;;#ASMEND
6933; GFX9-NEXT:    s_setpc_b64 s[30:31]
6934  %vec0 = call <3 x i32> asm "; def $0", "=s"()
6935  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> poison
6936  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
6937  ret void
6938}
6939
6940define void @s_shuffle_v4i32_v3i32__0_u_u_u() {
6941; GFX900-LABEL: s_shuffle_v4i32_v3i32__0_u_u_u:
6942; GFX900:       ; %bb.0:
6943; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6944; GFX900-NEXT:    ;;#ASMSTART
6945; GFX900-NEXT:    ; def s[8:10]
6946; GFX900-NEXT:    ;;#ASMEND
6947; GFX900-NEXT:    ;;#ASMSTART
6948; GFX900-NEXT:    ; use s[8:11]
6949; GFX900-NEXT:    ;;#ASMEND
6950; GFX900-NEXT:    s_setpc_b64 s[30:31]
6951;
6952; GFX90A-LABEL: s_shuffle_v4i32_v3i32__0_u_u_u:
6953; GFX90A:       ; %bb.0:
6954; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6955; GFX90A-NEXT:    ;;#ASMSTART
6956; GFX90A-NEXT:    ; def s[8:10]
6957; GFX90A-NEXT:    ;;#ASMEND
6958; GFX90A-NEXT:    ;;#ASMSTART
6959; GFX90A-NEXT:    ; use s[8:11]
6960; GFX90A-NEXT:    ;;#ASMEND
6961; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6962;
6963; GFX940-LABEL: s_shuffle_v4i32_v3i32__0_u_u_u:
6964; GFX940:       ; %bb.0:
6965; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6966; GFX940-NEXT:    ;;#ASMSTART
6967; GFX940-NEXT:    ; def s[8:10]
6968; GFX940-NEXT:    ;;#ASMEND
6969; GFX940-NEXT:    s_nop 0
6970; GFX940-NEXT:    ;;#ASMSTART
6971; GFX940-NEXT:    ; use s[8:11]
6972; GFX940-NEXT:    ;;#ASMEND
6973; GFX940-NEXT:    s_setpc_b64 s[30:31]
6974  %vec0 = call <3 x i32> asm "; def $0", "=s"()
6975  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
6976  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
6977  ret void
6978}
6979
6980define void @s_shuffle_v4i32_v3i32__1_u_u_u() {
6981; GFX900-LABEL: s_shuffle_v4i32_v3i32__1_u_u_u:
6982; GFX900:       ; %bb.0:
6983; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6984; GFX900-NEXT:    ;;#ASMSTART
6985; GFX900-NEXT:    ; def s[4:6]
6986; GFX900-NEXT:    ;;#ASMEND
6987; GFX900-NEXT:    s_mov_b32 s8, s5
6988; GFX900-NEXT:    ;;#ASMSTART
6989; GFX900-NEXT:    ; use s[8:11]
6990; GFX900-NEXT:    ;;#ASMEND
6991; GFX900-NEXT:    s_setpc_b64 s[30:31]
6992;
6993; GFX90A-LABEL: s_shuffle_v4i32_v3i32__1_u_u_u:
6994; GFX90A:       ; %bb.0:
6995; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6996; GFX90A-NEXT:    ;;#ASMSTART
6997; GFX90A-NEXT:    ; def s[4:6]
6998; GFX90A-NEXT:    ;;#ASMEND
6999; GFX90A-NEXT:    s_mov_b32 s8, s5
7000; GFX90A-NEXT:    ;;#ASMSTART
7001; GFX90A-NEXT:    ; use s[8:11]
7002; GFX90A-NEXT:    ;;#ASMEND
7003; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7004;
7005; GFX940-LABEL: s_shuffle_v4i32_v3i32__1_u_u_u:
7006; GFX940:       ; %bb.0:
7007; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7008; GFX940-NEXT:    ;;#ASMSTART
7009; GFX940-NEXT:    ; def s[0:2]
7010; GFX940-NEXT:    ;;#ASMEND
7011; GFX940-NEXT:    s_mov_b32 s8, s1
7012; GFX940-NEXT:    ;;#ASMSTART
7013; GFX940-NEXT:    ; use s[8:11]
7014; GFX940-NEXT:    ;;#ASMEND
7015; GFX940-NEXT:    s_setpc_b64 s[30:31]
7016  %vec0 = call <3 x i32> asm "; def $0", "=s"()
7017  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
7018  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
7019  ret void
7020}
7021
7022define void @s_shuffle_v4i32_v3i32__2_u_u_u() {
7023; GFX900-LABEL: s_shuffle_v4i32_v3i32__2_u_u_u:
7024; GFX900:       ; %bb.0:
7025; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7026; GFX900-NEXT:    ;;#ASMSTART
7027; GFX900-NEXT:    ; def s[4:6]
7028; GFX900-NEXT:    ;;#ASMEND
7029; GFX900-NEXT:    s_mov_b32 s8, s6
7030; GFX900-NEXT:    ;;#ASMSTART
7031; GFX900-NEXT:    ; use s[8:11]
7032; GFX900-NEXT:    ;;#ASMEND
7033; GFX900-NEXT:    s_setpc_b64 s[30:31]
7034;
7035; GFX90A-LABEL: s_shuffle_v4i32_v3i32__2_u_u_u:
7036; GFX90A:       ; %bb.0:
7037; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7038; GFX90A-NEXT:    ;;#ASMSTART
7039; GFX90A-NEXT:    ; def s[4:6]
7040; GFX90A-NEXT:    ;;#ASMEND
7041; GFX90A-NEXT:    s_mov_b32 s8, s6
7042; GFX90A-NEXT:    ;;#ASMSTART
7043; GFX90A-NEXT:    ; use s[8:11]
7044; GFX90A-NEXT:    ;;#ASMEND
7045; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7046;
7047; GFX940-LABEL: s_shuffle_v4i32_v3i32__2_u_u_u:
7048; GFX940:       ; %bb.0:
7049; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7050; GFX940-NEXT:    ;;#ASMSTART
7051; GFX940-NEXT:    ; def s[0:2]
7052; GFX940-NEXT:    ;;#ASMEND
7053; GFX940-NEXT:    s_mov_b32 s8, s2
7054; GFX940-NEXT:    ;;#ASMSTART
7055; GFX940-NEXT:    ; use s[8:11]
7056; GFX940-NEXT:    ;;#ASMEND
7057; GFX940-NEXT:    s_setpc_b64 s[30:31]
7058  %vec0 = call <3 x i32> asm "; def $0", "=s"()
7059  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
7060  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
7061  ret void
7062}
7063
7064define void @s_shuffle_v4i32_v3i32__3_u_u_u() {
7065; GFX9-LABEL: s_shuffle_v4i32_v3i32__3_u_u_u:
7066; GFX9:       ; %bb.0:
7067; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7068; GFX9-NEXT:    ;;#ASMSTART
7069; GFX9-NEXT:    ; use s[8:11]
7070; GFX9-NEXT:    ;;#ASMEND
7071; GFX9-NEXT:    s_setpc_b64 s[30:31]
7072  %vec0 = call <3 x i32> asm "; def $0", "=s"()
7073  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
7074  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
7075  ret void
7076}
7077
7078define void @s_shuffle_v4i32_v3i32__4_u_u_u() {
7079; GFX900-LABEL: s_shuffle_v4i32_v3i32__4_u_u_u:
7080; GFX900:       ; %bb.0:
7081; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7082; GFX900-NEXT:    ;;#ASMSTART
7083; GFX900-NEXT:    ; def s[4:6]
7084; GFX900-NEXT:    ;;#ASMEND
7085; GFX900-NEXT:    s_mov_b32 s8, s5
7086; GFX900-NEXT:    ;;#ASMSTART
7087; GFX900-NEXT:    ; use s[8:11]
7088; GFX900-NEXT:    ;;#ASMEND
7089; GFX900-NEXT:    s_setpc_b64 s[30:31]
7090;
7091; GFX90A-LABEL: s_shuffle_v4i32_v3i32__4_u_u_u:
7092; GFX90A:       ; %bb.0:
7093; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7094; GFX90A-NEXT:    ;;#ASMSTART
7095; GFX90A-NEXT:    ; def s[4:6]
7096; GFX90A-NEXT:    ;;#ASMEND
7097; GFX90A-NEXT:    s_mov_b32 s8, s5
7098; GFX90A-NEXT:    ;;#ASMSTART
7099; GFX90A-NEXT:    ; use s[8:11]
7100; GFX90A-NEXT:    ;;#ASMEND
7101; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7102;
7103; GFX940-LABEL: s_shuffle_v4i32_v3i32__4_u_u_u:
7104; GFX940:       ; %bb.0:
7105; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7106; GFX940-NEXT:    ;;#ASMSTART
7107; GFX940-NEXT:    ; def s[0:2]
7108; GFX940-NEXT:    ;;#ASMEND
7109; GFX940-NEXT:    s_mov_b32 s8, s1
7110; GFX940-NEXT:    ;;#ASMSTART
7111; GFX940-NEXT:    ; use s[8:11]
7112; GFX940-NEXT:    ;;#ASMEND
7113; GFX940-NEXT:    s_setpc_b64 s[30:31]
7114  %vec0 = call <3 x i32> asm "; def $0", "=s"()
7115  %vec1 = call <3 x i32> asm "; def $0", "=s"()
7116  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 poison, i32 poison, i32 poison>
7117  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
7118  ret void
7119}
7120
7121define void @s_shuffle_v4i32_v3i32__5_u_u_u() {
7122; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_u_u_u:
7123; GFX900:       ; %bb.0:
7124; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7125; GFX900-NEXT:    ;;#ASMSTART
7126; GFX900-NEXT:    ; def s[4:6]
7127; GFX900-NEXT:    ;;#ASMEND
7128; GFX900-NEXT:    s_mov_b32 s8, s6
7129; GFX900-NEXT:    ;;#ASMSTART
7130; GFX900-NEXT:    ; use s[8:11]
7131; GFX900-NEXT:    ;;#ASMEND
7132; GFX900-NEXT:    s_setpc_b64 s[30:31]
7133;
7134; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_u_u_u:
7135; GFX90A:       ; %bb.0:
7136; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7137; GFX90A-NEXT:    ;;#ASMSTART
7138; GFX90A-NEXT:    ; def s[4:6]
7139; GFX90A-NEXT:    ;;#ASMEND
7140; GFX90A-NEXT:    s_mov_b32 s8, s6
7141; GFX90A-NEXT:    ;;#ASMSTART
7142; GFX90A-NEXT:    ; use s[8:11]
7143; GFX90A-NEXT:    ;;#ASMEND
7144; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7145;
7146; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_u_u_u:
7147; GFX940:       ; %bb.0:
7148; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7149; GFX940-NEXT:    ;;#ASMSTART
7150; GFX940-NEXT:    ; def s[0:2]
7151; GFX940-NEXT:    ;;#ASMEND
7152; GFX940-NEXT:    s_mov_b32 s8, s2
7153; GFX940-NEXT:    ;;#ASMSTART
7154; GFX940-NEXT:    ; use s[8:11]
7155; GFX940-NEXT:    ;;#ASMEND
7156; GFX940-NEXT:    s_setpc_b64 s[30:31]
7157  %vec0 = call <3 x i32> asm "; def $0", "=s"()
7158  %vec1 = call <3 x i32> asm "; def $0", "=s"()
7159  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 poison, i32 poison>
7160  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
7161  ret void
7162}
7163
7164define void @s_shuffle_v4i32_v3i32__5_0_u_u() {
7165; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_0_u_u:
7166; GFX900:       ; %bb.0:
7167; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7168; GFX900-NEXT:    ;;#ASMSTART
7169; GFX900-NEXT:    ; def s[8:10]
7170; GFX900-NEXT:    ;;#ASMEND
7171; GFX900-NEXT:    ;;#ASMSTART
7172; GFX900-NEXT:    ; def s[4:6]
7173; GFX900-NEXT:    ;;#ASMEND
7174; GFX900-NEXT:    s_mov_b32 s8, s10
7175; GFX900-NEXT:    s_mov_b32 s9, s4
7176; GFX900-NEXT:    ;;#ASMSTART
7177; GFX900-NEXT:    ; use s[8:11]
7178; GFX900-NEXT:    ;;#ASMEND
7179; GFX900-NEXT:    s_setpc_b64 s[30:31]
7180;
7181; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_0_u_u:
7182; GFX90A:       ; %bb.0:
7183; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7184; GFX90A-NEXT:    ;;#ASMSTART
7185; GFX90A-NEXT:    ; def s[8:10]
7186; GFX90A-NEXT:    ;;#ASMEND
7187; GFX90A-NEXT:    ;;#ASMSTART
7188; GFX90A-NEXT:    ; def s[4:6]
7189; GFX90A-NEXT:    ;;#ASMEND
7190; GFX90A-NEXT:    s_mov_b32 s8, s10
7191; GFX90A-NEXT:    s_mov_b32 s9, s4
7192; GFX90A-NEXT:    ;;#ASMSTART
7193; GFX90A-NEXT:    ; use s[8:11]
7194; GFX90A-NEXT:    ;;#ASMEND
7195; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7196;
7197; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_0_u_u:
7198; GFX940:       ; %bb.0:
7199; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7200; GFX940-NEXT:    ;;#ASMSTART
7201; GFX940-NEXT:    ; def s[0:2]
7202; GFX940-NEXT:    ;;#ASMEND
7203; GFX940-NEXT:    ;;#ASMSTART
7204; GFX940-NEXT:    ; def s[4:6]
7205; GFX940-NEXT:    ;;#ASMEND
7206; GFX940-NEXT:    s_mov_b32 s8, s6
7207; GFX940-NEXT:    s_mov_b32 s9, s0
7208; GFX940-NEXT:    ;;#ASMSTART
7209; GFX940-NEXT:    ; use s[8:11]
7210; GFX940-NEXT:    ;;#ASMEND
7211; GFX940-NEXT:    s_setpc_b64 s[30:31]
7212  %vec0 = call <3 x i32> asm "; def $0", "=s"()
7213  %vec1 = call <3 x i32> asm "; def $0", "=s"()
7214  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 poison, i32 poison>
7215  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
7216  ret void
7217}
7218
7219define void @s_shuffle_v4i32_v3i32__5_1_u_u() {
7220; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_1_u_u:
7221; GFX900:       ; %bb.0:
7222; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7223; GFX900-NEXT:    ;;#ASMSTART
7224; GFX900-NEXT:    ; def s[8:10]
7225; GFX900-NEXT:    ;;#ASMEND
7226; GFX900-NEXT:    ;;#ASMSTART
7227; GFX900-NEXT:    ; def s[4:6]
7228; GFX900-NEXT:    ;;#ASMEND
7229; GFX900-NEXT:    s_mov_b32 s8, s6
7230; GFX900-NEXT:    ;;#ASMSTART
7231; GFX900-NEXT:    ; use s[8:11]
7232; GFX900-NEXT:    ;;#ASMEND
7233; GFX900-NEXT:    s_setpc_b64 s[30:31]
7234;
7235; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_1_u_u:
7236; GFX90A:       ; %bb.0:
7237; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7238; GFX90A-NEXT:    ;;#ASMSTART
7239; GFX90A-NEXT:    ; def s[8:10]
7240; GFX90A-NEXT:    ;;#ASMEND
7241; GFX90A-NEXT:    ;;#ASMSTART
7242; GFX90A-NEXT:    ; def s[4:6]
7243; GFX90A-NEXT:    ;;#ASMEND
7244; GFX90A-NEXT:    s_mov_b32 s8, s6
7245; GFX90A-NEXT:    ;;#ASMSTART
7246; GFX90A-NEXT:    ; use s[8:11]
7247; GFX90A-NEXT:    ;;#ASMEND
7248; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7249;
7250; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_1_u_u:
7251; GFX940:       ; %bb.0:
7252; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7253; GFX940-NEXT:    ;;#ASMSTART
7254; GFX940-NEXT:    ; def s[8:10]
7255; GFX940-NEXT:    ;;#ASMEND
7256; GFX940-NEXT:    ;;#ASMSTART
7257; GFX940-NEXT:    ; def s[0:2]
7258; GFX940-NEXT:    ;;#ASMEND
7259; GFX940-NEXT:    s_mov_b32 s8, s2
7260; GFX940-NEXT:    ;;#ASMSTART
7261; GFX940-NEXT:    ; use s[8:11]
7262; GFX940-NEXT:    ;;#ASMEND
7263; GFX940-NEXT:    s_setpc_b64 s[30:31]
7264  %vec0 = call <3 x i32> asm "; def $0", "=s"()
7265  %vec1 = call <3 x i32> asm "; def $0", "=s"()
7266  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 poison, i32 poison>
7267  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
7268  ret void
7269}
7270
7271define void @s_shuffle_v4i32_v3i32__5_2_u_u() {
7272; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_2_u_u:
7273; GFX900:       ; %bb.0:
7274; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7275; GFX900-NEXT:    ;;#ASMSTART
7276; GFX900-NEXT:    ; def s[8:10]
7277; GFX900-NEXT:    ;;#ASMEND
7278; GFX900-NEXT:    ;;#ASMSTART
7279; GFX900-NEXT:    ; def s[4:6]
7280; GFX900-NEXT:    ;;#ASMEND
7281; GFX900-NEXT:    s_mov_b32 s8, s10
7282; GFX900-NEXT:    s_mov_b32 s9, s6
7283; GFX900-NEXT:    ;;#ASMSTART
7284; GFX900-NEXT:    ; use s[8:11]
7285; GFX900-NEXT:    ;;#ASMEND
7286; GFX900-NEXT:    s_setpc_b64 s[30:31]
7287;
7288; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_2_u_u:
7289; GFX90A:       ; %bb.0:
7290; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7291; GFX90A-NEXT:    ;;#ASMSTART
7292; GFX90A-NEXT:    ; def s[8:10]
7293; GFX90A-NEXT:    ;;#ASMEND
7294; GFX90A-NEXT:    ;;#ASMSTART
7295; GFX90A-NEXT:    ; def s[4:6]
7296; GFX90A-NEXT:    ;;#ASMEND
7297; GFX90A-NEXT:    s_mov_b32 s8, s10
7298; GFX90A-NEXT:    s_mov_b32 s9, s6
7299; GFX90A-NEXT:    ;;#ASMSTART
7300; GFX90A-NEXT:    ; use s[8:11]
7301; GFX90A-NEXT:    ;;#ASMEND
7302; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7303;
7304; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_2_u_u:
7305; GFX940:       ; %bb.0:
7306; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7307; GFX940-NEXT:    ;;#ASMSTART
7308; GFX940-NEXT:    ; def s[0:2]
7309; GFX940-NEXT:    ;;#ASMEND
7310; GFX940-NEXT:    ;;#ASMSTART
7311; GFX940-NEXT:    ; def s[4:6]
7312; GFX940-NEXT:    ;;#ASMEND
7313; GFX940-NEXT:    s_mov_b32 s8, s6
7314; GFX940-NEXT:    s_mov_b32 s9, s2
7315; GFX940-NEXT:    ;;#ASMSTART
7316; GFX940-NEXT:    ; use s[8:11]
7317; GFX940-NEXT:    ;;#ASMEND
7318; GFX940-NEXT:    s_setpc_b64 s[30:31]
7319  %vec0 = call <3 x i32> asm "; def $0", "=s"()
7320  %vec1 = call <3 x i32> asm "; def $0", "=s"()
7321  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 poison, i32 poison>
7322  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
7323  ret void
7324}
7325
7326define void @s_shuffle_v4i32_v3i32__5_3_u_u() {
7327; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_3_u_u:
7328; GFX900:       ; %bb.0:
7329; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7330; GFX900-NEXT:    ;;#ASMSTART
7331; GFX900-NEXT:    ; def s[4:6]
7332; GFX900-NEXT:    ;;#ASMEND
7333; GFX900-NEXT:    s_mov_b32 s8, s6
7334; GFX900-NEXT:    s_mov_b32 s9, s4
7335; GFX900-NEXT:    ;;#ASMSTART
7336; GFX900-NEXT:    ; use s[8:11]
7337; GFX900-NEXT:    ;;#ASMEND
7338; GFX900-NEXT:    s_setpc_b64 s[30:31]
7339;
7340; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_3_u_u:
7341; GFX90A:       ; %bb.0:
7342; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7343; GFX90A-NEXT:    ;;#ASMSTART
7344; GFX90A-NEXT:    ; def s[4:6]
7345; GFX90A-NEXT:    ;;#ASMEND
7346; GFX90A-NEXT:    s_mov_b32 s8, s6
7347; GFX90A-NEXT:    s_mov_b32 s9, s4
7348; GFX90A-NEXT:    ;;#ASMSTART
7349; GFX90A-NEXT:    ; use s[8:11]
7350; GFX90A-NEXT:    ;;#ASMEND
7351; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7352;
7353; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_3_u_u:
7354; GFX940:       ; %bb.0:
7355; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7356; GFX940-NEXT:    ;;#ASMSTART
7357; GFX940-NEXT:    ; def s[0:2]
7358; GFX940-NEXT:    ;;#ASMEND
7359; GFX940-NEXT:    s_mov_b32 s8, s2
7360; GFX940-NEXT:    s_mov_b32 s9, s0
7361; GFX940-NEXT:    ;;#ASMSTART
7362; GFX940-NEXT:    ; use s[8:11]
7363; GFX940-NEXT:    ;;#ASMEND
7364; GFX940-NEXT:    s_setpc_b64 s[30:31]
7365  %vec0 = call <3 x i32> asm "; def $0", "=s"()
7366  %vec1 = call <3 x i32> asm "; def $0", "=s"()
7367  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 poison, i32 poison>
7368  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
7369  ret void
7370}
7371
7372define void @s_shuffle_v4i32_v3i32__5_4_u_u() {
7373; GFX9-LABEL: s_shuffle_v4i32_v3i32__5_4_u_u:
7374; GFX9:       ; %bb.0:
7375; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7376; GFX9-NEXT:    ;;#ASMSTART
7377; GFX9-NEXT:    ; def s[8:10]
7378; GFX9-NEXT:    ;;#ASMEND
7379; GFX9-NEXT:    s_mov_b32 s8, s10
7380; GFX9-NEXT:    ;;#ASMSTART
7381; GFX9-NEXT:    ; use s[8:11]
7382; GFX9-NEXT:    ;;#ASMEND
7383; GFX9-NEXT:    s_setpc_b64 s[30:31]
7384  %vec0 = call <3 x i32> asm "; def $0", "=s"()
7385  %vec1 = call <3 x i32> asm "; def $0", "=s"()
7386  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 poison, i32 poison>
7387  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
7388  ret void
7389}
7390
7391define void @s_shuffle_v4i32_v3i32__5_5_u_u() {
7392; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_u_u:
7393; GFX900:       ; %bb.0:
7394; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7395; GFX900-NEXT:    ;;#ASMSTART
7396; GFX900-NEXT:    ; def s[4:6]
7397; GFX900-NEXT:    ;;#ASMEND
7398; GFX900-NEXT:    s_mov_b32 s8, s6
7399; GFX900-NEXT:    s_mov_b32 s9, s6
7400; GFX900-NEXT:    ;;#ASMSTART
7401; GFX900-NEXT:    ; use s[8:11]
7402; GFX900-NEXT:    ;;#ASMEND
7403; GFX900-NEXT:    s_setpc_b64 s[30:31]
7404;
7405; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_u_u:
7406; GFX90A:       ; %bb.0:
7407; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7408; GFX90A-NEXT:    ;;#ASMSTART
7409; GFX90A-NEXT:    ; def s[4:6]
7410; GFX90A-NEXT:    ;;#ASMEND
7411; GFX90A-NEXT:    s_mov_b32 s8, s6
7412; GFX90A-NEXT:    s_mov_b32 s9, s6
7413; GFX90A-NEXT:    ;;#ASMSTART
7414; GFX90A-NEXT:    ; use s[8:11]
7415; GFX90A-NEXT:    ;;#ASMEND
7416; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7417;
7418; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_u_u:
7419; GFX940:       ; %bb.0:
7420; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7421; GFX940-NEXT:    ;;#ASMSTART
7422; GFX940-NEXT:    ; def s[0:2]
7423; GFX940-NEXT:    ;;#ASMEND
7424; GFX940-NEXT:    s_mov_b32 s8, s2
7425; GFX940-NEXT:    s_mov_b32 s9, s2
7426; GFX940-NEXT:    ;;#ASMSTART
7427; GFX940-NEXT:    ; use s[8:11]
7428; GFX940-NEXT:    ;;#ASMEND
7429; GFX940-NEXT:    s_setpc_b64 s[30:31]
7430  %vec0 = call <3 x i32> asm "; def $0", "=s"()
7431  %vec1 = call <3 x i32> asm "; def $0", "=s"()
7432  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 poison>
7433  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
7434  ret void
7435}
7436
7437define void @s_shuffle_v4i32_v3i32__5_5_0_u() {
7438; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_0_u:
7439; GFX900:       ; %bb.0:
7440; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7441; GFX900-NEXT:    ;;#ASMSTART
7442; GFX900-NEXT:    ; def s[8:10]
7443; GFX900-NEXT:    ;;#ASMEND
7444; GFX900-NEXT:    ;;#ASMSTART
7445; GFX900-NEXT:    ; def s[4:6]
7446; GFX900-NEXT:    ;;#ASMEND
7447; GFX900-NEXT:    s_mov_b32 s8, s10
7448; GFX900-NEXT:    s_mov_b32 s9, s10
7449; GFX900-NEXT:    s_mov_b32 s10, s4
7450; GFX900-NEXT:    ;;#ASMSTART
7451; GFX900-NEXT:    ; use s[8:11]
7452; GFX900-NEXT:    ;;#ASMEND
7453; GFX900-NEXT:    s_setpc_b64 s[30:31]
7454;
7455; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_0_u:
7456; GFX90A:       ; %bb.0:
7457; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7458; GFX90A-NEXT:    ;;#ASMSTART
7459; GFX90A-NEXT:    ; def s[8:10]
7460; GFX90A-NEXT:    ;;#ASMEND
7461; GFX90A-NEXT:    ;;#ASMSTART
7462; GFX90A-NEXT:    ; def s[4:6]
7463; GFX90A-NEXT:    ;;#ASMEND
7464; GFX90A-NEXT:    s_mov_b32 s8, s10
7465; GFX90A-NEXT:    s_mov_b32 s9, s10
7466; GFX90A-NEXT:    s_mov_b32 s10, s4
7467; GFX90A-NEXT:    ;;#ASMSTART
7468; GFX90A-NEXT:    ; use s[8:11]
7469; GFX90A-NEXT:    ;;#ASMEND
7470; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7471;
7472; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_0_u:
7473; GFX940:       ; %bb.0:
7474; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7475; GFX940-NEXT:    ;;#ASMSTART
7476; GFX940-NEXT:    ; def s[0:2]
7477; GFX940-NEXT:    ;;#ASMEND
7478; GFX940-NEXT:    ;;#ASMSTART
7479; GFX940-NEXT:    ; def s[4:6]
7480; GFX940-NEXT:    ;;#ASMEND
7481; GFX940-NEXT:    s_mov_b32 s8, s6
7482; GFX940-NEXT:    s_mov_b32 s9, s6
7483; GFX940-NEXT:    s_mov_b32 s10, s0
7484; GFX940-NEXT:    ;;#ASMSTART
7485; GFX940-NEXT:    ; use s[8:11]
7486; GFX940-NEXT:    ;;#ASMEND
7487; GFX940-NEXT:    s_setpc_b64 s[30:31]
7488  %vec0 = call <3 x i32> asm "; def $0", "=s"()
7489  %vec1 = call <3 x i32> asm "; def $0", "=s"()
7490  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 poison>
7491  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
7492  ret void
7493}
7494
7495define void @s_shuffle_v4i32_v3i32__5_5_1_u() {
7496; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_1_u:
7497; GFX900:       ; %bb.0:
7498; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7499; GFX900-NEXT:    ;;#ASMSTART
7500; GFX900-NEXT:    ; def s[8:10]
7501; GFX900-NEXT:    ;;#ASMEND
7502; GFX900-NEXT:    ;;#ASMSTART
7503; GFX900-NEXT:    ; def s[4:6]
7504; GFX900-NEXT:    ;;#ASMEND
7505; GFX900-NEXT:    s_mov_b32 s8, s10
7506; GFX900-NEXT:    s_mov_b32 s9, s10
7507; GFX900-NEXT:    s_mov_b32 s10, s5
7508; GFX900-NEXT:    ;;#ASMSTART
7509; GFX900-NEXT:    ; use s[8:11]
7510; GFX900-NEXT:    ;;#ASMEND
7511; GFX900-NEXT:    s_setpc_b64 s[30:31]
7512;
7513; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_1_u:
7514; GFX90A:       ; %bb.0:
7515; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7516; GFX90A-NEXT:    ;;#ASMSTART
7517; GFX90A-NEXT:    ; def s[8:10]
7518; GFX90A-NEXT:    ;;#ASMEND
7519; GFX90A-NEXT:    ;;#ASMSTART
7520; GFX90A-NEXT:    ; def s[4:6]
7521; GFX90A-NEXT:    ;;#ASMEND
7522; GFX90A-NEXT:    s_mov_b32 s8, s10
7523; GFX90A-NEXT:    s_mov_b32 s9, s10
7524; GFX90A-NEXT:    s_mov_b32 s10, s5
7525; GFX90A-NEXT:    ;;#ASMSTART
7526; GFX90A-NEXT:    ; use s[8:11]
7527; GFX90A-NEXT:    ;;#ASMEND
7528; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7529;
7530; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_1_u:
7531; GFX940:       ; %bb.0:
7532; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7533; GFX940-NEXT:    ;;#ASMSTART
7534; GFX940-NEXT:    ; def s[0:2]
7535; GFX940-NEXT:    ;;#ASMEND
7536; GFX940-NEXT:    ;;#ASMSTART
7537; GFX940-NEXT:    ; def s[4:6]
7538; GFX940-NEXT:    ;;#ASMEND
7539; GFX940-NEXT:    s_mov_b32 s8, s6
7540; GFX940-NEXT:    s_mov_b32 s9, s6
7541; GFX940-NEXT:    s_mov_b32 s10, s1
7542; GFX940-NEXT:    ;;#ASMSTART
7543; GFX940-NEXT:    ; use s[8:11]
7544; GFX940-NEXT:    ;;#ASMEND
7545; GFX940-NEXT:    s_setpc_b64 s[30:31]
7546  %vec0 = call <3 x i32> asm "; def $0", "=s"()
7547  %vec1 = call <3 x i32> asm "; def $0", "=s"()
7548  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 poison>
7549  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
7550  ret void
7551}
7552
7553define void @s_shuffle_v4i32_v3i32__5_5_2_u() {
7554; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_2_u:
7555; GFX900:       ; %bb.0:
7556; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7557; GFX900-NEXT:    ;;#ASMSTART
7558; GFX900-NEXT:    ; def s[8:10]
7559; GFX900-NEXT:    ;;#ASMEND
7560; GFX900-NEXT:    ;;#ASMSTART
7561; GFX900-NEXT:    ; def s[4:6]
7562; GFX900-NEXT:    ;;#ASMEND
7563; GFX900-NEXT:    s_mov_b32 s8, s6
7564; GFX900-NEXT:    s_mov_b32 s9, s6
7565; GFX900-NEXT:    ;;#ASMSTART
7566; GFX900-NEXT:    ; use s[8:11]
7567; GFX900-NEXT:    ;;#ASMEND
7568; GFX900-NEXT:    s_setpc_b64 s[30:31]
7569;
7570; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_2_u:
7571; GFX90A:       ; %bb.0:
7572; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7573; GFX90A-NEXT:    ;;#ASMSTART
7574; GFX90A-NEXT:    ; def s[8:10]
7575; GFX90A-NEXT:    ;;#ASMEND
7576; GFX90A-NEXT:    ;;#ASMSTART
7577; GFX90A-NEXT:    ; def s[4:6]
7578; GFX90A-NEXT:    ;;#ASMEND
7579; GFX90A-NEXT:    s_mov_b32 s8, s6
7580; GFX90A-NEXT:    s_mov_b32 s9, s6
7581; GFX90A-NEXT:    ;;#ASMSTART
7582; GFX90A-NEXT:    ; use s[8:11]
7583; GFX90A-NEXT:    ;;#ASMEND
7584; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7585;
7586; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_2_u:
7587; GFX940:       ; %bb.0:
7588; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7589; GFX940-NEXT:    ;;#ASMSTART
7590; GFX940-NEXT:    ; def s[8:10]
7591; GFX940-NEXT:    ;;#ASMEND
7592; GFX940-NEXT:    ;;#ASMSTART
7593; GFX940-NEXT:    ; def s[0:2]
7594; GFX940-NEXT:    ;;#ASMEND
7595; GFX940-NEXT:    s_mov_b32 s8, s2
7596; GFX940-NEXT:    s_mov_b32 s9, s2
7597; GFX940-NEXT:    ;;#ASMSTART
7598; GFX940-NEXT:    ; use s[8:11]
7599; GFX940-NEXT:    ;;#ASMEND
7600; GFX940-NEXT:    s_setpc_b64 s[30:31]
7601  %vec0 = call <3 x i32> asm "; def $0", "=s"()
7602  %vec1 = call <3 x i32> asm "; def $0", "=s"()
7603  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 poison>
7604  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
7605  ret void
7606}
7607
7608define void @s_shuffle_v4i32_v3i32__5_5_3_u() {
7609; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_3_u:
7610; GFX900:       ; %bb.0:
7611; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7612; GFX900-NEXT:    ;;#ASMSTART
7613; GFX900-NEXT:    ; def s[4:6]
7614; GFX900-NEXT:    ;;#ASMEND
7615; GFX900-NEXT:    s_mov_b32 s8, s6
7616; GFX900-NEXT:    s_mov_b32 s9, s6
7617; GFX900-NEXT:    s_mov_b32 s10, s4
7618; GFX900-NEXT:    ;;#ASMSTART
7619; GFX900-NEXT:    ; use s[8:11]
7620; GFX900-NEXT:    ;;#ASMEND
7621; GFX900-NEXT:    s_setpc_b64 s[30:31]
7622;
7623; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_3_u:
7624; GFX90A:       ; %bb.0:
7625; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7626; GFX90A-NEXT:    ;;#ASMSTART
7627; GFX90A-NEXT:    ; def s[4:6]
7628; GFX90A-NEXT:    ;;#ASMEND
7629; GFX90A-NEXT:    s_mov_b32 s8, s6
7630; GFX90A-NEXT:    s_mov_b32 s9, s6
7631; GFX90A-NEXT:    s_mov_b32 s10, s4
7632; GFX90A-NEXT:    ;;#ASMSTART
7633; GFX90A-NEXT:    ; use s[8:11]
7634; GFX90A-NEXT:    ;;#ASMEND
7635; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7636;
7637; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_3_u:
7638; GFX940:       ; %bb.0:
7639; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7640; GFX940-NEXT:    ;;#ASMSTART
7641; GFX940-NEXT:    ; def s[0:2]
7642; GFX940-NEXT:    ;;#ASMEND
7643; GFX940-NEXT:    s_mov_b32 s8, s2
7644; GFX940-NEXT:    s_mov_b32 s9, s2
7645; GFX940-NEXT:    s_mov_b32 s10, s0
7646; GFX940-NEXT:    ;;#ASMSTART
7647; GFX940-NEXT:    ; use s[8:11]
7648; GFX940-NEXT:    ;;#ASMEND
7649; GFX940-NEXT:    s_setpc_b64 s[30:31]
7650  %vec0 = call <3 x i32> asm "; def $0", "=s"()
7651  %vec1 = call <3 x i32> asm "; def $0", "=s"()
7652  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 poison>
7653  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
7654  ret void
7655}
7656
7657define void @s_shuffle_v4i32_v3i32__5_5_4_u() {
7658; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_4_u:
7659; GFX900:       ; %bb.0:
7660; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7661; GFX900-NEXT:    ;;#ASMSTART
7662; GFX900-NEXT:    ; def s[4:6]
7663; GFX900-NEXT:    ;;#ASMEND
7664; GFX900-NEXT:    s_mov_b32 s8, s6
7665; GFX900-NEXT:    s_mov_b32 s9, s6
7666; GFX900-NEXT:    s_mov_b32 s10, s5
7667; GFX900-NEXT:    ;;#ASMSTART
7668; GFX900-NEXT:    ; use s[8:11]
7669; GFX900-NEXT:    ;;#ASMEND
7670; GFX900-NEXT:    s_setpc_b64 s[30:31]
7671;
7672; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_4_u:
7673; GFX90A:       ; %bb.0:
7674; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7675; GFX90A-NEXT:    ;;#ASMSTART
7676; GFX90A-NEXT:    ; def s[4:6]
7677; GFX90A-NEXT:    ;;#ASMEND
7678; GFX90A-NEXT:    s_mov_b32 s8, s6
7679; GFX90A-NEXT:    s_mov_b32 s9, s6
7680; GFX90A-NEXT:    s_mov_b32 s10, s5
7681; GFX90A-NEXT:    ;;#ASMSTART
7682; GFX90A-NEXT:    ; use s[8:11]
7683; GFX90A-NEXT:    ;;#ASMEND
7684; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7685;
7686; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_4_u:
7687; GFX940:       ; %bb.0:
7688; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7689; GFX940-NEXT:    ;;#ASMSTART
7690; GFX940-NEXT:    ; def s[0:2]
7691; GFX940-NEXT:    ;;#ASMEND
7692; GFX940-NEXT:    s_mov_b32 s8, s2
7693; GFX940-NEXT:    s_mov_b32 s9, s2
7694; GFX940-NEXT:    s_mov_b32 s10, s1
7695; GFX940-NEXT:    ;;#ASMSTART
7696; GFX940-NEXT:    ; use s[8:11]
7697; GFX940-NEXT:    ;;#ASMEND
7698; GFX940-NEXT:    s_setpc_b64 s[30:31]
7699  %vec0 = call <3 x i32> asm "; def $0", "=s"()
7700  %vec1 = call <3 x i32> asm "; def $0", "=s"()
7701  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 poison>
7702  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
7703  ret void
7704}
7705
7706define void @s_shuffle_v4i32_v3i32__5_5_5_u() {
7707; GFX9-LABEL: s_shuffle_v4i32_v3i32__5_5_5_u:
7708; GFX9:       ; %bb.0:
7709; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7710; GFX9-NEXT:    ;;#ASMSTART
7711; GFX9-NEXT:    ; def s[8:10]
7712; GFX9-NEXT:    ;;#ASMEND
7713; GFX9-NEXT:    s_mov_b32 s8, s10
7714; GFX9-NEXT:    s_mov_b32 s9, s10
7715; GFX9-NEXT:    ;;#ASMSTART
7716; GFX9-NEXT:    ; use s[8:11]
7717; GFX9-NEXT:    ;;#ASMEND
7718; GFX9-NEXT:    s_setpc_b64 s[30:31]
7719  %vec0 = call <3 x i32> asm "; def $0", "=s"()
7720  %vec1 = call <3 x i32> asm "; def $0", "=s"()
7721  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 poison>
7722  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
7723  ret void
7724}
7725
7726define void @s_shuffle_v4i32_v3i32__5_5_5_0() {
7727; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_5_0:
7728; GFX900:       ; %bb.0:
7729; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7730; GFX900-NEXT:    ;;#ASMSTART
7731; GFX900-NEXT:    ; def s[8:10]
7732; GFX900-NEXT:    ;;#ASMEND
7733; GFX900-NEXT:    ;;#ASMSTART
7734; GFX900-NEXT:    ; def s[4:6]
7735; GFX900-NEXT:    ;;#ASMEND
7736; GFX900-NEXT:    s_mov_b32 s8, s10
7737; GFX900-NEXT:    s_mov_b32 s9, s10
7738; GFX900-NEXT:    s_mov_b32 s11, s4
7739; GFX900-NEXT:    ;;#ASMSTART
7740; GFX900-NEXT:    ; use s[8:11]
7741; GFX900-NEXT:    ;;#ASMEND
7742; GFX900-NEXT:    s_setpc_b64 s[30:31]
7743;
7744; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_5_0:
7745; GFX90A:       ; %bb.0:
7746; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7747; GFX90A-NEXT:    ;;#ASMSTART
7748; GFX90A-NEXT:    ; def s[8:10]
7749; GFX90A-NEXT:    ;;#ASMEND
7750; GFX90A-NEXT:    ;;#ASMSTART
7751; GFX90A-NEXT:    ; def s[4:6]
7752; GFX90A-NEXT:    ;;#ASMEND
7753; GFX90A-NEXT:    s_mov_b32 s8, s10
7754; GFX90A-NEXT:    s_mov_b32 s9, s10
7755; GFX90A-NEXT:    s_mov_b32 s11, s4
7756; GFX90A-NEXT:    ;;#ASMSTART
7757; GFX90A-NEXT:    ; use s[8:11]
7758; GFX90A-NEXT:    ;;#ASMEND
7759; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7760;
7761; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_5_0:
7762; GFX940:       ; %bb.0:
7763; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7764; GFX940-NEXT:    ;;#ASMSTART
7765; GFX940-NEXT:    ; def s[8:10]
7766; GFX940-NEXT:    ;;#ASMEND
7767; GFX940-NEXT:    ;;#ASMSTART
7768; GFX940-NEXT:    ; def s[0:2]
7769; GFX940-NEXT:    ;;#ASMEND
7770; GFX940-NEXT:    s_mov_b32 s8, s10
7771; GFX940-NEXT:    s_mov_b32 s9, s10
7772; GFX940-NEXT:    s_mov_b32 s11, s0
7773; GFX940-NEXT:    ;;#ASMSTART
7774; GFX940-NEXT:    ; use s[8:11]
7775; GFX940-NEXT:    ;;#ASMEND
7776; GFX940-NEXT:    s_setpc_b64 s[30:31]
7777  %vec0 = call <3 x i32> asm "; def $0", "=s"()
7778  %vec1 = call <3 x i32> asm "; def $0", "=s"()
7779  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 0>
7780  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
7781  ret void
7782}
7783
7784define void @s_shuffle_v4i32_v3i32__5_5_5_1() {
7785; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_5_1:
7786; GFX900:       ; %bb.0:
7787; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7788; GFX900-NEXT:    ;;#ASMSTART
7789; GFX900-NEXT:    ; def s[8:10]
7790; GFX900-NEXT:    ;;#ASMEND
7791; GFX900-NEXT:    ;;#ASMSTART
7792; GFX900-NEXT:    ; def s[4:6]
7793; GFX900-NEXT:    ;;#ASMEND
7794; GFX900-NEXT:    s_mov_b32 s8, s10
7795; GFX900-NEXT:    s_mov_b32 s9, s10
7796; GFX900-NEXT:    s_mov_b32 s11, s5
7797; GFX900-NEXT:    ;;#ASMSTART
7798; GFX900-NEXT:    ; use s[8:11]
7799; GFX900-NEXT:    ;;#ASMEND
7800; GFX900-NEXT:    s_setpc_b64 s[30:31]
7801;
7802; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_5_1:
7803; GFX90A:       ; %bb.0:
7804; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7805; GFX90A-NEXT:    ;;#ASMSTART
7806; GFX90A-NEXT:    ; def s[8:10]
7807; GFX90A-NEXT:    ;;#ASMEND
7808; GFX90A-NEXT:    ;;#ASMSTART
7809; GFX90A-NEXT:    ; def s[4:6]
7810; GFX90A-NEXT:    ;;#ASMEND
7811; GFX90A-NEXT:    s_mov_b32 s8, s10
7812; GFX90A-NEXT:    s_mov_b32 s9, s10
7813; GFX90A-NEXT:    s_mov_b32 s11, s5
7814; GFX90A-NEXT:    ;;#ASMSTART
7815; GFX90A-NEXT:    ; use s[8:11]
7816; GFX90A-NEXT:    ;;#ASMEND
7817; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7818;
7819; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_5_1:
7820; GFX940:       ; %bb.0:
7821; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7822; GFX940-NEXT:    ;;#ASMSTART
7823; GFX940-NEXT:    ; def s[8:10]
7824; GFX940-NEXT:    ;;#ASMEND
7825; GFX940-NEXT:    ;;#ASMSTART
7826; GFX940-NEXT:    ; def s[0:2]
7827; GFX940-NEXT:    ;;#ASMEND
7828; GFX940-NEXT:    s_mov_b32 s8, s10
7829; GFX940-NEXT:    s_mov_b32 s9, s10
7830; GFX940-NEXT:    s_mov_b32 s11, s1
7831; GFX940-NEXT:    ;;#ASMSTART
7832; GFX940-NEXT:    ; use s[8:11]
7833; GFX940-NEXT:    ;;#ASMEND
7834; GFX940-NEXT:    s_setpc_b64 s[30:31]
7835  %vec0 = call <3 x i32> asm "; def $0", "=s"()
7836  %vec1 = call <3 x i32> asm "; def $0", "=s"()
7837  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 1>
7838  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
7839  ret void
7840}
7841
7842define void @s_shuffle_v4i32_v3i32__5_5_5_2() {
7843; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_5_2:
7844; GFX900:       ; %bb.0:
7845; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7846; GFX900-NEXT:    ;;#ASMSTART
7847; GFX900-NEXT:    ; def s[8:10]
7848; GFX900-NEXT:    ;;#ASMEND
7849; GFX900-NEXT:    ;;#ASMSTART
7850; GFX900-NEXT:    ; def s[4:6]
7851; GFX900-NEXT:    ;;#ASMEND
7852; GFX900-NEXT:    s_mov_b32 s8, s10
7853; GFX900-NEXT:    s_mov_b32 s9, s10
7854; GFX900-NEXT:    s_mov_b32 s11, s6
7855; GFX900-NEXT:    ;;#ASMSTART
7856; GFX900-NEXT:    ; use s[8:11]
7857; GFX900-NEXT:    ;;#ASMEND
7858; GFX900-NEXT:    s_setpc_b64 s[30:31]
7859;
7860; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_5_2:
7861; GFX90A:       ; %bb.0:
7862; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7863; GFX90A-NEXT:    ;;#ASMSTART
7864; GFX90A-NEXT:    ; def s[8:10]
7865; GFX90A-NEXT:    ;;#ASMEND
7866; GFX90A-NEXT:    ;;#ASMSTART
7867; GFX90A-NEXT:    ; def s[4:6]
7868; GFX90A-NEXT:    ;;#ASMEND
7869; GFX90A-NEXT:    s_mov_b32 s8, s10
7870; GFX90A-NEXT:    s_mov_b32 s9, s10
7871; GFX90A-NEXT:    s_mov_b32 s11, s6
7872; GFX90A-NEXT:    ;;#ASMSTART
7873; GFX90A-NEXT:    ; use s[8:11]
7874; GFX90A-NEXT:    ;;#ASMEND
7875; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7876;
7877; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_5_2:
7878; GFX940:       ; %bb.0:
7879; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7880; GFX940-NEXT:    ;;#ASMSTART
7881; GFX940-NEXT:    ; def s[8:10]
7882; GFX940-NEXT:    ;;#ASMEND
7883; GFX940-NEXT:    ;;#ASMSTART
7884; GFX940-NEXT:    ; def s[0:2]
7885; GFX940-NEXT:    ;;#ASMEND
7886; GFX940-NEXT:    s_mov_b32 s8, s10
7887; GFX940-NEXT:    s_mov_b32 s9, s10
7888; GFX940-NEXT:    s_mov_b32 s11, s2
7889; GFX940-NEXT:    ;;#ASMSTART
7890; GFX940-NEXT:    ; use s[8:11]
7891; GFX940-NEXT:    ;;#ASMEND
7892; GFX940-NEXT:    s_setpc_b64 s[30:31]
7893  %vec0 = call <3 x i32> asm "; def $0", "=s"()
7894  %vec1 = call <3 x i32> asm "; def $0", "=s"()
7895  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 2>
7896  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
7897  ret void
7898}
7899
7900define void @s_shuffle_v4i32_v3i32__5_5_5_3() {
7901; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_5_3:
7902; GFX900:       ; %bb.0:
7903; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7904; GFX900-NEXT:    ;;#ASMSTART
7905; GFX900-NEXT:    ; def s[4:6]
7906; GFX900-NEXT:    ;;#ASMEND
7907; GFX900-NEXT:    s_mov_b32 s8, s6
7908; GFX900-NEXT:    s_mov_b32 s9, s6
7909; GFX900-NEXT:    s_mov_b32 s10, s6
7910; GFX900-NEXT:    s_mov_b32 s11, s4
7911; GFX900-NEXT:    ;;#ASMSTART
7912; GFX900-NEXT:    ; use s[8:11]
7913; GFX900-NEXT:    ;;#ASMEND
7914; GFX900-NEXT:    s_setpc_b64 s[30:31]
7915;
7916; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_5_3:
7917; GFX90A:       ; %bb.0:
7918; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7919; GFX90A-NEXT:    ;;#ASMSTART
7920; GFX90A-NEXT:    ; def s[4:6]
7921; GFX90A-NEXT:    ;;#ASMEND
7922; GFX90A-NEXT:    s_mov_b32 s8, s6
7923; GFX90A-NEXT:    s_mov_b32 s9, s6
7924; GFX90A-NEXT:    s_mov_b32 s10, s6
7925; GFX90A-NEXT:    s_mov_b32 s11, s4
7926; GFX90A-NEXT:    ;;#ASMSTART
7927; GFX90A-NEXT:    ; use s[8:11]
7928; GFX90A-NEXT:    ;;#ASMEND
7929; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7930;
7931; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_5_3:
7932; GFX940:       ; %bb.0:
7933; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7934; GFX940-NEXT:    ;;#ASMSTART
7935; GFX940-NEXT:    ; def s[0:2]
7936; GFX940-NEXT:    ;;#ASMEND
7937; GFX940-NEXT:    s_mov_b32 s8, s2
7938; GFX940-NEXT:    s_mov_b32 s9, s2
7939; GFX940-NEXT:    s_mov_b32 s10, s2
7940; GFX940-NEXT:    s_mov_b32 s11, s0
7941; GFX940-NEXT:    ;;#ASMSTART
7942; GFX940-NEXT:    ; use s[8:11]
7943; GFX940-NEXT:    ;;#ASMEND
7944; GFX940-NEXT:    s_setpc_b64 s[30:31]
7945  %vec0 = call <3 x i32> asm "; def $0", "=s"()
7946  %vec1 = call <3 x i32> asm "; def $0", "=s"()
7947  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 3>
7948  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
7949  ret void
7950}
7951
7952define void @s_shuffle_v4i32_v3i32__5_5_5_4() {
7953; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_5_4:
7954; GFX900:       ; %bb.0:
7955; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7956; GFX900-NEXT:    ;;#ASMSTART
7957; GFX900-NEXT:    ; def s[4:6]
7958; GFX900-NEXT:    ;;#ASMEND
7959; GFX900-NEXT:    s_mov_b32 s8, s6
7960; GFX900-NEXT:    s_mov_b32 s9, s6
7961; GFX900-NEXT:    s_mov_b32 s10, s6
7962; GFX900-NEXT:    s_mov_b32 s11, s5
7963; GFX900-NEXT:    ;;#ASMSTART
7964; GFX900-NEXT:    ; use s[8:11]
7965; GFX900-NEXT:    ;;#ASMEND
7966; GFX900-NEXT:    s_setpc_b64 s[30:31]
7967;
7968; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_5_4:
7969; GFX90A:       ; %bb.0:
7970; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7971; GFX90A-NEXT:    ;;#ASMSTART
7972; GFX90A-NEXT:    ; def s[4:6]
7973; GFX90A-NEXT:    ;;#ASMEND
7974; GFX90A-NEXT:    s_mov_b32 s8, s6
7975; GFX90A-NEXT:    s_mov_b32 s9, s6
7976; GFX90A-NEXT:    s_mov_b32 s10, s6
7977; GFX90A-NEXT:    s_mov_b32 s11, s5
7978; GFX90A-NEXT:    ;;#ASMSTART
7979; GFX90A-NEXT:    ; use s[8:11]
7980; GFX90A-NEXT:    ;;#ASMEND
7981; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7982;
7983; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_5_4:
7984; GFX940:       ; %bb.0:
7985; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7986; GFX940-NEXT:    ;;#ASMSTART
7987; GFX940-NEXT:    ; def s[0:2]
7988; GFX940-NEXT:    ;;#ASMEND
7989; GFX940-NEXT:    s_mov_b32 s8, s2
7990; GFX940-NEXT:    s_mov_b32 s9, s2
7991; GFX940-NEXT:    s_mov_b32 s10, s2
7992; GFX940-NEXT:    s_mov_b32 s11, s1
7993; GFX940-NEXT:    ;;#ASMSTART
7994; GFX940-NEXT:    ; use s[8:11]
7995; GFX940-NEXT:    ;;#ASMEND
7996; GFX940-NEXT:    s_setpc_b64 s[30:31]
7997  %vec0 = call <3 x i32> asm "; def $0", "=s"()
7998  %vec1 = call <3 x i32> asm "; def $0", "=s"()
7999  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 4>
8000  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
8001  ret void
8002}
8003
8004define void @s_shuffle_v4i32_v3i32__5_5_5_5() {
8005; GFX9-LABEL: s_shuffle_v4i32_v3i32__5_5_5_5:
8006; GFX9:       ; %bb.0:
8007; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8008; GFX9-NEXT:    ;;#ASMSTART
8009; GFX9-NEXT:    ; def s[8:10]
8010; GFX9-NEXT:    ;;#ASMEND
8011; GFX9-NEXT:    s_mov_b32 s8, s10
8012; GFX9-NEXT:    s_mov_b32 s9, s10
8013; GFX9-NEXT:    s_mov_b32 s11, s10
8014; GFX9-NEXT:    ;;#ASMSTART
8015; GFX9-NEXT:    ; use s[8:11]
8016; GFX9-NEXT:    ;;#ASMEND
8017; GFX9-NEXT:    s_setpc_b64 s[30:31]
8018  %vec0 = call <3 x i32> asm "; def $0", "=s"()
8019  %vec1 = call <3 x i32> asm "; def $0", "=s"()
8020  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
8021  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
8022  ret void
8023}
8024
8025define void @s_shuffle_v4i32_v3i32__u_0_0_0() {
8026; GFX900-LABEL: s_shuffle_v4i32_v3i32__u_0_0_0:
8027; GFX900:       ; %bb.0:
8028; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8029; GFX900-NEXT:    ;;#ASMSTART
8030; GFX900-NEXT:    ; def s[4:6]
8031; GFX900-NEXT:    ;;#ASMEND
8032; GFX900-NEXT:    s_mov_b32 s9, s4
8033; GFX900-NEXT:    s_mov_b32 s10, s4
8034; GFX900-NEXT:    s_mov_b32 s11, s4
8035; GFX900-NEXT:    ;;#ASMSTART
8036; GFX900-NEXT:    ; use s[8:11]
8037; GFX900-NEXT:    ;;#ASMEND
8038; GFX900-NEXT:    s_setpc_b64 s[30:31]
8039;
8040; GFX90A-LABEL: s_shuffle_v4i32_v3i32__u_0_0_0:
8041; GFX90A:       ; %bb.0:
8042; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8043; GFX90A-NEXT:    ;;#ASMSTART
8044; GFX90A-NEXT:    ; def s[4:6]
8045; GFX90A-NEXT:    ;;#ASMEND
8046; GFX90A-NEXT:    s_mov_b32 s9, s4
8047; GFX90A-NEXT:    s_mov_b32 s10, s4
8048; GFX90A-NEXT:    s_mov_b32 s11, s4
8049; GFX90A-NEXT:    ;;#ASMSTART
8050; GFX90A-NEXT:    ; use s[8:11]
8051; GFX90A-NEXT:    ;;#ASMEND
8052; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8053;
8054; GFX940-LABEL: s_shuffle_v4i32_v3i32__u_0_0_0:
8055; GFX940:       ; %bb.0:
8056; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8057; GFX940-NEXT:    ;;#ASMSTART
8058; GFX940-NEXT:    ; def s[0:2]
8059; GFX940-NEXT:    ;;#ASMEND
8060; GFX940-NEXT:    s_mov_b32 s9, s0
8061; GFX940-NEXT:    s_mov_b32 s10, s0
8062; GFX940-NEXT:    s_mov_b32 s11, s0
8063; GFX940-NEXT:    ;;#ASMSTART
8064; GFX940-NEXT:    ; use s[8:11]
8065; GFX940-NEXT:    ;;#ASMEND
8066; GFX940-NEXT:    s_setpc_b64 s[30:31]
8067  %vec0 = call <3 x i32> asm "; def $0", "=s"()
8068  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0>
8069  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
8070  ret void
8071}
8072
8073define void @s_shuffle_v4i32_v3i32__0_0_0_0() {
8074; GFX9-LABEL: s_shuffle_v4i32_v3i32__0_0_0_0:
8075; GFX9:       ; %bb.0:
8076; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8077; GFX9-NEXT:    ;;#ASMSTART
8078; GFX9-NEXT:    ; def s[8:10]
8079; GFX9-NEXT:    ;;#ASMEND
8080; GFX9-NEXT:    s_mov_b32 s9, s8
8081; GFX9-NEXT:    s_mov_b32 s10, s8
8082; GFX9-NEXT:    s_mov_b32 s11, s8
8083; GFX9-NEXT:    ;;#ASMSTART
8084; GFX9-NEXT:    ; use s[8:11]
8085; GFX9-NEXT:    ;;#ASMEND
8086; GFX9-NEXT:    s_setpc_b64 s[30:31]
8087  %vec0 = call <3 x i32> asm "; def $0", "=s"()
8088  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> zeroinitializer
8089  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
8090  ret void
8091}
8092
8093define void @s_shuffle_v4i32_v3i32__1_0_0_0() {
8094; GFX900-LABEL: s_shuffle_v4i32_v3i32__1_0_0_0:
8095; GFX900:       ; %bb.0:
8096; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8097; GFX900-NEXT:    ;;#ASMSTART
8098; GFX900-NEXT:    ; def s[4:6]
8099; GFX900-NEXT:    ;;#ASMEND
8100; GFX900-NEXT:    s_mov_b32 s8, s5
8101; GFX900-NEXT:    s_mov_b32 s9, s4
8102; GFX900-NEXT:    s_mov_b32 s10, s4
8103; GFX900-NEXT:    s_mov_b32 s11, s4
8104; GFX900-NEXT:    ;;#ASMSTART
8105; GFX900-NEXT:    ; use s[8:11]
8106; GFX900-NEXT:    ;;#ASMEND
8107; GFX900-NEXT:    s_setpc_b64 s[30:31]
8108;
8109; GFX90A-LABEL: s_shuffle_v4i32_v3i32__1_0_0_0:
8110; GFX90A:       ; %bb.0:
8111; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8112; GFX90A-NEXT:    ;;#ASMSTART
8113; GFX90A-NEXT:    ; def s[4:6]
8114; GFX90A-NEXT:    ;;#ASMEND
8115; GFX90A-NEXT:    s_mov_b32 s8, s5
8116; GFX90A-NEXT:    s_mov_b32 s9, s4
8117; GFX90A-NEXT:    s_mov_b32 s10, s4
8118; GFX90A-NEXT:    s_mov_b32 s11, s4
8119; GFX90A-NEXT:    ;;#ASMSTART
8120; GFX90A-NEXT:    ; use s[8:11]
8121; GFX90A-NEXT:    ;;#ASMEND
8122; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8123;
8124; GFX940-LABEL: s_shuffle_v4i32_v3i32__1_0_0_0:
8125; GFX940:       ; %bb.0:
8126; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8127; GFX940-NEXT:    ;;#ASMSTART
8128; GFX940-NEXT:    ; def s[0:2]
8129; GFX940-NEXT:    ;;#ASMEND
8130; GFX940-NEXT:    s_mov_b32 s8, s1
8131; GFX940-NEXT:    s_mov_b32 s9, s0
8132; GFX940-NEXT:    s_mov_b32 s10, s0
8133; GFX940-NEXT:    s_mov_b32 s11, s0
8134; GFX940-NEXT:    ;;#ASMSTART
8135; GFX940-NEXT:    ; use s[8:11]
8136; GFX940-NEXT:    ;;#ASMEND
8137; GFX940-NEXT:    s_setpc_b64 s[30:31]
8138  %vec0 = call <3 x i32> asm "; def $0", "=s"()
8139  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
8140  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
8141  ret void
8142}
8143
8144define void @s_shuffle_v4i32_v3i32__2_0_0_0() {
8145; GFX900-LABEL: s_shuffle_v4i32_v3i32__2_0_0_0:
8146; GFX900:       ; %bb.0:
8147; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8148; GFX900-NEXT:    ;;#ASMSTART
8149; GFX900-NEXT:    ; def s[4:6]
8150; GFX900-NEXT:    ;;#ASMEND
8151; GFX900-NEXT:    s_mov_b32 s8, s6
8152; GFX900-NEXT:    s_mov_b32 s9, s4
8153; GFX900-NEXT:    s_mov_b32 s10, s4
8154; GFX900-NEXT:    s_mov_b32 s11, s4
8155; GFX900-NEXT:    ;;#ASMSTART
8156; GFX900-NEXT:    ; use s[8:11]
8157; GFX900-NEXT:    ;;#ASMEND
8158; GFX900-NEXT:    s_setpc_b64 s[30:31]
8159;
8160; GFX90A-LABEL: s_shuffle_v4i32_v3i32__2_0_0_0:
8161; GFX90A:       ; %bb.0:
8162; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8163; GFX90A-NEXT:    ;;#ASMSTART
8164; GFX90A-NEXT:    ; def s[4:6]
8165; GFX90A-NEXT:    ;;#ASMEND
8166; GFX90A-NEXT:    s_mov_b32 s8, s6
8167; GFX90A-NEXT:    s_mov_b32 s9, s4
8168; GFX90A-NEXT:    s_mov_b32 s10, s4
8169; GFX90A-NEXT:    s_mov_b32 s11, s4
8170; GFX90A-NEXT:    ;;#ASMSTART
8171; GFX90A-NEXT:    ; use s[8:11]
8172; GFX90A-NEXT:    ;;#ASMEND
8173; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8174;
8175; GFX940-LABEL: s_shuffle_v4i32_v3i32__2_0_0_0:
8176; GFX940:       ; %bb.0:
8177; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8178; GFX940-NEXT:    ;;#ASMSTART
8179; GFX940-NEXT:    ; def s[0:2]
8180; GFX940-NEXT:    ;;#ASMEND
8181; GFX940-NEXT:    s_mov_b32 s8, s2
8182; GFX940-NEXT:    s_mov_b32 s9, s0
8183; GFX940-NEXT:    s_mov_b32 s10, s0
8184; GFX940-NEXT:    s_mov_b32 s11, s0
8185; GFX940-NEXT:    ;;#ASMSTART
8186; GFX940-NEXT:    ; use s[8:11]
8187; GFX940-NEXT:    ;;#ASMEND
8188; GFX940-NEXT:    s_setpc_b64 s[30:31]
8189  %vec0 = call <3 x i32> asm "; def $0", "=s"()
8190  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
8191  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
8192  ret void
8193}
8194
8195define void @s_shuffle_v4i32_v3i32__3_0_0_0() {
8196; GFX900-LABEL: s_shuffle_v4i32_v3i32__3_0_0_0:
8197; GFX900:       ; %bb.0:
8198; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8199; GFX900-NEXT:    ;;#ASMSTART
8200; GFX900-NEXT:    ; def s[4:6]
8201; GFX900-NEXT:    ;;#ASMEND
8202; GFX900-NEXT:    s_mov_b32 s9, s4
8203; GFX900-NEXT:    s_mov_b32 s10, s4
8204; GFX900-NEXT:    s_mov_b32 s11, s4
8205; GFX900-NEXT:    ;;#ASMSTART
8206; GFX900-NEXT:    ; use s[8:11]
8207; GFX900-NEXT:    ;;#ASMEND
8208; GFX900-NEXT:    s_setpc_b64 s[30:31]
8209;
8210; GFX90A-LABEL: s_shuffle_v4i32_v3i32__3_0_0_0:
8211; GFX90A:       ; %bb.0:
8212; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8213; GFX90A-NEXT:    ;;#ASMSTART
8214; GFX90A-NEXT:    ; def s[4:6]
8215; GFX90A-NEXT:    ;;#ASMEND
8216; GFX90A-NEXT:    s_mov_b32 s9, s4
8217; GFX90A-NEXT:    s_mov_b32 s10, s4
8218; GFX90A-NEXT:    s_mov_b32 s11, s4
8219; GFX90A-NEXT:    ;;#ASMSTART
8220; GFX90A-NEXT:    ; use s[8:11]
8221; GFX90A-NEXT:    ;;#ASMEND
8222; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8223;
8224; GFX940-LABEL: s_shuffle_v4i32_v3i32__3_0_0_0:
8225; GFX940:       ; %bb.0:
8226; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8227; GFX940-NEXT:    ;;#ASMSTART
8228; GFX940-NEXT:    ; def s[0:2]
8229; GFX940-NEXT:    ;;#ASMEND
8230; GFX940-NEXT:    s_mov_b32 s9, s0
8231; GFX940-NEXT:    s_mov_b32 s10, s0
8232; GFX940-NEXT:    s_mov_b32 s11, s0
8233; GFX940-NEXT:    ;;#ASMSTART
8234; GFX940-NEXT:    ; use s[8:11]
8235; GFX940-NEXT:    ;;#ASMEND
8236; GFX940-NEXT:    s_setpc_b64 s[30:31]
8237  %vec0 = call <3 x i32> asm "; def $0", "=s"()
8238  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
8239  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
8240  ret void
8241}
8242
8243define void @s_shuffle_v4i32_v3i32__4_0_0_0() {
8244; GFX900-LABEL: s_shuffle_v4i32_v3i32__4_0_0_0:
8245; GFX900:       ; %bb.0:
8246; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8247; GFX900-NEXT:    ;;#ASMSTART
8248; GFX900-NEXT:    ; def s[8:10]
8249; GFX900-NEXT:    ;;#ASMEND
8250; GFX900-NEXT:    ;;#ASMSTART
8251; GFX900-NEXT:    ; def s[4:6]
8252; GFX900-NEXT:    ;;#ASMEND
8253; GFX900-NEXT:    s_mov_b32 s8, s9
8254; GFX900-NEXT:    s_mov_b32 s9, s4
8255; GFX900-NEXT:    s_mov_b32 s10, s4
8256; GFX900-NEXT:    s_mov_b32 s11, s4
8257; GFX900-NEXT:    ;;#ASMSTART
8258; GFX900-NEXT:    ; use s[8:11]
8259; GFX900-NEXT:    ;;#ASMEND
8260; GFX900-NEXT:    s_setpc_b64 s[30:31]
8261;
8262; GFX90A-LABEL: s_shuffle_v4i32_v3i32__4_0_0_0:
8263; GFX90A:       ; %bb.0:
8264; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8265; GFX90A-NEXT:    ;;#ASMSTART
8266; GFX90A-NEXT:    ; def s[8:10]
8267; GFX90A-NEXT:    ;;#ASMEND
8268; GFX90A-NEXT:    ;;#ASMSTART
8269; GFX90A-NEXT:    ; def s[4:6]
8270; GFX90A-NEXT:    ;;#ASMEND
8271; GFX90A-NEXT:    s_mov_b32 s8, s9
8272; GFX90A-NEXT:    s_mov_b32 s9, s4
8273; GFX90A-NEXT:    s_mov_b32 s10, s4
8274; GFX90A-NEXT:    s_mov_b32 s11, s4
8275; GFX90A-NEXT:    ;;#ASMSTART
8276; GFX90A-NEXT:    ; use s[8:11]
8277; GFX90A-NEXT:    ;;#ASMEND
8278; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8279;
8280; GFX940-LABEL: s_shuffle_v4i32_v3i32__4_0_0_0:
8281; GFX940:       ; %bb.0:
8282; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8283; GFX940-NEXT:    ;;#ASMSTART
8284; GFX940-NEXT:    ; def s[0:2]
8285; GFX940-NEXT:    ;;#ASMEND
8286; GFX940-NEXT:    ;;#ASMSTART
8287; GFX940-NEXT:    ; def s[4:6]
8288; GFX940-NEXT:    ;;#ASMEND
8289; GFX940-NEXT:    s_mov_b32 s8, s5
8290; GFX940-NEXT:    s_mov_b32 s9, s0
8291; GFX940-NEXT:    s_mov_b32 s10, s0
8292; GFX940-NEXT:    s_mov_b32 s11, s0
8293; GFX940-NEXT:    ;;#ASMSTART
8294; GFX940-NEXT:    ; use s[8:11]
8295; GFX940-NEXT:    ;;#ASMEND
8296; GFX940-NEXT:    s_setpc_b64 s[30:31]
8297  %vec0 = call <3 x i32> asm "; def $0", "=s"()
8298  %vec1 = call <3 x i32> asm "; def $0", "=s"()
8299  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 0, i32 0, i32 0>
8300  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
8301  ret void
8302}
8303
8304define void @s_shuffle_v4i32_v3i32__5_0_0_0() {
8305; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_0_0_0:
8306; GFX900:       ; %bb.0:
8307; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8308; GFX900-NEXT:    ;;#ASMSTART
8309; GFX900-NEXT:    ; def s[8:10]
8310; GFX900-NEXT:    ;;#ASMEND
8311; GFX900-NEXT:    ;;#ASMSTART
8312; GFX900-NEXT:    ; def s[4:6]
8313; GFX900-NEXT:    ;;#ASMEND
8314; GFX900-NEXT:    s_mov_b32 s8, s10
8315; GFX900-NEXT:    s_mov_b32 s9, s4
8316; GFX900-NEXT:    s_mov_b32 s10, s4
8317; GFX900-NEXT:    s_mov_b32 s11, s4
8318; GFX900-NEXT:    ;;#ASMSTART
8319; GFX900-NEXT:    ; use s[8:11]
8320; GFX900-NEXT:    ;;#ASMEND
8321; GFX900-NEXT:    s_setpc_b64 s[30:31]
8322;
8323; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_0_0_0:
8324; GFX90A:       ; %bb.0:
8325; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8326; GFX90A-NEXT:    ;;#ASMSTART
8327; GFX90A-NEXT:    ; def s[8:10]
8328; GFX90A-NEXT:    ;;#ASMEND
8329; GFX90A-NEXT:    ;;#ASMSTART
8330; GFX90A-NEXT:    ; def s[4:6]
8331; GFX90A-NEXT:    ;;#ASMEND
8332; GFX90A-NEXT:    s_mov_b32 s8, s10
8333; GFX90A-NEXT:    s_mov_b32 s9, s4
8334; GFX90A-NEXT:    s_mov_b32 s10, s4
8335; GFX90A-NEXT:    s_mov_b32 s11, s4
8336; GFX90A-NEXT:    ;;#ASMSTART
8337; GFX90A-NEXT:    ; use s[8:11]
8338; GFX90A-NEXT:    ;;#ASMEND
8339; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8340;
8341; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_0_0_0:
8342; GFX940:       ; %bb.0:
8343; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8344; GFX940-NEXT:    ;;#ASMSTART
8345; GFX940-NEXT:    ; def s[0:2]
8346; GFX940-NEXT:    ;;#ASMEND
8347; GFX940-NEXT:    ;;#ASMSTART
8348; GFX940-NEXT:    ; def s[4:6]
8349; GFX940-NEXT:    ;;#ASMEND
8350; GFX940-NEXT:    s_mov_b32 s8, s6
8351; GFX940-NEXT:    s_mov_b32 s9, s0
8352; GFX940-NEXT:    s_mov_b32 s10, s0
8353; GFX940-NEXT:    s_mov_b32 s11, s0
8354; GFX940-NEXT:    ;;#ASMSTART
8355; GFX940-NEXT:    ; use s[8:11]
8356; GFX940-NEXT:    ;;#ASMEND
8357; GFX940-NEXT:    s_setpc_b64 s[30:31]
8358  %vec0 = call <3 x i32> asm "; def $0", "=s"()
8359  %vec1 = call <3 x i32> asm "; def $0", "=s"()
8360  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 0, i32 0>
8361  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
8362  ret void
8363}
8364
8365define void @s_shuffle_v4i32_v3i32__5_u_0_0() {
8366; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_u_0_0:
8367; GFX900:       ; %bb.0:
8368; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8369; GFX900-NEXT:    ;;#ASMSTART
8370; GFX900-NEXT:    ; def s[8:10]
8371; GFX900-NEXT:    ;;#ASMEND
8372; GFX900-NEXT:    ;;#ASMSTART
8373; GFX900-NEXT:    ; def s[4:6]
8374; GFX900-NEXT:    ;;#ASMEND
8375; GFX900-NEXT:    s_mov_b32 s8, s10
8376; GFX900-NEXT:    s_mov_b32 s10, s4
8377; GFX900-NEXT:    s_mov_b32 s11, s4
8378; GFX900-NEXT:    ;;#ASMSTART
8379; GFX900-NEXT:    ; use s[8:11]
8380; GFX900-NEXT:    ;;#ASMEND
8381; GFX900-NEXT:    s_setpc_b64 s[30:31]
8382;
8383; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_u_0_0:
8384; GFX90A:       ; %bb.0:
8385; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8386; GFX90A-NEXT:    ;;#ASMSTART
8387; GFX90A-NEXT:    ; def s[8:10]
8388; GFX90A-NEXT:    ;;#ASMEND
8389; GFX90A-NEXT:    ;;#ASMSTART
8390; GFX90A-NEXT:    ; def s[4:6]
8391; GFX90A-NEXT:    ;;#ASMEND
8392; GFX90A-NEXT:    s_mov_b32 s8, s10
8393; GFX90A-NEXT:    s_mov_b32 s10, s4
8394; GFX90A-NEXT:    s_mov_b32 s11, s4
8395; GFX90A-NEXT:    ;;#ASMSTART
8396; GFX90A-NEXT:    ; use s[8:11]
8397; GFX90A-NEXT:    ;;#ASMEND
8398; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8399;
8400; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_u_0_0:
8401; GFX940:       ; %bb.0:
8402; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8403; GFX940-NEXT:    ;;#ASMSTART
8404; GFX940-NEXT:    ; def s[0:2]
8405; GFX940-NEXT:    ;;#ASMEND
8406; GFX940-NEXT:    ;;#ASMSTART
8407; GFX940-NEXT:    ; def s[4:6]
8408; GFX940-NEXT:    ;;#ASMEND
8409; GFX940-NEXT:    s_mov_b32 s8, s6
8410; GFX940-NEXT:    s_mov_b32 s10, s0
8411; GFX940-NEXT:    s_mov_b32 s11, s0
8412; GFX940-NEXT:    ;;#ASMSTART
8413; GFX940-NEXT:    ; use s[8:11]
8414; GFX940-NEXT:    ;;#ASMEND
8415; GFX940-NEXT:    s_setpc_b64 s[30:31]
8416  %vec0 = call <3 x i32> asm "; def $0", "=s"()
8417  %vec1 = call <3 x i32> asm "; def $0", "=s"()
8418  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 0, i32 0>
8419  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
8420  ret void
8421}
8422
8423define void @s_shuffle_v4i32_v3i32__5_1_0_0() {
8424; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_1_0_0:
8425; GFX900:       ; %bb.0:
8426; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8427; GFX900-NEXT:    ;;#ASMSTART
8428; GFX900-NEXT:    ; def s[8:10]
8429; GFX900-NEXT:    ;;#ASMEND
8430; GFX900-NEXT:    ;;#ASMSTART
8431; GFX900-NEXT:    ; def s[4:6]
8432; GFX900-NEXT:    ;;#ASMEND
8433; GFX900-NEXT:    s_mov_b32 s8, s10
8434; GFX900-NEXT:    s_mov_b32 s9, s5
8435; GFX900-NEXT:    s_mov_b32 s10, s4
8436; GFX900-NEXT:    s_mov_b32 s11, s4
8437; GFX900-NEXT:    ;;#ASMSTART
8438; GFX900-NEXT:    ; use s[8:11]
8439; GFX900-NEXT:    ;;#ASMEND
8440; GFX900-NEXT:    s_setpc_b64 s[30:31]
8441;
8442; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_1_0_0:
8443; GFX90A:       ; %bb.0:
8444; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8445; GFX90A-NEXT:    ;;#ASMSTART
8446; GFX90A-NEXT:    ; def s[8:10]
8447; GFX90A-NEXT:    ;;#ASMEND
8448; GFX90A-NEXT:    ;;#ASMSTART
8449; GFX90A-NEXT:    ; def s[4:6]
8450; GFX90A-NEXT:    ;;#ASMEND
8451; GFX90A-NEXT:    s_mov_b32 s8, s10
8452; GFX90A-NEXT:    s_mov_b32 s9, s5
8453; GFX90A-NEXT:    s_mov_b32 s10, s4
8454; GFX90A-NEXT:    s_mov_b32 s11, s4
8455; GFX90A-NEXT:    ;;#ASMSTART
8456; GFX90A-NEXT:    ; use s[8:11]
8457; GFX90A-NEXT:    ;;#ASMEND
8458; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8459;
8460; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_1_0_0:
8461; GFX940:       ; %bb.0:
8462; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8463; GFX940-NEXT:    ;;#ASMSTART
8464; GFX940-NEXT:    ; def s[0:2]
8465; GFX940-NEXT:    ;;#ASMEND
8466; GFX940-NEXT:    ;;#ASMSTART
8467; GFX940-NEXT:    ; def s[4:6]
8468; GFX940-NEXT:    ;;#ASMEND
8469; GFX940-NEXT:    s_mov_b32 s8, s6
8470; GFX940-NEXT:    s_mov_b32 s9, s1
8471; GFX940-NEXT:    s_mov_b32 s10, s0
8472; GFX940-NEXT:    s_mov_b32 s11, s0
8473; GFX940-NEXT:    ;;#ASMSTART
8474; GFX940-NEXT:    ; use s[8:11]
8475; GFX940-NEXT:    ;;#ASMEND
8476; GFX940-NEXT:    s_setpc_b64 s[30:31]
8477  %vec0 = call <3 x i32> asm "; def $0", "=s"()
8478  %vec1 = call <3 x i32> asm "; def $0", "=s"()
8479  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 0, i32 0>
8480  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
8481  ret void
8482}
8483
8484define void @s_shuffle_v4i32_v3i32__5_2_0_0() {
8485; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_2_0_0:
8486; GFX900:       ; %bb.0:
8487; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8488; GFX900-NEXT:    ;;#ASMSTART
8489; GFX900-NEXT:    ; def s[8:10]
8490; GFX900-NEXT:    ;;#ASMEND
8491; GFX900-NEXT:    ;;#ASMSTART
8492; GFX900-NEXT:    ; def s[4:6]
8493; GFX900-NEXT:    ;;#ASMEND
8494; GFX900-NEXT:    s_mov_b32 s8, s10
8495; GFX900-NEXT:    s_mov_b32 s9, s6
8496; GFX900-NEXT:    s_mov_b32 s10, s4
8497; GFX900-NEXT:    s_mov_b32 s11, s4
8498; GFX900-NEXT:    ;;#ASMSTART
8499; GFX900-NEXT:    ; use s[8:11]
8500; GFX900-NEXT:    ;;#ASMEND
8501; GFX900-NEXT:    s_setpc_b64 s[30:31]
8502;
8503; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_2_0_0:
8504; GFX90A:       ; %bb.0:
8505; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8506; GFX90A-NEXT:    ;;#ASMSTART
8507; GFX90A-NEXT:    ; def s[8:10]
8508; GFX90A-NEXT:    ;;#ASMEND
8509; GFX90A-NEXT:    ;;#ASMSTART
8510; GFX90A-NEXT:    ; def s[4:6]
8511; GFX90A-NEXT:    ;;#ASMEND
8512; GFX90A-NEXT:    s_mov_b32 s8, s10
8513; GFX90A-NEXT:    s_mov_b32 s9, s6
8514; GFX90A-NEXT:    s_mov_b32 s10, s4
8515; GFX90A-NEXT:    s_mov_b32 s11, s4
8516; GFX90A-NEXT:    ;;#ASMSTART
8517; GFX90A-NEXT:    ; use s[8:11]
8518; GFX90A-NEXT:    ;;#ASMEND
8519; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8520;
8521; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_2_0_0:
8522; GFX940:       ; %bb.0:
8523; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8524; GFX940-NEXT:    ;;#ASMSTART
8525; GFX940-NEXT:    ; def s[0:2]
8526; GFX940-NEXT:    ;;#ASMEND
8527; GFX940-NEXT:    ;;#ASMSTART
8528; GFX940-NEXT:    ; def s[4:6]
8529; GFX940-NEXT:    ;;#ASMEND
8530; GFX940-NEXT:    s_mov_b32 s8, s6
8531; GFX940-NEXT:    s_mov_b32 s9, s2
8532; GFX940-NEXT:    s_mov_b32 s10, s0
8533; GFX940-NEXT:    s_mov_b32 s11, s0
8534; GFX940-NEXT:    ;;#ASMSTART
8535; GFX940-NEXT:    ; use s[8:11]
8536; GFX940-NEXT:    ;;#ASMEND
8537; GFX940-NEXT:    s_setpc_b64 s[30:31]
8538  %vec0 = call <3 x i32> asm "; def $0", "=s"()
8539  %vec1 = call <3 x i32> asm "; def $0", "=s"()
8540  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 0, i32 0>
8541  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
8542  ret void
8543}
8544
8545define void @s_shuffle_v4i32_v3i32__5_3_0_0() {
8546; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_3_0_0:
8547; GFX900:       ; %bb.0:
8548; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8549; GFX900-NEXT:    ;;#ASMSTART
8550; GFX900-NEXT:    ; def s[4:6]
8551; GFX900-NEXT:    ;;#ASMEND
8552; GFX900-NEXT:    ;;#ASMSTART
8553; GFX900-NEXT:    ; def s[12:14]
8554; GFX900-NEXT:    ;;#ASMEND
8555; GFX900-NEXT:    s_mov_b32 s8, s14
8556; GFX900-NEXT:    s_mov_b32 s9, s12
8557; GFX900-NEXT:    s_mov_b32 s10, s4
8558; GFX900-NEXT:    s_mov_b32 s11, s4
8559; GFX900-NEXT:    ;;#ASMSTART
8560; GFX900-NEXT:    ; use s[8:11]
8561; GFX900-NEXT:    ;;#ASMEND
8562; GFX900-NEXT:    s_setpc_b64 s[30:31]
8563;
8564; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_3_0_0:
8565; GFX90A:       ; %bb.0:
8566; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8567; GFX90A-NEXT:    ;;#ASMSTART
8568; GFX90A-NEXT:    ; def s[4:6]
8569; GFX90A-NEXT:    ;;#ASMEND
8570; GFX90A-NEXT:    ;;#ASMSTART
8571; GFX90A-NEXT:    ; def s[12:14]
8572; GFX90A-NEXT:    ;;#ASMEND
8573; GFX90A-NEXT:    s_mov_b32 s8, s14
8574; GFX90A-NEXT:    s_mov_b32 s9, s12
8575; GFX90A-NEXT:    s_mov_b32 s10, s4
8576; GFX90A-NEXT:    s_mov_b32 s11, s4
8577; GFX90A-NEXT:    ;;#ASMSTART
8578; GFX90A-NEXT:    ; use s[8:11]
8579; GFX90A-NEXT:    ;;#ASMEND
8580; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8581;
8582; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_3_0_0:
8583; GFX940:       ; %bb.0:
8584; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8585; GFX940-NEXT:    ;;#ASMSTART
8586; GFX940-NEXT:    ; def s[0:2]
8587; GFX940-NEXT:    ;;#ASMEND
8588; GFX940-NEXT:    ;;#ASMSTART
8589; GFX940-NEXT:    ; def s[4:6]
8590; GFX940-NEXT:    ;;#ASMEND
8591; GFX940-NEXT:    s_mov_b32 s8, s6
8592; GFX940-NEXT:    s_mov_b32 s9, s4
8593; GFX940-NEXT:    s_mov_b32 s10, s0
8594; GFX940-NEXT:    s_mov_b32 s11, s0
8595; GFX940-NEXT:    ;;#ASMSTART
8596; GFX940-NEXT:    ; use s[8:11]
8597; GFX940-NEXT:    ;;#ASMEND
8598; GFX940-NEXT:    s_setpc_b64 s[30:31]
8599  %vec0 = call <3 x i32> asm "; def $0", "=s"()
8600  %vec1 = call <3 x i32> asm "; def $0", "=s"()
8601  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 0, i32 0>
8602  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
8603  ret void
8604}
8605
8606define void @s_shuffle_v4i32_v3i32__5_4_0_0() {
8607; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_4_0_0:
8608; GFX900:       ; %bb.0:
8609; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8610; GFX900-NEXT:    ;;#ASMSTART
8611; GFX900-NEXT:    ; def s[8:10]
8612; GFX900-NEXT:    ;;#ASMEND
8613; GFX900-NEXT:    ;;#ASMSTART
8614; GFX900-NEXT:    ; def s[4:6]
8615; GFX900-NEXT:    ;;#ASMEND
8616; GFX900-NEXT:    s_mov_b32 s8, s10
8617; GFX900-NEXT:    s_mov_b32 s10, s4
8618; GFX900-NEXT:    s_mov_b32 s11, s4
8619; GFX900-NEXT:    ;;#ASMSTART
8620; GFX900-NEXT:    ; use s[8:11]
8621; GFX900-NEXT:    ;;#ASMEND
8622; GFX900-NEXT:    s_setpc_b64 s[30:31]
8623;
8624; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_4_0_0:
8625; GFX90A:       ; %bb.0:
8626; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8627; GFX90A-NEXT:    ;;#ASMSTART
8628; GFX90A-NEXT:    ; def s[8:10]
8629; GFX90A-NEXT:    ;;#ASMEND
8630; GFX90A-NEXT:    ;;#ASMSTART
8631; GFX90A-NEXT:    ; def s[4:6]
8632; GFX90A-NEXT:    ;;#ASMEND
8633; GFX90A-NEXT:    s_mov_b32 s8, s10
8634; GFX90A-NEXT:    s_mov_b32 s10, s4
8635; GFX90A-NEXT:    s_mov_b32 s11, s4
8636; GFX90A-NEXT:    ;;#ASMSTART
8637; GFX90A-NEXT:    ; use s[8:11]
8638; GFX90A-NEXT:    ;;#ASMEND
8639; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8640;
8641; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_4_0_0:
8642; GFX940:       ; %bb.0:
8643; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8644; GFX940-NEXT:    ;;#ASMSTART
8645; GFX940-NEXT:    ; def s[8:10]
8646; GFX940-NEXT:    ;;#ASMEND
8647; GFX940-NEXT:    ;;#ASMSTART
8648; GFX940-NEXT:    ; def s[0:2]
8649; GFX940-NEXT:    ;;#ASMEND
8650; GFX940-NEXT:    s_mov_b32 s8, s10
8651; GFX940-NEXT:    s_mov_b32 s10, s0
8652; GFX940-NEXT:    s_mov_b32 s11, s0
8653; GFX940-NEXT:    ;;#ASMSTART
8654; GFX940-NEXT:    ; use s[8:11]
8655; GFX940-NEXT:    ;;#ASMEND
8656; GFX940-NEXT:    s_setpc_b64 s[30:31]
8657  %vec0 = call <3 x i32> asm "; def $0", "=s"()
8658  %vec1 = call <3 x i32> asm "; def $0", "=s"()
8659  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 0, i32 0>
8660  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
8661  ret void
8662}
8663
8664define void @s_shuffle_v4i32_v3i32__5_5_0_0() {
8665; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_0_0:
8666; GFX900:       ; %bb.0:
8667; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8668; GFX900-NEXT:    ;;#ASMSTART
8669; GFX900-NEXT:    ; def s[8:10]
8670; GFX900-NEXT:    ;;#ASMEND
8671; GFX900-NEXT:    ;;#ASMSTART
8672; GFX900-NEXT:    ; def s[4:6]
8673; GFX900-NEXT:    ;;#ASMEND
8674; GFX900-NEXT:    s_mov_b32 s8, s10
8675; GFX900-NEXT:    s_mov_b32 s9, s10
8676; GFX900-NEXT:    s_mov_b32 s10, s4
8677; GFX900-NEXT:    s_mov_b32 s11, s4
8678; GFX900-NEXT:    ;;#ASMSTART
8679; GFX900-NEXT:    ; use s[8:11]
8680; GFX900-NEXT:    ;;#ASMEND
8681; GFX900-NEXT:    s_setpc_b64 s[30:31]
8682;
8683; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_0_0:
8684; GFX90A:       ; %bb.0:
8685; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8686; GFX90A-NEXT:    ;;#ASMSTART
8687; GFX90A-NEXT:    ; def s[8:10]
8688; GFX90A-NEXT:    ;;#ASMEND
8689; GFX90A-NEXT:    ;;#ASMSTART
8690; GFX90A-NEXT:    ; def s[4:6]
8691; GFX90A-NEXT:    ;;#ASMEND
8692; GFX90A-NEXT:    s_mov_b32 s8, s10
8693; GFX90A-NEXT:    s_mov_b32 s9, s10
8694; GFX90A-NEXT:    s_mov_b32 s10, s4
8695; GFX90A-NEXT:    s_mov_b32 s11, s4
8696; GFX90A-NEXT:    ;;#ASMSTART
8697; GFX90A-NEXT:    ; use s[8:11]
8698; GFX90A-NEXT:    ;;#ASMEND
8699; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8700;
8701; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_0_0:
8702; GFX940:       ; %bb.0:
8703; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8704; GFX940-NEXT:    ;;#ASMSTART
8705; GFX940-NEXT:    ; def s[0:2]
8706; GFX940-NEXT:    ;;#ASMEND
8707; GFX940-NEXT:    ;;#ASMSTART
8708; GFX940-NEXT:    ; def s[4:6]
8709; GFX940-NEXT:    ;;#ASMEND
8710; GFX940-NEXT:    s_mov_b32 s8, s6
8711; GFX940-NEXT:    s_mov_b32 s9, s6
8712; GFX940-NEXT:    s_mov_b32 s10, s0
8713; GFX940-NEXT:    s_mov_b32 s11, s0
8714; GFX940-NEXT:    ;;#ASMSTART
8715; GFX940-NEXT:    ; use s[8:11]
8716; GFX940-NEXT:    ;;#ASMEND
8717; GFX940-NEXT:    s_setpc_b64 s[30:31]
8718  %vec0 = call <3 x i32> asm "; def $0", "=s"()
8719  %vec1 = call <3 x i32> asm "; def $0", "=s"()
8720  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 0>
8721  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
8722  ret void
8723}
8724
8725define void @s_shuffle_v4i32_v3i32__5_5_u_0() {
8726; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_u_0:
8727; GFX900:       ; %bb.0:
8728; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8729; GFX900-NEXT:    ;;#ASMSTART
8730; GFX900-NEXT:    ; def s[8:10]
8731; GFX900-NEXT:    ;;#ASMEND
8732; GFX900-NEXT:    ;;#ASMSTART
8733; GFX900-NEXT:    ; def s[4:6]
8734; GFX900-NEXT:    ;;#ASMEND
8735; GFX900-NEXT:    s_mov_b32 s8, s10
8736; GFX900-NEXT:    s_mov_b32 s9, s10
8737; GFX900-NEXT:    s_mov_b32 s11, s4
8738; GFX900-NEXT:    ;;#ASMSTART
8739; GFX900-NEXT:    ; use s[8:11]
8740; GFX900-NEXT:    ;;#ASMEND
8741; GFX900-NEXT:    s_setpc_b64 s[30:31]
8742;
8743; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_u_0:
8744; GFX90A:       ; %bb.0:
8745; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8746; GFX90A-NEXT:    ;;#ASMSTART
8747; GFX90A-NEXT:    ; def s[8:10]
8748; GFX90A-NEXT:    ;;#ASMEND
8749; GFX90A-NEXT:    ;;#ASMSTART
8750; GFX90A-NEXT:    ; def s[4:6]
8751; GFX90A-NEXT:    ;;#ASMEND
8752; GFX90A-NEXT:    s_mov_b32 s8, s10
8753; GFX90A-NEXT:    s_mov_b32 s9, s10
8754; GFX90A-NEXT:    s_mov_b32 s11, s4
8755; GFX90A-NEXT:    ;;#ASMSTART
8756; GFX90A-NEXT:    ; use s[8:11]
8757; GFX90A-NEXT:    ;;#ASMEND
8758; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8759;
8760; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_u_0:
8761; GFX940:       ; %bb.0:
8762; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8763; GFX940-NEXT:    ;;#ASMSTART
8764; GFX940-NEXT:    ; def s[0:2]
8765; GFX940-NEXT:    ;;#ASMEND
8766; GFX940-NEXT:    ;;#ASMSTART
8767; GFX940-NEXT:    ; def s[4:6]
8768; GFX940-NEXT:    ;;#ASMEND
8769; GFX940-NEXT:    s_mov_b32 s8, s6
8770; GFX940-NEXT:    s_mov_b32 s9, s6
8771; GFX940-NEXT:    s_mov_b32 s11, s0
8772; GFX940-NEXT:    ;;#ASMSTART
8773; GFX940-NEXT:    ; use s[8:11]
8774; GFX940-NEXT:    ;;#ASMEND
8775; GFX940-NEXT:    s_setpc_b64 s[30:31]
8776  %vec0 = call <3 x i32> asm "; def $0", "=s"()
8777  %vec1 = call <3 x i32> asm "; def $0", "=s"()
8778  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 0>
8779  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
8780  ret void
8781}
8782
8783define void @s_shuffle_v4i32_v3i32__5_5_1_0() {
8784; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_1_0:
8785; GFX900:       ; %bb.0:
8786; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8787; GFX900-NEXT:    ;;#ASMSTART
8788; GFX900-NEXT:    ; def s[8:10]
8789; GFX900-NEXT:    ;;#ASMEND
8790; GFX900-NEXT:    ;;#ASMSTART
8791; GFX900-NEXT:    ; def s[4:6]
8792; GFX900-NEXT:    ;;#ASMEND
8793; GFX900-NEXT:    s_mov_b32 s8, s10
8794; GFX900-NEXT:    s_mov_b32 s9, s10
8795; GFX900-NEXT:    s_mov_b32 s10, s5
8796; GFX900-NEXT:    s_mov_b32 s11, s4
8797; GFX900-NEXT:    ;;#ASMSTART
8798; GFX900-NEXT:    ; use s[8:11]
8799; GFX900-NEXT:    ;;#ASMEND
8800; GFX900-NEXT:    s_setpc_b64 s[30:31]
8801;
8802; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_1_0:
8803; GFX90A:       ; %bb.0:
8804; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8805; GFX90A-NEXT:    ;;#ASMSTART
8806; GFX90A-NEXT:    ; def s[8:10]
8807; GFX90A-NEXT:    ;;#ASMEND
8808; GFX90A-NEXT:    ;;#ASMSTART
8809; GFX90A-NEXT:    ; def s[4:6]
8810; GFX90A-NEXT:    ;;#ASMEND
8811; GFX90A-NEXT:    s_mov_b32 s8, s10
8812; GFX90A-NEXT:    s_mov_b32 s9, s10
8813; GFX90A-NEXT:    s_mov_b32 s10, s5
8814; GFX90A-NEXT:    s_mov_b32 s11, s4
8815; GFX90A-NEXT:    ;;#ASMSTART
8816; GFX90A-NEXT:    ; use s[8:11]
8817; GFX90A-NEXT:    ;;#ASMEND
8818; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8819;
8820; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_1_0:
8821; GFX940:       ; %bb.0:
8822; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8823; GFX940-NEXT:    ;;#ASMSTART
8824; GFX940-NEXT:    ; def s[0:2]
8825; GFX940-NEXT:    ;;#ASMEND
8826; GFX940-NEXT:    ;;#ASMSTART
8827; GFX940-NEXT:    ; def s[4:6]
8828; GFX940-NEXT:    ;;#ASMEND
8829; GFX940-NEXT:    s_mov_b32 s8, s6
8830; GFX940-NEXT:    s_mov_b32 s9, s6
8831; GFX940-NEXT:    s_mov_b32 s10, s1
8832; GFX940-NEXT:    s_mov_b32 s11, s0
8833; GFX940-NEXT:    ;;#ASMSTART
8834; GFX940-NEXT:    ; use s[8:11]
8835; GFX940-NEXT:    ;;#ASMEND
8836; GFX940-NEXT:    s_setpc_b64 s[30:31]
8837  %vec0 = call <3 x i32> asm "; def $0", "=s"()
8838  %vec1 = call <3 x i32> asm "; def $0", "=s"()
8839  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 0>
8840  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
8841  ret void
8842}
8843
8844define void @s_shuffle_v4i32_v3i32__5_5_2_0() {
8845; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_2_0:
8846; GFX900:       ; %bb.0:
8847; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8848; GFX900-NEXT:    ;;#ASMSTART
8849; GFX900-NEXT:    ; def s[8:10]
8850; GFX900-NEXT:    ;;#ASMEND
8851; GFX900-NEXT:    ;;#ASMSTART
8852; GFX900-NEXT:    ; def s[4:6]
8853; GFX900-NEXT:    ;;#ASMEND
8854; GFX900-NEXT:    s_mov_b32 s8, s10
8855; GFX900-NEXT:    s_mov_b32 s9, s10
8856; GFX900-NEXT:    s_mov_b32 s10, s6
8857; GFX900-NEXT:    s_mov_b32 s11, s4
8858; GFX900-NEXT:    ;;#ASMSTART
8859; GFX900-NEXT:    ; use s[8:11]
8860; GFX900-NEXT:    ;;#ASMEND
8861; GFX900-NEXT:    s_setpc_b64 s[30:31]
8862;
8863; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_2_0:
8864; GFX90A:       ; %bb.0:
8865; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8866; GFX90A-NEXT:    ;;#ASMSTART
8867; GFX90A-NEXT:    ; def s[8:10]
8868; GFX90A-NEXT:    ;;#ASMEND
8869; GFX90A-NEXT:    ;;#ASMSTART
8870; GFX90A-NEXT:    ; def s[4:6]
8871; GFX90A-NEXT:    ;;#ASMEND
8872; GFX90A-NEXT:    s_mov_b32 s8, s10
8873; GFX90A-NEXT:    s_mov_b32 s9, s10
8874; GFX90A-NEXT:    s_mov_b32 s10, s6
8875; GFX90A-NEXT:    s_mov_b32 s11, s4
8876; GFX90A-NEXT:    ;;#ASMSTART
8877; GFX90A-NEXT:    ; use s[8:11]
8878; GFX90A-NEXT:    ;;#ASMEND
8879; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8880;
8881; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_2_0:
8882; GFX940:       ; %bb.0:
8883; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8884; GFX940-NEXT:    ;;#ASMSTART
8885; GFX940-NEXT:    ; def s[0:2]
8886; GFX940-NEXT:    ;;#ASMEND
8887; GFX940-NEXT:    ;;#ASMSTART
8888; GFX940-NEXT:    ; def s[4:6]
8889; GFX940-NEXT:    ;;#ASMEND
8890; GFX940-NEXT:    s_mov_b32 s8, s6
8891; GFX940-NEXT:    s_mov_b32 s9, s6
8892; GFX940-NEXT:    s_mov_b32 s10, s2
8893; GFX940-NEXT:    s_mov_b32 s11, s0
8894; GFX940-NEXT:    ;;#ASMSTART
8895; GFX940-NEXT:    ; use s[8:11]
8896; GFX940-NEXT:    ;;#ASMEND
8897; GFX940-NEXT:    s_setpc_b64 s[30:31]
8898  %vec0 = call <3 x i32> asm "; def $0", "=s"()
8899  %vec1 = call <3 x i32> asm "; def $0", "=s"()
8900  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 0>
8901  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
8902  ret void
8903}
8904
8905define void @s_shuffle_v4i32_v3i32__5_5_3_0() {
8906; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_3_0:
8907; GFX900:       ; %bb.0:
8908; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8909; GFX900-NEXT:    ;;#ASMSTART
8910; GFX900-NEXT:    ; def s[4:6]
8911; GFX900-NEXT:    ;;#ASMEND
8912; GFX900-NEXT:    ;;#ASMSTART
8913; GFX900-NEXT:    ; def s[12:14]
8914; GFX900-NEXT:    ;;#ASMEND
8915; GFX900-NEXT:    s_mov_b32 s8, s14
8916; GFX900-NEXT:    s_mov_b32 s9, s14
8917; GFX900-NEXT:    s_mov_b32 s10, s12
8918; GFX900-NEXT:    s_mov_b32 s11, s4
8919; GFX900-NEXT:    ;;#ASMSTART
8920; GFX900-NEXT:    ; use s[8:11]
8921; GFX900-NEXT:    ;;#ASMEND
8922; GFX900-NEXT:    s_setpc_b64 s[30:31]
8923;
8924; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_3_0:
8925; GFX90A:       ; %bb.0:
8926; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8927; GFX90A-NEXT:    ;;#ASMSTART
8928; GFX90A-NEXT:    ; def s[4:6]
8929; GFX90A-NEXT:    ;;#ASMEND
8930; GFX90A-NEXT:    ;;#ASMSTART
8931; GFX90A-NEXT:    ; def s[12:14]
8932; GFX90A-NEXT:    ;;#ASMEND
8933; GFX90A-NEXT:    s_mov_b32 s8, s14
8934; GFX90A-NEXT:    s_mov_b32 s9, s14
8935; GFX90A-NEXT:    s_mov_b32 s10, s12
8936; GFX90A-NEXT:    s_mov_b32 s11, s4
8937; GFX90A-NEXT:    ;;#ASMSTART
8938; GFX90A-NEXT:    ; use s[8:11]
8939; GFX90A-NEXT:    ;;#ASMEND
8940; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8941;
8942; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_3_0:
8943; GFX940:       ; %bb.0:
8944; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8945; GFX940-NEXT:    ;;#ASMSTART
8946; GFX940-NEXT:    ; def s[0:2]
8947; GFX940-NEXT:    ;;#ASMEND
8948; GFX940-NEXT:    ;;#ASMSTART
8949; GFX940-NEXT:    ; def s[4:6]
8950; GFX940-NEXT:    ;;#ASMEND
8951; GFX940-NEXT:    s_mov_b32 s8, s6
8952; GFX940-NEXT:    s_mov_b32 s9, s6
8953; GFX940-NEXT:    s_mov_b32 s10, s4
8954; GFX940-NEXT:    s_mov_b32 s11, s0
8955; GFX940-NEXT:    ;;#ASMSTART
8956; GFX940-NEXT:    ; use s[8:11]
8957; GFX940-NEXT:    ;;#ASMEND
8958; GFX940-NEXT:    s_setpc_b64 s[30:31]
8959  %vec0 = call <3 x i32> asm "; def $0", "=s"()
8960  %vec1 = call <3 x i32> asm "; def $0", "=s"()
8961  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 0>
8962  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
8963  ret void
8964}
8965
8966define void @s_shuffle_v4i32_v3i32__5_5_4_0() {
8967; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_4_0:
8968; GFX900:       ; %bb.0:
8969; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8970; GFX900-NEXT:    ;;#ASMSTART
8971; GFX900-NEXT:    ; def s[4:6]
8972; GFX900-NEXT:    ;;#ASMEND
8973; GFX900-NEXT:    ;;#ASMSTART
8974; GFX900-NEXT:    ; def s[12:14]
8975; GFX900-NEXT:    ;;#ASMEND
8976; GFX900-NEXT:    s_mov_b32 s8, s14
8977; GFX900-NEXT:    s_mov_b32 s9, s14
8978; GFX900-NEXT:    s_mov_b32 s10, s13
8979; GFX900-NEXT:    s_mov_b32 s11, s4
8980; GFX900-NEXT:    ;;#ASMSTART
8981; GFX900-NEXT:    ; use s[8:11]
8982; GFX900-NEXT:    ;;#ASMEND
8983; GFX900-NEXT:    s_setpc_b64 s[30:31]
8984;
8985; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_4_0:
8986; GFX90A:       ; %bb.0:
8987; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8988; GFX90A-NEXT:    ;;#ASMSTART
8989; GFX90A-NEXT:    ; def s[4:6]
8990; GFX90A-NEXT:    ;;#ASMEND
8991; GFX90A-NEXT:    ;;#ASMSTART
8992; GFX90A-NEXT:    ; def s[12:14]
8993; GFX90A-NEXT:    ;;#ASMEND
8994; GFX90A-NEXT:    s_mov_b32 s8, s14
8995; GFX90A-NEXT:    s_mov_b32 s9, s14
8996; GFX90A-NEXT:    s_mov_b32 s10, s13
8997; GFX90A-NEXT:    s_mov_b32 s11, s4
8998; GFX90A-NEXT:    ;;#ASMSTART
8999; GFX90A-NEXT:    ; use s[8:11]
9000; GFX90A-NEXT:    ;;#ASMEND
9001; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9002;
9003; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_4_0:
9004; GFX940:       ; %bb.0:
9005; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9006; GFX940-NEXT:    ;;#ASMSTART
9007; GFX940-NEXT:    ; def s[0:2]
9008; GFX940-NEXT:    ;;#ASMEND
9009; GFX940-NEXT:    ;;#ASMSTART
9010; GFX940-NEXT:    ; def s[4:6]
9011; GFX940-NEXT:    ;;#ASMEND
9012; GFX940-NEXT:    s_mov_b32 s8, s6
9013; GFX940-NEXT:    s_mov_b32 s9, s6
9014; GFX940-NEXT:    s_mov_b32 s10, s5
9015; GFX940-NEXT:    s_mov_b32 s11, s0
9016; GFX940-NEXT:    ;;#ASMSTART
9017; GFX940-NEXT:    ; use s[8:11]
9018; GFX940-NEXT:    ;;#ASMEND
9019; GFX940-NEXT:    s_setpc_b64 s[30:31]
9020  %vec0 = call <3 x i32> asm "; def $0", "=s"()
9021  %vec1 = call <3 x i32> asm "; def $0", "=s"()
9022  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 0>
9023  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
9024  ret void
9025}
9026
9027define void @s_shuffle_v4i32_v3i32__u_1_1_1() {
9028; GFX9-LABEL: s_shuffle_v4i32_v3i32__u_1_1_1:
9029; GFX9:       ; %bb.0:
9030; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9031; GFX9-NEXT:    ;;#ASMSTART
9032; GFX9-NEXT:    ; def s[8:10]
9033; GFX9-NEXT:    ;;#ASMEND
9034; GFX9-NEXT:    s_mov_b32 s10, s9
9035; GFX9-NEXT:    s_mov_b32 s11, s9
9036; GFX9-NEXT:    ;;#ASMSTART
9037; GFX9-NEXT:    ; use s[8:11]
9038; GFX9-NEXT:    ;;#ASMEND
9039; GFX9-NEXT:    s_setpc_b64 s[30:31]
9040  %vec0 = call <3 x i32> asm "; def $0", "=s"()
9041  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
9042  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
9043  ret void
9044}
9045
9046define void @s_shuffle_v4i32_v3i32__0_1_1_1() {
9047; GFX9-LABEL: s_shuffle_v4i32_v3i32__0_1_1_1:
9048; GFX9:       ; %bb.0:
9049; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9050; GFX9-NEXT:    ;;#ASMSTART
9051; GFX9-NEXT:    ; def s[8:10]
9052; GFX9-NEXT:    ;;#ASMEND
9053; GFX9-NEXT:    s_mov_b32 s10, s9
9054; GFX9-NEXT:    s_mov_b32 s11, s9
9055; GFX9-NEXT:    ;;#ASMSTART
9056; GFX9-NEXT:    ; use s[8:11]
9057; GFX9-NEXT:    ;;#ASMEND
9058; GFX9-NEXT:    s_setpc_b64 s[30:31]
9059  %vec0 = call <3 x i32> asm "; def $0", "=s"()
9060  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
9061  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
9062  ret void
9063}
9064
9065define void @s_shuffle_v4i32_v3i32__1_1_1_1() {
9066; GFX9-LABEL: s_shuffle_v4i32_v3i32__1_1_1_1:
9067; GFX9:       ; %bb.0:
9068; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9069; GFX9-NEXT:    ;;#ASMSTART
9070; GFX9-NEXT:    ; def s[8:10]
9071; GFX9-NEXT:    ;;#ASMEND
9072; GFX9-NEXT:    s_mov_b32 s8, s9
9073; GFX9-NEXT:    s_mov_b32 s10, s9
9074; GFX9-NEXT:    s_mov_b32 s11, s9
9075; GFX9-NEXT:    ;;#ASMSTART
9076; GFX9-NEXT:    ; use s[8:11]
9077; GFX9-NEXT:    ;;#ASMEND
9078; GFX9-NEXT:    s_setpc_b64 s[30:31]
9079  %vec0 = call <3 x i32> asm "; def $0", "=s"()
9080  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
9081  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
9082  ret void
9083}
9084
9085define void @s_shuffle_v4i32_v3i32__2_1_1_1() {
9086; GFX9-LABEL: s_shuffle_v4i32_v3i32__2_1_1_1:
9087; GFX9:       ; %bb.0:
9088; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9089; GFX9-NEXT:    ;;#ASMSTART
9090; GFX9-NEXT:    ; def s[8:10]
9091; GFX9-NEXT:    ;;#ASMEND
9092; GFX9-NEXT:    s_mov_b32 s8, s10
9093; GFX9-NEXT:    s_mov_b32 s10, s9
9094; GFX9-NEXT:    s_mov_b32 s11, s9
9095; GFX9-NEXT:    ;;#ASMSTART
9096; GFX9-NEXT:    ; use s[8:11]
9097; GFX9-NEXT:    ;;#ASMEND
9098; GFX9-NEXT:    s_setpc_b64 s[30:31]
9099  %vec0 = call <3 x i32> asm "; def $0", "=s"()
9100  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
9101  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
9102  ret void
9103}
9104
9105define void @s_shuffle_v4i32_v3i32__3_1_1_1() {
9106; GFX9-LABEL: s_shuffle_v4i32_v3i32__3_1_1_1:
9107; GFX9:       ; %bb.0:
9108; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9109; GFX9-NEXT:    ;;#ASMSTART
9110; GFX9-NEXT:    ; def s[8:10]
9111; GFX9-NEXT:    ;;#ASMEND
9112; GFX9-NEXT:    s_mov_b32 s10, s9
9113; GFX9-NEXT:    s_mov_b32 s11, s9
9114; GFX9-NEXT:    ;;#ASMSTART
9115; GFX9-NEXT:    ; use s[8:11]
9116; GFX9-NEXT:    ;;#ASMEND
9117; GFX9-NEXT:    s_setpc_b64 s[30:31]
9118  %vec0 = call <3 x i32> asm "; def $0", "=s"()
9119  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
9120  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
9121  ret void
9122}
9123
9124define void @s_shuffle_v4i32_v3i32__4_1_1_1() {
9125; GFX900-LABEL: s_shuffle_v4i32_v3i32__4_1_1_1:
9126; GFX900:       ; %bb.0:
9127; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9128; GFX900-NEXT:    ;;#ASMSTART
9129; GFX900-NEXT:    ; def s[8:10]
9130; GFX900-NEXT:    ;;#ASMEND
9131; GFX900-NEXT:    ;;#ASMSTART
9132; GFX900-NEXT:    ; def s[4:6]
9133; GFX900-NEXT:    ;;#ASMEND
9134; GFX900-NEXT:    s_mov_b32 s8, s5
9135; GFX900-NEXT:    s_mov_b32 s10, s9
9136; GFX900-NEXT:    s_mov_b32 s11, s9
9137; GFX900-NEXT:    ;;#ASMSTART
9138; GFX900-NEXT:    ; use s[8:11]
9139; GFX900-NEXT:    ;;#ASMEND
9140; GFX900-NEXT:    s_setpc_b64 s[30:31]
9141;
9142; GFX90A-LABEL: s_shuffle_v4i32_v3i32__4_1_1_1:
9143; GFX90A:       ; %bb.0:
9144; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9145; GFX90A-NEXT:    ;;#ASMSTART
9146; GFX90A-NEXT:    ; def s[8:10]
9147; GFX90A-NEXT:    ;;#ASMEND
9148; GFX90A-NEXT:    ;;#ASMSTART
9149; GFX90A-NEXT:    ; def s[4:6]
9150; GFX90A-NEXT:    ;;#ASMEND
9151; GFX90A-NEXT:    s_mov_b32 s8, s5
9152; GFX90A-NEXT:    s_mov_b32 s10, s9
9153; GFX90A-NEXT:    s_mov_b32 s11, s9
9154; GFX90A-NEXT:    ;;#ASMSTART
9155; GFX90A-NEXT:    ; use s[8:11]
9156; GFX90A-NEXT:    ;;#ASMEND
9157; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9158;
9159; GFX940-LABEL: s_shuffle_v4i32_v3i32__4_1_1_1:
9160; GFX940:       ; %bb.0:
9161; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9162; GFX940-NEXT:    ;;#ASMSTART
9163; GFX940-NEXT:    ; def s[8:10]
9164; GFX940-NEXT:    ;;#ASMEND
9165; GFX940-NEXT:    ;;#ASMSTART
9166; GFX940-NEXT:    ; def s[0:2]
9167; GFX940-NEXT:    ;;#ASMEND
9168; GFX940-NEXT:    s_mov_b32 s8, s1
9169; GFX940-NEXT:    s_mov_b32 s10, s9
9170; GFX940-NEXT:    s_mov_b32 s11, s9
9171; GFX940-NEXT:    ;;#ASMSTART
9172; GFX940-NEXT:    ; use s[8:11]
9173; GFX940-NEXT:    ;;#ASMEND
9174; GFX940-NEXT:    s_setpc_b64 s[30:31]
9175  %vec0 = call <3 x i32> asm "; def $0", "=s"()
9176  %vec1 = call <3 x i32> asm "; def $0", "=s"()
9177  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 1, i32 1, i32 1>
9178  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
9179  ret void
9180}
9181
9182define void @s_shuffle_v4i32_v3i32__5_1_1_1() {
9183; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_1_1_1:
9184; GFX900:       ; %bb.0:
9185; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9186; GFX900-NEXT:    ;;#ASMSTART
9187; GFX900-NEXT:    ; def s[8:10]
9188; GFX900-NEXT:    ;;#ASMEND
9189; GFX900-NEXT:    ;;#ASMSTART
9190; GFX900-NEXT:    ; def s[4:6]
9191; GFX900-NEXT:    ;;#ASMEND
9192; GFX900-NEXT:    s_mov_b32 s8, s6
9193; GFX900-NEXT:    s_mov_b32 s10, s9
9194; GFX900-NEXT:    s_mov_b32 s11, s9
9195; GFX900-NEXT:    ;;#ASMSTART
9196; GFX900-NEXT:    ; use s[8:11]
9197; GFX900-NEXT:    ;;#ASMEND
9198; GFX900-NEXT:    s_setpc_b64 s[30:31]
9199;
9200; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_1_1_1:
9201; GFX90A:       ; %bb.0:
9202; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9203; GFX90A-NEXT:    ;;#ASMSTART
9204; GFX90A-NEXT:    ; def s[8:10]
9205; GFX90A-NEXT:    ;;#ASMEND
9206; GFX90A-NEXT:    ;;#ASMSTART
9207; GFX90A-NEXT:    ; def s[4:6]
9208; GFX90A-NEXT:    ;;#ASMEND
9209; GFX90A-NEXT:    s_mov_b32 s8, s6
9210; GFX90A-NEXT:    s_mov_b32 s10, s9
9211; GFX90A-NEXT:    s_mov_b32 s11, s9
9212; GFX90A-NEXT:    ;;#ASMSTART
9213; GFX90A-NEXT:    ; use s[8:11]
9214; GFX90A-NEXT:    ;;#ASMEND
9215; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9216;
9217; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_1_1_1:
9218; GFX940:       ; %bb.0:
9219; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9220; GFX940-NEXT:    ;;#ASMSTART
9221; GFX940-NEXT:    ; def s[8:10]
9222; GFX940-NEXT:    ;;#ASMEND
9223; GFX940-NEXT:    ;;#ASMSTART
9224; GFX940-NEXT:    ; def s[0:2]
9225; GFX940-NEXT:    ;;#ASMEND
9226; GFX940-NEXT:    s_mov_b32 s8, s2
9227; GFX940-NEXT:    s_mov_b32 s10, s9
9228; GFX940-NEXT:    s_mov_b32 s11, s9
9229; GFX940-NEXT:    ;;#ASMSTART
9230; GFX940-NEXT:    ; use s[8:11]
9231; GFX940-NEXT:    ;;#ASMEND
9232; GFX940-NEXT:    s_setpc_b64 s[30:31]
9233  %vec0 = call <3 x i32> asm "; def $0", "=s"()
9234  %vec1 = call <3 x i32> asm "; def $0", "=s"()
9235  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 1, i32 1>
9236  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
9237  ret void
9238}
9239
9240define void @s_shuffle_v4i32_v3i32__5_u_1_1() {
9241; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_u_1_1:
9242; GFX900:       ; %bb.0:
9243; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9244; GFX900-NEXT:    ;;#ASMSTART
9245; GFX900-NEXT:    ; def s[8:10]
9246; GFX900-NEXT:    ;;#ASMEND
9247; GFX900-NEXT:    ;;#ASMSTART
9248; GFX900-NEXT:    ; def s[4:6]
9249; GFX900-NEXT:    ;;#ASMEND
9250; GFX900-NEXT:    s_mov_b32 s8, s10
9251; GFX900-NEXT:    s_mov_b32 s10, s5
9252; GFX900-NEXT:    s_mov_b32 s11, s5
9253; GFX900-NEXT:    ;;#ASMSTART
9254; GFX900-NEXT:    ; use s[8:11]
9255; GFX900-NEXT:    ;;#ASMEND
9256; GFX900-NEXT:    s_setpc_b64 s[30:31]
9257;
9258; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_u_1_1:
9259; GFX90A:       ; %bb.0:
9260; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9261; GFX90A-NEXT:    ;;#ASMSTART
9262; GFX90A-NEXT:    ; def s[8:10]
9263; GFX90A-NEXT:    ;;#ASMEND
9264; GFX90A-NEXT:    ;;#ASMSTART
9265; GFX90A-NEXT:    ; def s[4:6]
9266; GFX90A-NEXT:    ;;#ASMEND
9267; GFX90A-NEXT:    s_mov_b32 s8, s10
9268; GFX90A-NEXT:    s_mov_b32 s10, s5
9269; GFX90A-NEXT:    s_mov_b32 s11, s5
9270; GFX90A-NEXT:    ;;#ASMSTART
9271; GFX90A-NEXT:    ; use s[8:11]
9272; GFX90A-NEXT:    ;;#ASMEND
9273; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9274;
9275; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_u_1_1:
9276; GFX940:       ; %bb.0:
9277; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9278; GFX940-NEXT:    ;;#ASMSTART
9279; GFX940-NEXT:    ; def s[0:2]
9280; GFX940-NEXT:    ;;#ASMEND
9281; GFX940-NEXT:    ;;#ASMSTART
9282; GFX940-NEXT:    ; def s[4:6]
9283; GFX940-NEXT:    ;;#ASMEND
9284; GFX940-NEXT:    s_mov_b32 s8, s6
9285; GFX940-NEXT:    s_mov_b32 s10, s1
9286; GFX940-NEXT:    s_mov_b32 s11, s1
9287; GFX940-NEXT:    ;;#ASMSTART
9288; GFX940-NEXT:    ; use s[8:11]
9289; GFX940-NEXT:    ;;#ASMEND
9290; GFX940-NEXT:    s_setpc_b64 s[30:31]
9291  %vec0 = call <3 x i32> asm "; def $0", "=s"()
9292  %vec1 = call <3 x i32> asm "; def $0", "=s"()
9293  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 1, i32 1>
9294  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
9295  ret void
9296}
9297
9298define void @s_shuffle_v4i32_v3i32__5_0_1_1() {
9299; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_0_1_1:
9300; GFX900:       ; %bb.0:
9301; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9302; GFX900-NEXT:    ;;#ASMSTART
9303; GFX900-NEXT:    ; def s[8:10]
9304; GFX900-NEXT:    ;;#ASMEND
9305; GFX900-NEXT:    ;;#ASMSTART
9306; GFX900-NEXT:    ; def s[4:6]
9307; GFX900-NEXT:    ;;#ASMEND
9308; GFX900-NEXT:    s_mov_b32 s8, s10
9309; GFX900-NEXT:    s_mov_b32 s9, s4
9310; GFX900-NEXT:    s_mov_b32 s10, s5
9311; GFX900-NEXT:    s_mov_b32 s11, s5
9312; GFX900-NEXT:    ;;#ASMSTART
9313; GFX900-NEXT:    ; use s[8:11]
9314; GFX900-NEXT:    ;;#ASMEND
9315; GFX900-NEXT:    s_setpc_b64 s[30:31]
9316;
9317; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_0_1_1:
9318; GFX90A:       ; %bb.0:
9319; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9320; GFX90A-NEXT:    ;;#ASMSTART
9321; GFX90A-NEXT:    ; def s[8:10]
9322; GFX90A-NEXT:    ;;#ASMEND
9323; GFX90A-NEXT:    ;;#ASMSTART
9324; GFX90A-NEXT:    ; def s[4:6]
9325; GFX90A-NEXT:    ;;#ASMEND
9326; GFX90A-NEXT:    s_mov_b32 s8, s10
9327; GFX90A-NEXT:    s_mov_b32 s9, s4
9328; GFX90A-NEXT:    s_mov_b32 s10, s5
9329; GFX90A-NEXT:    s_mov_b32 s11, s5
9330; GFX90A-NEXT:    ;;#ASMSTART
9331; GFX90A-NEXT:    ; use s[8:11]
9332; GFX90A-NEXT:    ;;#ASMEND
9333; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9334;
9335; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_0_1_1:
9336; GFX940:       ; %bb.0:
9337; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9338; GFX940-NEXT:    ;;#ASMSTART
9339; GFX940-NEXT:    ; def s[0:2]
9340; GFX940-NEXT:    ;;#ASMEND
9341; GFX940-NEXT:    ;;#ASMSTART
9342; GFX940-NEXT:    ; def s[4:6]
9343; GFX940-NEXT:    ;;#ASMEND
9344; GFX940-NEXT:    s_mov_b32 s8, s6
9345; GFX940-NEXT:    s_mov_b32 s9, s0
9346; GFX940-NEXT:    s_mov_b32 s10, s1
9347; GFX940-NEXT:    s_mov_b32 s11, s1
9348; GFX940-NEXT:    ;;#ASMSTART
9349; GFX940-NEXT:    ; use s[8:11]
9350; GFX940-NEXT:    ;;#ASMEND
9351; GFX940-NEXT:    s_setpc_b64 s[30:31]
9352  %vec0 = call <3 x i32> asm "; def $0", "=s"()
9353  %vec1 = call <3 x i32> asm "; def $0", "=s"()
9354  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 1, i32 1>
9355  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
9356  ret void
9357}
9358
9359define void @s_shuffle_v4i32_v3i32__5_2_1_1() {
9360; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_2_1_1:
9361; GFX900:       ; %bb.0:
9362; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9363; GFX900-NEXT:    ;;#ASMSTART
9364; GFX900-NEXT:    ; def s[8:10]
9365; GFX900-NEXT:    ;;#ASMEND
9366; GFX900-NEXT:    ;;#ASMSTART
9367; GFX900-NEXT:    ; def s[4:6]
9368; GFX900-NEXT:    ;;#ASMEND
9369; GFX900-NEXT:    s_mov_b32 s8, s10
9370; GFX900-NEXT:    s_mov_b32 s9, s6
9371; GFX900-NEXT:    s_mov_b32 s10, s5
9372; GFX900-NEXT:    s_mov_b32 s11, s5
9373; GFX900-NEXT:    ;;#ASMSTART
9374; GFX900-NEXT:    ; use s[8:11]
9375; GFX900-NEXT:    ;;#ASMEND
9376; GFX900-NEXT:    s_setpc_b64 s[30:31]
9377;
9378; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_2_1_1:
9379; GFX90A:       ; %bb.0:
9380; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9381; GFX90A-NEXT:    ;;#ASMSTART
9382; GFX90A-NEXT:    ; def s[8:10]
9383; GFX90A-NEXT:    ;;#ASMEND
9384; GFX90A-NEXT:    ;;#ASMSTART
9385; GFX90A-NEXT:    ; def s[4:6]
9386; GFX90A-NEXT:    ;;#ASMEND
9387; GFX90A-NEXT:    s_mov_b32 s8, s10
9388; GFX90A-NEXT:    s_mov_b32 s9, s6
9389; GFX90A-NEXT:    s_mov_b32 s10, s5
9390; GFX90A-NEXT:    s_mov_b32 s11, s5
9391; GFX90A-NEXT:    ;;#ASMSTART
9392; GFX90A-NEXT:    ; use s[8:11]
9393; GFX90A-NEXT:    ;;#ASMEND
9394; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9395;
9396; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_2_1_1:
9397; GFX940:       ; %bb.0:
9398; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9399; GFX940-NEXT:    ;;#ASMSTART
9400; GFX940-NEXT:    ; def s[0:2]
9401; GFX940-NEXT:    ;;#ASMEND
9402; GFX940-NEXT:    ;;#ASMSTART
9403; GFX940-NEXT:    ; def s[4:6]
9404; GFX940-NEXT:    ;;#ASMEND
9405; GFX940-NEXT:    s_mov_b32 s8, s6
9406; GFX940-NEXT:    s_mov_b32 s9, s2
9407; GFX940-NEXT:    s_mov_b32 s10, s1
9408; GFX940-NEXT:    s_mov_b32 s11, s1
9409; GFX940-NEXT:    ;;#ASMSTART
9410; GFX940-NEXT:    ; use s[8:11]
9411; GFX940-NEXT:    ;;#ASMEND
9412; GFX940-NEXT:    s_setpc_b64 s[30:31]
9413  %vec0 = call <3 x i32> asm "; def $0", "=s"()
9414  %vec1 = call <3 x i32> asm "; def $0", "=s"()
9415  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 1, i32 1>
9416  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
9417  ret void
9418}
9419
9420define void @s_shuffle_v4i32_v3i32__5_3_1_1() {
9421; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_3_1_1:
9422; GFX900:       ; %bb.0:
9423; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9424; GFX900-NEXT:    ;;#ASMSTART
9425; GFX900-NEXT:    ; def s[4:6]
9426; GFX900-NEXT:    ;;#ASMEND
9427; GFX900-NEXT:    ;;#ASMSTART
9428; GFX900-NEXT:    ; def s[12:14]
9429; GFX900-NEXT:    ;;#ASMEND
9430; GFX900-NEXT:    s_mov_b32 s8, s14
9431; GFX900-NEXT:    s_mov_b32 s9, s12
9432; GFX900-NEXT:    s_mov_b32 s10, s5
9433; GFX900-NEXT:    s_mov_b32 s11, s5
9434; GFX900-NEXT:    ;;#ASMSTART
9435; GFX900-NEXT:    ; use s[8:11]
9436; GFX900-NEXT:    ;;#ASMEND
9437; GFX900-NEXT:    s_setpc_b64 s[30:31]
9438;
9439; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_3_1_1:
9440; GFX90A:       ; %bb.0:
9441; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9442; GFX90A-NEXT:    ;;#ASMSTART
9443; GFX90A-NEXT:    ; def s[4:6]
9444; GFX90A-NEXT:    ;;#ASMEND
9445; GFX90A-NEXT:    ;;#ASMSTART
9446; GFX90A-NEXT:    ; def s[12:14]
9447; GFX90A-NEXT:    ;;#ASMEND
9448; GFX90A-NEXT:    s_mov_b32 s8, s14
9449; GFX90A-NEXT:    s_mov_b32 s9, s12
9450; GFX90A-NEXT:    s_mov_b32 s10, s5
9451; GFX90A-NEXT:    s_mov_b32 s11, s5
9452; GFX90A-NEXT:    ;;#ASMSTART
9453; GFX90A-NEXT:    ; use s[8:11]
9454; GFX90A-NEXT:    ;;#ASMEND
9455; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9456;
9457; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_3_1_1:
9458; GFX940:       ; %bb.0:
9459; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9460; GFX940-NEXT:    ;;#ASMSTART
9461; GFX940-NEXT:    ; def s[0:2]
9462; GFX940-NEXT:    ;;#ASMEND
9463; GFX940-NEXT:    ;;#ASMSTART
9464; GFX940-NEXT:    ; def s[4:6]
9465; GFX940-NEXT:    ;;#ASMEND
9466; GFX940-NEXT:    s_mov_b32 s8, s6
9467; GFX940-NEXT:    s_mov_b32 s9, s4
9468; GFX940-NEXT:    s_mov_b32 s10, s1
9469; GFX940-NEXT:    s_mov_b32 s11, s1
9470; GFX940-NEXT:    ;;#ASMSTART
9471; GFX940-NEXT:    ; use s[8:11]
9472; GFX940-NEXT:    ;;#ASMEND
9473; GFX940-NEXT:    s_setpc_b64 s[30:31]
9474  %vec0 = call <3 x i32> asm "; def $0", "=s"()
9475  %vec1 = call <3 x i32> asm "; def $0", "=s"()
9476  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 1, i32 1>
9477  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
9478  ret void
9479}
9480
9481define void @s_shuffle_v4i32_v3i32__5_4_1_1() {
9482; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_4_1_1:
9483; GFX900:       ; %bb.0:
9484; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9485; GFX900-NEXT:    ;;#ASMSTART
9486; GFX900-NEXT:    ; def s[8:10]
9487; GFX900-NEXT:    ;;#ASMEND
9488; GFX900-NEXT:    ;;#ASMSTART
9489; GFX900-NEXT:    ; def s[4:6]
9490; GFX900-NEXT:    ;;#ASMEND
9491; GFX900-NEXT:    s_mov_b32 s8, s10
9492; GFX900-NEXT:    s_mov_b32 s10, s5
9493; GFX900-NEXT:    s_mov_b32 s11, s5
9494; GFX900-NEXT:    ;;#ASMSTART
9495; GFX900-NEXT:    ; use s[8:11]
9496; GFX900-NEXT:    ;;#ASMEND
9497; GFX900-NEXT:    s_setpc_b64 s[30:31]
9498;
9499; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_4_1_1:
9500; GFX90A:       ; %bb.0:
9501; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9502; GFX90A-NEXT:    ;;#ASMSTART
9503; GFX90A-NEXT:    ; def s[8:10]
9504; GFX90A-NEXT:    ;;#ASMEND
9505; GFX90A-NEXT:    ;;#ASMSTART
9506; GFX90A-NEXT:    ; def s[4:6]
9507; GFX90A-NEXT:    ;;#ASMEND
9508; GFX90A-NEXT:    s_mov_b32 s8, s10
9509; GFX90A-NEXT:    s_mov_b32 s10, s5
9510; GFX90A-NEXT:    s_mov_b32 s11, s5
9511; GFX90A-NEXT:    ;;#ASMSTART
9512; GFX90A-NEXT:    ; use s[8:11]
9513; GFX90A-NEXT:    ;;#ASMEND
9514; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9515;
9516; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_4_1_1:
9517; GFX940:       ; %bb.0:
9518; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9519; GFX940-NEXT:    ;;#ASMSTART
9520; GFX940-NEXT:    ; def s[8:10]
9521; GFX940-NEXT:    ;;#ASMEND
9522; GFX940-NEXT:    ;;#ASMSTART
9523; GFX940-NEXT:    ; def s[0:2]
9524; GFX940-NEXT:    ;;#ASMEND
9525; GFX940-NEXT:    s_mov_b32 s8, s10
9526; GFX940-NEXT:    s_mov_b32 s10, s1
9527; GFX940-NEXT:    s_mov_b32 s11, s1
9528; GFX940-NEXT:    ;;#ASMSTART
9529; GFX940-NEXT:    ; use s[8:11]
9530; GFX940-NEXT:    ;;#ASMEND
9531; GFX940-NEXT:    s_setpc_b64 s[30:31]
9532  %vec0 = call <3 x i32> asm "; def $0", "=s"()
9533  %vec1 = call <3 x i32> asm "; def $0", "=s"()
9534  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 1, i32 1>
9535  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
9536  ret void
9537}
9538
9539define void @s_shuffle_v4i32_v3i32__5_5_1_1() {
9540; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_1_1:
9541; GFX900:       ; %bb.0:
9542; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9543; GFX900-NEXT:    ;;#ASMSTART
9544; GFX900-NEXT:    ; def s[8:10]
9545; GFX900-NEXT:    ;;#ASMEND
9546; GFX900-NEXT:    ;;#ASMSTART
9547; GFX900-NEXT:    ; def s[4:6]
9548; GFX900-NEXT:    ;;#ASMEND
9549; GFX900-NEXT:    s_mov_b32 s8, s10
9550; GFX900-NEXT:    s_mov_b32 s9, s10
9551; GFX900-NEXT:    s_mov_b32 s10, s5
9552; GFX900-NEXT:    s_mov_b32 s11, s5
9553; GFX900-NEXT:    ;;#ASMSTART
9554; GFX900-NEXT:    ; use s[8:11]
9555; GFX900-NEXT:    ;;#ASMEND
9556; GFX900-NEXT:    s_setpc_b64 s[30:31]
9557;
9558; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_1_1:
9559; GFX90A:       ; %bb.0:
9560; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9561; GFX90A-NEXT:    ;;#ASMSTART
9562; GFX90A-NEXT:    ; def s[8:10]
9563; GFX90A-NEXT:    ;;#ASMEND
9564; GFX90A-NEXT:    ;;#ASMSTART
9565; GFX90A-NEXT:    ; def s[4:6]
9566; GFX90A-NEXT:    ;;#ASMEND
9567; GFX90A-NEXT:    s_mov_b32 s8, s10
9568; GFX90A-NEXT:    s_mov_b32 s9, s10
9569; GFX90A-NEXT:    s_mov_b32 s10, s5
9570; GFX90A-NEXT:    s_mov_b32 s11, s5
9571; GFX90A-NEXT:    ;;#ASMSTART
9572; GFX90A-NEXT:    ; use s[8:11]
9573; GFX90A-NEXT:    ;;#ASMEND
9574; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9575;
9576; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_1_1:
9577; GFX940:       ; %bb.0:
9578; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9579; GFX940-NEXT:    ;;#ASMSTART
9580; GFX940-NEXT:    ; def s[0:2]
9581; GFX940-NEXT:    ;;#ASMEND
9582; GFX940-NEXT:    ;;#ASMSTART
9583; GFX940-NEXT:    ; def s[4:6]
9584; GFX940-NEXT:    ;;#ASMEND
9585; GFX940-NEXT:    s_mov_b32 s8, s6
9586; GFX940-NEXT:    s_mov_b32 s9, s6
9587; GFX940-NEXT:    s_mov_b32 s10, s1
9588; GFX940-NEXT:    s_mov_b32 s11, s1
9589; GFX940-NEXT:    ;;#ASMSTART
9590; GFX940-NEXT:    ; use s[8:11]
9591; GFX940-NEXT:    ;;#ASMEND
9592; GFX940-NEXT:    s_setpc_b64 s[30:31]
9593  %vec0 = call <3 x i32> asm "; def $0", "=s"()
9594  %vec1 = call <3 x i32> asm "; def $0", "=s"()
9595  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 1>
9596  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
9597  ret void
9598}
9599
9600define void @s_shuffle_v4i32_v3i32__5_5_u_1() {
9601; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_u_1:
9602; GFX900:       ; %bb.0:
9603; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9604; GFX900-NEXT:    ;;#ASMSTART
9605; GFX900-NEXT:    ; def s[8:10]
9606; GFX900-NEXT:    ;;#ASMEND
9607; GFX900-NEXT:    ;;#ASMSTART
9608; GFX900-NEXT:    ; def s[4:6]
9609; GFX900-NEXT:    ;;#ASMEND
9610; GFX900-NEXT:    s_mov_b32 s8, s10
9611; GFX900-NEXT:    s_mov_b32 s9, s10
9612; GFX900-NEXT:    s_mov_b32 s11, s5
9613; GFX900-NEXT:    ;;#ASMSTART
9614; GFX900-NEXT:    ; use s[8:11]
9615; GFX900-NEXT:    ;;#ASMEND
9616; GFX900-NEXT:    s_setpc_b64 s[30:31]
9617;
9618; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_u_1:
9619; GFX90A:       ; %bb.0:
9620; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9621; GFX90A-NEXT:    ;;#ASMSTART
9622; GFX90A-NEXT:    ; def s[8:10]
9623; GFX90A-NEXT:    ;;#ASMEND
9624; GFX90A-NEXT:    ;;#ASMSTART
9625; GFX90A-NEXT:    ; def s[4:6]
9626; GFX90A-NEXT:    ;;#ASMEND
9627; GFX90A-NEXT:    s_mov_b32 s8, s10
9628; GFX90A-NEXT:    s_mov_b32 s9, s10
9629; GFX90A-NEXT:    s_mov_b32 s11, s5
9630; GFX90A-NEXT:    ;;#ASMSTART
9631; GFX90A-NEXT:    ; use s[8:11]
9632; GFX90A-NEXT:    ;;#ASMEND
9633; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9634;
9635; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_u_1:
9636; GFX940:       ; %bb.0:
9637; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9638; GFX940-NEXT:    ;;#ASMSTART
9639; GFX940-NEXT:    ; def s[0:2]
9640; GFX940-NEXT:    ;;#ASMEND
9641; GFX940-NEXT:    ;;#ASMSTART
9642; GFX940-NEXT:    ; def s[4:6]
9643; GFX940-NEXT:    ;;#ASMEND
9644; GFX940-NEXT:    s_mov_b32 s8, s6
9645; GFX940-NEXT:    s_mov_b32 s9, s6
9646; GFX940-NEXT:    s_mov_b32 s11, s1
9647; GFX940-NEXT:    ;;#ASMSTART
9648; GFX940-NEXT:    ; use s[8:11]
9649; GFX940-NEXT:    ;;#ASMEND
9650; GFX940-NEXT:    s_setpc_b64 s[30:31]
9651  %vec0 = call <3 x i32> asm "; def $0", "=s"()
9652  %vec1 = call <3 x i32> asm "; def $0", "=s"()
9653  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 1>
9654  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
9655  ret void
9656}
9657
9658define void @s_shuffle_v4i32_v3i32__5_5_0_1() {
9659; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_0_1:
9660; GFX900:       ; %bb.0:
9661; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9662; GFX900-NEXT:    ;;#ASMSTART
9663; GFX900-NEXT:    ; def s[8:10]
9664; GFX900-NEXT:    ;;#ASMEND
9665; GFX900-NEXT:    ;;#ASMSTART
9666; GFX900-NEXT:    ; def s[4:6]
9667; GFX900-NEXT:    ;;#ASMEND
9668; GFX900-NEXT:    s_mov_b32 s8, s10
9669; GFX900-NEXT:    s_mov_b32 s9, s10
9670; GFX900-NEXT:    s_mov_b32 s10, s4
9671; GFX900-NEXT:    s_mov_b32 s11, s5
9672; GFX900-NEXT:    ;;#ASMSTART
9673; GFX900-NEXT:    ; use s[8:11]
9674; GFX900-NEXT:    ;;#ASMEND
9675; GFX900-NEXT:    s_setpc_b64 s[30:31]
9676;
9677; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_0_1:
9678; GFX90A:       ; %bb.0:
9679; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9680; GFX90A-NEXT:    ;;#ASMSTART
9681; GFX90A-NEXT:    ; def s[8:10]
9682; GFX90A-NEXT:    ;;#ASMEND
9683; GFX90A-NEXT:    ;;#ASMSTART
9684; GFX90A-NEXT:    ; def s[4:6]
9685; GFX90A-NEXT:    ;;#ASMEND
9686; GFX90A-NEXT:    s_mov_b32 s8, s10
9687; GFX90A-NEXT:    s_mov_b32 s9, s10
9688; GFX90A-NEXT:    s_mov_b32 s10, s4
9689; GFX90A-NEXT:    s_mov_b32 s11, s5
9690; GFX90A-NEXT:    ;;#ASMSTART
9691; GFX90A-NEXT:    ; use s[8:11]
9692; GFX90A-NEXT:    ;;#ASMEND
9693; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9694;
9695; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_0_1:
9696; GFX940:       ; %bb.0:
9697; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9698; GFX940-NEXT:    ;;#ASMSTART
9699; GFX940-NEXT:    ; def s[0:2]
9700; GFX940-NEXT:    ;;#ASMEND
9701; GFX940-NEXT:    ;;#ASMSTART
9702; GFX940-NEXT:    ; def s[4:6]
9703; GFX940-NEXT:    ;;#ASMEND
9704; GFX940-NEXT:    s_mov_b32 s8, s6
9705; GFX940-NEXT:    s_mov_b32 s9, s6
9706; GFX940-NEXT:    s_mov_b32 s10, s0
9707; GFX940-NEXT:    s_mov_b32 s11, s1
9708; GFX940-NEXT:    ;;#ASMSTART
9709; GFX940-NEXT:    ; use s[8:11]
9710; GFX940-NEXT:    ;;#ASMEND
9711; GFX940-NEXT:    s_setpc_b64 s[30:31]
9712  %vec0 = call <3 x i32> asm "; def $0", "=s"()
9713  %vec1 = call <3 x i32> asm "; def $0", "=s"()
9714  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 1>
9715  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
9716  ret void
9717}
9718
9719define void @s_shuffle_v4i32_v3i32__5_5_2_1() {
9720; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_2_1:
9721; GFX900:       ; %bb.0:
9722; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9723; GFX900-NEXT:    ;;#ASMSTART
9724; GFX900-NEXT:    ; def s[8:10]
9725; GFX900-NEXT:    ;;#ASMEND
9726; GFX900-NEXT:    ;;#ASMSTART
9727; GFX900-NEXT:    ; def s[4:6]
9728; GFX900-NEXT:    ;;#ASMEND
9729; GFX900-NEXT:    s_mov_b32 s8, s10
9730; GFX900-NEXT:    s_mov_b32 s9, s10
9731; GFX900-NEXT:    s_mov_b32 s10, s6
9732; GFX900-NEXT:    s_mov_b32 s11, s5
9733; GFX900-NEXT:    ;;#ASMSTART
9734; GFX900-NEXT:    ; use s[8:11]
9735; GFX900-NEXT:    ;;#ASMEND
9736; GFX900-NEXT:    s_setpc_b64 s[30:31]
9737;
9738; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_2_1:
9739; GFX90A:       ; %bb.0:
9740; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9741; GFX90A-NEXT:    ;;#ASMSTART
9742; GFX90A-NEXT:    ; def s[8:10]
9743; GFX90A-NEXT:    ;;#ASMEND
9744; GFX90A-NEXT:    ;;#ASMSTART
9745; GFX90A-NEXT:    ; def s[4:6]
9746; GFX90A-NEXT:    ;;#ASMEND
9747; GFX90A-NEXT:    s_mov_b32 s8, s10
9748; GFX90A-NEXT:    s_mov_b32 s9, s10
9749; GFX90A-NEXT:    s_mov_b32 s10, s6
9750; GFX90A-NEXT:    s_mov_b32 s11, s5
9751; GFX90A-NEXT:    ;;#ASMSTART
9752; GFX90A-NEXT:    ; use s[8:11]
9753; GFX90A-NEXT:    ;;#ASMEND
9754; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9755;
9756; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_2_1:
9757; GFX940:       ; %bb.0:
9758; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9759; GFX940-NEXT:    ;;#ASMSTART
9760; GFX940-NEXT:    ; def s[0:2]
9761; GFX940-NEXT:    ;;#ASMEND
9762; GFX940-NEXT:    ;;#ASMSTART
9763; GFX940-NEXT:    ; def s[4:6]
9764; GFX940-NEXT:    ;;#ASMEND
9765; GFX940-NEXT:    s_mov_b32 s8, s6
9766; GFX940-NEXT:    s_mov_b32 s9, s6
9767; GFX940-NEXT:    s_mov_b32 s10, s2
9768; GFX940-NEXT:    s_mov_b32 s11, s1
9769; GFX940-NEXT:    ;;#ASMSTART
9770; GFX940-NEXT:    ; use s[8:11]
9771; GFX940-NEXT:    ;;#ASMEND
9772; GFX940-NEXT:    s_setpc_b64 s[30:31]
9773  %vec0 = call <3 x i32> asm "; def $0", "=s"()
9774  %vec1 = call <3 x i32> asm "; def $0", "=s"()
9775  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 1>
9776  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
9777  ret void
9778}
9779
9780define void @s_shuffle_v4i32_v3i32__5_5_3_1() {
9781; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_3_1:
9782; GFX900:       ; %bb.0:
9783; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9784; GFX900-NEXT:    ;;#ASMSTART
9785; GFX900-NEXT:    ; def s[4:6]
9786; GFX900-NEXT:    ;;#ASMEND
9787; GFX900-NEXT:    ;;#ASMSTART
9788; GFX900-NEXT:    ; def s[12:14]
9789; GFX900-NEXT:    ;;#ASMEND
9790; GFX900-NEXT:    s_mov_b32 s8, s14
9791; GFX900-NEXT:    s_mov_b32 s9, s14
9792; GFX900-NEXT:    s_mov_b32 s10, s12
9793; GFX900-NEXT:    s_mov_b32 s11, s5
9794; GFX900-NEXT:    ;;#ASMSTART
9795; GFX900-NEXT:    ; use s[8:11]
9796; GFX900-NEXT:    ;;#ASMEND
9797; GFX900-NEXT:    s_setpc_b64 s[30:31]
9798;
9799; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_3_1:
9800; GFX90A:       ; %bb.0:
9801; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9802; GFX90A-NEXT:    ;;#ASMSTART
9803; GFX90A-NEXT:    ; def s[4:6]
9804; GFX90A-NEXT:    ;;#ASMEND
9805; GFX90A-NEXT:    ;;#ASMSTART
9806; GFX90A-NEXT:    ; def s[12:14]
9807; GFX90A-NEXT:    ;;#ASMEND
9808; GFX90A-NEXT:    s_mov_b32 s8, s14
9809; GFX90A-NEXT:    s_mov_b32 s9, s14
9810; GFX90A-NEXT:    s_mov_b32 s10, s12
9811; GFX90A-NEXT:    s_mov_b32 s11, s5
9812; GFX90A-NEXT:    ;;#ASMSTART
9813; GFX90A-NEXT:    ; use s[8:11]
9814; GFX90A-NEXT:    ;;#ASMEND
9815; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9816;
9817; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_3_1:
9818; GFX940:       ; %bb.0:
9819; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9820; GFX940-NEXT:    ;;#ASMSTART
9821; GFX940-NEXT:    ; def s[0:2]
9822; GFX940-NEXT:    ;;#ASMEND
9823; GFX940-NEXT:    ;;#ASMSTART
9824; GFX940-NEXT:    ; def s[4:6]
9825; GFX940-NEXT:    ;;#ASMEND
9826; GFX940-NEXT:    s_mov_b32 s8, s6
9827; GFX940-NEXT:    s_mov_b32 s9, s6
9828; GFX940-NEXT:    s_mov_b32 s10, s4
9829; GFX940-NEXT:    s_mov_b32 s11, s1
9830; GFX940-NEXT:    ;;#ASMSTART
9831; GFX940-NEXT:    ; use s[8:11]
9832; GFX940-NEXT:    ;;#ASMEND
9833; GFX940-NEXT:    s_setpc_b64 s[30:31]
9834  %vec0 = call <3 x i32> asm "; def $0", "=s"()
9835  %vec1 = call <3 x i32> asm "; def $0", "=s"()
9836  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 1>
9837  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
9838  ret void
9839}
9840
9841define void @s_shuffle_v4i32_v3i32__5_5_4_1() {
9842; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_4_1:
9843; GFX900:       ; %bb.0:
9844; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9845; GFX900-NEXT:    ;;#ASMSTART
9846; GFX900-NEXT:    ; def s[4:6]
9847; GFX900-NEXT:    ;;#ASMEND
9848; GFX900-NEXT:    ;;#ASMSTART
9849; GFX900-NEXT:    ; def s[12:14]
9850; GFX900-NEXT:    ;;#ASMEND
9851; GFX900-NEXT:    s_mov_b32 s8, s14
9852; GFX900-NEXT:    s_mov_b32 s9, s14
9853; GFX900-NEXT:    s_mov_b32 s10, s13
9854; GFX900-NEXT:    s_mov_b32 s11, s5
9855; GFX900-NEXT:    ;;#ASMSTART
9856; GFX900-NEXT:    ; use s[8:11]
9857; GFX900-NEXT:    ;;#ASMEND
9858; GFX900-NEXT:    s_setpc_b64 s[30:31]
9859;
9860; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_4_1:
9861; GFX90A:       ; %bb.0:
9862; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9863; GFX90A-NEXT:    ;;#ASMSTART
9864; GFX90A-NEXT:    ; def s[4:6]
9865; GFX90A-NEXT:    ;;#ASMEND
9866; GFX90A-NEXT:    ;;#ASMSTART
9867; GFX90A-NEXT:    ; def s[12:14]
9868; GFX90A-NEXT:    ;;#ASMEND
9869; GFX90A-NEXT:    s_mov_b32 s8, s14
9870; GFX90A-NEXT:    s_mov_b32 s9, s14
9871; GFX90A-NEXT:    s_mov_b32 s10, s13
9872; GFX90A-NEXT:    s_mov_b32 s11, s5
9873; GFX90A-NEXT:    ;;#ASMSTART
9874; GFX90A-NEXT:    ; use s[8:11]
9875; GFX90A-NEXT:    ;;#ASMEND
9876; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9877;
9878; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_4_1:
9879; GFX940:       ; %bb.0:
9880; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9881; GFX940-NEXT:    ;;#ASMSTART
9882; GFX940-NEXT:    ; def s[0:2]
9883; GFX940-NEXT:    ;;#ASMEND
9884; GFX940-NEXT:    ;;#ASMSTART
9885; GFX940-NEXT:    ; def s[4:6]
9886; GFX940-NEXT:    ;;#ASMEND
9887; GFX940-NEXT:    s_mov_b32 s8, s6
9888; GFX940-NEXT:    s_mov_b32 s9, s6
9889; GFX940-NEXT:    s_mov_b32 s10, s5
9890; GFX940-NEXT:    s_mov_b32 s11, s1
9891; GFX940-NEXT:    ;;#ASMSTART
9892; GFX940-NEXT:    ; use s[8:11]
9893; GFX940-NEXT:    ;;#ASMEND
9894; GFX940-NEXT:    s_setpc_b64 s[30:31]
9895  %vec0 = call <3 x i32> asm "; def $0", "=s"()
9896  %vec1 = call <3 x i32> asm "; def $0", "=s"()
9897  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 1>
9898  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
9899  ret void
9900}
9901
9902define void @s_shuffle_v4i32_v3i32__u_2_2_2() {
9903; GFX9-LABEL: s_shuffle_v4i32_v3i32__u_2_2_2:
9904; GFX9:       ; %bb.0:
9905; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9906; GFX9-NEXT:    ;;#ASMSTART
9907; GFX9-NEXT:    ; def s[8:10]
9908; GFX9-NEXT:    ;;#ASMEND
9909; GFX9-NEXT:    s_mov_b32 s9, s10
9910; GFX9-NEXT:    s_mov_b32 s11, s10
9911; GFX9-NEXT:    ;;#ASMSTART
9912; GFX9-NEXT:    ; use s[8:11]
9913; GFX9-NEXT:    ;;#ASMEND
9914; GFX9-NEXT:    s_setpc_b64 s[30:31]
9915  %vec0 = call <3 x i32> asm "; def $0", "=s"()
9916  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
9917  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
9918  ret void
9919}
9920
9921define void @s_shuffle_v4i32_v3i32__0_2_2_2() {
9922; GFX9-LABEL: s_shuffle_v4i32_v3i32__0_2_2_2:
9923; GFX9:       ; %bb.0:
9924; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9925; GFX9-NEXT:    ;;#ASMSTART
9926; GFX9-NEXT:    ; def s[8:10]
9927; GFX9-NEXT:    ;;#ASMEND
9928; GFX9-NEXT:    s_mov_b32 s9, s10
9929; GFX9-NEXT:    s_mov_b32 s11, s10
9930; GFX9-NEXT:    ;;#ASMSTART
9931; GFX9-NEXT:    ; use s[8:11]
9932; GFX9-NEXT:    ;;#ASMEND
9933; GFX9-NEXT:    s_setpc_b64 s[30:31]
9934  %vec0 = call <3 x i32> asm "; def $0", "=s"()
9935  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
9936  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
9937  ret void
9938}
9939
9940define void @s_shuffle_v4i32_v3i32__1_2_2_2() {
9941; GFX9-LABEL: s_shuffle_v4i32_v3i32__1_2_2_2:
9942; GFX9:       ; %bb.0:
9943; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9944; GFX9-NEXT:    ;;#ASMSTART
9945; GFX9-NEXT:    ; def s[8:10]
9946; GFX9-NEXT:    ;;#ASMEND
9947; GFX9-NEXT:    s_mov_b32 s8, s9
9948; GFX9-NEXT:    s_mov_b32 s9, s10
9949; GFX9-NEXT:    s_mov_b32 s11, s10
9950; GFX9-NEXT:    ;;#ASMSTART
9951; GFX9-NEXT:    ; use s[8:11]
9952; GFX9-NEXT:    ;;#ASMEND
9953; GFX9-NEXT:    s_setpc_b64 s[30:31]
9954  %vec0 = call <3 x i32> asm "; def $0", "=s"()
9955  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
9956  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
9957  ret void
9958}
9959
9960define void @s_shuffle_v4i32_v3i32__2_2_2_2() {
9961; GFX9-LABEL: s_shuffle_v4i32_v3i32__2_2_2_2:
9962; GFX9:       ; %bb.0:
9963; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9964; GFX9-NEXT:    ;;#ASMSTART
9965; GFX9-NEXT:    ; def s[8:10]
9966; GFX9-NEXT:    ;;#ASMEND
9967; GFX9-NEXT:    s_mov_b32 s8, s10
9968; GFX9-NEXT:    s_mov_b32 s9, s10
9969; GFX9-NEXT:    s_mov_b32 s11, s10
9970; GFX9-NEXT:    ;;#ASMSTART
9971; GFX9-NEXT:    ; use s[8:11]
9972; GFX9-NEXT:    ;;#ASMEND
9973; GFX9-NEXT:    s_setpc_b64 s[30:31]
9974  %vec0 = call <3 x i32> asm "; def $0", "=s"()
9975  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
9976  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
9977  ret void
9978}
9979
9980define void @s_shuffle_v4i32_v3i32__3_2_2_2() {
9981; GFX9-LABEL: s_shuffle_v4i32_v3i32__3_2_2_2:
9982; GFX9:       ; %bb.0:
9983; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9984; GFX9-NEXT:    ;;#ASMSTART
9985; GFX9-NEXT:    ; def s[8:10]
9986; GFX9-NEXT:    ;;#ASMEND
9987; GFX9-NEXT:    s_mov_b32 s9, s10
9988; GFX9-NEXT:    s_mov_b32 s11, s10
9989; GFX9-NEXT:    ;;#ASMSTART
9990; GFX9-NEXT:    ; use s[8:11]
9991; GFX9-NEXT:    ;;#ASMEND
9992; GFX9-NEXT:    s_setpc_b64 s[30:31]
9993  %vec0 = call <3 x i32> asm "; def $0", "=s"()
9994  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
9995  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
9996  ret void
9997}
9998
9999define void @s_shuffle_v4i32_v3i32__4_2_2_2() {
10000; GFX900-LABEL: s_shuffle_v4i32_v3i32__4_2_2_2:
10001; GFX900:       ; %bb.0:
10002; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10003; GFX900-NEXT:    ;;#ASMSTART
10004; GFX900-NEXT:    ; def s[8:10]
10005; GFX900-NEXT:    ;;#ASMEND
10006; GFX900-NEXT:    ;;#ASMSTART
10007; GFX900-NEXT:    ; def s[4:6]
10008; GFX900-NEXT:    ;;#ASMEND
10009; GFX900-NEXT:    s_mov_b32 s8, s5
10010; GFX900-NEXT:    s_mov_b32 s9, s10
10011; GFX900-NEXT:    s_mov_b32 s11, s10
10012; GFX900-NEXT:    ;;#ASMSTART
10013; GFX900-NEXT:    ; use s[8:11]
10014; GFX900-NEXT:    ;;#ASMEND
10015; GFX900-NEXT:    s_setpc_b64 s[30:31]
10016;
10017; GFX90A-LABEL: s_shuffle_v4i32_v3i32__4_2_2_2:
10018; GFX90A:       ; %bb.0:
10019; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10020; GFX90A-NEXT:    ;;#ASMSTART
10021; GFX90A-NEXT:    ; def s[8:10]
10022; GFX90A-NEXT:    ;;#ASMEND
10023; GFX90A-NEXT:    ;;#ASMSTART
10024; GFX90A-NEXT:    ; def s[4:6]
10025; GFX90A-NEXT:    ;;#ASMEND
10026; GFX90A-NEXT:    s_mov_b32 s8, s5
10027; GFX90A-NEXT:    s_mov_b32 s9, s10
10028; GFX90A-NEXT:    s_mov_b32 s11, s10
10029; GFX90A-NEXT:    ;;#ASMSTART
10030; GFX90A-NEXT:    ; use s[8:11]
10031; GFX90A-NEXT:    ;;#ASMEND
10032; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10033;
10034; GFX940-LABEL: s_shuffle_v4i32_v3i32__4_2_2_2:
10035; GFX940:       ; %bb.0:
10036; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10037; GFX940-NEXT:    ;;#ASMSTART
10038; GFX940-NEXT:    ; def s[8:10]
10039; GFX940-NEXT:    ;;#ASMEND
10040; GFX940-NEXT:    ;;#ASMSTART
10041; GFX940-NEXT:    ; def s[0:2]
10042; GFX940-NEXT:    ;;#ASMEND
10043; GFX940-NEXT:    s_mov_b32 s8, s1
10044; GFX940-NEXT:    s_mov_b32 s9, s10
10045; GFX940-NEXT:    s_mov_b32 s11, s10
10046; GFX940-NEXT:    ;;#ASMSTART
10047; GFX940-NEXT:    ; use s[8:11]
10048; GFX940-NEXT:    ;;#ASMEND
10049; GFX940-NEXT:    s_setpc_b64 s[30:31]
10050  %vec0 = call <3 x i32> asm "; def $0", "=s"()
10051  %vec1 = call <3 x i32> asm "; def $0", "=s"()
10052  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 2, i32 2, i32 2>
10053  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
10054  ret void
10055}
10056
10057define void @s_shuffle_v4i32_v3i32__5_2_2_2() {
10058; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_2_2_2:
10059; GFX900:       ; %bb.0:
10060; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10061; GFX900-NEXT:    ;;#ASMSTART
10062; GFX900-NEXT:    ; def s[8:10]
10063; GFX900-NEXT:    ;;#ASMEND
10064; GFX900-NEXT:    ;;#ASMSTART
10065; GFX900-NEXT:    ; def s[4:6]
10066; GFX900-NEXT:    ;;#ASMEND
10067; GFX900-NEXT:    s_mov_b32 s8, s6
10068; GFX900-NEXT:    s_mov_b32 s9, s10
10069; GFX900-NEXT:    s_mov_b32 s11, s10
10070; GFX900-NEXT:    ;;#ASMSTART
10071; GFX900-NEXT:    ; use s[8:11]
10072; GFX900-NEXT:    ;;#ASMEND
10073; GFX900-NEXT:    s_setpc_b64 s[30:31]
10074;
10075; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_2_2_2:
10076; GFX90A:       ; %bb.0:
10077; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10078; GFX90A-NEXT:    ;;#ASMSTART
10079; GFX90A-NEXT:    ; def s[8:10]
10080; GFX90A-NEXT:    ;;#ASMEND
10081; GFX90A-NEXT:    ;;#ASMSTART
10082; GFX90A-NEXT:    ; def s[4:6]
10083; GFX90A-NEXT:    ;;#ASMEND
10084; GFX90A-NEXT:    s_mov_b32 s8, s6
10085; GFX90A-NEXT:    s_mov_b32 s9, s10
10086; GFX90A-NEXT:    s_mov_b32 s11, s10
10087; GFX90A-NEXT:    ;;#ASMSTART
10088; GFX90A-NEXT:    ; use s[8:11]
10089; GFX90A-NEXT:    ;;#ASMEND
10090; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10091;
10092; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_2_2_2:
10093; GFX940:       ; %bb.0:
10094; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10095; GFX940-NEXT:    ;;#ASMSTART
10096; GFX940-NEXT:    ; def s[8:10]
10097; GFX940-NEXT:    ;;#ASMEND
10098; GFX940-NEXT:    ;;#ASMSTART
10099; GFX940-NEXT:    ; def s[0:2]
10100; GFX940-NEXT:    ;;#ASMEND
10101; GFX940-NEXT:    s_mov_b32 s8, s2
10102; GFX940-NEXT:    s_mov_b32 s9, s10
10103; GFX940-NEXT:    s_mov_b32 s11, s10
10104; GFX940-NEXT:    ;;#ASMSTART
10105; GFX940-NEXT:    ; use s[8:11]
10106; GFX940-NEXT:    ;;#ASMEND
10107; GFX940-NEXT:    s_setpc_b64 s[30:31]
10108  %vec0 = call <3 x i32> asm "; def $0", "=s"()
10109  %vec1 = call <3 x i32> asm "; def $0", "=s"()
10110  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2>
10111  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
10112  ret void
10113}
10114
10115define void @s_shuffle_v4i32_v3i32__5_u_2_2() {
10116; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_u_2_2:
10117; GFX900:       ; %bb.0:
10118; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10119; GFX900-NEXT:    ;;#ASMSTART
10120; GFX900-NEXT:    ; def s[8:10]
10121; GFX900-NEXT:    ;;#ASMEND
10122; GFX900-NEXT:    ;;#ASMSTART
10123; GFX900-NEXT:    ; def s[4:6]
10124; GFX900-NEXT:    ;;#ASMEND
10125; GFX900-NEXT:    s_mov_b32 s8, s6
10126; GFX900-NEXT:    s_mov_b32 s11, s10
10127; GFX900-NEXT:    ;;#ASMSTART
10128; GFX900-NEXT:    ; use s[8:11]
10129; GFX900-NEXT:    ;;#ASMEND
10130; GFX900-NEXT:    s_setpc_b64 s[30:31]
10131;
10132; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_u_2_2:
10133; GFX90A:       ; %bb.0:
10134; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10135; GFX90A-NEXT:    ;;#ASMSTART
10136; GFX90A-NEXT:    ; def s[8:10]
10137; GFX90A-NEXT:    ;;#ASMEND
10138; GFX90A-NEXT:    ;;#ASMSTART
10139; GFX90A-NEXT:    ; def s[4:6]
10140; GFX90A-NEXT:    ;;#ASMEND
10141; GFX90A-NEXT:    s_mov_b32 s8, s6
10142; GFX90A-NEXT:    s_mov_b32 s11, s10
10143; GFX90A-NEXT:    ;;#ASMSTART
10144; GFX90A-NEXT:    ; use s[8:11]
10145; GFX90A-NEXT:    ;;#ASMEND
10146; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10147;
10148; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_u_2_2:
10149; GFX940:       ; %bb.0:
10150; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10151; GFX940-NEXT:    ;;#ASMSTART
10152; GFX940-NEXT:    ; def s[8:10]
10153; GFX940-NEXT:    ;;#ASMEND
10154; GFX940-NEXT:    ;;#ASMSTART
10155; GFX940-NEXT:    ; def s[0:2]
10156; GFX940-NEXT:    ;;#ASMEND
10157; GFX940-NEXT:    s_mov_b32 s8, s2
10158; GFX940-NEXT:    s_mov_b32 s11, s10
10159; GFX940-NEXT:    ;;#ASMSTART
10160; GFX940-NEXT:    ; use s[8:11]
10161; GFX940-NEXT:    ;;#ASMEND
10162; GFX940-NEXT:    s_setpc_b64 s[30:31]
10163  %vec0 = call <3 x i32> asm "; def $0", "=s"()
10164  %vec1 = call <3 x i32> asm "; def $0", "=s"()
10165  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 2, i32 2>
10166  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
10167  ret void
10168}
10169
10170define void @s_shuffle_v4i32_v3i32__5_0_2_2() {
10171; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_0_2_2:
10172; GFX900:       ; %bb.0:
10173; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10174; GFX900-NEXT:    ;;#ASMSTART
10175; GFX900-NEXT:    ; def s[8:10]
10176; GFX900-NEXT:    ;;#ASMEND
10177; GFX900-NEXT:    ;;#ASMSTART
10178; GFX900-NEXT:    ; def s[4:6]
10179; GFX900-NEXT:    ;;#ASMEND
10180; GFX900-NEXT:    s_mov_b32 s8, s10
10181; GFX900-NEXT:    s_mov_b32 s9, s4
10182; GFX900-NEXT:    s_mov_b32 s10, s6
10183; GFX900-NEXT:    s_mov_b32 s11, s6
10184; GFX900-NEXT:    ;;#ASMSTART
10185; GFX900-NEXT:    ; use s[8:11]
10186; GFX900-NEXT:    ;;#ASMEND
10187; GFX900-NEXT:    s_setpc_b64 s[30:31]
10188;
10189; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_0_2_2:
10190; GFX90A:       ; %bb.0:
10191; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10192; GFX90A-NEXT:    ;;#ASMSTART
10193; GFX90A-NEXT:    ; def s[8:10]
10194; GFX90A-NEXT:    ;;#ASMEND
10195; GFX90A-NEXT:    ;;#ASMSTART
10196; GFX90A-NEXT:    ; def s[4:6]
10197; GFX90A-NEXT:    ;;#ASMEND
10198; GFX90A-NEXT:    s_mov_b32 s8, s10
10199; GFX90A-NEXT:    s_mov_b32 s9, s4
10200; GFX90A-NEXT:    s_mov_b32 s10, s6
10201; GFX90A-NEXT:    s_mov_b32 s11, s6
10202; GFX90A-NEXT:    ;;#ASMSTART
10203; GFX90A-NEXT:    ; use s[8:11]
10204; GFX90A-NEXT:    ;;#ASMEND
10205; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10206;
10207; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_0_2_2:
10208; GFX940:       ; %bb.0:
10209; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10210; GFX940-NEXT:    ;;#ASMSTART
10211; GFX940-NEXT:    ; def s[0:2]
10212; GFX940-NEXT:    ;;#ASMEND
10213; GFX940-NEXT:    ;;#ASMSTART
10214; GFX940-NEXT:    ; def s[4:6]
10215; GFX940-NEXT:    ;;#ASMEND
10216; GFX940-NEXT:    s_mov_b32 s8, s6
10217; GFX940-NEXT:    s_mov_b32 s9, s0
10218; GFX940-NEXT:    s_mov_b32 s10, s2
10219; GFX940-NEXT:    s_mov_b32 s11, s2
10220; GFX940-NEXT:    ;;#ASMSTART
10221; GFX940-NEXT:    ; use s[8:11]
10222; GFX940-NEXT:    ;;#ASMEND
10223; GFX940-NEXT:    s_setpc_b64 s[30:31]
10224  %vec0 = call <3 x i32> asm "; def $0", "=s"()
10225  %vec1 = call <3 x i32> asm "; def $0", "=s"()
10226  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 2, i32 2>
10227  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
10228  ret void
10229}
10230
10231define void @s_shuffle_v4i32_v3i32__5_1_2_2() {
10232; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_1_2_2:
10233; GFX900:       ; %bb.0:
10234; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10235; GFX900-NEXT:    ;;#ASMSTART
10236; GFX900-NEXT:    ; def s[8:10]
10237; GFX900-NEXT:    ;;#ASMEND
10238; GFX900-NEXT:    ;;#ASMSTART
10239; GFX900-NEXT:    ; def s[4:6]
10240; GFX900-NEXT:    ;;#ASMEND
10241; GFX900-NEXT:    s_mov_b32 s8, s6
10242; GFX900-NEXT:    s_mov_b32 s11, s10
10243; GFX900-NEXT:    ;;#ASMSTART
10244; GFX900-NEXT:    ; use s[8:11]
10245; GFX900-NEXT:    ;;#ASMEND
10246; GFX900-NEXT:    s_setpc_b64 s[30:31]
10247;
10248; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_1_2_2:
10249; GFX90A:       ; %bb.0:
10250; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10251; GFX90A-NEXT:    ;;#ASMSTART
10252; GFX90A-NEXT:    ; def s[8:10]
10253; GFX90A-NEXT:    ;;#ASMEND
10254; GFX90A-NEXT:    ;;#ASMSTART
10255; GFX90A-NEXT:    ; def s[4:6]
10256; GFX90A-NEXT:    ;;#ASMEND
10257; GFX90A-NEXT:    s_mov_b32 s8, s6
10258; GFX90A-NEXT:    s_mov_b32 s11, s10
10259; GFX90A-NEXT:    ;;#ASMSTART
10260; GFX90A-NEXT:    ; use s[8:11]
10261; GFX90A-NEXT:    ;;#ASMEND
10262; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10263;
10264; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_1_2_2:
10265; GFX940:       ; %bb.0:
10266; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10267; GFX940-NEXT:    ;;#ASMSTART
10268; GFX940-NEXT:    ; def s[8:10]
10269; GFX940-NEXT:    ;;#ASMEND
10270; GFX940-NEXT:    ;;#ASMSTART
10271; GFX940-NEXT:    ; def s[0:2]
10272; GFX940-NEXT:    ;;#ASMEND
10273; GFX940-NEXT:    s_mov_b32 s8, s2
10274; GFX940-NEXT:    s_mov_b32 s11, s10
10275; GFX940-NEXT:    ;;#ASMSTART
10276; GFX940-NEXT:    ; use s[8:11]
10277; GFX940-NEXT:    ;;#ASMEND
10278; GFX940-NEXT:    s_setpc_b64 s[30:31]
10279  %vec0 = call <3 x i32> asm "; def $0", "=s"()
10280  %vec1 = call <3 x i32> asm "; def $0", "=s"()
10281  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 2, i32 2>
10282  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
10283  ret void
10284}
10285
10286define void @s_shuffle_v4i32_v3i32__5_3_2_2() {
10287; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_3_2_2:
10288; GFX900:       ; %bb.0:
10289; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10290; GFX900-NEXT:    ;;#ASMSTART
10291; GFX900-NEXT:    ; def s[8:10]
10292; GFX900-NEXT:    ;;#ASMEND
10293; GFX900-NEXT:    ;;#ASMSTART
10294; GFX900-NEXT:    ; def s[4:6]
10295; GFX900-NEXT:    ;;#ASMEND
10296; GFX900-NEXT:    s_mov_b32 s8, s6
10297; GFX900-NEXT:    s_mov_b32 s9, s4
10298; GFX900-NEXT:    s_mov_b32 s11, s10
10299; GFX900-NEXT:    ;;#ASMSTART
10300; GFX900-NEXT:    ; use s[8:11]
10301; GFX900-NEXT:    ;;#ASMEND
10302; GFX900-NEXT:    s_setpc_b64 s[30:31]
10303;
10304; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_3_2_2:
10305; GFX90A:       ; %bb.0:
10306; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10307; GFX90A-NEXT:    ;;#ASMSTART
10308; GFX90A-NEXT:    ; def s[8:10]
10309; GFX90A-NEXT:    ;;#ASMEND
10310; GFX90A-NEXT:    ;;#ASMSTART
10311; GFX90A-NEXT:    ; def s[4:6]
10312; GFX90A-NEXT:    ;;#ASMEND
10313; GFX90A-NEXT:    s_mov_b32 s8, s6
10314; GFX90A-NEXT:    s_mov_b32 s9, s4
10315; GFX90A-NEXT:    s_mov_b32 s11, s10
10316; GFX90A-NEXT:    ;;#ASMSTART
10317; GFX90A-NEXT:    ; use s[8:11]
10318; GFX90A-NEXT:    ;;#ASMEND
10319; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10320;
10321; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_3_2_2:
10322; GFX940:       ; %bb.0:
10323; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10324; GFX940-NEXT:    ;;#ASMSTART
10325; GFX940-NEXT:    ; def s[8:10]
10326; GFX940-NEXT:    ;;#ASMEND
10327; GFX940-NEXT:    ;;#ASMSTART
10328; GFX940-NEXT:    ; def s[0:2]
10329; GFX940-NEXT:    ;;#ASMEND
10330; GFX940-NEXT:    s_mov_b32 s8, s2
10331; GFX940-NEXT:    s_mov_b32 s9, s0
10332; GFX940-NEXT:    s_mov_b32 s11, s10
10333; GFX940-NEXT:    ;;#ASMSTART
10334; GFX940-NEXT:    ; use s[8:11]
10335; GFX940-NEXT:    ;;#ASMEND
10336; GFX940-NEXT:    s_setpc_b64 s[30:31]
10337  %vec0 = call <3 x i32> asm "; def $0", "=s"()
10338  %vec1 = call <3 x i32> asm "; def $0", "=s"()
10339  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 2, i32 2>
10340  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
10341  ret void
10342}
10343
10344define void @s_shuffle_v4i32_v3i32__5_4_2_2() {
10345; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_4_2_2:
10346; GFX900:       ; %bb.0:
10347; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10348; GFX900-NEXT:    ;;#ASMSTART
10349; GFX900-NEXT:    ; def s[8:10]
10350; GFX900-NEXT:    ;;#ASMEND
10351; GFX900-NEXT:    ;;#ASMSTART
10352; GFX900-NEXT:    ; def s[4:6]
10353; GFX900-NEXT:    ;;#ASMEND
10354; GFX900-NEXT:    s_mov_b32 s8, s10
10355; GFX900-NEXT:    s_mov_b32 s10, s6
10356; GFX900-NEXT:    s_mov_b32 s11, s6
10357; GFX900-NEXT:    ;;#ASMSTART
10358; GFX900-NEXT:    ; use s[8:11]
10359; GFX900-NEXT:    ;;#ASMEND
10360; GFX900-NEXT:    s_setpc_b64 s[30:31]
10361;
10362; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_4_2_2:
10363; GFX90A:       ; %bb.0:
10364; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10365; GFX90A-NEXT:    ;;#ASMSTART
10366; GFX90A-NEXT:    ; def s[8:10]
10367; GFX90A-NEXT:    ;;#ASMEND
10368; GFX90A-NEXT:    ;;#ASMSTART
10369; GFX90A-NEXT:    ; def s[4:6]
10370; GFX90A-NEXT:    ;;#ASMEND
10371; GFX90A-NEXT:    s_mov_b32 s8, s10
10372; GFX90A-NEXT:    s_mov_b32 s10, s6
10373; GFX90A-NEXT:    s_mov_b32 s11, s6
10374; GFX90A-NEXT:    ;;#ASMSTART
10375; GFX90A-NEXT:    ; use s[8:11]
10376; GFX90A-NEXT:    ;;#ASMEND
10377; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10378;
10379; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_4_2_2:
10380; GFX940:       ; %bb.0:
10381; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10382; GFX940-NEXT:    ;;#ASMSTART
10383; GFX940-NEXT:    ; def s[8:10]
10384; GFX940-NEXT:    ;;#ASMEND
10385; GFX940-NEXT:    ;;#ASMSTART
10386; GFX940-NEXT:    ; def s[0:2]
10387; GFX940-NEXT:    ;;#ASMEND
10388; GFX940-NEXT:    s_mov_b32 s8, s10
10389; GFX940-NEXT:    s_mov_b32 s10, s2
10390; GFX940-NEXT:    s_mov_b32 s11, s2
10391; GFX940-NEXT:    ;;#ASMSTART
10392; GFX940-NEXT:    ; use s[8:11]
10393; GFX940-NEXT:    ;;#ASMEND
10394; GFX940-NEXT:    s_setpc_b64 s[30:31]
10395  %vec0 = call <3 x i32> asm "; def $0", "=s"()
10396  %vec1 = call <3 x i32> asm "; def $0", "=s"()
10397  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 2, i32 2>
10398  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
10399  ret void
10400}
10401
10402define void @s_shuffle_v4i32_v3i32__5_5_2_2() {
10403; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_2_2:
10404; GFX900:       ; %bb.0:
10405; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10406; GFX900-NEXT:    ;;#ASMSTART
10407; GFX900-NEXT:    ; def s[8:10]
10408; GFX900-NEXT:    ;;#ASMEND
10409; GFX900-NEXT:    ;;#ASMSTART
10410; GFX900-NEXT:    ; def s[4:6]
10411; GFX900-NEXT:    ;;#ASMEND
10412; GFX900-NEXT:    s_mov_b32 s8, s6
10413; GFX900-NEXT:    s_mov_b32 s9, s6
10414; GFX900-NEXT:    s_mov_b32 s11, s10
10415; GFX900-NEXT:    ;;#ASMSTART
10416; GFX900-NEXT:    ; use s[8:11]
10417; GFX900-NEXT:    ;;#ASMEND
10418; GFX900-NEXT:    s_setpc_b64 s[30:31]
10419;
10420; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_2_2:
10421; GFX90A:       ; %bb.0:
10422; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10423; GFX90A-NEXT:    ;;#ASMSTART
10424; GFX90A-NEXT:    ; def s[8:10]
10425; GFX90A-NEXT:    ;;#ASMEND
10426; GFX90A-NEXT:    ;;#ASMSTART
10427; GFX90A-NEXT:    ; def s[4:6]
10428; GFX90A-NEXT:    ;;#ASMEND
10429; GFX90A-NEXT:    s_mov_b32 s8, s6
10430; GFX90A-NEXT:    s_mov_b32 s9, s6
10431; GFX90A-NEXT:    s_mov_b32 s11, s10
10432; GFX90A-NEXT:    ;;#ASMSTART
10433; GFX90A-NEXT:    ; use s[8:11]
10434; GFX90A-NEXT:    ;;#ASMEND
10435; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10436;
10437; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_2_2:
10438; GFX940:       ; %bb.0:
10439; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10440; GFX940-NEXT:    ;;#ASMSTART
10441; GFX940-NEXT:    ; def s[8:10]
10442; GFX940-NEXT:    ;;#ASMEND
10443; GFX940-NEXT:    ;;#ASMSTART
10444; GFX940-NEXT:    ; def s[0:2]
10445; GFX940-NEXT:    ;;#ASMEND
10446; GFX940-NEXT:    s_mov_b32 s8, s2
10447; GFX940-NEXT:    s_mov_b32 s9, s2
10448; GFX940-NEXT:    s_mov_b32 s11, s10
10449; GFX940-NEXT:    ;;#ASMSTART
10450; GFX940-NEXT:    ; use s[8:11]
10451; GFX940-NEXT:    ;;#ASMEND
10452; GFX940-NEXT:    s_setpc_b64 s[30:31]
10453  %vec0 = call <3 x i32> asm "; def $0", "=s"()
10454  %vec1 = call <3 x i32> asm "; def $0", "=s"()
10455  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 2>
10456  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
10457  ret void
10458}
10459
10460define void @s_shuffle_v4i32_v3i32__5_5_u_2() {
10461; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_u_2:
10462; GFX900:       ; %bb.0:
10463; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10464; GFX900-NEXT:    ;;#ASMSTART
10465; GFX900-NEXT:    ; def s[8:10]
10466; GFX900-NEXT:    ;;#ASMEND
10467; GFX900-NEXT:    ;;#ASMSTART
10468; GFX900-NEXT:    ; def s[4:6]
10469; GFX900-NEXT:    ;;#ASMEND
10470; GFX900-NEXT:    s_mov_b32 s8, s10
10471; GFX900-NEXT:    s_mov_b32 s9, s10
10472; GFX900-NEXT:    s_mov_b32 s11, s6
10473; GFX900-NEXT:    ;;#ASMSTART
10474; GFX900-NEXT:    ; use s[8:11]
10475; GFX900-NEXT:    ;;#ASMEND
10476; GFX900-NEXT:    s_setpc_b64 s[30:31]
10477;
10478; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_u_2:
10479; GFX90A:       ; %bb.0:
10480; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10481; GFX90A-NEXT:    ;;#ASMSTART
10482; GFX90A-NEXT:    ; def s[8:10]
10483; GFX90A-NEXT:    ;;#ASMEND
10484; GFX90A-NEXT:    ;;#ASMSTART
10485; GFX90A-NEXT:    ; def s[4:6]
10486; GFX90A-NEXT:    ;;#ASMEND
10487; GFX90A-NEXT:    s_mov_b32 s8, s10
10488; GFX90A-NEXT:    s_mov_b32 s9, s10
10489; GFX90A-NEXT:    s_mov_b32 s11, s6
10490; GFX90A-NEXT:    ;;#ASMSTART
10491; GFX90A-NEXT:    ; use s[8:11]
10492; GFX90A-NEXT:    ;;#ASMEND
10493; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10494;
10495; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_u_2:
10496; GFX940:       ; %bb.0:
10497; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10498; GFX940-NEXT:    ;;#ASMSTART
10499; GFX940-NEXT:    ; def s[0:2]
10500; GFX940-NEXT:    ;;#ASMEND
10501; GFX940-NEXT:    ;;#ASMSTART
10502; GFX940-NEXT:    ; def s[4:6]
10503; GFX940-NEXT:    ;;#ASMEND
10504; GFX940-NEXT:    s_mov_b32 s8, s6
10505; GFX940-NEXT:    s_mov_b32 s9, s6
10506; GFX940-NEXT:    s_mov_b32 s11, s2
10507; GFX940-NEXT:    ;;#ASMSTART
10508; GFX940-NEXT:    ; use s[8:11]
10509; GFX940-NEXT:    ;;#ASMEND
10510; GFX940-NEXT:    s_setpc_b64 s[30:31]
10511  %vec0 = call <3 x i32> asm "; def $0", "=s"()
10512  %vec1 = call <3 x i32> asm "; def $0", "=s"()
10513  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 2>
10514  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
10515  ret void
10516}
10517
10518define void @s_shuffle_v4i32_v3i32__5_5_0_2() {
10519; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_0_2:
10520; GFX900:       ; %bb.0:
10521; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10522; GFX900-NEXT:    ;;#ASMSTART
10523; GFX900-NEXT:    ; def s[8:10]
10524; GFX900-NEXT:    ;;#ASMEND
10525; GFX900-NEXT:    ;;#ASMSTART
10526; GFX900-NEXT:    ; def s[4:6]
10527; GFX900-NEXT:    ;;#ASMEND
10528; GFX900-NEXT:    s_mov_b32 s8, s10
10529; GFX900-NEXT:    s_mov_b32 s9, s10
10530; GFX900-NEXT:    s_mov_b32 s10, s4
10531; GFX900-NEXT:    s_mov_b32 s11, s6
10532; GFX900-NEXT:    ;;#ASMSTART
10533; GFX900-NEXT:    ; use s[8:11]
10534; GFX900-NEXT:    ;;#ASMEND
10535; GFX900-NEXT:    s_setpc_b64 s[30:31]
10536;
10537; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_0_2:
10538; GFX90A:       ; %bb.0:
10539; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10540; GFX90A-NEXT:    ;;#ASMSTART
10541; GFX90A-NEXT:    ; def s[8:10]
10542; GFX90A-NEXT:    ;;#ASMEND
10543; GFX90A-NEXT:    ;;#ASMSTART
10544; GFX90A-NEXT:    ; def s[4:6]
10545; GFX90A-NEXT:    ;;#ASMEND
10546; GFX90A-NEXT:    s_mov_b32 s8, s10
10547; GFX90A-NEXT:    s_mov_b32 s9, s10
10548; GFX90A-NEXT:    s_mov_b32 s10, s4
10549; GFX90A-NEXT:    s_mov_b32 s11, s6
10550; GFX90A-NEXT:    ;;#ASMSTART
10551; GFX90A-NEXT:    ; use s[8:11]
10552; GFX90A-NEXT:    ;;#ASMEND
10553; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10554;
10555; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_0_2:
10556; GFX940:       ; %bb.0:
10557; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10558; GFX940-NEXT:    ;;#ASMSTART
10559; GFX940-NEXT:    ; def s[0:2]
10560; GFX940-NEXT:    ;;#ASMEND
10561; GFX940-NEXT:    ;;#ASMSTART
10562; GFX940-NEXT:    ; def s[4:6]
10563; GFX940-NEXT:    ;;#ASMEND
10564; GFX940-NEXT:    s_mov_b32 s8, s6
10565; GFX940-NEXT:    s_mov_b32 s9, s6
10566; GFX940-NEXT:    s_mov_b32 s10, s0
10567; GFX940-NEXT:    s_mov_b32 s11, s2
10568; GFX940-NEXT:    ;;#ASMSTART
10569; GFX940-NEXT:    ; use s[8:11]
10570; GFX940-NEXT:    ;;#ASMEND
10571; GFX940-NEXT:    s_setpc_b64 s[30:31]
10572  %vec0 = call <3 x i32> asm "; def $0", "=s"()
10573  %vec1 = call <3 x i32> asm "; def $0", "=s"()
10574  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 2>
10575  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
10576  ret void
10577}
10578
10579define void @s_shuffle_v4i32_v3i32__5_5_1_2() {
10580; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_1_2:
10581; GFX900:       ; %bb.0:
10582; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10583; GFX900-NEXT:    ;;#ASMSTART
10584; GFX900-NEXT:    ; def s[8:10]
10585; GFX900-NEXT:    ;;#ASMEND
10586; GFX900-NEXT:    ;;#ASMSTART
10587; GFX900-NEXT:    ; def s[4:6]
10588; GFX900-NEXT:    ;;#ASMEND
10589; GFX900-NEXT:    s_mov_b32 s8, s10
10590; GFX900-NEXT:    s_mov_b32 s9, s10
10591; GFX900-NEXT:    s_mov_b32 s10, s5
10592; GFX900-NEXT:    s_mov_b32 s11, s6
10593; GFX900-NEXT:    ;;#ASMSTART
10594; GFX900-NEXT:    ; use s[8:11]
10595; GFX900-NEXT:    ;;#ASMEND
10596; GFX900-NEXT:    s_setpc_b64 s[30:31]
10597;
10598; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_1_2:
10599; GFX90A:       ; %bb.0:
10600; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10601; GFX90A-NEXT:    ;;#ASMSTART
10602; GFX90A-NEXT:    ; def s[8:10]
10603; GFX90A-NEXT:    ;;#ASMEND
10604; GFX90A-NEXT:    ;;#ASMSTART
10605; GFX90A-NEXT:    ; def s[4:6]
10606; GFX90A-NEXT:    ;;#ASMEND
10607; GFX90A-NEXT:    s_mov_b32 s8, s10
10608; GFX90A-NEXT:    s_mov_b32 s9, s10
10609; GFX90A-NEXT:    s_mov_b32 s10, s5
10610; GFX90A-NEXT:    s_mov_b32 s11, s6
10611; GFX90A-NEXT:    ;;#ASMSTART
10612; GFX90A-NEXT:    ; use s[8:11]
10613; GFX90A-NEXT:    ;;#ASMEND
10614; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10615;
10616; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_1_2:
10617; GFX940:       ; %bb.0:
10618; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10619; GFX940-NEXT:    ;;#ASMSTART
10620; GFX940-NEXT:    ; def s[0:2]
10621; GFX940-NEXT:    ;;#ASMEND
10622; GFX940-NEXT:    ;;#ASMSTART
10623; GFX940-NEXT:    ; def s[4:6]
10624; GFX940-NEXT:    ;;#ASMEND
10625; GFX940-NEXT:    s_mov_b32 s8, s6
10626; GFX940-NEXT:    s_mov_b32 s9, s6
10627; GFX940-NEXT:    s_mov_b32 s10, s1
10628; GFX940-NEXT:    s_mov_b32 s11, s2
10629; GFX940-NEXT:    ;;#ASMSTART
10630; GFX940-NEXT:    ; use s[8:11]
10631; GFX940-NEXT:    ;;#ASMEND
10632; GFX940-NEXT:    s_setpc_b64 s[30:31]
10633  %vec0 = call <3 x i32> asm "; def $0", "=s"()
10634  %vec1 = call <3 x i32> asm "; def $0", "=s"()
10635  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 2>
10636  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
10637  ret void
10638}
10639
10640define void @s_shuffle_v4i32_v3i32__5_5_3_2() {
10641; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_3_2:
10642; GFX900:       ; %bb.0:
10643; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10644; GFX900-NEXT:    ;;#ASMSTART
10645; GFX900-NEXT:    ; def s[4:6]
10646; GFX900-NEXT:    ;;#ASMEND
10647; GFX900-NEXT:    ;;#ASMSTART
10648; GFX900-NEXT:    ; def s[12:14]
10649; GFX900-NEXT:    ;;#ASMEND
10650; GFX900-NEXT:    s_mov_b32 s8, s14
10651; GFX900-NEXT:    s_mov_b32 s9, s14
10652; GFX900-NEXT:    s_mov_b32 s10, s12
10653; GFX900-NEXT:    s_mov_b32 s11, s6
10654; GFX900-NEXT:    ;;#ASMSTART
10655; GFX900-NEXT:    ; use s[8:11]
10656; GFX900-NEXT:    ;;#ASMEND
10657; GFX900-NEXT:    s_setpc_b64 s[30:31]
10658;
10659; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_3_2:
10660; GFX90A:       ; %bb.0:
10661; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10662; GFX90A-NEXT:    ;;#ASMSTART
10663; GFX90A-NEXT:    ; def s[4:6]
10664; GFX90A-NEXT:    ;;#ASMEND
10665; GFX90A-NEXT:    ;;#ASMSTART
10666; GFX90A-NEXT:    ; def s[12:14]
10667; GFX90A-NEXT:    ;;#ASMEND
10668; GFX90A-NEXT:    s_mov_b32 s8, s14
10669; GFX90A-NEXT:    s_mov_b32 s9, s14
10670; GFX90A-NEXT:    s_mov_b32 s10, s12
10671; GFX90A-NEXT:    s_mov_b32 s11, s6
10672; GFX90A-NEXT:    ;;#ASMSTART
10673; GFX90A-NEXT:    ; use s[8:11]
10674; GFX90A-NEXT:    ;;#ASMEND
10675; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10676;
10677; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_3_2:
10678; GFX940:       ; %bb.0:
10679; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10680; GFX940-NEXT:    ;;#ASMSTART
10681; GFX940-NEXT:    ; def s[0:2]
10682; GFX940-NEXT:    ;;#ASMEND
10683; GFX940-NEXT:    ;;#ASMSTART
10684; GFX940-NEXT:    ; def s[4:6]
10685; GFX940-NEXT:    ;;#ASMEND
10686; GFX940-NEXT:    s_mov_b32 s8, s6
10687; GFX940-NEXT:    s_mov_b32 s9, s6
10688; GFX940-NEXT:    s_mov_b32 s10, s4
10689; GFX940-NEXT:    s_mov_b32 s11, s2
10690; GFX940-NEXT:    ;;#ASMSTART
10691; GFX940-NEXT:    ; use s[8:11]
10692; GFX940-NEXT:    ;;#ASMEND
10693; GFX940-NEXT:    s_setpc_b64 s[30:31]
10694  %vec0 = call <3 x i32> asm "; def $0", "=s"()
10695  %vec1 = call <3 x i32> asm "; def $0", "=s"()
10696  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 2>
10697  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
10698  ret void
10699}
10700
10701define void @s_shuffle_v4i32_v3i32__5_5_4_2() {
10702; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_4_2:
10703; GFX900:       ; %bb.0:
10704; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10705; GFX900-NEXT:    ;;#ASMSTART
10706; GFX900-NEXT:    ; def s[4:6]
10707; GFX900-NEXT:    ;;#ASMEND
10708; GFX900-NEXT:    ;;#ASMSTART
10709; GFX900-NEXT:    ; def s[12:14]
10710; GFX900-NEXT:    ;;#ASMEND
10711; GFX900-NEXT:    s_mov_b32 s8, s14
10712; GFX900-NEXT:    s_mov_b32 s9, s14
10713; GFX900-NEXT:    s_mov_b32 s10, s13
10714; GFX900-NEXT:    s_mov_b32 s11, s6
10715; GFX900-NEXT:    ;;#ASMSTART
10716; GFX900-NEXT:    ; use s[8:11]
10717; GFX900-NEXT:    ;;#ASMEND
10718; GFX900-NEXT:    s_setpc_b64 s[30:31]
10719;
10720; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_4_2:
10721; GFX90A:       ; %bb.0:
10722; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10723; GFX90A-NEXT:    ;;#ASMSTART
10724; GFX90A-NEXT:    ; def s[4:6]
10725; GFX90A-NEXT:    ;;#ASMEND
10726; GFX90A-NEXT:    ;;#ASMSTART
10727; GFX90A-NEXT:    ; def s[12:14]
10728; GFX90A-NEXT:    ;;#ASMEND
10729; GFX90A-NEXT:    s_mov_b32 s8, s14
10730; GFX90A-NEXT:    s_mov_b32 s9, s14
10731; GFX90A-NEXT:    s_mov_b32 s10, s13
10732; GFX90A-NEXT:    s_mov_b32 s11, s6
10733; GFX90A-NEXT:    ;;#ASMSTART
10734; GFX90A-NEXT:    ; use s[8:11]
10735; GFX90A-NEXT:    ;;#ASMEND
10736; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10737;
10738; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_4_2:
10739; GFX940:       ; %bb.0:
10740; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10741; GFX940-NEXT:    ;;#ASMSTART
10742; GFX940-NEXT:    ; def s[0:2]
10743; GFX940-NEXT:    ;;#ASMEND
10744; GFX940-NEXT:    ;;#ASMSTART
10745; GFX940-NEXT:    ; def s[4:6]
10746; GFX940-NEXT:    ;;#ASMEND
10747; GFX940-NEXT:    s_mov_b32 s8, s6
10748; GFX940-NEXT:    s_mov_b32 s9, s6
10749; GFX940-NEXT:    s_mov_b32 s10, s5
10750; GFX940-NEXT:    s_mov_b32 s11, s2
10751; GFX940-NEXT:    ;;#ASMSTART
10752; GFX940-NEXT:    ; use s[8:11]
10753; GFX940-NEXT:    ;;#ASMEND
10754; GFX940-NEXT:    s_setpc_b64 s[30:31]
10755  %vec0 = call <3 x i32> asm "; def $0", "=s"()
10756  %vec1 = call <3 x i32> asm "; def $0", "=s"()
10757  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 2>
10758  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
10759  ret void
10760}
10761
10762define void @s_shuffle_v4i32_v3i32__u_3_3_3() {
10763; GFX9-LABEL: s_shuffle_v4i32_v3i32__u_3_3_3:
10764; GFX9:       ; %bb.0:
10765; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10766; GFX9-NEXT:    ;;#ASMSTART
10767; GFX9-NEXT:    ; use s[8:11]
10768; GFX9-NEXT:    ;;#ASMEND
10769; GFX9-NEXT:    s_setpc_b64 s[30:31]
10770  %vec0 = call <3 x i32> asm "; def $0", "=s"()
10771  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
10772  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
10773  ret void
10774}
10775
10776define void @s_shuffle_v4i32_v3i32__0_3_3_3() {
10777; GFX900-LABEL: s_shuffle_v4i32_v3i32__0_3_3_3:
10778; GFX900:       ; %bb.0:
10779; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10780; GFX900-NEXT:    ;;#ASMSTART
10781; GFX900-NEXT:    ; def s[8:10]
10782; GFX900-NEXT:    ;;#ASMEND
10783; GFX900-NEXT:    ;;#ASMSTART
10784; GFX900-NEXT:    ; use s[8:11]
10785; GFX900-NEXT:    ;;#ASMEND
10786; GFX900-NEXT:    s_setpc_b64 s[30:31]
10787;
10788; GFX90A-LABEL: s_shuffle_v4i32_v3i32__0_3_3_3:
10789; GFX90A:       ; %bb.0:
10790; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10791; GFX90A-NEXT:    ;;#ASMSTART
10792; GFX90A-NEXT:    ; def s[8:10]
10793; GFX90A-NEXT:    ;;#ASMEND
10794; GFX90A-NEXT:    ;;#ASMSTART
10795; GFX90A-NEXT:    ; use s[8:11]
10796; GFX90A-NEXT:    ;;#ASMEND
10797; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10798;
10799; GFX940-LABEL: s_shuffle_v4i32_v3i32__0_3_3_3:
10800; GFX940:       ; %bb.0:
10801; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10802; GFX940-NEXT:    ;;#ASMSTART
10803; GFX940-NEXT:    ; def s[8:10]
10804; GFX940-NEXT:    ;;#ASMEND
10805; GFX940-NEXT:    s_nop 0
10806; GFX940-NEXT:    ;;#ASMSTART
10807; GFX940-NEXT:    ; use s[8:11]
10808; GFX940-NEXT:    ;;#ASMEND
10809; GFX940-NEXT:    s_setpc_b64 s[30:31]
10810  %vec0 = call <3 x i32> asm "; def $0", "=s"()
10811  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
10812  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
10813  ret void
10814}
10815
10816define void @s_shuffle_v4i32_v3i32__1_3_3_3() {
10817; GFX900-LABEL: s_shuffle_v4i32_v3i32__1_3_3_3:
10818; GFX900:       ; %bb.0:
10819; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10820; GFX900-NEXT:    ;;#ASMSTART
10821; GFX900-NEXT:    ; def s[4:6]
10822; GFX900-NEXT:    ;;#ASMEND
10823; GFX900-NEXT:    s_mov_b32 s8, s5
10824; GFX900-NEXT:    ;;#ASMSTART
10825; GFX900-NEXT:    ; use s[8:11]
10826; GFX900-NEXT:    ;;#ASMEND
10827; GFX900-NEXT:    s_setpc_b64 s[30:31]
10828;
10829; GFX90A-LABEL: s_shuffle_v4i32_v3i32__1_3_3_3:
10830; GFX90A:       ; %bb.0:
10831; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10832; GFX90A-NEXT:    ;;#ASMSTART
10833; GFX90A-NEXT:    ; def s[4:6]
10834; GFX90A-NEXT:    ;;#ASMEND
10835; GFX90A-NEXT:    s_mov_b32 s8, s5
10836; GFX90A-NEXT:    ;;#ASMSTART
10837; GFX90A-NEXT:    ; use s[8:11]
10838; GFX90A-NEXT:    ;;#ASMEND
10839; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10840;
10841; GFX940-LABEL: s_shuffle_v4i32_v3i32__1_3_3_3:
10842; GFX940:       ; %bb.0:
10843; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10844; GFX940-NEXT:    ;;#ASMSTART
10845; GFX940-NEXT:    ; def s[0:2]
10846; GFX940-NEXT:    ;;#ASMEND
10847; GFX940-NEXT:    s_mov_b32 s8, s1
10848; GFX940-NEXT:    ;;#ASMSTART
10849; GFX940-NEXT:    ; use s[8:11]
10850; GFX940-NEXT:    ;;#ASMEND
10851; GFX940-NEXT:    s_setpc_b64 s[30:31]
10852  %vec0 = call <3 x i32> asm "; def $0", "=s"()
10853  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
10854  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
10855  ret void
10856}
10857
10858define void @s_shuffle_v4i32_v3i32__2_3_3_3() {
10859; GFX900-LABEL: s_shuffle_v4i32_v3i32__2_3_3_3:
10860; GFX900:       ; %bb.0:
10861; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10862; GFX900-NEXT:    ;;#ASMSTART
10863; GFX900-NEXT:    ; def s[4:6]
10864; GFX900-NEXT:    ;;#ASMEND
10865; GFX900-NEXT:    s_mov_b32 s8, s6
10866; GFX900-NEXT:    ;;#ASMSTART
10867; GFX900-NEXT:    ; use s[8:11]
10868; GFX900-NEXT:    ;;#ASMEND
10869; GFX900-NEXT:    s_setpc_b64 s[30:31]
10870;
10871; GFX90A-LABEL: s_shuffle_v4i32_v3i32__2_3_3_3:
10872; GFX90A:       ; %bb.0:
10873; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10874; GFX90A-NEXT:    ;;#ASMSTART
10875; GFX90A-NEXT:    ; def s[4:6]
10876; GFX90A-NEXT:    ;;#ASMEND
10877; GFX90A-NEXT:    s_mov_b32 s8, s6
10878; GFX90A-NEXT:    ;;#ASMSTART
10879; GFX90A-NEXT:    ; use s[8:11]
10880; GFX90A-NEXT:    ;;#ASMEND
10881; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10882;
10883; GFX940-LABEL: s_shuffle_v4i32_v3i32__2_3_3_3:
10884; GFX940:       ; %bb.0:
10885; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10886; GFX940-NEXT:    ;;#ASMSTART
10887; GFX940-NEXT:    ; def s[0:2]
10888; GFX940-NEXT:    ;;#ASMEND
10889; GFX940-NEXT:    s_mov_b32 s8, s2
10890; GFX940-NEXT:    ;;#ASMSTART
10891; GFX940-NEXT:    ; use s[8:11]
10892; GFX940-NEXT:    ;;#ASMEND
10893; GFX940-NEXT:    s_setpc_b64 s[30:31]
10894  %vec0 = call <3 x i32> asm "; def $0", "=s"()
10895  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
10896  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
10897  ret void
10898}
10899
10900define void @s_shuffle_v4i32_v3i32__3_3_3_3() {
10901; GFX9-LABEL: s_shuffle_v4i32_v3i32__3_3_3_3:
10902; GFX9:       ; %bb.0:
10903; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10904; GFX9-NEXT:    ;;#ASMSTART
10905; GFX9-NEXT:    ; use s[8:11]
10906; GFX9-NEXT:    ;;#ASMEND
10907; GFX9-NEXT:    s_setpc_b64 s[30:31]
10908  %vec0 = call <3 x i32> asm "; def $0", "=s"()
10909  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
10910  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
10911  ret void
10912}
10913
10914define void @s_shuffle_v4i32_v3i32__4_3_3_3() {
10915; GFX900-LABEL: s_shuffle_v4i32_v3i32__4_3_3_3:
10916; GFX900:       ; %bb.0:
10917; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10918; GFX900-NEXT:    ;;#ASMSTART
10919; GFX900-NEXT:    ; def s[4:6]
10920; GFX900-NEXT:    ;;#ASMEND
10921; GFX900-NEXT:    s_mov_b32 s8, s5
10922; GFX900-NEXT:    s_mov_b32 s9, s4
10923; GFX900-NEXT:    s_mov_b32 s10, s4
10924; GFX900-NEXT:    s_mov_b32 s11, s4
10925; GFX900-NEXT:    ;;#ASMSTART
10926; GFX900-NEXT:    ; use s[8:11]
10927; GFX900-NEXT:    ;;#ASMEND
10928; GFX900-NEXT:    s_setpc_b64 s[30:31]
10929;
10930; GFX90A-LABEL: s_shuffle_v4i32_v3i32__4_3_3_3:
10931; GFX90A:       ; %bb.0:
10932; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10933; GFX90A-NEXT:    ;;#ASMSTART
10934; GFX90A-NEXT:    ; def s[4:6]
10935; GFX90A-NEXT:    ;;#ASMEND
10936; GFX90A-NEXT:    s_mov_b32 s8, s5
10937; GFX90A-NEXT:    s_mov_b32 s9, s4
10938; GFX90A-NEXT:    s_mov_b32 s10, s4
10939; GFX90A-NEXT:    s_mov_b32 s11, s4
10940; GFX90A-NEXT:    ;;#ASMSTART
10941; GFX90A-NEXT:    ; use s[8:11]
10942; GFX90A-NEXT:    ;;#ASMEND
10943; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10944;
10945; GFX940-LABEL: s_shuffle_v4i32_v3i32__4_3_3_3:
10946; GFX940:       ; %bb.0:
10947; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10948; GFX940-NEXT:    ;;#ASMSTART
10949; GFX940-NEXT:    ; def s[0:2]
10950; GFX940-NEXT:    ;;#ASMEND
10951; GFX940-NEXT:    s_mov_b32 s8, s1
10952; GFX940-NEXT:    s_mov_b32 s9, s0
10953; GFX940-NEXT:    s_mov_b32 s10, s0
10954; GFX940-NEXT:    s_mov_b32 s11, s0
10955; GFX940-NEXT:    ;;#ASMSTART
10956; GFX940-NEXT:    ; use s[8:11]
10957; GFX940-NEXT:    ;;#ASMEND
10958; GFX940-NEXT:    s_setpc_b64 s[30:31]
10959  %vec0 = call <3 x i32> asm "; def $0", "=s"()
10960  %vec1 = call <3 x i32> asm "; def $0", "=s"()
10961  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 3, i32 3, i32 3>
10962  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
10963  ret void
10964}
10965
10966define void @s_shuffle_v4i32_v3i32__5_3_3_3() {
10967; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_3_3_3:
10968; GFX900:       ; %bb.0:
10969; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10970; GFX900-NEXT:    ;;#ASMSTART
10971; GFX900-NEXT:    ; def s[4:6]
10972; GFX900-NEXT:    ;;#ASMEND
10973; GFX900-NEXT:    s_mov_b32 s8, s6
10974; GFX900-NEXT:    s_mov_b32 s9, s4
10975; GFX900-NEXT:    s_mov_b32 s10, s4
10976; GFX900-NEXT:    s_mov_b32 s11, s4
10977; GFX900-NEXT:    ;;#ASMSTART
10978; GFX900-NEXT:    ; use s[8:11]
10979; GFX900-NEXT:    ;;#ASMEND
10980; GFX900-NEXT:    s_setpc_b64 s[30:31]
10981;
10982; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_3_3_3:
10983; GFX90A:       ; %bb.0:
10984; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10985; GFX90A-NEXT:    ;;#ASMSTART
10986; GFX90A-NEXT:    ; def s[4:6]
10987; GFX90A-NEXT:    ;;#ASMEND
10988; GFX90A-NEXT:    s_mov_b32 s8, s6
10989; GFX90A-NEXT:    s_mov_b32 s9, s4
10990; GFX90A-NEXT:    s_mov_b32 s10, s4
10991; GFX90A-NEXT:    s_mov_b32 s11, s4
10992; GFX90A-NEXT:    ;;#ASMSTART
10993; GFX90A-NEXT:    ; use s[8:11]
10994; GFX90A-NEXT:    ;;#ASMEND
10995; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10996;
10997; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_3_3_3:
10998; GFX940:       ; %bb.0:
10999; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11000; GFX940-NEXT:    ;;#ASMSTART
11001; GFX940-NEXT:    ; def s[0:2]
11002; GFX940-NEXT:    ;;#ASMEND
11003; GFX940-NEXT:    s_mov_b32 s8, s2
11004; GFX940-NEXT:    s_mov_b32 s9, s0
11005; GFX940-NEXT:    s_mov_b32 s10, s0
11006; GFX940-NEXT:    s_mov_b32 s11, s0
11007; GFX940-NEXT:    ;;#ASMSTART
11008; GFX940-NEXT:    ; use s[8:11]
11009; GFX940-NEXT:    ;;#ASMEND
11010; GFX940-NEXT:    s_setpc_b64 s[30:31]
11011  %vec0 = call <3 x i32> asm "; def $0", "=s"()
11012  %vec1 = call <3 x i32> asm "; def $0", "=s"()
11013  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3>
11014  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
11015  ret void
11016}
11017
11018define void @s_shuffle_v4i32_v3i32__5_u_3_3() {
11019; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_u_3_3:
11020; GFX900:       ; %bb.0:
11021; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11022; GFX900-NEXT:    ;;#ASMSTART
11023; GFX900-NEXT:    ; def s[4:6]
11024; GFX900-NEXT:    ;;#ASMEND
11025; GFX900-NEXT:    s_mov_b32 s8, s6
11026; GFX900-NEXT:    s_mov_b32 s10, s4
11027; GFX900-NEXT:    s_mov_b32 s11, s4
11028; GFX900-NEXT:    ;;#ASMSTART
11029; GFX900-NEXT:    ; use s[8:11]
11030; GFX900-NEXT:    ;;#ASMEND
11031; GFX900-NEXT:    s_setpc_b64 s[30:31]
11032;
11033; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_u_3_3:
11034; GFX90A:       ; %bb.0:
11035; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11036; GFX90A-NEXT:    ;;#ASMSTART
11037; GFX90A-NEXT:    ; def s[4:6]
11038; GFX90A-NEXT:    ;;#ASMEND
11039; GFX90A-NEXT:    s_mov_b32 s8, s6
11040; GFX90A-NEXT:    s_mov_b32 s10, s4
11041; GFX90A-NEXT:    s_mov_b32 s11, s4
11042; GFX90A-NEXT:    ;;#ASMSTART
11043; GFX90A-NEXT:    ; use s[8:11]
11044; GFX90A-NEXT:    ;;#ASMEND
11045; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11046;
11047; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_u_3_3:
11048; GFX940:       ; %bb.0:
11049; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11050; GFX940-NEXT:    ;;#ASMSTART
11051; GFX940-NEXT:    ; def s[0:2]
11052; GFX940-NEXT:    ;;#ASMEND
11053; GFX940-NEXT:    s_mov_b32 s8, s2
11054; GFX940-NEXT:    s_mov_b32 s10, s0
11055; GFX940-NEXT:    s_mov_b32 s11, s0
11056; GFX940-NEXT:    ;;#ASMSTART
11057; GFX940-NEXT:    ; use s[8:11]
11058; GFX940-NEXT:    ;;#ASMEND
11059; GFX940-NEXT:    s_setpc_b64 s[30:31]
11060  %vec0 = call <3 x i32> asm "; def $0", "=s"()
11061  %vec1 = call <3 x i32> asm "; def $0", "=s"()
11062  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 3, i32 3>
11063  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
11064  ret void
11065}
11066
11067define void @s_shuffle_v4i32_v3i32__5_0_3_3() {
11068; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_0_3_3:
11069; GFX900:       ; %bb.0:
11070; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11071; GFX900-NEXT:    ;;#ASMSTART
11072; GFX900-NEXT:    ; def s[4:6]
11073; GFX900-NEXT:    ;;#ASMEND
11074; GFX900-NEXT:    ;;#ASMSTART
11075; GFX900-NEXT:    ; def s[12:14]
11076; GFX900-NEXT:    ;;#ASMEND
11077; GFX900-NEXT:    s_mov_b32 s8, s14
11078; GFX900-NEXT:    s_mov_b32 s9, s4
11079; GFX900-NEXT:    s_mov_b32 s10, s12
11080; GFX900-NEXT:    s_mov_b32 s11, s12
11081; GFX900-NEXT:    ;;#ASMSTART
11082; GFX900-NEXT:    ; use s[8:11]
11083; GFX900-NEXT:    ;;#ASMEND
11084; GFX900-NEXT:    s_setpc_b64 s[30:31]
11085;
11086; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_0_3_3:
11087; GFX90A:       ; %bb.0:
11088; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11089; GFX90A-NEXT:    ;;#ASMSTART
11090; GFX90A-NEXT:    ; def s[4:6]
11091; GFX90A-NEXT:    ;;#ASMEND
11092; GFX90A-NEXT:    ;;#ASMSTART
11093; GFX90A-NEXT:    ; def s[12:14]
11094; GFX90A-NEXT:    ;;#ASMEND
11095; GFX90A-NEXT:    s_mov_b32 s8, s14
11096; GFX90A-NEXT:    s_mov_b32 s9, s4
11097; GFX90A-NEXT:    s_mov_b32 s10, s12
11098; GFX90A-NEXT:    s_mov_b32 s11, s12
11099; GFX90A-NEXT:    ;;#ASMSTART
11100; GFX90A-NEXT:    ; use s[8:11]
11101; GFX90A-NEXT:    ;;#ASMEND
11102; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11103;
11104; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_0_3_3:
11105; GFX940:       ; %bb.0:
11106; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11107; GFX940-NEXT:    ;;#ASMSTART
11108; GFX940-NEXT:    ; def s[0:2]
11109; GFX940-NEXT:    ;;#ASMEND
11110; GFX940-NEXT:    ;;#ASMSTART
11111; GFX940-NEXT:    ; def s[4:6]
11112; GFX940-NEXT:    ;;#ASMEND
11113; GFX940-NEXT:    s_mov_b32 s8, s6
11114; GFX940-NEXT:    s_mov_b32 s9, s0
11115; GFX940-NEXT:    s_mov_b32 s10, s4
11116; GFX940-NEXT:    s_mov_b32 s11, s4
11117; GFX940-NEXT:    ;;#ASMSTART
11118; GFX940-NEXT:    ; use s[8:11]
11119; GFX940-NEXT:    ;;#ASMEND
11120; GFX940-NEXT:    s_setpc_b64 s[30:31]
11121  %vec0 = call <3 x i32> asm "; def $0", "=s"()
11122  %vec1 = call <3 x i32> asm "; def $0", "=s"()
11123  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 3, i32 3>
11124  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
11125  ret void
11126}
11127
11128define void @s_shuffle_v4i32_v3i32__5_1_3_3() {
11129; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_1_3_3:
11130; GFX900:       ; %bb.0:
11131; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11132; GFX900-NEXT:    ;;#ASMSTART
11133; GFX900-NEXT:    ; def s[8:10]
11134; GFX900-NEXT:    ;;#ASMEND
11135; GFX900-NEXT:    ;;#ASMSTART
11136; GFX900-NEXT:    ; def s[4:6]
11137; GFX900-NEXT:    ;;#ASMEND
11138; GFX900-NEXT:    s_mov_b32 s8, s6
11139; GFX900-NEXT:    s_mov_b32 s10, s4
11140; GFX900-NEXT:    s_mov_b32 s11, s4
11141; GFX900-NEXT:    ;;#ASMSTART
11142; GFX900-NEXT:    ; use s[8:11]
11143; GFX900-NEXT:    ;;#ASMEND
11144; GFX900-NEXT:    s_setpc_b64 s[30:31]
11145;
11146; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_1_3_3:
11147; GFX90A:       ; %bb.0:
11148; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11149; GFX90A-NEXT:    ;;#ASMSTART
11150; GFX90A-NEXT:    ; def s[8:10]
11151; GFX90A-NEXT:    ;;#ASMEND
11152; GFX90A-NEXT:    ;;#ASMSTART
11153; GFX90A-NEXT:    ; def s[4:6]
11154; GFX90A-NEXT:    ;;#ASMEND
11155; GFX90A-NEXT:    s_mov_b32 s8, s6
11156; GFX90A-NEXT:    s_mov_b32 s10, s4
11157; GFX90A-NEXT:    s_mov_b32 s11, s4
11158; GFX90A-NEXT:    ;;#ASMSTART
11159; GFX90A-NEXT:    ; use s[8:11]
11160; GFX90A-NEXT:    ;;#ASMEND
11161; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11162;
11163; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_1_3_3:
11164; GFX940:       ; %bb.0:
11165; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11166; GFX940-NEXT:    ;;#ASMSTART
11167; GFX940-NEXT:    ; def s[8:10]
11168; GFX940-NEXT:    ;;#ASMEND
11169; GFX940-NEXT:    ;;#ASMSTART
11170; GFX940-NEXT:    ; def s[0:2]
11171; GFX940-NEXT:    ;;#ASMEND
11172; GFX940-NEXT:    s_mov_b32 s8, s2
11173; GFX940-NEXT:    s_mov_b32 s10, s0
11174; GFX940-NEXT:    s_mov_b32 s11, s0
11175; GFX940-NEXT:    ;;#ASMSTART
11176; GFX940-NEXT:    ; use s[8:11]
11177; GFX940-NEXT:    ;;#ASMEND
11178; GFX940-NEXT:    s_setpc_b64 s[30:31]
11179  %vec0 = call <3 x i32> asm "; def $0", "=s"()
11180  %vec1 = call <3 x i32> asm "; def $0", "=s"()
11181  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 3, i32 3>
11182  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
11183  ret void
11184}
11185
11186define void @s_shuffle_v4i32_v3i32__5_2_3_3() {
11187; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_2_3_3:
11188; GFX900:       ; %bb.0:
11189; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11190; GFX900-NEXT:    ;;#ASMSTART
11191; GFX900-NEXT:    ; def s[4:6]
11192; GFX900-NEXT:    ;;#ASMEND
11193; GFX900-NEXT:    ;;#ASMSTART
11194; GFX900-NEXT:    ; def s[12:14]
11195; GFX900-NEXT:    ;;#ASMEND
11196; GFX900-NEXT:    s_mov_b32 s8, s14
11197; GFX900-NEXT:    s_mov_b32 s9, s6
11198; GFX900-NEXT:    s_mov_b32 s10, s12
11199; GFX900-NEXT:    s_mov_b32 s11, s12
11200; GFX900-NEXT:    ;;#ASMSTART
11201; GFX900-NEXT:    ; use s[8:11]
11202; GFX900-NEXT:    ;;#ASMEND
11203; GFX900-NEXT:    s_setpc_b64 s[30:31]
11204;
11205; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_2_3_3:
11206; GFX90A:       ; %bb.0:
11207; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11208; GFX90A-NEXT:    ;;#ASMSTART
11209; GFX90A-NEXT:    ; def s[4:6]
11210; GFX90A-NEXT:    ;;#ASMEND
11211; GFX90A-NEXT:    ;;#ASMSTART
11212; GFX90A-NEXT:    ; def s[12:14]
11213; GFX90A-NEXT:    ;;#ASMEND
11214; GFX90A-NEXT:    s_mov_b32 s8, s14
11215; GFX90A-NEXT:    s_mov_b32 s9, s6
11216; GFX90A-NEXT:    s_mov_b32 s10, s12
11217; GFX90A-NEXT:    s_mov_b32 s11, s12
11218; GFX90A-NEXT:    ;;#ASMSTART
11219; GFX90A-NEXT:    ; use s[8:11]
11220; GFX90A-NEXT:    ;;#ASMEND
11221; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11222;
11223; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_2_3_3:
11224; GFX940:       ; %bb.0:
11225; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11226; GFX940-NEXT:    ;;#ASMSTART
11227; GFX940-NEXT:    ; def s[0:2]
11228; GFX940-NEXT:    ;;#ASMEND
11229; GFX940-NEXT:    ;;#ASMSTART
11230; GFX940-NEXT:    ; def s[4:6]
11231; GFX940-NEXT:    ;;#ASMEND
11232; GFX940-NEXT:    s_mov_b32 s8, s6
11233; GFX940-NEXT:    s_mov_b32 s9, s2
11234; GFX940-NEXT:    s_mov_b32 s10, s4
11235; GFX940-NEXT:    s_mov_b32 s11, s4
11236; GFX940-NEXT:    ;;#ASMSTART
11237; GFX940-NEXT:    ; use s[8:11]
11238; GFX940-NEXT:    ;;#ASMEND
11239; GFX940-NEXT:    s_setpc_b64 s[30:31]
11240  %vec0 = call <3 x i32> asm "; def $0", "=s"()
11241  %vec1 = call <3 x i32> asm "; def $0", "=s"()
11242  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 3, i32 3>
11243  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
11244  ret void
11245}
11246
11247define void @s_shuffle_v4i32_v3i32__5_4_3_3() {
11248; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_4_3_3:
11249; GFX900:       ; %bb.0:
11250; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11251; GFX900-NEXT:    ;;#ASMSTART
11252; GFX900-NEXT:    ; def s[4:6]
11253; GFX900-NEXT:    ;;#ASMEND
11254; GFX900-NEXT:    s_mov_b32 s8, s6
11255; GFX900-NEXT:    s_mov_b32 s9, s5
11256; GFX900-NEXT:    s_mov_b32 s10, s4
11257; GFX900-NEXT:    s_mov_b32 s11, s4
11258; GFX900-NEXT:    ;;#ASMSTART
11259; GFX900-NEXT:    ; use s[8:11]
11260; GFX900-NEXT:    ;;#ASMEND
11261; GFX900-NEXT:    s_setpc_b64 s[30:31]
11262;
11263; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_4_3_3:
11264; GFX90A:       ; %bb.0:
11265; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11266; GFX90A-NEXT:    ;;#ASMSTART
11267; GFX90A-NEXT:    ; def s[4:6]
11268; GFX90A-NEXT:    ;;#ASMEND
11269; GFX90A-NEXT:    s_mov_b32 s8, s6
11270; GFX90A-NEXT:    s_mov_b32 s9, s5
11271; GFX90A-NEXT:    s_mov_b32 s10, s4
11272; GFX90A-NEXT:    s_mov_b32 s11, s4
11273; GFX90A-NEXT:    ;;#ASMSTART
11274; GFX90A-NEXT:    ; use s[8:11]
11275; GFX90A-NEXT:    ;;#ASMEND
11276; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11277;
11278; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_4_3_3:
11279; GFX940:       ; %bb.0:
11280; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11281; GFX940-NEXT:    ;;#ASMSTART
11282; GFX940-NEXT:    ; def s[0:2]
11283; GFX940-NEXT:    ;;#ASMEND
11284; GFX940-NEXT:    s_mov_b32 s8, s2
11285; GFX940-NEXT:    s_mov_b32 s9, s1
11286; GFX940-NEXT:    s_mov_b32 s10, s0
11287; GFX940-NEXT:    s_mov_b32 s11, s0
11288; GFX940-NEXT:    ;;#ASMSTART
11289; GFX940-NEXT:    ; use s[8:11]
11290; GFX940-NEXT:    ;;#ASMEND
11291; GFX940-NEXT:    s_setpc_b64 s[30:31]
11292  %vec0 = call <3 x i32> asm "; def $0", "=s"()
11293  %vec1 = call <3 x i32> asm "; def $0", "=s"()
11294  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 3, i32 3>
11295  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
11296  ret void
11297}
11298
11299define void @s_shuffle_v4i32_v3i32__5_5_3_3() {
11300; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_3_3:
11301; GFX900:       ; %bb.0:
11302; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11303; GFX900-NEXT:    ;;#ASMSTART
11304; GFX900-NEXT:    ; def s[4:6]
11305; GFX900-NEXT:    ;;#ASMEND
11306; GFX900-NEXT:    s_mov_b32 s8, s6
11307; GFX900-NEXT:    s_mov_b32 s9, s6
11308; GFX900-NEXT:    s_mov_b32 s10, s4
11309; GFX900-NEXT:    s_mov_b32 s11, s4
11310; GFX900-NEXT:    ;;#ASMSTART
11311; GFX900-NEXT:    ; use s[8:11]
11312; GFX900-NEXT:    ;;#ASMEND
11313; GFX900-NEXT:    s_setpc_b64 s[30:31]
11314;
11315; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_3_3:
11316; GFX90A:       ; %bb.0:
11317; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11318; GFX90A-NEXT:    ;;#ASMSTART
11319; GFX90A-NEXT:    ; def s[4:6]
11320; GFX90A-NEXT:    ;;#ASMEND
11321; GFX90A-NEXT:    s_mov_b32 s8, s6
11322; GFX90A-NEXT:    s_mov_b32 s9, s6
11323; GFX90A-NEXT:    s_mov_b32 s10, s4
11324; GFX90A-NEXT:    s_mov_b32 s11, s4
11325; GFX90A-NEXT:    ;;#ASMSTART
11326; GFX90A-NEXT:    ; use s[8:11]
11327; GFX90A-NEXT:    ;;#ASMEND
11328; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11329;
11330; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_3_3:
11331; GFX940:       ; %bb.0:
11332; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11333; GFX940-NEXT:    ;;#ASMSTART
11334; GFX940-NEXT:    ; def s[0:2]
11335; GFX940-NEXT:    ;;#ASMEND
11336; GFX940-NEXT:    s_mov_b32 s8, s2
11337; GFX940-NEXT:    s_mov_b32 s9, s2
11338; GFX940-NEXT:    s_mov_b32 s10, s0
11339; GFX940-NEXT:    s_mov_b32 s11, s0
11340; GFX940-NEXT:    ;;#ASMSTART
11341; GFX940-NEXT:    ; use s[8:11]
11342; GFX940-NEXT:    ;;#ASMEND
11343; GFX940-NEXT:    s_setpc_b64 s[30:31]
11344  %vec0 = call <3 x i32> asm "; def $0", "=s"()
11345  %vec1 = call <3 x i32> asm "; def $0", "=s"()
11346  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 3>
11347  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
11348  ret void
11349}
11350
11351define void @s_shuffle_v4i32_v3i32__5_5_u_3() {
11352; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_u_3:
11353; GFX900:       ; %bb.0:
11354; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11355; GFX900-NEXT:    ;;#ASMSTART
11356; GFX900-NEXT:    ; def s[4:6]
11357; GFX900-NEXT:    ;;#ASMEND
11358; GFX900-NEXT:    s_mov_b32 s8, s6
11359; GFX900-NEXT:    s_mov_b32 s9, s6
11360; GFX900-NEXT:    s_mov_b32 s11, s4
11361; GFX900-NEXT:    ;;#ASMSTART
11362; GFX900-NEXT:    ; use s[8:11]
11363; GFX900-NEXT:    ;;#ASMEND
11364; GFX900-NEXT:    s_setpc_b64 s[30:31]
11365;
11366; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_u_3:
11367; GFX90A:       ; %bb.0:
11368; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11369; GFX90A-NEXT:    ;;#ASMSTART
11370; GFX90A-NEXT:    ; def s[4:6]
11371; GFX90A-NEXT:    ;;#ASMEND
11372; GFX90A-NEXT:    s_mov_b32 s8, s6
11373; GFX90A-NEXT:    s_mov_b32 s9, s6
11374; GFX90A-NEXT:    s_mov_b32 s11, s4
11375; GFX90A-NEXT:    ;;#ASMSTART
11376; GFX90A-NEXT:    ; use s[8:11]
11377; GFX90A-NEXT:    ;;#ASMEND
11378; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11379;
11380; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_u_3:
11381; GFX940:       ; %bb.0:
11382; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11383; GFX940-NEXT:    ;;#ASMSTART
11384; GFX940-NEXT:    ; def s[0:2]
11385; GFX940-NEXT:    ;;#ASMEND
11386; GFX940-NEXT:    s_mov_b32 s8, s2
11387; GFX940-NEXT:    s_mov_b32 s9, s2
11388; GFX940-NEXT:    s_mov_b32 s11, s0
11389; GFX940-NEXT:    ;;#ASMSTART
11390; GFX940-NEXT:    ; use s[8:11]
11391; GFX940-NEXT:    ;;#ASMEND
11392; GFX940-NEXT:    s_setpc_b64 s[30:31]
11393  %vec0 = call <3 x i32> asm "; def $0", "=s"()
11394  %vec1 = call <3 x i32> asm "; def $0", "=s"()
11395  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 3>
11396  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
11397  ret void
11398}
11399
11400define void @s_shuffle_v4i32_v3i32__5_5_0_3() {
11401; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_0_3:
11402; GFX900:       ; %bb.0:
11403; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11404; GFX900-NEXT:    ;;#ASMSTART
11405; GFX900-NEXT:    ; def s[4:6]
11406; GFX900-NEXT:    ;;#ASMEND
11407; GFX900-NEXT:    ;;#ASMSTART
11408; GFX900-NEXT:    ; def s[12:14]
11409; GFX900-NEXT:    ;;#ASMEND
11410; GFX900-NEXT:    s_mov_b32 s8, s14
11411; GFX900-NEXT:    s_mov_b32 s9, s14
11412; GFX900-NEXT:    s_mov_b32 s10, s4
11413; GFX900-NEXT:    s_mov_b32 s11, s12
11414; GFX900-NEXT:    ;;#ASMSTART
11415; GFX900-NEXT:    ; use s[8:11]
11416; GFX900-NEXT:    ;;#ASMEND
11417; GFX900-NEXT:    s_setpc_b64 s[30:31]
11418;
11419; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_0_3:
11420; GFX90A:       ; %bb.0:
11421; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11422; GFX90A-NEXT:    ;;#ASMSTART
11423; GFX90A-NEXT:    ; def s[4:6]
11424; GFX90A-NEXT:    ;;#ASMEND
11425; GFX90A-NEXT:    ;;#ASMSTART
11426; GFX90A-NEXT:    ; def s[12:14]
11427; GFX90A-NEXT:    ;;#ASMEND
11428; GFX90A-NEXT:    s_mov_b32 s8, s14
11429; GFX90A-NEXT:    s_mov_b32 s9, s14
11430; GFX90A-NEXT:    s_mov_b32 s10, s4
11431; GFX90A-NEXT:    s_mov_b32 s11, s12
11432; GFX90A-NEXT:    ;;#ASMSTART
11433; GFX90A-NEXT:    ; use s[8:11]
11434; GFX90A-NEXT:    ;;#ASMEND
11435; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11436;
11437; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_0_3:
11438; GFX940:       ; %bb.0:
11439; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11440; GFX940-NEXT:    ;;#ASMSTART
11441; GFX940-NEXT:    ; def s[0:2]
11442; GFX940-NEXT:    ;;#ASMEND
11443; GFX940-NEXT:    ;;#ASMSTART
11444; GFX940-NEXT:    ; def s[4:6]
11445; GFX940-NEXT:    ;;#ASMEND
11446; GFX940-NEXT:    s_mov_b32 s8, s6
11447; GFX940-NEXT:    s_mov_b32 s9, s6
11448; GFX940-NEXT:    s_mov_b32 s10, s0
11449; GFX940-NEXT:    s_mov_b32 s11, s4
11450; GFX940-NEXT:    ;;#ASMSTART
11451; GFX940-NEXT:    ; use s[8:11]
11452; GFX940-NEXT:    ;;#ASMEND
11453; GFX940-NEXT:    s_setpc_b64 s[30:31]
11454  %vec0 = call <3 x i32> asm "; def $0", "=s"()
11455  %vec1 = call <3 x i32> asm "; def $0", "=s"()
11456  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 3>
11457  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
11458  ret void
11459}
11460
11461define void @s_shuffle_v4i32_v3i32__5_5_1_3() {
11462; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_1_3:
11463; GFX900:       ; %bb.0:
11464; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11465; GFX900-NEXT:    ;;#ASMSTART
11466; GFX900-NEXT:    ; def s[4:6]
11467; GFX900-NEXT:    ;;#ASMEND
11468; GFX900-NEXT:    ;;#ASMSTART
11469; GFX900-NEXT:    ; def s[12:14]
11470; GFX900-NEXT:    ;;#ASMEND
11471; GFX900-NEXT:    s_mov_b32 s8, s14
11472; GFX900-NEXT:    s_mov_b32 s9, s14
11473; GFX900-NEXT:    s_mov_b32 s10, s5
11474; GFX900-NEXT:    s_mov_b32 s11, s12
11475; GFX900-NEXT:    ;;#ASMSTART
11476; GFX900-NEXT:    ; use s[8:11]
11477; GFX900-NEXT:    ;;#ASMEND
11478; GFX900-NEXT:    s_setpc_b64 s[30:31]
11479;
11480; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_1_3:
11481; GFX90A:       ; %bb.0:
11482; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11483; GFX90A-NEXT:    ;;#ASMSTART
11484; GFX90A-NEXT:    ; def s[4:6]
11485; GFX90A-NEXT:    ;;#ASMEND
11486; GFX90A-NEXT:    ;;#ASMSTART
11487; GFX90A-NEXT:    ; def s[12:14]
11488; GFX90A-NEXT:    ;;#ASMEND
11489; GFX90A-NEXT:    s_mov_b32 s8, s14
11490; GFX90A-NEXT:    s_mov_b32 s9, s14
11491; GFX90A-NEXT:    s_mov_b32 s10, s5
11492; GFX90A-NEXT:    s_mov_b32 s11, s12
11493; GFX90A-NEXT:    ;;#ASMSTART
11494; GFX90A-NEXT:    ; use s[8:11]
11495; GFX90A-NEXT:    ;;#ASMEND
11496; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11497;
11498; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_1_3:
11499; GFX940:       ; %bb.0:
11500; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11501; GFX940-NEXT:    ;;#ASMSTART
11502; GFX940-NEXT:    ; def s[0:2]
11503; GFX940-NEXT:    ;;#ASMEND
11504; GFX940-NEXT:    ;;#ASMSTART
11505; GFX940-NEXT:    ; def s[4:6]
11506; GFX940-NEXT:    ;;#ASMEND
11507; GFX940-NEXT:    s_mov_b32 s8, s6
11508; GFX940-NEXT:    s_mov_b32 s9, s6
11509; GFX940-NEXT:    s_mov_b32 s10, s1
11510; GFX940-NEXT:    s_mov_b32 s11, s4
11511; GFX940-NEXT:    ;;#ASMSTART
11512; GFX940-NEXT:    ; use s[8:11]
11513; GFX940-NEXT:    ;;#ASMEND
11514; GFX940-NEXT:    s_setpc_b64 s[30:31]
11515  %vec0 = call <3 x i32> asm "; def $0", "=s"()
11516  %vec1 = call <3 x i32> asm "; def $0", "=s"()
11517  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 3>
11518  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
11519  ret void
11520}
11521
11522define void @s_shuffle_v4i32_v3i32__5_5_2_3() {
11523; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_2_3:
11524; GFX900:       ; %bb.0:
11525; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11526; GFX900-NEXT:    ;;#ASMSTART
11527; GFX900-NEXT:    ; def s[8:10]
11528; GFX900-NEXT:    ;;#ASMEND
11529; GFX900-NEXT:    ;;#ASMSTART
11530; GFX900-NEXT:    ; def s[4:6]
11531; GFX900-NEXT:    ;;#ASMEND
11532; GFX900-NEXT:    s_mov_b32 s8, s6
11533; GFX900-NEXT:    s_mov_b32 s9, s6
11534; GFX900-NEXT:    s_mov_b32 s11, s4
11535; GFX900-NEXT:    ;;#ASMSTART
11536; GFX900-NEXT:    ; use s[8:11]
11537; GFX900-NEXT:    ;;#ASMEND
11538; GFX900-NEXT:    s_setpc_b64 s[30:31]
11539;
11540; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_2_3:
11541; GFX90A:       ; %bb.0:
11542; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11543; GFX90A-NEXT:    ;;#ASMSTART
11544; GFX90A-NEXT:    ; def s[8:10]
11545; GFX90A-NEXT:    ;;#ASMEND
11546; GFX90A-NEXT:    ;;#ASMSTART
11547; GFX90A-NEXT:    ; def s[4:6]
11548; GFX90A-NEXT:    ;;#ASMEND
11549; GFX90A-NEXT:    s_mov_b32 s8, s6
11550; GFX90A-NEXT:    s_mov_b32 s9, s6
11551; GFX90A-NEXT:    s_mov_b32 s11, s4
11552; GFX90A-NEXT:    ;;#ASMSTART
11553; GFX90A-NEXT:    ; use s[8:11]
11554; GFX90A-NEXT:    ;;#ASMEND
11555; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11556;
11557; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_2_3:
11558; GFX940:       ; %bb.0:
11559; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11560; GFX940-NEXT:    ;;#ASMSTART
11561; GFX940-NEXT:    ; def s[8:10]
11562; GFX940-NEXT:    ;;#ASMEND
11563; GFX940-NEXT:    ;;#ASMSTART
11564; GFX940-NEXT:    ; def s[0:2]
11565; GFX940-NEXT:    ;;#ASMEND
11566; GFX940-NEXT:    s_mov_b32 s8, s2
11567; GFX940-NEXT:    s_mov_b32 s9, s2
11568; GFX940-NEXT:    s_mov_b32 s11, s0
11569; GFX940-NEXT:    ;;#ASMSTART
11570; GFX940-NEXT:    ; use s[8:11]
11571; GFX940-NEXT:    ;;#ASMEND
11572; GFX940-NEXT:    s_setpc_b64 s[30:31]
11573  %vec0 = call <3 x i32> asm "; def $0", "=s"()
11574  %vec1 = call <3 x i32> asm "; def $0", "=s"()
11575  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 3>
11576  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
11577  ret void
11578}
11579
11580define void @s_shuffle_v4i32_v3i32__5_5_4_3() {
11581; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_4_3:
11582; GFX900:       ; %bb.0:
11583; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11584; GFX900-NEXT:    ;;#ASMSTART
11585; GFX900-NEXT:    ; def s[4:6]
11586; GFX900-NEXT:    ;;#ASMEND
11587; GFX900-NEXT:    s_mov_b32 s8, s6
11588; GFX900-NEXT:    s_mov_b32 s9, s6
11589; GFX900-NEXT:    s_mov_b32 s10, s5
11590; GFX900-NEXT:    s_mov_b32 s11, s4
11591; GFX900-NEXT:    ;;#ASMSTART
11592; GFX900-NEXT:    ; use s[8:11]
11593; GFX900-NEXT:    ;;#ASMEND
11594; GFX900-NEXT:    s_setpc_b64 s[30:31]
11595;
11596; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_4_3:
11597; GFX90A:       ; %bb.0:
11598; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11599; GFX90A-NEXT:    ;;#ASMSTART
11600; GFX90A-NEXT:    ; def s[4:6]
11601; GFX90A-NEXT:    ;;#ASMEND
11602; GFX90A-NEXT:    s_mov_b32 s8, s6
11603; GFX90A-NEXT:    s_mov_b32 s9, s6
11604; GFX90A-NEXT:    s_mov_b32 s10, s5
11605; GFX90A-NEXT:    s_mov_b32 s11, s4
11606; GFX90A-NEXT:    ;;#ASMSTART
11607; GFX90A-NEXT:    ; use s[8:11]
11608; GFX90A-NEXT:    ;;#ASMEND
11609; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11610;
11611; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_4_3:
11612; GFX940:       ; %bb.0:
11613; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11614; GFX940-NEXT:    ;;#ASMSTART
11615; GFX940-NEXT:    ; def s[0:2]
11616; GFX940-NEXT:    ;;#ASMEND
11617; GFX940-NEXT:    s_mov_b32 s8, s2
11618; GFX940-NEXT:    s_mov_b32 s9, s2
11619; GFX940-NEXT:    s_mov_b32 s10, s1
11620; GFX940-NEXT:    s_mov_b32 s11, s0
11621; GFX940-NEXT:    ;;#ASMSTART
11622; GFX940-NEXT:    ; use s[8:11]
11623; GFX940-NEXT:    ;;#ASMEND
11624; GFX940-NEXT:    s_setpc_b64 s[30:31]
11625  %vec0 = call <3 x i32> asm "; def $0", "=s"()
11626  %vec1 = call <3 x i32> asm "; def $0", "=s"()
11627  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 3>
11628  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
11629  ret void
11630}
11631
11632define void @s_shuffle_v4i32_v3i32__u_4_4_4() {
11633; GFX9-LABEL: s_shuffle_v4i32_v3i32__u_4_4_4:
11634; GFX9:       ; %bb.0:
11635; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11636; GFX9-NEXT:    ;;#ASMSTART
11637; GFX9-NEXT:    ; def s[8:10]
11638; GFX9-NEXT:    ;;#ASMEND
11639; GFX9-NEXT:    s_mov_b32 s10, s9
11640; GFX9-NEXT:    s_mov_b32 s11, s9
11641; GFX9-NEXT:    ;;#ASMSTART
11642; GFX9-NEXT:    ; use s[8:11]
11643; GFX9-NEXT:    ;;#ASMEND
11644; GFX9-NEXT:    s_setpc_b64 s[30:31]
11645  %vec0 = call <3 x i32> asm "; def $0", "=s"()
11646  %vec1 = call <3 x i32> asm "; def $0", "=s"()
11647  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
11648  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
11649  ret void
11650}
11651
11652define void @s_shuffle_v4i32_v3i32__0_4_4_4() {
11653; GFX900-LABEL: s_shuffle_v4i32_v3i32__0_4_4_4:
11654; GFX900:       ; %bb.0:
11655; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11656; GFX900-NEXT:    ;;#ASMSTART
11657; GFX900-NEXT:    ; def s[8:10]
11658; GFX900-NEXT:    ;;#ASMEND
11659; GFX900-NEXT:    ;;#ASMSTART
11660; GFX900-NEXT:    ; def s[4:6]
11661; GFX900-NEXT:    ;;#ASMEND
11662; GFX900-NEXT:    s_mov_b32 s9, s5
11663; GFX900-NEXT:    s_mov_b32 s10, s5
11664; GFX900-NEXT:    s_mov_b32 s11, s5
11665; GFX900-NEXT:    ;;#ASMSTART
11666; GFX900-NEXT:    ; use s[8:11]
11667; GFX900-NEXT:    ;;#ASMEND
11668; GFX900-NEXT:    s_setpc_b64 s[30:31]
11669;
11670; GFX90A-LABEL: s_shuffle_v4i32_v3i32__0_4_4_4:
11671; GFX90A:       ; %bb.0:
11672; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11673; GFX90A-NEXT:    ;;#ASMSTART
11674; GFX90A-NEXT:    ; def s[8:10]
11675; GFX90A-NEXT:    ;;#ASMEND
11676; GFX90A-NEXT:    ;;#ASMSTART
11677; GFX90A-NEXT:    ; def s[4:6]
11678; GFX90A-NEXT:    ;;#ASMEND
11679; GFX90A-NEXT:    s_mov_b32 s9, s5
11680; GFX90A-NEXT:    s_mov_b32 s10, s5
11681; GFX90A-NEXT:    s_mov_b32 s11, s5
11682; GFX90A-NEXT:    ;;#ASMSTART
11683; GFX90A-NEXT:    ; use s[8:11]
11684; GFX90A-NEXT:    ;;#ASMEND
11685; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11686;
11687; GFX940-LABEL: s_shuffle_v4i32_v3i32__0_4_4_4:
11688; GFX940:       ; %bb.0:
11689; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11690; GFX940-NEXT:    ;;#ASMSTART
11691; GFX940-NEXT:    ; def s[8:10]
11692; GFX940-NEXT:    ;;#ASMEND
11693; GFX940-NEXT:    ;;#ASMSTART
11694; GFX940-NEXT:    ; def s[0:2]
11695; GFX940-NEXT:    ;;#ASMEND
11696; GFX940-NEXT:    s_mov_b32 s9, s1
11697; GFX940-NEXT:    s_mov_b32 s10, s1
11698; GFX940-NEXT:    s_mov_b32 s11, s1
11699; GFX940-NEXT:    ;;#ASMSTART
11700; GFX940-NEXT:    ; use s[8:11]
11701; GFX940-NEXT:    ;;#ASMEND
11702; GFX940-NEXT:    s_setpc_b64 s[30:31]
11703  %vec0 = call <3 x i32> asm "; def $0", "=s"()
11704  %vec1 = call <3 x i32> asm "; def $0", "=s"()
11705  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
11706  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
11707  ret void
11708}
11709
11710define void @s_shuffle_v4i32_v3i32__1_4_4_4() {
11711; GFX900-LABEL: s_shuffle_v4i32_v3i32__1_4_4_4:
11712; GFX900:       ; %bb.0:
11713; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11714; GFX900-NEXT:    ;;#ASMSTART
11715; GFX900-NEXT:    ; def s[8:10]
11716; GFX900-NEXT:    ;;#ASMEND
11717; GFX900-NEXT:    ;;#ASMSTART
11718; GFX900-NEXT:    ; def s[4:6]
11719; GFX900-NEXT:    ;;#ASMEND
11720; GFX900-NEXT:    s_mov_b32 s8, s5
11721; GFX900-NEXT:    s_mov_b32 s10, s9
11722; GFX900-NEXT:    s_mov_b32 s11, s9
11723; GFX900-NEXT:    ;;#ASMSTART
11724; GFX900-NEXT:    ; use s[8:11]
11725; GFX900-NEXT:    ;;#ASMEND
11726; GFX900-NEXT:    s_setpc_b64 s[30:31]
11727;
11728; GFX90A-LABEL: s_shuffle_v4i32_v3i32__1_4_4_4:
11729; GFX90A:       ; %bb.0:
11730; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11731; GFX90A-NEXT:    ;;#ASMSTART
11732; GFX90A-NEXT:    ; def s[8:10]
11733; GFX90A-NEXT:    ;;#ASMEND
11734; GFX90A-NEXT:    ;;#ASMSTART
11735; GFX90A-NEXT:    ; def s[4:6]
11736; GFX90A-NEXT:    ;;#ASMEND
11737; GFX90A-NEXT:    s_mov_b32 s8, s5
11738; GFX90A-NEXT:    s_mov_b32 s10, s9
11739; GFX90A-NEXT:    s_mov_b32 s11, s9
11740; GFX90A-NEXT:    ;;#ASMSTART
11741; GFX90A-NEXT:    ; use s[8:11]
11742; GFX90A-NEXT:    ;;#ASMEND
11743; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11744;
11745; GFX940-LABEL: s_shuffle_v4i32_v3i32__1_4_4_4:
11746; GFX940:       ; %bb.0:
11747; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11748; GFX940-NEXT:    ;;#ASMSTART
11749; GFX940-NEXT:    ; def s[8:10]
11750; GFX940-NEXT:    ;;#ASMEND
11751; GFX940-NEXT:    ;;#ASMSTART
11752; GFX940-NEXT:    ; def s[0:2]
11753; GFX940-NEXT:    ;;#ASMEND
11754; GFX940-NEXT:    s_mov_b32 s8, s1
11755; GFX940-NEXT:    s_mov_b32 s10, s9
11756; GFX940-NEXT:    s_mov_b32 s11, s9
11757; GFX940-NEXT:    ;;#ASMSTART
11758; GFX940-NEXT:    ; use s[8:11]
11759; GFX940-NEXT:    ;;#ASMEND
11760; GFX940-NEXT:    s_setpc_b64 s[30:31]
11761  %vec0 = call <3 x i32> asm "; def $0", "=s"()
11762  %vec1 = call <3 x i32> asm "; def $0", "=s"()
11763  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
11764  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
11765  ret void
11766}
11767
11768define void @s_shuffle_v4i32_v3i32__2_4_4_4() {
11769; GFX900-LABEL: s_shuffle_v4i32_v3i32__2_4_4_4:
11770; GFX900:       ; %bb.0:
11771; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11772; GFX900-NEXT:    ;;#ASMSTART
11773; GFX900-NEXT:    ; def s[8:10]
11774; GFX900-NEXT:    ;;#ASMEND
11775; GFX900-NEXT:    ;;#ASMSTART
11776; GFX900-NEXT:    ; def s[4:6]
11777; GFX900-NEXT:    ;;#ASMEND
11778; GFX900-NEXT:    s_mov_b32 s8, s6
11779; GFX900-NEXT:    s_mov_b32 s10, s9
11780; GFX900-NEXT:    s_mov_b32 s11, s9
11781; GFX900-NEXT:    ;;#ASMSTART
11782; GFX900-NEXT:    ; use s[8:11]
11783; GFX900-NEXT:    ;;#ASMEND
11784; GFX900-NEXT:    s_setpc_b64 s[30:31]
11785;
11786; GFX90A-LABEL: s_shuffle_v4i32_v3i32__2_4_4_4:
11787; GFX90A:       ; %bb.0:
11788; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11789; GFX90A-NEXT:    ;;#ASMSTART
11790; GFX90A-NEXT:    ; def s[8:10]
11791; GFX90A-NEXT:    ;;#ASMEND
11792; GFX90A-NEXT:    ;;#ASMSTART
11793; GFX90A-NEXT:    ; def s[4:6]
11794; GFX90A-NEXT:    ;;#ASMEND
11795; GFX90A-NEXT:    s_mov_b32 s8, s6
11796; GFX90A-NEXT:    s_mov_b32 s10, s9
11797; GFX90A-NEXT:    s_mov_b32 s11, s9
11798; GFX90A-NEXT:    ;;#ASMSTART
11799; GFX90A-NEXT:    ; use s[8:11]
11800; GFX90A-NEXT:    ;;#ASMEND
11801; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11802;
11803; GFX940-LABEL: s_shuffle_v4i32_v3i32__2_4_4_4:
11804; GFX940:       ; %bb.0:
11805; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11806; GFX940-NEXT:    ;;#ASMSTART
11807; GFX940-NEXT:    ; def s[8:10]
11808; GFX940-NEXT:    ;;#ASMEND
11809; GFX940-NEXT:    ;;#ASMSTART
11810; GFX940-NEXT:    ; def s[0:2]
11811; GFX940-NEXT:    ;;#ASMEND
11812; GFX940-NEXT:    s_mov_b32 s8, s2
11813; GFX940-NEXT:    s_mov_b32 s10, s9
11814; GFX940-NEXT:    s_mov_b32 s11, s9
11815; GFX940-NEXT:    ;;#ASMSTART
11816; GFX940-NEXT:    ; use s[8:11]
11817; GFX940-NEXT:    ;;#ASMEND
11818; GFX940-NEXT:    s_setpc_b64 s[30:31]
11819  %vec0 = call <3 x i32> asm "; def $0", "=s"()
11820  %vec1 = call <3 x i32> asm "; def $0", "=s"()
11821  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
11822  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
11823  ret void
11824}
11825
11826define void @s_shuffle_v4i32_v3i32__3_4_4_4() {
11827; GFX9-LABEL: s_shuffle_v4i32_v3i32__3_4_4_4:
11828; GFX9:       ; %bb.0:
11829; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11830; GFX9-NEXT:    ;;#ASMSTART
11831; GFX9-NEXT:    ; def s[8:10]
11832; GFX9-NEXT:    ;;#ASMEND
11833; GFX9-NEXT:    s_mov_b32 s10, s9
11834; GFX9-NEXT:    s_mov_b32 s11, s9
11835; GFX9-NEXT:    ;;#ASMSTART
11836; GFX9-NEXT:    ; use s[8:11]
11837; GFX9-NEXT:    ;;#ASMEND
11838; GFX9-NEXT:    s_setpc_b64 s[30:31]
11839  %vec0 = call <3 x i32> asm "; def $0", "=s"()
11840  %vec1 = call <3 x i32> asm "; def $0", "=s"()
11841  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
11842  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
11843  ret void
11844}
11845
11846define void @s_shuffle_v4i32_v3i32__4_4_4_4() {
11847; GFX9-LABEL: s_shuffle_v4i32_v3i32__4_4_4_4:
11848; GFX9:       ; %bb.0:
11849; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11850; GFX9-NEXT:    ;;#ASMSTART
11851; GFX9-NEXT:    ; def s[8:10]
11852; GFX9-NEXT:    ;;#ASMEND
11853; GFX9-NEXT:    s_mov_b32 s8, s9
11854; GFX9-NEXT:    s_mov_b32 s10, s9
11855; GFX9-NEXT:    s_mov_b32 s11, s9
11856; GFX9-NEXT:    ;;#ASMSTART
11857; GFX9-NEXT:    ; use s[8:11]
11858; GFX9-NEXT:    ;;#ASMEND
11859; GFX9-NEXT:    s_setpc_b64 s[30:31]
11860  %vec0 = call <3 x i32> asm "; def $0", "=s"()
11861  %vec1 = call <3 x i32> asm "; def $0", "=s"()
11862  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
11863  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
11864  ret void
11865}
11866
11867define void @s_shuffle_v4i32_v3i32__5_4_4_4() {
11868; GFX9-LABEL: s_shuffle_v4i32_v3i32__5_4_4_4:
11869; GFX9:       ; %bb.0:
11870; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11871; GFX9-NEXT:    ;;#ASMSTART
11872; GFX9-NEXT:    ; def s[8:10]
11873; GFX9-NEXT:    ;;#ASMEND
11874; GFX9-NEXT:    s_mov_b32 s8, s10
11875; GFX9-NEXT:    s_mov_b32 s10, s9
11876; GFX9-NEXT:    s_mov_b32 s11, s9
11877; GFX9-NEXT:    ;;#ASMSTART
11878; GFX9-NEXT:    ; use s[8:11]
11879; GFX9-NEXT:    ;;#ASMEND
11880; GFX9-NEXT:    s_setpc_b64 s[30:31]
11881  %vec0 = call <3 x i32> asm "; def $0", "=s"()
11882  %vec1 = call <3 x i32> asm "; def $0", "=s"()
11883  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
11884  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
11885  ret void
11886}
11887
11888define void @s_shuffle_v4i32_v3i32__5_u_4_4() {
11889; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_u_4_4:
11890; GFX900:       ; %bb.0:
11891; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11892; GFX900-NEXT:    ;;#ASMSTART
11893; GFX900-NEXT:    ; def s[4:6]
11894; GFX900-NEXT:    ;;#ASMEND
11895; GFX900-NEXT:    s_mov_b32 s8, s6
11896; GFX900-NEXT:    s_mov_b32 s10, s5
11897; GFX900-NEXT:    s_mov_b32 s11, s5
11898; GFX900-NEXT:    ;;#ASMSTART
11899; GFX900-NEXT:    ; use s[8:11]
11900; GFX900-NEXT:    ;;#ASMEND
11901; GFX900-NEXT:    s_setpc_b64 s[30:31]
11902;
11903; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_u_4_4:
11904; GFX90A:       ; %bb.0:
11905; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11906; GFX90A-NEXT:    ;;#ASMSTART
11907; GFX90A-NEXT:    ; def s[4:6]
11908; GFX90A-NEXT:    ;;#ASMEND
11909; GFX90A-NEXT:    s_mov_b32 s8, s6
11910; GFX90A-NEXT:    s_mov_b32 s10, s5
11911; GFX90A-NEXT:    s_mov_b32 s11, s5
11912; GFX90A-NEXT:    ;;#ASMSTART
11913; GFX90A-NEXT:    ; use s[8:11]
11914; GFX90A-NEXT:    ;;#ASMEND
11915; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11916;
11917; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_u_4_4:
11918; GFX940:       ; %bb.0:
11919; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11920; GFX940-NEXT:    ;;#ASMSTART
11921; GFX940-NEXT:    ; def s[0:2]
11922; GFX940-NEXT:    ;;#ASMEND
11923; GFX940-NEXT:    s_mov_b32 s8, s2
11924; GFX940-NEXT:    s_mov_b32 s10, s1
11925; GFX940-NEXT:    s_mov_b32 s11, s1
11926; GFX940-NEXT:    ;;#ASMSTART
11927; GFX940-NEXT:    ; use s[8:11]
11928; GFX940-NEXT:    ;;#ASMEND
11929; GFX940-NEXT:    s_setpc_b64 s[30:31]
11930  %vec0 = call <3 x i32> asm "; def $0", "=s"()
11931  %vec1 = call <3 x i32> asm "; def $0", "=s"()
11932  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 4, i32 4>
11933  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
11934  ret void
11935}
11936
11937define void @s_shuffle_v4i32_v3i32__5_0_4_4() {
11938; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_0_4_4:
11939; GFX900:       ; %bb.0:
11940; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11941; GFX900-NEXT:    ;;#ASMSTART
11942; GFX900-NEXT:    ; def s[4:6]
11943; GFX900-NEXT:    ;;#ASMEND
11944; GFX900-NEXT:    ;;#ASMSTART
11945; GFX900-NEXT:    ; def s[12:14]
11946; GFX900-NEXT:    ;;#ASMEND
11947; GFX900-NEXT:    s_mov_b32 s8, s14
11948; GFX900-NEXT:    s_mov_b32 s9, s4
11949; GFX900-NEXT:    s_mov_b32 s10, s13
11950; GFX900-NEXT:    s_mov_b32 s11, s13
11951; GFX900-NEXT:    ;;#ASMSTART
11952; GFX900-NEXT:    ; use s[8:11]
11953; GFX900-NEXT:    ;;#ASMEND
11954; GFX900-NEXT:    s_setpc_b64 s[30:31]
11955;
11956; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_0_4_4:
11957; GFX90A:       ; %bb.0:
11958; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11959; GFX90A-NEXT:    ;;#ASMSTART
11960; GFX90A-NEXT:    ; def s[4:6]
11961; GFX90A-NEXT:    ;;#ASMEND
11962; GFX90A-NEXT:    ;;#ASMSTART
11963; GFX90A-NEXT:    ; def s[12:14]
11964; GFX90A-NEXT:    ;;#ASMEND
11965; GFX90A-NEXT:    s_mov_b32 s8, s14
11966; GFX90A-NEXT:    s_mov_b32 s9, s4
11967; GFX90A-NEXT:    s_mov_b32 s10, s13
11968; GFX90A-NEXT:    s_mov_b32 s11, s13
11969; GFX90A-NEXT:    ;;#ASMSTART
11970; GFX90A-NEXT:    ; use s[8:11]
11971; GFX90A-NEXT:    ;;#ASMEND
11972; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11973;
11974; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_0_4_4:
11975; GFX940:       ; %bb.0:
11976; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11977; GFX940-NEXT:    ;;#ASMSTART
11978; GFX940-NEXT:    ; def s[0:2]
11979; GFX940-NEXT:    ;;#ASMEND
11980; GFX940-NEXT:    ;;#ASMSTART
11981; GFX940-NEXT:    ; def s[4:6]
11982; GFX940-NEXT:    ;;#ASMEND
11983; GFX940-NEXT:    s_mov_b32 s8, s6
11984; GFX940-NEXT:    s_mov_b32 s9, s0
11985; GFX940-NEXT:    s_mov_b32 s10, s5
11986; GFX940-NEXT:    s_mov_b32 s11, s5
11987; GFX940-NEXT:    ;;#ASMSTART
11988; GFX940-NEXT:    ; use s[8:11]
11989; GFX940-NEXT:    ;;#ASMEND
11990; GFX940-NEXT:    s_setpc_b64 s[30:31]
11991  %vec0 = call <3 x i32> asm "; def $0", "=s"()
11992  %vec1 = call <3 x i32> asm "; def $0", "=s"()
11993  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 4, i32 4>
11994  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
11995  ret void
11996}
11997
11998define void @s_shuffle_v4i32_v3i32__5_1_4_4() {
11999; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_1_4_4:
12000; GFX900:       ; %bb.0:
12001; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12002; GFX900-NEXT:    ;;#ASMSTART
12003; GFX900-NEXT:    ; def s[8:10]
12004; GFX900-NEXT:    ;;#ASMEND
12005; GFX900-NEXT:    ;;#ASMSTART
12006; GFX900-NEXT:    ; def s[4:6]
12007; GFX900-NEXT:    ;;#ASMEND
12008; GFX900-NEXT:    s_mov_b32 s8, s6
12009; GFX900-NEXT:    s_mov_b32 s10, s5
12010; GFX900-NEXT:    s_mov_b32 s11, s5
12011; GFX900-NEXT:    ;;#ASMSTART
12012; GFX900-NEXT:    ; use s[8:11]
12013; GFX900-NEXT:    ;;#ASMEND
12014; GFX900-NEXT:    s_setpc_b64 s[30:31]
12015;
12016; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_1_4_4:
12017; GFX90A:       ; %bb.0:
12018; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12019; GFX90A-NEXT:    ;;#ASMSTART
12020; GFX90A-NEXT:    ; def s[8:10]
12021; GFX90A-NEXT:    ;;#ASMEND
12022; GFX90A-NEXT:    ;;#ASMSTART
12023; GFX90A-NEXT:    ; def s[4:6]
12024; GFX90A-NEXT:    ;;#ASMEND
12025; GFX90A-NEXT:    s_mov_b32 s8, s6
12026; GFX90A-NEXT:    s_mov_b32 s10, s5
12027; GFX90A-NEXT:    s_mov_b32 s11, s5
12028; GFX90A-NEXT:    ;;#ASMSTART
12029; GFX90A-NEXT:    ; use s[8:11]
12030; GFX90A-NEXT:    ;;#ASMEND
12031; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12032;
12033; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_1_4_4:
12034; GFX940:       ; %bb.0:
12035; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12036; GFX940-NEXT:    ;;#ASMSTART
12037; GFX940-NEXT:    ; def s[8:10]
12038; GFX940-NEXT:    ;;#ASMEND
12039; GFX940-NEXT:    ;;#ASMSTART
12040; GFX940-NEXT:    ; def s[0:2]
12041; GFX940-NEXT:    ;;#ASMEND
12042; GFX940-NEXT:    s_mov_b32 s8, s2
12043; GFX940-NEXT:    s_mov_b32 s10, s1
12044; GFX940-NEXT:    s_mov_b32 s11, s1
12045; GFX940-NEXT:    ;;#ASMSTART
12046; GFX940-NEXT:    ; use s[8:11]
12047; GFX940-NEXT:    ;;#ASMEND
12048; GFX940-NEXT:    s_setpc_b64 s[30:31]
12049  %vec0 = call <3 x i32> asm "; def $0", "=s"()
12050  %vec1 = call <3 x i32> asm "; def $0", "=s"()
12051  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 4, i32 4>
12052  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
12053  ret void
12054}
12055
12056define void @s_shuffle_v4i32_v3i32__5_2_4_4() {
12057; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_2_4_4:
12058; GFX900:       ; %bb.0:
12059; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12060; GFX900-NEXT:    ;;#ASMSTART
12061; GFX900-NEXT:    ; def s[4:6]
12062; GFX900-NEXT:    ;;#ASMEND
12063; GFX900-NEXT:    ;;#ASMSTART
12064; GFX900-NEXT:    ; def s[12:14]
12065; GFX900-NEXT:    ;;#ASMEND
12066; GFX900-NEXT:    s_mov_b32 s8, s14
12067; GFX900-NEXT:    s_mov_b32 s9, s6
12068; GFX900-NEXT:    s_mov_b32 s10, s13
12069; GFX900-NEXT:    s_mov_b32 s11, s13
12070; GFX900-NEXT:    ;;#ASMSTART
12071; GFX900-NEXT:    ; use s[8:11]
12072; GFX900-NEXT:    ;;#ASMEND
12073; GFX900-NEXT:    s_setpc_b64 s[30:31]
12074;
12075; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_2_4_4:
12076; GFX90A:       ; %bb.0:
12077; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12078; GFX90A-NEXT:    ;;#ASMSTART
12079; GFX90A-NEXT:    ; def s[4:6]
12080; GFX90A-NEXT:    ;;#ASMEND
12081; GFX90A-NEXT:    ;;#ASMSTART
12082; GFX90A-NEXT:    ; def s[12:14]
12083; GFX90A-NEXT:    ;;#ASMEND
12084; GFX90A-NEXT:    s_mov_b32 s8, s14
12085; GFX90A-NEXT:    s_mov_b32 s9, s6
12086; GFX90A-NEXT:    s_mov_b32 s10, s13
12087; GFX90A-NEXT:    s_mov_b32 s11, s13
12088; GFX90A-NEXT:    ;;#ASMSTART
12089; GFX90A-NEXT:    ; use s[8:11]
12090; GFX90A-NEXT:    ;;#ASMEND
12091; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12092;
12093; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_2_4_4:
12094; GFX940:       ; %bb.0:
12095; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12096; GFX940-NEXT:    ;;#ASMSTART
12097; GFX940-NEXT:    ; def s[0:2]
12098; GFX940-NEXT:    ;;#ASMEND
12099; GFX940-NEXT:    ;;#ASMSTART
12100; GFX940-NEXT:    ; def s[4:6]
12101; GFX940-NEXT:    ;;#ASMEND
12102; GFX940-NEXT:    s_mov_b32 s8, s6
12103; GFX940-NEXT:    s_mov_b32 s9, s2
12104; GFX940-NEXT:    s_mov_b32 s10, s5
12105; GFX940-NEXT:    s_mov_b32 s11, s5
12106; GFX940-NEXT:    ;;#ASMSTART
12107; GFX940-NEXT:    ; use s[8:11]
12108; GFX940-NEXT:    ;;#ASMEND
12109; GFX940-NEXT:    s_setpc_b64 s[30:31]
12110  %vec0 = call <3 x i32> asm "; def $0", "=s"()
12111  %vec1 = call <3 x i32> asm "; def $0", "=s"()
12112  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 4, i32 4>
12113  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
12114  ret void
12115}
12116
12117define void @s_shuffle_v4i32_v3i32__5_3_4_4() {
12118; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_3_4_4:
12119; GFX900:       ; %bb.0:
12120; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12121; GFX900-NEXT:    ;;#ASMSTART
12122; GFX900-NEXT:    ; def s[4:6]
12123; GFX900-NEXT:    ;;#ASMEND
12124; GFX900-NEXT:    s_mov_b32 s8, s6
12125; GFX900-NEXT:    s_mov_b32 s9, s4
12126; GFX900-NEXT:    s_mov_b32 s10, s5
12127; GFX900-NEXT:    s_mov_b32 s11, s5
12128; GFX900-NEXT:    ;;#ASMSTART
12129; GFX900-NEXT:    ; use s[8:11]
12130; GFX900-NEXT:    ;;#ASMEND
12131; GFX900-NEXT:    s_setpc_b64 s[30:31]
12132;
12133; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_3_4_4:
12134; GFX90A:       ; %bb.0:
12135; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12136; GFX90A-NEXT:    ;;#ASMSTART
12137; GFX90A-NEXT:    ; def s[4:6]
12138; GFX90A-NEXT:    ;;#ASMEND
12139; GFX90A-NEXT:    s_mov_b32 s8, s6
12140; GFX90A-NEXT:    s_mov_b32 s9, s4
12141; GFX90A-NEXT:    s_mov_b32 s10, s5
12142; GFX90A-NEXT:    s_mov_b32 s11, s5
12143; GFX90A-NEXT:    ;;#ASMSTART
12144; GFX90A-NEXT:    ; use s[8:11]
12145; GFX90A-NEXT:    ;;#ASMEND
12146; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12147;
12148; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_3_4_4:
12149; GFX940:       ; %bb.0:
12150; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12151; GFX940-NEXT:    ;;#ASMSTART
12152; GFX940-NEXT:    ; def s[0:2]
12153; GFX940-NEXT:    ;;#ASMEND
12154; GFX940-NEXT:    s_mov_b32 s8, s2
12155; GFX940-NEXT:    s_mov_b32 s9, s0
12156; GFX940-NEXT:    s_mov_b32 s10, s1
12157; GFX940-NEXT:    s_mov_b32 s11, s1
12158; GFX940-NEXT:    ;;#ASMSTART
12159; GFX940-NEXT:    ; use s[8:11]
12160; GFX940-NEXT:    ;;#ASMEND
12161; GFX940-NEXT:    s_setpc_b64 s[30:31]
12162  %vec0 = call <3 x i32> asm "; def $0", "=s"()
12163  %vec1 = call <3 x i32> asm "; def $0", "=s"()
12164  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 4, i32 4>
12165  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
12166  ret void
12167}
12168
12169define void @s_shuffle_v4i32_v3i32__5_5_4_4() {
12170; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_4_4:
12171; GFX900:       ; %bb.0:
12172; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12173; GFX900-NEXT:    ;;#ASMSTART
12174; GFX900-NEXT:    ; def s[4:6]
12175; GFX900-NEXT:    ;;#ASMEND
12176; GFX900-NEXT:    s_mov_b32 s8, s6
12177; GFX900-NEXT:    s_mov_b32 s9, s6
12178; GFX900-NEXT:    s_mov_b32 s10, s5
12179; GFX900-NEXT:    s_mov_b32 s11, s5
12180; GFX900-NEXT:    ;;#ASMSTART
12181; GFX900-NEXT:    ; use s[8:11]
12182; GFX900-NEXT:    ;;#ASMEND
12183; GFX900-NEXT:    s_setpc_b64 s[30:31]
12184;
12185; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_4_4:
12186; GFX90A:       ; %bb.0:
12187; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12188; GFX90A-NEXT:    ;;#ASMSTART
12189; GFX90A-NEXT:    ; def s[4:6]
12190; GFX90A-NEXT:    ;;#ASMEND
12191; GFX90A-NEXT:    s_mov_b32 s8, s6
12192; GFX90A-NEXT:    s_mov_b32 s9, s6
12193; GFX90A-NEXT:    s_mov_b32 s10, s5
12194; GFX90A-NEXT:    s_mov_b32 s11, s5
12195; GFX90A-NEXT:    ;;#ASMSTART
12196; GFX90A-NEXT:    ; use s[8:11]
12197; GFX90A-NEXT:    ;;#ASMEND
12198; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12199;
12200; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_4_4:
12201; GFX940:       ; %bb.0:
12202; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12203; GFX940-NEXT:    ;;#ASMSTART
12204; GFX940-NEXT:    ; def s[0:2]
12205; GFX940-NEXT:    ;;#ASMEND
12206; GFX940-NEXT:    s_mov_b32 s8, s2
12207; GFX940-NEXT:    s_mov_b32 s9, s2
12208; GFX940-NEXT:    s_mov_b32 s10, s1
12209; GFX940-NEXT:    s_mov_b32 s11, s1
12210; GFX940-NEXT:    ;;#ASMSTART
12211; GFX940-NEXT:    ; use s[8:11]
12212; GFX940-NEXT:    ;;#ASMEND
12213; GFX940-NEXT:    s_setpc_b64 s[30:31]
12214  %vec0 = call <3 x i32> asm "; def $0", "=s"()
12215  %vec1 = call <3 x i32> asm "; def $0", "=s"()
12216  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 4>
12217  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
12218  ret void
12219}
12220
12221define void @s_shuffle_v4i32_v3i32__5_5_u_4() {
12222; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_u_4:
12223; GFX900:       ; %bb.0:
12224; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12225; GFX900-NEXT:    ;;#ASMSTART
12226; GFX900-NEXT:    ; def s[4:6]
12227; GFX900-NEXT:    ;;#ASMEND
12228; GFX900-NEXT:    s_mov_b32 s8, s6
12229; GFX900-NEXT:    s_mov_b32 s9, s6
12230; GFX900-NEXT:    s_mov_b32 s11, s5
12231; GFX900-NEXT:    ;;#ASMSTART
12232; GFX900-NEXT:    ; use s[8:11]
12233; GFX900-NEXT:    ;;#ASMEND
12234; GFX900-NEXT:    s_setpc_b64 s[30:31]
12235;
12236; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_u_4:
12237; GFX90A:       ; %bb.0:
12238; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12239; GFX90A-NEXT:    ;;#ASMSTART
12240; GFX90A-NEXT:    ; def s[4:6]
12241; GFX90A-NEXT:    ;;#ASMEND
12242; GFX90A-NEXT:    s_mov_b32 s8, s6
12243; GFX90A-NEXT:    s_mov_b32 s9, s6
12244; GFX90A-NEXT:    s_mov_b32 s11, s5
12245; GFX90A-NEXT:    ;;#ASMSTART
12246; GFX90A-NEXT:    ; use s[8:11]
12247; GFX90A-NEXT:    ;;#ASMEND
12248; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12249;
12250; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_u_4:
12251; GFX940:       ; %bb.0:
12252; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12253; GFX940-NEXT:    ;;#ASMSTART
12254; GFX940-NEXT:    ; def s[0:2]
12255; GFX940-NEXT:    ;;#ASMEND
12256; GFX940-NEXT:    s_mov_b32 s8, s2
12257; GFX940-NEXT:    s_mov_b32 s9, s2
12258; GFX940-NEXT:    s_mov_b32 s11, s1
12259; GFX940-NEXT:    ;;#ASMSTART
12260; GFX940-NEXT:    ; use s[8:11]
12261; GFX940-NEXT:    ;;#ASMEND
12262; GFX940-NEXT:    s_setpc_b64 s[30:31]
12263  %vec0 = call <3 x i32> asm "; def $0", "=s"()
12264  %vec1 = call <3 x i32> asm "; def $0", "=s"()
12265  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 4>
12266  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
12267  ret void
12268}
12269
12270define void @s_shuffle_v4i32_v3i32__5_5_0_4() {
12271; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_0_4:
12272; GFX900:       ; %bb.0:
12273; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12274; GFX900-NEXT:    ;;#ASMSTART
12275; GFX900-NEXT:    ; def s[4:6]
12276; GFX900-NEXT:    ;;#ASMEND
12277; GFX900-NEXT:    ;;#ASMSTART
12278; GFX900-NEXT:    ; def s[12:14]
12279; GFX900-NEXT:    ;;#ASMEND
12280; GFX900-NEXT:    s_mov_b32 s8, s14
12281; GFX900-NEXT:    s_mov_b32 s9, s14
12282; GFX900-NEXT:    s_mov_b32 s10, s4
12283; GFX900-NEXT:    s_mov_b32 s11, s13
12284; GFX900-NEXT:    ;;#ASMSTART
12285; GFX900-NEXT:    ; use s[8:11]
12286; GFX900-NEXT:    ;;#ASMEND
12287; GFX900-NEXT:    s_setpc_b64 s[30:31]
12288;
12289; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_0_4:
12290; GFX90A:       ; %bb.0:
12291; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12292; GFX90A-NEXT:    ;;#ASMSTART
12293; GFX90A-NEXT:    ; def s[4:6]
12294; GFX90A-NEXT:    ;;#ASMEND
12295; GFX90A-NEXT:    ;;#ASMSTART
12296; GFX90A-NEXT:    ; def s[12:14]
12297; GFX90A-NEXT:    ;;#ASMEND
12298; GFX90A-NEXT:    s_mov_b32 s8, s14
12299; GFX90A-NEXT:    s_mov_b32 s9, s14
12300; GFX90A-NEXT:    s_mov_b32 s10, s4
12301; GFX90A-NEXT:    s_mov_b32 s11, s13
12302; GFX90A-NEXT:    ;;#ASMSTART
12303; GFX90A-NEXT:    ; use s[8:11]
12304; GFX90A-NEXT:    ;;#ASMEND
12305; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12306;
12307; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_0_4:
12308; GFX940:       ; %bb.0:
12309; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12310; GFX940-NEXT:    ;;#ASMSTART
12311; GFX940-NEXT:    ; def s[0:2]
12312; GFX940-NEXT:    ;;#ASMEND
12313; GFX940-NEXT:    ;;#ASMSTART
12314; GFX940-NEXT:    ; def s[4:6]
12315; GFX940-NEXT:    ;;#ASMEND
12316; GFX940-NEXT:    s_mov_b32 s8, s6
12317; GFX940-NEXT:    s_mov_b32 s9, s6
12318; GFX940-NEXT:    s_mov_b32 s10, s0
12319; GFX940-NEXT:    s_mov_b32 s11, s5
12320; GFX940-NEXT:    ;;#ASMSTART
12321; GFX940-NEXT:    ; use s[8:11]
12322; GFX940-NEXT:    ;;#ASMEND
12323; GFX940-NEXT:    s_setpc_b64 s[30:31]
12324  %vec0 = call <3 x i32> asm "; def $0", "=s"()
12325  %vec1 = call <3 x i32> asm "; def $0", "=s"()
12326  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 4>
12327  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
12328  ret void
12329}
12330
12331define void @s_shuffle_v4i32_v3i32__5_5_1_4() {
12332; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_1_4:
12333; GFX900:       ; %bb.0:
12334; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12335; GFX900-NEXT:    ;;#ASMSTART
12336; GFX900-NEXT:    ; def s[4:6]
12337; GFX900-NEXT:    ;;#ASMEND
12338; GFX900-NEXT:    ;;#ASMSTART
12339; GFX900-NEXT:    ; def s[12:14]
12340; GFX900-NEXT:    ;;#ASMEND
12341; GFX900-NEXT:    s_mov_b32 s8, s14
12342; GFX900-NEXT:    s_mov_b32 s9, s14
12343; GFX900-NEXT:    s_mov_b32 s10, s5
12344; GFX900-NEXT:    s_mov_b32 s11, s13
12345; GFX900-NEXT:    ;;#ASMSTART
12346; GFX900-NEXT:    ; use s[8:11]
12347; GFX900-NEXT:    ;;#ASMEND
12348; GFX900-NEXT:    s_setpc_b64 s[30:31]
12349;
12350; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_1_4:
12351; GFX90A:       ; %bb.0:
12352; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12353; GFX90A-NEXT:    ;;#ASMSTART
12354; GFX90A-NEXT:    ; def s[4:6]
12355; GFX90A-NEXT:    ;;#ASMEND
12356; GFX90A-NEXT:    ;;#ASMSTART
12357; GFX90A-NEXT:    ; def s[12:14]
12358; GFX90A-NEXT:    ;;#ASMEND
12359; GFX90A-NEXT:    s_mov_b32 s8, s14
12360; GFX90A-NEXT:    s_mov_b32 s9, s14
12361; GFX90A-NEXT:    s_mov_b32 s10, s5
12362; GFX90A-NEXT:    s_mov_b32 s11, s13
12363; GFX90A-NEXT:    ;;#ASMSTART
12364; GFX90A-NEXT:    ; use s[8:11]
12365; GFX90A-NEXT:    ;;#ASMEND
12366; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12367;
12368; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_1_4:
12369; GFX940:       ; %bb.0:
12370; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12371; GFX940-NEXT:    ;;#ASMSTART
12372; GFX940-NEXT:    ; def s[0:2]
12373; GFX940-NEXT:    ;;#ASMEND
12374; GFX940-NEXT:    ;;#ASMSTART
12375; GFX940-NEXT:    ; def s[4:6]
12376; GFX940-NEXT:    ;;#ASMEND
12377; GFX940-NEXT:    s_mov_b32 s8, s6
12378; GFX940-NEXT:    s_mov_b32 s9, s6
12379; GFX940-NEXT:    s_mov_b32 s10, s1
12380; GFX940-NEXT:    s_mov_b32 s11, s5
12381; GFX940-NEXT:    ;;#ASMSTART
12382; GFX940-NEXT:    ; use s[8:11]
12383; GFX940-NEXT:    ;;#ASMEND
12384; GFX940-NEXT:    s_setpc_b64 s[30:31]
12385  %vec0 = call <3 x i32> asm "; def $0", "=s"()
12386  %vec1 = call <3 x i32> asm "; def $0", "=s"()
12387  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 4>
12388  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
12389  ret void
12390}
12391
12392define void @s_shuffle_v4i32_v3i32__5_5_2_4() {
12393; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_2_4:
12394; GFX900:       ; %bb.0:
12395; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12396; GFX900-NEXT:    ;;#ASMSTART
12397; GFX900-NEXT:    ; def s[8:10]
12398; GFX900-NEXT:    ;;#ASMEND
12399; GFX900-NEXT:    ;;#ASMSTART
12400; GFX900-NEXT:    ; def s[4:6]
12401; GFX900-NEXT:    ;;#ASMEND
12402; GFX900-NEXT:    s_mov_b32 s8, s6
12403; GFX900-NEXT:    s_mov_b32 s9, s6
12404; GFX900-NEXT:    s_mov_b32 s11, s5
12405; GFX900-NEXT:    ;;#ASMSTART
12406; GFX900-NEXT:    ; use s[8:11]
12407; GFX900-NEXT:    ;;#ASMEND
12408; GFX900-NEXT:    s_setpc_b64 s[30:31]
12409;
12410; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_2_4:
12411; GFX90A:       ; %bb.0:
12412; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12413; GFX90A-NEXT:    ;;#ASMSTART
12414; GFX90A-NEXT:    ; def s[8:10]
12415; GFX90A-NEXT:    ;;#ASMEND
12416; GFX90A-NEXT:    ;;#ASMSTART
12417; GFX90A-NEXT:    ; def s[4:6]
12418; GFX90A-NEXT:    ;;#ASMEND
12419; GFX90A-NEXT:    s_mov_b32 s8, s6
12420; GFX90A-NEXT:    s_mov_b32 s9, s6
12421; GFX90A-NEXT:    s_mov_b32 s11, s5
12422; GFX90A-NEXT:    ;;#ASMSTART
12423; GFX90A-NEXT:    ; use s[8:11]
12424; GFX90A-NEXT:    ;;#ASMEND
12425; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12426;
12427; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_2_4:
12428; GFX940:       ; %bb.0:
12429; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12430; GFX940-NEXT:    ;;#ASMSTART
12431; GFX940-NEXT:    ; def s[8:10]
12432; GFX940-NEXT:    ;;#ASMEND
12433; GFX940-NEXT:    ;;#ASMSTART
12434; GFX940-NEXT:    ; def s[0:2]
12435; GFX940-NEXT:    ;;#ASMEND
12436; GFX940-NEXT:    s_mov_b32 s8, s2
12437; GFX940-NEXT:    s_mov_b32 s9, s2
12438; GFX940-NEXT:    s_mov_b32 s11, s1
12439; GFX940-NEXT:    ;;#ASMSTART
12440; GFX940-NEXT:    ; use s[8:11]
12441; GFX940-NEXT:    ;;#ASMEND
12442; GFX940-NEXT:    s_setpc_b64 s[30:31]
12443  %vec0 = call <3 x i32> asm "; def $0", "=s"()
12444  %vec1 = call <3 x i32> asm "; def $0", "=s"()
12445  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 4>
12446  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
12447  ret void
12448}
12449
12450define void @s_shuffle_v4i32_v3i32__5_5_3_4() {
12451; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_3_4:
12452; GFX900:       ; %bb.0:
12453; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12454; GFX900-NEXT:    ;;#ASMSTART
12455; GFX900-NEXT:    ; def s[4:6]
12456; GFX900-NEXT:    ;;#ASMEND
12457; GFX900-NEXT:    s_mov_b32 s8, s6
12458; GFX900-NEXT:    s_mov_b32 s9, s6
12459; GFX900-NEXT:    s_mov_b32 s10, s4
12460; GFX900-NEXT:    s_mov_b32 s11, s5
12461; GFX900-NEXT:    ;;#ASMSTART
12462; GFX900-NEXT:    ; use s[8:11]
12463; GFX900-NEXT:    ;;#ASMEND
12464; GFX900-NEXT:    s_setpc_b64 s[30:31]
12465;
12466; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_3_4:
12467; GFX90A:       ; %bb.0:
12468; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12469; GFX90A-NEXT:    ;;#ASMSTART
12470; GFX90A-NEXT:    ; def s[4:6]
12471; GFX90A-NEXT:    ;;#ASMEND
12472; GFX90A-NEXT:    s_mov_b32 s8, s6
12473; GFX90A-NEXT:    s_mov_b32 s9, s6
12474; GFX90A-NEXT:    s_mov_b32 s10, s4
12475; GFX90A-NEXT:    s_mov_b32 s11, s5
12476; GFX90A-NEXT:    ;;#ASMSTART
12477; GFX90A-NEXT:    ; use s[8:11]
12478; GFX90A-NEXT:    ;;#ASMEND
12479; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12480;
12481; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_3_4:
12482; GFX940:       ; %bb.0:
12483; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12484; GFX940-NEXT:    ;;#ASMSTART
12485; GFX940-NEXT:    ; def s[0:2]
12486; GFX940-NEXT:    ;;#ASMEND
12487; GFX940-NEXT:    s_mov_b32 s8, s2
12488; GFX940-NEXT:    s_mov_b32 s9, s2
12489; GFX940-NEXT:    s_mov_b32 s10, s0
12490; GFX940-NEXT:    s_mov_b32 s11, s1
12491; GFX940-NEXT:    ;;#ASMSTART
12492; GFX940-NEXT:    ; use s[8:11]
12493; GFX940-NEXT:    ;;#ASMEND
12494; GFX940-NEXT:    s_setpc_b64 s[30:31]
12495  %vec0 = call <3 x i32> asm "; def $0", "=s"()
12496  %vec1 = call <3 x i32> asm "; def $0", "=s"()
12497  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 4>
12498  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
12499  ret void
12500}
12501
12502define void @s_shuffle_v4i32_v3i32__u_5_5_5() {
12503; GFX9-LABEL: s_shuffle_v4i32_v3i32__u_5_5_5:
12504; GFX9:       ; %bb.0:
12505; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12506; GFX9-NEXT:    ;;#ASMSTART
12507; GFX9-NEXT:    ; def s[8:10]
12508; GFX9-NEXT:    ;;#ASMEND
12509; GFX9-NEXT:    s_mov_b32 s9, s10
12510; GFX9-NEXT:    s_mov_b32 s11, s10
12511; GFX9-NEXT:    ;;#ASMSTART
12512; GFX9-NEXT:    ; use s[8:11]
12513; GFX9-NEXT:    ;;#ASMEND
12514; GFX9-NEXT:    s_setpc_b64 s[30:31]
12515  %vec0 = call <3 x i32> asm "; def $0", "=s"()
12516  %vec1 = call <3 x i32> asm "; def $0", "=s"()
12517  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
12518  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
12519  ret void
12520}
12521
12522define void @s_shuffle_v4i32_v3i32__0_5_5_5() {
12523; GFX900-LABEL: s_shuffle_v4i32_v3i32__0_5_5_5:
12524; GFX900:       ; %bb.0:
12525; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12526; GFX900-NEXT:    ;;#ASMSTART
12527; GFX900-NEXT:    ; def s[8:10]
12528; GFX900-NEXT:    ;;#ASMEND
12529; GFX900-NEXT:    ;;#ASMSTART
12530; GFX900-NEXT:    ; def s[4:6]
12531; GFX900-NEXT:    ;;#ASMEND
12532; GFX900-NEXT:    s_mov_b32 s9, s6
12533; GFX900-NEXT:    s_mov_b32 s10, s6
12534; GFX900-NEXT:    s_mov_b32 s11, s6
12535; GFX900-NEXT:    ;;#ASMSTART
12536; GFX900-NEXT:    ; use s[8:11]
12537; GFX900-NEXT:    ;;#ASMEND
12538; GFX900-NEXT:    s_setpc_b64 s[30:31]
12539;
12540; GFX90A-LABEL: s_shuffle_v4i32_v3i32__0_5_5_5:
12541; GFX90A:       ; %bb.0:
12542; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12543; GFX90A-NEXT:    ;;#ASMSTART
12544; GFX90A-NEXT:    ; def s[8:10]
12545; GFX90A-NEXT:    ;;#ASMEND
12546; GFX90A-NEXT:    ;;#ASMSTART
12547; GFX90A-NEXT:    ; def s[4:6]
12548; GFX90A-NEXT:    ;;#ASMEND
12549; GFX90A-NEXT:    s_mov_b32 s9, s6
12550; GFX90A-NEXT:    s_mov_b32 s10, s6
12551; GFX90A-NEXT:    s_mov_b32 s11, s6
12552; GFX90A-NEXT:    ;;#ASMSTART
12553; GFX90A-NEXT:    ; use s[8:11]
12554; GFX90A-NEXT:    ;;#ASMEND
12555; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12556;
12557; GFX940-LABEL: s_shuffle_v4i32_v3i32__0_5_5_5:
12558; GFX940:       ; %bb.0:
12559; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12560; GFX940-NEXT:    ;;#ASMSTART
12561; GFX940-NEXT:    ; def s[8:10]
12562; GFX940-NEXT:    ;;#ASMEND
12563; GFX940-NEXT:    ;;#ASMSTART
12564; GFX940-NEXT:    ; def s[0:2]
12565; GFX940-NEXT:    ;;#ASMEND
12566; GFX940-NEXT:    s_mov_b32 s9, s2
12567; GFX940-NEXT:    s_mov_b32 s10, s2
12568; GFX940-NEXT:    s_mov_b32 s11, s2
12569; GFX940-NEXT:    ;;#ASMSTART
12570; GFX940-NEXT:    ; use s[8:11]
12571; GFX940-NEXT:    ;;#ASMEND
12572; GFX940-NEXT:    s_setpc_b64 s[30:31]
12573  %vec0 = call <3 x i32> asm "; def $0", "=s"()
12574  %vec1 = call <3 x i32> asm "; def $0", "=s"()
12575  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
12576  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
12577  ret void
12578}
12579
12580define void @s_shuffle_v4i32_v3i32__1_5_5_5() {
12581; GFX900-LABEL: s_shuffle_v4i32_v3i32__1_5_5_5:
12582; GFX900:       ; %bb.0:
12583; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12584; GFX900-NEXT:    ;;#ASMSTART
12585; GFX900-NEXT:    ; def s[8:10]
12586; GFX900-NEXT:    ;;#ASMEND
12587; GFX900-NEXT:    ;;#ASMSTART
12588; GFX900-NEXT:    ; def s[4:6]
12589; GFX900-NEXT:    ;;#ASMEND
12590; GFX900-NEXT:    s_mov_b32 s8, s5
12591; GFX900-NEXT:    s_mov_b32 s9, s10
12592; GFX900-NEXT:    s_mov_b32 s11, s10
12593; GFX900-NEXT:    ;;#ASMSTART
12594; GFX900-NEXT:    ; use s[8:11]
12595; GFX900-NEXT:    ;;#ASMEND
12596; GFX900-NEXT:    s_setpc_b64 s[30:31]
12597;
12598; GFX90A-LABEL: s_shuffle_v4i32_v3i32__1_5_5_5:
12599; GFX90A:       ; %bb.0:
12600; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12601; GFX90A-NEXT:    ;;#ASMSTART
12602; GFX90A-NEXT:    ; def s[8:10]
12603; GFX90A-NEXT:    ;;#ASMEND
12604; GFX90A-NEXT:    ;;#ASMSTART
12605; GFX90A-NEXT:    ; def s[4:6]
12606; GFX90A-NEXT:    ;;#ASMEND
12607; GFX90A-NEXT:    s_mov_b32 s8, s5
12608; GFX90A-NEXT:    s_mov_b32 s9, s10
12609; GFX90A-NEXT:    s_mov_b32 s11, s10
12610; GFX90A-NEXT:    ;;#ASMSTART
12611; GFX90A-NEXT:    ; use s[8:11]
12612; GFX90A-NEXT:    ;;#ASMEND
12613; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12614;
12615; GFX940-LABEL: s_shuffle_v4i32_v3i32__1_5_5_5:
12616; GFX940:       ; %bb.0:
12617; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12618; GFX940-NEXT:    ;;#ASMSTART
12619; GFX940-NEXT:    ; def s[8:10]
12620; GFX940-NEXT:    ;;#ASMEND
12621; GFX940-NEXT:    ;;#ASMSTART
12622; GFX940-NEXT:    ; def s[0:2]
12623; GFX940-NEXT:    ;;#ASMEND
12624; GFX940-NEXT:    s_mov_b32 s8, s1
12625; GFX940-NEXT:    s_mov_b32 s9, s10
12626; GFX940-NEXT:    s_mov_b32 s11, s10
12627; GFX940-NEXT:    ;;#ASMSTART
12628; GFX940-NEXT:    ; use s[8:11]
12629; GFX940-NEXT:    ;;#ASMEND
12630; GFX940-NEXT:    s_setpc_b64 s[30:31]
12631  %vec0 = call <3 x i32> asm "; def $0", "=s"()
12632  %vec1 = call <3 x i32> asm "; def $0", "=s"()
12633  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
12634  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
12635  ret void
12636}
12637
12638define void @s_shuffle_v4i32_v3i32__2_5_5_5() {
12639; GFX900-LABEL: s_shuffle_v4i32_v3i32__2_5_5_5:
12640; GFX900:       ; %bb.0:
12641; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12642; GFX900-NEXT:    ;;#ASMSTART
12643; GFX900-NEXT:    ; def s[8:10]
12644; GFX900-NEXT:    ;;#ASMEND
12645; GFX900-NEXT:    ;;#ASMSTART
12646; GFX900-NEXT:    ; def s[4:6]
12647; GFX900-NEXT:    ;;#ASMEND
12648; GFX900-NEXT:    s_mov_b32 s8, s6
12649; GFX900-NEXT:    s_mov_b32 s9, s10
12650; GFX900-NEXT:    s_mov_b32 s11, s10
12651; GFX900-NEXT:    ;;#ASMSTART
12652; GFX900-NEXT:    ; use s[8:11]
12653; GFX900-NEXT:    ;;#ASMEND
12654; GFX900-NEXT:    s_setpc_b64 s[30:31]
12655;
12656; GFX90A-LABEL: s_shuffle_v4i32_v3i32__2_5_5_5:
12657; GFX90A:       ; %bb.0:
12658; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12659; GFX90A-NEXT:    ;;#ASMSTART
12660; GFX90A-NEXT:    ; def s[8:10]
12661; GFX90A-NEXT:    ;;#ASMEND
12662; GFX90A-NEXT:    ;;#ASMSTART
12663; GFX90A-NEXT:    ; def s[4:6]
12664; GFX90A-NEXT:    ;;#ASMEND
12665; GFX90A-NEXT:    s_mov_b32 s8, s6
12666; GFX90A-NEXT:    s_mov_b32 s9, s10
12667; GFX90A-NEXT:    s_mov_b32 s11, s10
12668; GFX90A-NEXT:    ;;#ASMSTART
12669; GFX90A-NEXT:    ; use s[8:11]
12670; GFX90A-NEXT:    ;;#ASMEND
12671; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12672;
12673; GFX940-LABEL: s_shuffle_v4i32_v3i32__2_5_5_5:
12674; GFX940:       ; %bb.0:
12675; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12676; GFX940-NEXT:    ;;#ASMSTART
12677; GFX940-NEXT:    ; def s[8:10]
12678; GFX940-NEXT:    ;;#ASMEND
12679; GFX940-NEXT:    ;;#ASMSTART
12680; GFX940-NEXT:    ; def s[0:2]
12681; GFX940-NEXT:    ;;#ASMEND
12682; GFX940-NEXT:    s_mov_b32 s8, s2
12683; GFX940-NEXT:    s_mov_b32 s9, s10
12684; GFX940-NEXT:    s_mov_b32 s11, s10
12685; GFX940-NEXT:    ;;#ASMSTART
12686; GFX940-NEXT:    ; use s[8:11]
12687; GFX940-NEXT:    ;;#ASMEND
12688; GFX940-NEXT:    s_setpc_b64 s[30:31]
12689  %vec0 = call <3 x i32> asm "; def $0", "=s"()
12690  %vec1 = call <3 x i32> asm "; def $0", "=s"()
12691  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
12692  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
12693  ret void
12694}
12695
12696define void @s_shuffle_v4i32_v3i32__3_5_5_5() {
12697; GFX9-LABEL: s_shuffle_v4i32_v3i32__3_5_5_5:
12698; GFX9:       ; %bb.0:
12699; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12700; GFX9-NEXT:    ;;#ASMSTART
12701; GFX9-NEXT:    ; def s[8:10]
12702; GFX9-NEXT:    ;;#ASMEND
12703; GFX9-NEXT:    s_mov_b32 s9, s10
12704; GFX9-NEXT:    s_mov_b32 s11, s10
12705; GFX9-NEXT:    ;;#ASMSTART
12706; GFX9-NEXT:    ; use s[8:11]
12707; GFX9-NEXT:    ;;#ASMEND
12708; GFX9-NEXT:    s_setpc_b64 s[30:31]
12709  %vec0 = call <3 x i32> asm "; def $0", "=s"()
12710  %vec1 = call <3 x i32> asm "; def $0", "=s"()
12711  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
12712  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
12713  ret void
12714}
12715
12716define void @s_shuffle_v4i32_v3i32__4_5_5_5() {
12717; GFX9-LABEL: s_shuffle_v4i32_v3i32__4_5_5_5:
12718; GFX9:       ; %bb.0:
12719; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12720; GFX9-NEXT:    ;;#ASMSTART
12721; GFX9-NEXT:    ; def s[8:10]
12722; GFX9-NEXT:    ;;#ASMEND
12723; GFX9-NEXT:    s_mov_b32 s8, s9
12724; GFX9-NEXT:    s_mov_b32 s9, s10
12725; GFX9-NEXT:    s_mov_b32 s11, s10
12726; GFX9-NEXT:    ;;#ASMSTART
12727; GFX9-NEXT:    ; use s[8:11]
12728; GFX9-NEXT:    ;;#ASMEND
12729; GFX9-NEXT:    s_setpc_b64 s[30:31]
12730  %vec0 = call <3 x i32> asm "; def $0", "=s"()
12731  %vec1 = call <3 x i32> asm "; def $0", "=s"()
12732  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
12733  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
12734  ret void
12735}
12736
12737define void @s_shuffle_v4i32_v3i32__5_u_5_5() {
12738; GFX9-LABEL: s_shuffle_v4i32_v3i32__5_u_5_5:
12739; GFX9:       ; %bb.0:
12740; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12741; GFX9-NEXT:    ;;#ASMSTART
12742; GFX9-NEXT:    ; def s[8:10]
12743; GFX9-NEXT:    ;;#ASMEND
12744; GFX9-NEXT:    s_mov_b32 s8, s10
12745; GFX9-NEXT:    s_mov_b32 s11, s10
12746; GFX9-NEXT:    ;;#ASMSTART
12747; GFX9-NEXT:    ; use s[8:11]
12748; GFX9-NEXT:    ;;#ASMEND
12749; GFX9-NEXT:    s_setpc_b64 s[30:31]
12750  %vec0 = call <3 x i32> asm "; def $0", "=s"()
12751  %vec1 = call <3 x i32> asm "; def $0", "=s"()
12752  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 poison, i32 5, i32 5>
12753  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
12754  ret void
12755}
12756
12757define void @s_shuffle_v4i32_v3i32__5_0_5_5() {
12758; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_0_5_5:
12759; GFX900:       ; %bb.0:
12760; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12761; GFX900-NEXT:    ;;#ASMSTART
12762; GFX900-NEXT:    ; def s[8:10]
12763; GFX900-NEXT:    ;;#ASMEND
12764; GFX900-NEXT:    ;;#ASMSTART
12765; GFX900-NEXT:    ; def s[4:6]
12766; GFX900-NEXT:    ;;#ASMEND
12767; GFX900-NEXT:    s_mov_b32 s8, s10
12768; GFX900-NEXT:    s_mov_b32 s9, s4
12769; GFX900-NEXT:    s_mov_b32 s11, s10
12770; GFX900-NEXT:    ;;#ASMSTART
12771; GFX900-NEXT:    ; use s[8:11]
12772; GFX900-NEXT:    ;;#ASMEND
12773; GFX900-NEXT:    s_setpc_b64 s[30:31]
12774;
12775; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_0_5_5:
12776; GFX90A:       ; %bb.0:
12777; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12778; GFX90A-NEXT:    ;;#ASMSTART
12779; GFX90A-NEXT:    ; def s[8:10]
12780; GFX90A-NEXT:    ;;#ASMEND
12781; GFX90A-NEXT:    ;;#ASMSTART
12782; GFX90A-NEXT:    ; def s[4:6]
12783; GFX90A-NEXT:    ;;#ASMEND
12784; GFX90A-NEXT:    s_mov_b32 s8, s10
12785; GFX90A-NEXT:    s_mov_b32 s9, s4
12786; GFX90A-NEXT:    s_mov_b32 s11, s10
12787; GFX90A-NEXT:    ;;#ASMSTART
12788; GFX90A-NEXT:    ; use s[8:11]
12789; GFX90A-NEXT:    ;;#ASMEND
12790; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12791;
12792; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_0_5_5:
12793; GFX940:       ; %bb.0:
12794; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12795; GFX940-NEXT:    ;;#ASMSTART
12796; GFX940-NEXT:    ; def s[8:10]
12797; GFX940-NEXT:    ;;#ASMEND
12798; GFX940-NEXT:    ;;#ASMSTART
12799; GFX940-NEXT:    ; def s[0:2]
12800; GFX940-NEXT:    ;;#ASMEND
12801; GFX940-NEXT:    s_mov_b32 s8, s10
12802; GFX940-NEXT:    s_mov_b32 s9, s0
12803; GFX940-NEXT:    s_mov_b32 s11, s10
12804; GFX940-NEXT:    ;;#ASMSTART
12805; GFX940-NEXT:    ; use s[8:11]
12806; GFX940-NEXT:    ;;#ASMEND
12807; GFX940-NEXT:    s_setpc_b64 s[30:31]
12808  %vec0 = call <3 x i32> asm "; def $0", "=s"()
12809  %vec1 = call <3 x i32> asm "; def $0", "=s"()
12810  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 0, i32 5, i32 5>
12811  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
12812  ret void
12813}
12814
12815define void @s_shuffle_v4i32_v3i32__5_1_5_5() {
12816; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_1_5_5:
12817; GFX900:       ; %bb.0:
12818; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12819; GFX900-NEXT:    ;;#ASMSTART
12820; GFX900-NEXT:    ; def s[8:10]
12821; GFX900-NEXT:    ;;#ASMEND
12822; GFX900-NEXT:    ;;#ASMSTART
12823; GFX900-NEXT:    ; def s[4:6]
12824; GFX900-NEXT:    ;;#ASMEND
12825; GFX900-NEXT:    s_mov_b32 s8, s6
12826; GFX900-NEXT:    s_mov_b32 s10, s6
12827; GFX900-NEXT:    s_mov_b32 s11, s6
12828; GFX900-NEXT:    ;;#ASMSTART
12829; GFX900-NEXT:    ; use s[8:11]
12830; GFX900-NEXT:    ;;#ASMEND
12831; GFX900-NEXT:    s_setpc_b64 s[30:31]
12832;
12833; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_1_5_5:
12834; GFX90A:       ; %bb.0:
12835; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12836; GFX90A-NEXT:    ;;#ASMSTART
12837; GFX90A-NEXT:    ; def s[8:10]
12838; GFX90A-NEXT:    ;;#ASMEND
12839; GFX90A-NEXT:    ;;#ASMSTART
12840; GFX90A-NEXT:    ; def s[4:6]
12841; GFX90A-NEXT:    ;;#ASMEND
12842; GFX90A-NEXT:    s_mov_b32 s8, s6
12843; GFX90A-NEXT:    s_mov_b32 s10, s6
12844; GFX90A-NEXT:    s_mov_b32 s11, s6
12845; GFX90A-NEXT:    ;;#ASMSTART
12846; GFX90A-NEXT:    ; use s[8:11]
12847; GFX90A-NEXT:    ;;#ASMEND
12848; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12849;
12850; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_1_5_5:
12851; GFX940:       ; %bb.0:
12852; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12853; GFX940-NEXT:    ;;#ASMSTART
12854; GFX940-NEXT:    ; def s[8:10]
12855; GFX940-NEXT:    ;;#ASMEND
12856; GFX940-NEXT:    ;;#ASMSTART
12857; GFX940-NEXT:    ; def s[0:2]
12858; GFX940-NEXT:    ;;#ASMEND
12859; GFX940-NEXT:    s_mov_b32 s8, s2
12860; GFX940-NEXT:    s_mov_b32 s10, s2
12861; GFX940-NEXT:    s_mov_b32 s11, s2
12862; GFX940-NEXT:    ;;#ASMSTART
12863; GFX940-NEXT:    ; use s[8:11]
12864; GFX940-NEXT:    ;;#ASMEND
12865; GFX940-NEXT:    s_setpc_b64 s[30:31]
12866  %vec0 = call <3 x i32> asm "; def $0", "=s"()
12867  %vec1 = call <3 x i32> asm "; def $0", "=s"()
12868  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 1, i32 5, i32 5>
12869  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
12870  ret void
12871}
12872
12873define void @s_shuffle_v4i32_v3i32__5_2_5_5() {
12874; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_2_5_5:
12875; GFX900:       ; %bb.0:
12876; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12877; GFX900-NEXT:    ;;#ASMSTART
12878; GFX900-NEXT:    ; def s[8:10]
12879; GFX900-NEXT:    ;;#ASMEND
12880; GFX900-NEXT:    ;;#ASMSTART
12881; GFX900-NEXT:    ; def s[4:6]
12882; GFX900-NEXT:    ;;#ASMEND
12883; GFX900-NEXT:    s_mov_b32 s8, s10
12884; GFX900-NEXT:    s_mov_b32 s9, s6
12885; GFX900-NEXT:    s_mov_b32 s11, s10
12886; GFX900-NEXT:    ;;#ASMSTART
12887; GFX900-NEXT:    ; use s[8:11]
12888; GFX900-NEXT:    ;;#ASMEND
12889; GFX900-NEXT:    s_setpc_b64 s[30:31]
12890;
12891; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_2_5_5:
12892; GFX90A:       ; %bb.0:
12893; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12894; GFX90A-NEXT:    ;;#ASMSTART
12895; GFX90A-NEXT:    ; def s[8:10]
12896; GFX90A-NEXT:    ;;#ASMEND
12897; GFX90A-NEXT:    ;;#ASMSTART
12898; GFX90A-NEXT:    ; def s[4:6]
12899; GFX90A-NEXT:    ;;#ASMEND
12900; GFX90A-NEXT:    s_mov_b32 s8, s10
12901; GFX90A-NEXT:    s_mov_b32 s9, s6
12902; GFX90A-NEXT:    s_mov_b32 s11, s10
12903; GFX90A-NEXT:    ;;#ASMSTART
12904; GFX90A-NEXT:    ; use s[8:11]
12905; GFX90A-NEXT:    ;;#ASMEND
12906; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12907;
12908; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_2_5_5:
12909; GFX940:       ; %bb.0:
12910; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12911; GFX940-NEXT:    ;;#ASMSTART
12912; GFX940-NEXT:    ; def s[8:10]
12913; GFX940-NEXT:    ;;#ASMEND
12914; GFX940-NEXT:    ;;#ASMSTART
12915; GFX940-NEXT:    ; def s[0:2]
12916; GFX940-NEXT:    ;;#ASMEND
12917; GFX940-NEXT:    s_mov_b32 s8, s10
12918; GFX940-NEXT:    s_mov_b32 s9, s2
12919; GFX940-NEXT:    s_mov_b32 s11, s10
12920; GFX940-NEXT:    ;;#ASMSTART
12921; GFX940-NEXT:    ; use s[8:11]
12922; GFX940-NEXT:    ;;#ASMEND
12923; GFX940-NEXT:    s_setpc_b64 s[30:31]
12924  %vec0 = call <3 x i32> asm "; def $0", "=s"()
12925  %vec1 = call <3 x i32> asm "; def $0", "=s"()
12926  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 2, i32 5, i32 5>
12927  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
12928  ret void
12929}
12930
12931define void @s_shuffle_v4i32_v3i32__5_3_5_5() {
12932; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_3_5_5:
12933; GFX900:       ; %bb.0:
12934; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12935; GFX900-NEXT:    ;;#ASMSTART
12936; GFX900-NEXT:    ; def s[4:6]
12937; GFX900-NEXT:    ;;#ASMEND
12938; GFX900-NEXT:    s_mov_b32 s8, s6
12939; GFX900-NEXT:    s_mov_b32 s9, s4
12940; GFX900-NEXT:    s_mov_b32 s10, s6
12941; GFX900-NEXT:    s_mov_b32 s11, s6
12942; GFX900-NEXT:    ;;#ASMSTART
12943; GFX900-NEXT:    ; use s[8:11]
12944; GFX900-NEXT:    ;;#ASMEND
12945; GFX900-NEXT:    s_setpc_b64 s[30:31]
12946;
12947; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_3_5_5:
12948; GFX90A:       ; %bb.0:
12949; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12950; GFX90A-NEXT:    ;;#ASMSTART
12951; GFX90A-NEXT:    ; def s[4:6]
12952; GFX90A-NEXT:    ;;#ASMEND
12953; GFX90A-NEXT:    s_mov_b32 s8, s6
12954; GFX90A-NEXT:    s_mov_b32 s9, s4
12955; GFX90A-NEXT:    s_mov_b32 s10, s6
12956; GFX90A-NEXT:    s_mov_b32 s11, s6
12957; GFX90A-NEXT:    ;;#ASMSTART
12958; GFX90A-NEXT:    ; use s[8:11]
12959; GFX90A-NEXT:    ;;#ASMEND
12960; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12961;
12962; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_3_5_5:
12963; GFX940:       ; %bb.0:
12964; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12965; GFX940-NEXT:    ;;#ASMSTART
12966; GFX940-NEXT:    ; def s[0:2]
12967; GFX940-NEXT:    ;;#ASMEND
12968; GFX940-NEXT:    s_mov_b32 s8, s2
12969; GFX940-NEXT:    s_mov_b32 s9, s0
12970; GFX940-NEXT:    s_mov_b32 s10, s2
12971; GFX940-NEXT:    s_mov_b32 s11, s2
12972; GFX940-NEXT:    ;;#ASMSTART
12973; GFX940-NEXT:    ; use s[8:11]
12974; GFX940-NEXT:    ;;#ASMEND
12975; GFX940-NEXT:    s_setpc_b64 s[30:31]
12976  %vec0 = call <3 x i32> asm "; def $0", "=s"()
12977  %vec1 = call <3 x i32> asm "; def $0", "=s"()
12978  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 3, i32 5, i32 5>
12979  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
12980  ret void
12981}
12982
12983define void @s_shuffle_v4i32_v3i32__5_4_5_5() {
12984; GFX9-LABEL: s_shuffle_v4i32_v3i32__5_4_5_5:
12985; GFX9:       ; %bb.0:
12986; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12987; GFX9-NEXT:    ;;#ASMSTART
12988; GFX9-NEXT:    ; def s[8:10]
12989; GFX9-NEXT:    ;;#ASMEND
12990; GFX9-NEXT:    s_mov_b32 s8, s10
12991; GFX9-NEXT:    s_mov_b32 s11, s10
12992; GFX9-NEXT:    ;;#ASMSTART
12993; GFX9-NEXT:    ; use s[8:11]
12994; GFX9-NEXT:    ;;#ASMEND
12995; GFX9-NEXT:    s_setpc_b64 s[30:31]
12996  %vec0 = call <3 x i32> asm "; def $0", "=s"()
12997  %vec1 = call <3 x i32> asm "; def $0", "=s"()
12998  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 4, i32 5, i32 5>
12999  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
13000  ret void
13001}
13002
13003define void @s_shuffle_v4i32_v3i32__5_5_u_5() {
13004; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_u_5:
13005; GFX900:       ; %bb.0:
13006; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13007; GFX900-NEXT:    ;;#ASMSTART
13008; GFX900-NEXT:    ; def s[4:6]
13009; GFX900-NEXT:    ;;#ASMEND
13010; GFX900-NEXT:    s_mov_b32 s8, s6
13011; GFX900-NEXT:    s_mov_b32 s9, s6
13012; GFX900-NEXT:    s_mov_b32 s11, s6
13013; GFX900-NEXT:    ;;#ASMSTART
13014; GFX900-NEXT:    ; use s[8:11]
13015; GFX900-NEXT:    ;;#ASMEND
13016; GFX900-NEXT:    s_setpc_b64 s[30:31]
13017;
13018; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_u_5:
13019; GFX90A:       ; %bb.0:
13020; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13021; GFX90A-NEXT:    ;;#ASMSTART
13022; GFX90A-NEXT:    ; def s[4:6]
13023; GFX90A-NEXT:    ;;#ASMEND
13024; GFX90A-NEXT:    s_mov_b32 s8, s6
13025; GFX90A-NEXT:    s_mov_b32 s9, s6
13026; GFX90A-NEXT:    s_mov_b32 s11, s6
13027; GFX90A-NEXT:    ;;#ASMSTART
13028; GFX90A-NEXT:    ; use s[8:11]
13029; GFX90A-NEXT:    ;;#ASMEND
13030; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13031;
13032; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_u_5:
13033; GFX940:       ; %bb.0:
13034; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13035; GFX940-NEXT:    ;;#ASMSTART
13036; GFX940-NEXT:    ; def s[0:2]
13037; GFX940-NEXT:    ;;#ASMEND
13038; GFX940-NEXT:    s_mov_b32 s8, s2
13039; GFX940-NEXT:    s_mov_b32 s9, s2
13040; GFX940-NEXT:    s_mov_b32 s11, s2
13041; GFX940-NEXT:    ;;#ASMSTART
13042; GFX940-NEXT:    ; use s[8:11]
13043; GFX940-NEXT:    ;;#ASMEND
13044; GFX940-NEXT:    s_setpc_b64 s[30:31]
13045  %vec0 = call <3 x i32> asm "; def $0", "=s"()
13046  %vec1 = call <3 x i32> asm "; def $0", "=s"()
13047  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 5>
13048  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
13049  ret void
13050}
13051
13052define void @s_shuffle_v4i32_v3i32__5_5_0_5() {
13053; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_0_5:
13054; GFX900:       ; %bb.0:
13055; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13056; GFX900-NEXT:    ;;#ASMSTART
13057; GFX900-NEXT:    ; def s[4:6]
13058; GFX900-NEXT:    ;;#ASMEND
13059; GFX900-NEXT:    ;;#ASMSTART
13060; GFX900-NEXT:    ; def s[12:14]
13061; GFX900-NEXT:    ;;#ASMEND
13062; GFX900-NEXT:    s_mov_b32 s8, s14
13063; GFX900-NEXT:    s_mov_b32 s9, s14
13064; GFX900-NEXT:    s_mov_b32 s10, s4
13065; GFX900-NEXT:    s_mov_b32 s11, s14
13066; GFX900-NEXT:    ;;#ASMSTART
13067; GFX900-NEXT:    ; use s[8:11]
13068; GFX900-NEXT:    ;;#ASMEND
13069; GFX900-NEXT:    s_setpc_b64 s[30:31]
13070;
13071; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_0_5:
13072; GFX90A:       ; %bb.0:
13073; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13074; GFX90A-NEXT:    ;;#ASMSTART
13075; GFX90A-NEXT:    ; def s[4:6]
13076; GFX90A-NEXT:    ;;#ASMEND
13077; GFX90A-NEXT:    ;;#ASMSTART
13078; GFX90A-NEXT:    ; def s[12:14]
13079; GFX90A-NEXT:    ;;#ASMEND
13080; GFX90A-NEXT:    s_mov_b32 s8, s14
13081; GFX90A-NEXT:    s_mov_b32 s9, s14
13082; GFX90A-NEXT:    s_mov_b32 s10, s4
13083; GFX90A-NEXT:    s_mov_b32 s11, s14
13084; GFX90A-NEXT:    ;;#ASMSTART
13085; GFX90A-NEXT:    ; use s[8:11]
13086; GFX90A-NEXT:    ;;#ASMEND
13087; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13088;
13089; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_0_5:
13090; GFX940:       ; %bb.0:
13091; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13092; GFX940-NEXT:    ;;#ASMSTART
13093; GFX940-NEXT:    ; def s[0:2]
13094; GFX940-NEXT:    ;;#ASMEND
13095; GFX940-NEXT:    ;;#ASMSTART
13096; GFX940-NEXT:    ; def s[4:6]
13097; GFX940-NEXT:    ;;#ASMEND
13098; GFX940-NEXT:    s_mov_b32 s8, s6
13099; GFX940-NEXT:    s_mov_b32 s9, s6
13100; GFX940-NEXT:    s_mov_b32 s10, s0
13101; GFX940-NEXT:    s_mov_b32 s11, s6
13102; GFX940-NEXT:    ;;#ASMSTART
13103; GFX940-NEXT:    ; use s[8:11]
13104; GFX940-NEXT:    ;;#ASMEND
13105; GFX940-NEXT:    s_setpc_b64 s[30:31]
13106  %vec0 = call <3 x i32> asm "; def $0", "=s"()
13107  %vec1 = call <3 x i32> asm "; def $0", "=s"()
13108  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 5>
13109  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
13110  ret void
13111}
13112
13113define void @s_shuffle_v4i32_v3i32__5_5_1_5() {
13114; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_1_5:
13115; GFX900:       ; %bb.0:
13116; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13117; GFX900-NEXT:    ;;#ASMSTART
13118; GFX900-NEXT:    ; def s[4:6]
13119; GFX900-NEXT:    ;;#ASMEND
13120; GFX900-NEXT:    ;;#ASMSTART
13121; GFX900-NEXT:    ; def s[12:14]
13122; GFX900-NEXT:    ;;#ASMEND
13123; GFX900-NEXT:    s_mov_b32 s8, s14
13124; GFX900-NEXT:    s_mov_b32 s9, s14
13125; GFX900-NEXT:    s_mov_b32 s10, s5
13126; GFX900-NEXT:    s_mov_b32 s11, s14
13127; GFX900-NEXT:    ;;#ASMSTART
13128; GFX900-NEXT:    ; use s[8:11]
13129; GFX900-NEXT:    ;;#ASMEND
13130; GFX900-NEXT:    s_setpc_b64 s[30:31]
13131;
13132; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_1_5:
13133; GFX90A:       ; %bb.0:
13134; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13135; GFX90A-NEXT:    ;;#ASMSTART
13136; GFX90A-NEXT:    ; def s[4:6]
13137; GFX90A-NEXT:    ;;#ASMEND
13138; GFX90A-NEXT:    ;;#ASMSTART
13139; GFX90A-NEXT:    ; def s[12:14]
13140; GFX90A-NEXT:    ;;#ASMEND
13141; GFX90A-NEXT:    s_mov_b32 s8, s14
13142; GFX90A-NEXT:    s_mov_b32 s9, s14
13143; GFX90A-NEXT:    s_mov_b32 s10, s5
13144; GFX90A-NEXT:    s_mov_b32 s11, s14
13145; GFX90A-NEXT:    ;;#ASMSTART
13146; GFX90A-NEXT:    ; use s[8:11]
13147; GFX90A-NEXT:    ;;#ASMEND
13148; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13149;
13150; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_1_5:
13151; GFX940:       ; %bb.0:
13152; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13153; GFX940-NEXT:    ;;#ASMSTART
13154; GFX940-NEXT:    ; def s[0:2]
13155; GFX940-NEXT:    ;;#ASMEND
13156; GFX940-NEXT:    ;;#ASMSTART
13157; GFX940-NEXT:    ; def s[4:6]
13158; GFX940-NEXT:    ;;#ASMEND
13159; GFX940-NEXT:    s_mov_b32 s8, s6
13160; GFX940-NEXT:    s_mov_b32 s9, s6
13161; GFX940-NEXT:    s_mov_b32 s10, s1
13162; GFX940-NEXT:    s_mov_b32 s11, s6
13163; GFX940-NEXT:    ;;#ASMSTART
13164; GFX940-NEXT:    ; use s[8:11]
13165; GFX940-NEXT:    ;;#ASMEND
13166; GFX940-NEXT:    s_setpc_b64 s[30:31]
13167  %vec0 = call <3 x i32> asm "; def $0", "=s"()
13168  %vec1 = call <3 x i32> asm "; def $0", "=s"()
13169  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 5>
13170  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
13171  ret void
13172}
13173
13174define void @s_shuffle_v4i32_v3i32__5_5_2_5() {
13175; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_2_5:
13176; GFX900:       ; %bb.0:
13177; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13178; GFX900-NEXT:    ;;#ASMSTART
13179; GFX900-NEXT:    ; def s[8:10]
13180; GFX900-NEXT:    ;;#ASMEND
13181; GFX900-NEXT:    ;;#ASMSTART
13182; GFX900-NEXT:    ; def s[4:6]
13183; GFX900-NEXT:    ;;#ASMEND
13184; GFX900-NEXT:    s_mov_b32 s8, s6
13185; GFX900-NEXT:    s_mov_b32 s9, s6
13186; GFX900-NEXT:    s_mov_b32 s11, s6
13187; GFX900-NEXT:    ;;#ASMSTART
13188; GFX900-NEXT:    ; use s[8:11]
13189; GFX900-NEXT:    ;;#ASMEND
13190; GFX900-NEXT:    s_setpc_b64 s[30:31]
13191;
13192; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_2_5:
13193; GFX90A:       ; %bb.0:
13194; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13195; GFX90A-NEXT:    ;;#ASMSTART
13196; GFX90A-NEXT:    ; def s[8:10]
13197; GFX90A-NEXT:    ;;#ASMEND
13198; GFX90A-NEXT:    ;;#ASMSTART
13199; GFX90A-NEXT:    ; def s[4:6]
13200; GFX90A-NEXT:    ;;#ASMEND
13201; GFX90A-NEXT:    s_mov_b32 s8, s6
13202; GFX90A-NEXT:    s_mov_b32 s9, s6
13203; GFX90A-NEXT:    s_mov_b32 s11, s6
13204; GFX90A-NEXT:    ;;#ASMSTART
13205; GFX90A-NEXT:    ; use s[8:11]
13206; GFX90A-NEXT:    ;;#ASMEND
13207; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13208;
13209; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_2_5:
13210; GFX940:       ; %bb.0:
13211; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13212; GFX940-NEXT:    ;;#ASMSTART
13213; GFX940-NEXT:    ; def s[8:10]
13214; GFX940-NEXT:    ;;#ASMEND
13215; GFX940-NEXT:    ;;#ASMSTART
13216; GFX940-NEXT:    ; def s[0:2]
13217; GFX940-NEXT:    ;;#ASMEND
13218; GFX940-NEXT:    s_mov_b32 s8, s2
13219; GFX940-NEXT:    s_mov_b32 s9, s2
13220; GFX940-NEXT:    s_mov_b32 s11, s2
13221; GFX940-NEXT:    ;;#ASMSTART
13222; GFX940-NEXT:    ; use s[8:11]
13223; GFX940-NEXT:    ;;#ASMEND
13224; GFX940-NEXT:    s_setpc_b64 s[30:31]
13225  %vec0 = call <3 x i32> asm "; def $0", "=s"()
13226  %vec1 = call <3 x i32> asm "; def $0", "=s"()
13227  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 5>
13228  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
13229  ret void
13230}
13231
13232define void @s_shuffle_v4i32_v3i32__5_5_3_5() {
13233; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_3_5:
13234; GFX900:       ; %bb.0:
13235; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13236; GFX900-NEXT:    ;;#ASMSTART
13237; GFX900-NEXT:    ; def s[4:6]
13238; GFX900-NEXT:    ;;#ASMEND
13239; GFX900-NEXT:    s_mov_b32 s8, s6
13240; GFX900-NEXT:    s_mov_b32 s9, s6
13241; GFX900-NEXT:    s_mov_b32 s10, s4
13242; GFX900-NEXT:    s_mov_b32 s11, s6
13243; GFX900-NEXT:    ;;#ASMSTART
13244; GFX900-NEXT:    ; use s[8:11]
13245; GFX900-NEXT:    ;;#ASMEND
13246; GFX900-NEXT:    s_setpc_b64 s[30:31]
13247;
13248; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_3_5:
13249; GFX90A:       ; %bb.0:
13250; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13251; GFX90A-NEXT:    ;;#ASMSTART
13252; GFX90A-NEXT:    ; def s[4:6]
13253; GFX90A-NEXT:    ;;#ASMEND
13254; GFX90A-NEXT:    s_mov_b32 s8, s6
13255; GFX90A-NEXT:    s_mov_b32 s9, s6
13256; GFX90A-NEXT:    s_mov_b32 s10, s4
13257; GFX90A-NEXT:    s_mov_b32 s11, s6
13258; GFX90A-NEXT:    ;;#ASMSTART
13259; GFX90A-NEXT:    ; use s[8:11]
13260; GFX90A-NEXT:    ;;#ASMEND
13261; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13262;
13263; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_3_5:
13264; GFX940:       ; %bb.0:
13265; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13266; GFX940-NEXT:    ;;#ASMSTART
13267; GFX940-NEXT:    ; def s[0:2]
13268; GFX940-NEXT:    ;;#ASMEND
13269; GFX940-NEXT:    s_mov_b32 s8, s2
13270; GFX940-NEXT:    s_mov_b32 s9, s2
13271; GFX940-NEXT:    s_mov_b32 s10, s0
13272; GFX940-NEXT:    s_mov_b32 s11, s2
13273; GFX940-NEXT:    ;;#ASMSTART
13274; GFX940-NEXT:    ; use s[8:11]
13275; GFX940-NEXT:    ;;#ASMEND
13276; GFX940-NEXT:    s_setpc_b64 s[30:31]
13277  %vec0 = call <3 x i32> asm "; def $0", "=s"()
13278  %vec1 = call <3 x i32> asm "; def $0", "=s"()
13279  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 5>
13280  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
13281  ret void
13282}
13283
13284define void @s_shuffle_v4i32_v3i32__5_5_4_5() {
13285; GFX900-LABEL: s_shuffle_v4i32_v3i32__5_5_4_5:
13286; GFX900:       ; %bb.0:
13287; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13288; GFX900-NEXT:    ;;#ASMSTART
13289; GFX900-NEXT:    ; def s[4:6]
13290; GFX900-NEXT:    ;;#ASMEND
13291; GFX900-NEXT:    s_mov_b32 s8, s6
13292; GFX900-NEXT:    s_mov_b32 s9, s6
13293; GFX900-NEXT:    s_mov_b32 s10, s5
13294; GFX900-NEXT:    s_mov_b32 s11, s6
13295; GFX900-NEXT:    ;;#ASMSTART
13296; GFX900-NEXT:    ; use s[8:11]
13297; GFX900-NEXT:    ;;#ASMEND
13298; GFX900-NEXT:    s_setpc_b64 s[30:31]
13299;
13300; GFX90A-LABEL: s_shuffle_v4i32_v3i32__5_5_4_5:
13301; GFX90A:       ; %bb.0:
13302; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13303; GFX90A-NEXT:    ;;#ASMSTART
13304; GFX90A-NEXT:    ; def s[4:6]
13305; GFX90A-NEXT:    ;;#ASMEND
13306; GFX90A-NEXT:    s_mov_b32 s8, s6
13307; GFX90A-NEXT:    s_mov_b32 s9, s6
13308; GFX90A-NEXT:    s_mov_b32 s10, s5
13309; GFX90A-NEXT:    s_mov_b32 s11, s6
13310; GFX90A-NEXT:    ;;#ASMSTART
13311; GFX90A-NEXT:    ; use s[8:11]
13312; GFX90A-NEXT:    ;;#ASMEND
13313; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13314;
13315; GFX940-LABEL: s_shuffle_v4i32_v3i32__5_5_4_5:
13316; GFX940:       ; %bb.0:
13317; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13318; GFX940-NEXT:    ;;#ASMSTART
13319; GFX940-NEXT:    ; def s[0:2]
13320; GFX940-NEXT:    ;;#ASMEND
13321; GFX940-NEXT:    s_mov_b32 s8, s2
13322; GFX940-NEXT:    s_mov_b32 s9, s2
13323; GFX940-NEXT:    s_mov_b32 s10, s1
13324; GFX940-NEXT:    s_mov_b32 s11, s2
13325; GFX940-NEXT:    ;;#ASMSTART
13326; GFX940-NEXT:    ; use s[8:11]
13327; GFX940-NEXT:    ;;#ASMEND
13328; GFX940-NEXT:    s_setpc_b64 s[30:31]
13329  %vec0 = call <3 x i32> asm "; def $0", "=s"()
13330  %vec1 = call <3 x i32> asm "; def $0", "=s"()
13331  %shuf = shufflevector <3 x i32> %vec0, <3 x i32> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 5>
13332  call void asm sideeffect "; use $0", "{s[8:11]}"(<4 x i32> %shuf)
13333  ret void
13334}
13335;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
13336; GFX90APLUS: {{.*}}
13337