xref: /llvm-project/llvm/test/CodeGen/AMDGPU/shufflevector.v4i16.v3i16.ll (revision 585858aeb6247b3892218edb9d353c63f1c33186)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s
5
6
7define void @v_shuffle_v4i16_v3i16__u_u_u_u(ptr addrspace(1) inreg %ptr) {
8; GFX9-LABEL: v_shuffle_v4i16_v3i16__u_u_u_u:
9; GFX9:       ; %bb.0:
10; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GFX9-NEXT:    s_setpc_b64 s[30:31]
12  %vec0 = call <4 x i16> asm "; def $0", "=v"()
13  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
14  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> poison
15  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
16  ret void
17}
18
19define void @v_shuffle_v4i16_v3i16__0_u_u_u(ptr addrspace(1) inreg %ptr) {
20; GFX900-LABEL: v_shuffle_v4i16_v3i16__0_u_u_u:
21; GFX900:       ; %bb.0:
22; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23; GFX900-NEXT:    v_mov_b32_e32 v2, 0
24; GFX900-NEXT:    ;;#ASMSTART
25; GFX900-NEXT:    ; def v[0:1]
26; GFX900-NEXT:    ;;#ASMEND
27; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
28; GFX900-NEXT:    s_waitcnt vmcnt(0)
29; GFX900-NEXT:    s_setpc_b64 s[30:31]
30;
31; GFX90A-LABEL: v_shuffle_v4i16_v3i16__0_u_u_u:
32; GFX90A:       ; %bb.0:
33; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
35; GFX90A-NEXT:    ;;#ASMSTART
36; GFX90A-NEXT:    ; def v[0:1]
37; GFX90A-NEXT:    ;;#ASMEND
38; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
39; GFX90A-NEXT:    s_waitcnt vmcnt(0)
40; GFX90A-NEXT:    s_setpc_b64 s[30:31]
41;
42; GFX940-LABEL: v_shuffle_v4i16_v3i16__0_u_u_u:
43; GFX940:       ; %bb.0:
44; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
45; GFX940-NEXT:    v_mov_b32_e32 v2, 0
46; GFX940-NEXT:    ;;#ASMSTART
47; GFX940-NEXT:    ; def v[0:1]
48; GFX940-NEXT:    ;;#ASMEND
49; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
50; GFX940-NEXT:    s_waitcnt vmcnt(0)
51; GFX940-NEXT:    s_setpc_b64 s[30:31]
52  %vec0 = call <4 x i16> asm "; def $0", "=v"()
53  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
54  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
55  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
56  ret void
57}
58
59define void @v_shuffle_v4i16_v3i16__1_u_u_u(ptr addrspace(1) inreg %ptr) {
60; GFX900-LABEL: v_shuffle_v4i16_v3i16__1_u_u_u:
61; GFX900:       ; %bb.0:
62; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
63; GFX900-NEXT:    ;;#ASMSTART
64; GFX900-NEXT:    ; def v[0:1]
65; GFX900-NEXT:    ;;#ASMEND
66; GFX900-NEXT:    v_mov_b32_e32 v2, 0
67; GFX900-NEXT:    v_alignbit_b32 v0, s4, v0, 16
68; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
69; GFX900-NEXT:    s_waitcnt vmcnt(0)
70; GFX900-NEXT:    s_setpc_b64 s[30:31]
71;
72; GFX90A-LABEL: v_shuffle_v4i16_v3i16__1_u_u_u:
73; GFX90A:       ; %bb.0:
74; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75; GFX90A-NEXT:    ;;#ASMSTART
76; GFX90A-NEXT:    ; def v[0:1]
77; GFX90A-NEXT:    ;;#ASMEND
78; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
79; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v0, 16
80; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
81; GFX90A-NEXT:    s_waitcnt vmcnt(0)
82; GFX90A-NEXT:    s_setpc_b64 s[30:31]
83;
84; GFX940-LABEL: v_shuffle_v4i16_v3i16__1_u_u_u:
85; GFX940:       ; %bb.0:
86; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
87; GFX940-NEXT:    ;;#ASMSTART
88; GFX940-NEXT:    ; def v[0:1]
89; GFX940-NEXT:    ;;#ASMEND
90; GFX940-NEXT:    v_mov_b32_e32 v2, 0
91; GFX940-NEXT:    v_alignbit_b32 v0, s0, v0, 16
92; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
93; GFX940-NEXT:    s_waitcnt vmcnt(0)
94; GFX940-NEXT:    s_setpc_b64 s[30:31]
95  %vec0 = call <4 x i16> asm "; def $0", "=v"()
96  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
97  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
98  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
99  ret void
100}
101
102define void @v_shuffle_v4i16_v3i16__2_u_u_u(ptr addrspace(1) inreg %ptr) {
103; GFX900-LABEL: v_shuffle_v4i16_v3i16__2_u_u_u:
104; GFX900:       ; %bb.0:
105; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
106; GFX900-NEXT:    ;;#ASMSTART
107; GFX900-NEXT:    ; def v[0:1]
108; GFX900-NEXT:    ;;#ASMEND
109; GFX900-NEXT:    v_mov_b32_e32 v2, 0
110; GFX900-NEXT:    v_mov_b32_e32 v0, v1
111; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
112; GFX900-NEXT:    s_waitcnt vmcnt(0)
113; GFX900-NEXT:    s_setpc_b64 s[30:31]
114;
115; GFX90A-LABEL: v_shuffle_v4i16_v3i16__2_u_u_u:
116; GFX90A:       ; %bb.0:
117; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118; GFX90A-NEXT:    ;;#ASMSTART
119; GFX90A-NEXT:    ; def v[0:1]
120; GFX90A-NEXT:    ;;#ASMEND
121; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
122; GFX90A-NEXT:    v_mov_b32_e32 v0, v1
123; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
124; GFX90A-NEXT:    s_waitcnt vmcnt(0)
125; GFX90A-NEXT:    s_setpc_b64 s[30:31]
126;
127; GFX940-LABEL: v_shuffle_v4i16_v3i16__2_u_u_u:
128; GFX940:       ; %bb.0:
129; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130; GFX940-NEXT:    ;;#ASMSTART
131; GFX940-NEXT:    ; def v[0:1]
132; GFX940-NEXT:    ;;#ASMEND
133; GFX940-NEXT:    v_mov_b32_e32 v2, 0
134; GFX940-NEXT:    v_mov_b32_e32 v0, v1
135; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
136; GFX940-NEXT:    s_waitcnt vmcnt(0)
137; GFX940-NEXT:    s_setpc_b64 s[30:31]
138  %vec0 = call <4 x i16> asm "; def $0", "=v"()
139  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
140  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
141  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
142  ret void
143}
144
145define void @v_shuffle_v4i16_v3i16__3_u_u_u(ptr addrspace(1) inreg %ptr) {
146; GFX9-LABEL: v_shuffle_v4i16_v3i16__3_u_u_u:
147; GFX9:       ; %bb.0:
148; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
149; GFX9-NEXT:    s_setpc_b64 s[30:31]
150  %vec0 = call <4 x i16> asm "; def $0", "=v"()
151  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
152  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
153  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
154  ret void
155}
156
157define void @v_shuffle_v4i16_v3i16__4_u_u_u(ptr addrspace(1) inreg %ptr) {
158; GFX900-LABEL: v_shuffle_v4i16_v3i16__4_u_u_u:
159; GFX900:       ; %bb.0:
160; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
161; GFX900-NEXT:    ;;#ASMSTART
162; GFX900-NEXT:    ; def v[0:1]
163; GFX900-NEXT:    ;;#ASMEND
164; GFX900-NEXT:    v_mov_b32_e32 v2, 0
165; GFX900-NEXT:    v_alignbit_b32 v0, s4, v0, 16
166; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
167; GFX900-NEXT:    s_waitcnt vmcnt(0)
168; GFX900-NEXT:    s_setpc_b64 s[30:31]
169;
170; GFX90A-LABEL: v_shuffle_v4i16_v3i16__4_u_u_u:
171; GFX90A:       ; %bb.0:
172; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
173; GFX90A-NEXT:    ;;#ASMSTART
174; GFX90A-NEXT:    ; def v[0:1]
175; GFX90A-NEXT:    ;;#ASMEND
176; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
177; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v0, 16
178; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
179; GFX90A-NEXT:    s_waitcnt vmcnt(0)
180; GFX90A-NEXT:    s_setpc_b64 s[30:31]
181;
182; GFX940-LABEL: v_shuffle_v4i16_v3i16__4_u_u_u:
183; GFX940:       ; %bb.0:
184; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185; GFX940-NEXT:    ;;#ASMSTART
186; GFX940-NEXT:    ; def v[0:1]
187; GFX940-NEXT:    ;;#ASMEND
188; GFX940-NEXT:    v_mov_b32_e32 v2, 0
189; GFX940-NEXT:    v_alignbit_b32 v0, s0, v0, 16
190; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
191; GFX940-NEXT:    s_waitcnt vmcnt(0)
192; GFX940-NEXT:    s_setpc_b64 s[30:31]
193  %vec0 = call <4 x i16> asm "; def $0", "=v"()
194  %vec1 = call <4 x i16> asm "; def $0", "=v"()
195  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
196  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
197  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 poison, i32 poison, i32 poison>
198  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
199  ret void
200}
201
202define void @v_shuffle_v4i16_v3i16__5_u_u_u(ptr addrspace(1) inreg %ptr) {
203; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_u_u_u:
204; GFX900:       ; %bb.0:
205; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206; GFX900-NEXT:    ;;#ASMSTART
207; GFX900-NEXT:    ; def v[0:1]
208; GFX900-NEXT:    ;;#ASMEND
209; GFX900-NEXT:    v_mov_b32_e32 v2, 0
210; GFX900-NEXT:    v_mov_b32_e32 v0, v1
211; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
212; GFX900-NEXT:    s_waitcnt vmcnt(0)
213; GFX900-NEXT:    s_setpc_b64 s[30:31]
214;
215; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_u_u_u:
216; GFX90A:       ; %bb.0:
217; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
218; GFX90A-NEXT:    ;;#ASMSTART
219; GFX90A-NEXT:    ; def v[0:1]
220; GFX90A-NEXT:    ;;#ASMEND
221; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
222; GFX90A-NEXT:    v_mov_b32_e32 v0, v1
223; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
224; GFX90A-NEXT:    s_waitcnt vmcnt(0)
225; GFX90A-NEXT:    s_setpc_b64 s[30:31]
226;
227; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_u_u_u:
228; GFX940:       ; %bb.0:
229; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
230; GFX940-NEXT:    ;;#ASMSTART
231; GFX940-NEXT:    ; def v[0:1]
232; GFX940-NEXT:    ;;#ASMEND
233; GFX940-NEXT:    v_mov_b32_e32 v2, 0
234; GFX940-NEXT:    v_mov_b32_e32 v0, v1
235; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
236; GFX940-NEXT:    s_waitcnt vmcnt(0)
237; GFX940-NEXT:    s_setpc_b64 s[30:31]
238  %vec0 = call <4 x i16> asm "; def $0", "=v"()
239  %vec1 = call <4 x i16> asm "; def $0", "=v"()
240  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
241  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
242  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 poison, i32 poison>
243  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
244  ret void
245}
246
247define void @v_shuffle_v4i16_v3i16__5_0_u_u(ptr addrspace(1) inreg %ptr) {
248; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_0_u_u:
249; GFX900:       ; %bb.0:
250; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
251; GFX900-NEXT:    ;;#ASMSTART
252; GFX900-NEXT:    ; def v[0:1]
253; GFX900-NEXT:    ;;#ASMEND
254; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
255; GFX900-NEXT:    v_mov_b32_e32 v3, 0
256; GFX900-NEXT:    ;;#ASMSTART
257; GFX900-NEXT:    ; def v[1:2]
258; GFX900-NEXT:    ;;#ASMEND
259; GFX900-NEXT:    v_perm_b32 v0, v0, v2, s4
260; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
261; GFX900-NEXT:    s_waitcnt vmcnt(0)
262; GFX900-NEXT:    s_setpc_b64 s[30:31]
263;
264; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_0_u_u:
265; GFX90A:       ; %bb.0:
266; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
267; GFX90A-NEXT:    ;;#ASMSTART
268; GFX90A-NEXT:    ; def v[0:1]
269; GFX90A-NEXT:    ;;#ASMEND
270; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
271; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
272; GFX90A-NEXT:    ;;#ASMSTART
273; GFX90A-NEXT:    ; def v[2:3]
274; GFX90A-NEXT:    ;;#ASMEND
275; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
276; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
277; GFX90A-NEXT:    s_waitcnt vmcnt(0)
278; GFX90A-NEXT:    s_setpc_b64 s[30:31]
279;
280; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_0_u_u:
281; GFX940:       ; %bb.0:
282; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
283; GFX940-NEXT:    ;;#ASMSTART
284; GFX940-NEXT:    ; def v[0:1]
285; GFX940-NEXT:    ;;#ASMEND
286; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
287; GFX940-NEXT:    v_mov_b32_e32 v4, 0
288; GFX940-NEXT:    ;;#ASMSTART
289; GFX940-NEXT:    ; def v[2:3]
290; GFX940-NEXT:    ;;#ASMEND
291; GFX940-NEXT:    s_nop 0
292; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
293; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
294; GFX940-NEXT:    s_waitcnt vmcnt(0)
295; GFX940-NEXT:    s_setpc_b64 s[30:31]
296  %vec0 = call <4 x i16> asm "; def $0", "=v"()
297  %vec1 = call <4 x i16> asm "; def $0", "=v"()
298  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
299  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
300  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 poison, i32 poison>
301  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
302  ret void
303}
304
305define void @v_shuffle_v4i16_v3i16__5_1_u_u(ptr addrspace(1) inreg %ptr) {
306; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_1_u_u:
307; GFX900:       ; %bb.0:
308; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
309; GFX900-NEXT:    ;;#ASMSTART
310; GFX900-NEXT:    ; def v[0:1]
311; GFX900-NEXT:    ;;#ASMEND
312; GFX900-NEXT:    s_mov_b32 s4, 0xffff
313; GFX900-NEXT:    v_mov_b32_e32 v3, 0
314; GFX900-NEXT:    ;;#ASMSTART
315; GFX900-NEXT:    ; def v[1:2]
316; GFX900-NEXT:    ;;#ASMEND
317; GFX900-NEXT:    v_bfi_b32 v0, s4, v2, v0
318; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
319; GFX900-NEXT:    s_waitcnt vmcnt(0)
320; GFX900-NEXT:    s_setpc_b64 s[30:31]
321;
322; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_1_u_u:
323; GFX90A:       ; %bb.0:
324; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
325; GFX90A-NEXT:    ;;#ASMSTART
326; GFX90A-NEXT:    ; def v[0:1]
327; GFX90A-NEXT:    ;;#ASMEND
328; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
329; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
330; GFX90A-NEXT:    ;;#ASMSTART
331; GFX90A-NEXT:    ; def v[2:3]
332; GFX90A-NEXT:    ;;#ASMEND
333; GFX90A-NEXT:    v_bfi_b32 v0, s4, v3, v0
334; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
335; GFX90A-NEXT:    s_waitcnt vmcnt(0)
336; GFX90A-NEXT:    s_setpc_b64 s[30:31]
337;
338; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_1_u_u:
339; GFX940:       ; %bb.0:
340; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
341; GFX940-NEXT:    ;;#ASMSTART
342; GFX940-NEXT:    ; def v[0:1]
343; GFX940-NEXT:    ;;#ASMEND
344; GFX940-NEXT:    s_mov_b32 s2, 0xffff
345; GFX940-NEXT:    v_mov_b32_e32 v4, 0
346; GFX940-NEXT:    ;;#ASMSTART
347; GFX940-NEXT:    ; def v[2:3]
348; GFX940-NEXT:    ;;#ASMEND
349; GFX940-NEXT:    s_nop 0
350; GFX940-NEXT:    v_bfi_b32 v0, s2, v3, v0
351; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
352; GFX940-NEXT:    s_waitcnt vmcnt(0)
353; GFX940-NEXT:    s_setpc_b64 s[30:31]
354  %vec0 = call <4 x i16> asm "; def $0", "=v"()
355  %vec1 = call <4 x i16> asm "; def $0", "=v"()
356  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
357  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
358  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 poison, i32 poison>
359  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
360  ret void
361}
362
363define void @v_shuffle_v4i16_v3i16__5_2_u_u(ptr addrspace(1) inreg %ptr) {
364; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_2_u_u:
365; GFX900:       ; %bb.0:
366; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
367; GFX900-NEXT:    ;;#ASMSTART
368; GFX900-NEXT:    ; def v[0:1]
369; GFX900-NEXT:    ;;#ASMEND
370; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
371; GFX900-NEXT:    v_mov_b32_e32 v4, 0
372; GFX900-NEXT:    ;;#ASMSTART
373; GFX900-NEXT:    ; def v[2:3]
374; GFX900-NEXT:    ;;#ASMEND
375; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
376; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
377; GFX900-NEXT:    s_waitcnt vmcnt(0)
378; GFX900-NEXT:    s_setpc_b64 s[30:31]
379;
380; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_2_u_u:
381; GFX90A:       ; %bb.0:
382; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
383; GFX90A-NEXT:    ;;#ASMSTART
384; GFX90A-NEXT:    ; def v[0:1]
385; GFX90A-NEXT:    ;;#ASMEND
386; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
387; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
388; GFX90A-NEXT:    ;;#ASMSTART
389; GFX90A-NEXT:    ; def v[2:3]
390; GFX90A-NEXT:    ;;#ASMEND
391; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
392; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
393; GFX90A-NEXT:    s_waitcnt vmcnt(0)
394; GFX90A-NEXT:    s_setpc_b64 s[30:31]
395;
396; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_2_u_u:
397; GFX940:       ; %bb.0:
398; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
399; GFX940-NEXT:    ;;#ASMSTART
400; GFX940-NEXT:    ; def v[0:1]
401; GFX940-NEXT:    ;;#ASMEND
402; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
403; GFX940-NEXT:    v_mov_b32_e32 v4, 0
404; GFX940-NEXT:    ;;#ASMSTART
405; GFX940-NEXT:    ; def v[2:3]
406; GFX940-NEXT:    ;;#ASMEND
407; GFX940-NEXT:    s_nop 0
408; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
409; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
410; GFX940-NEXT:    s_waitcnt vmcnt(0)
411; GFX940-NEXT:    s_setpc_b64 s[30:31]
412  %vec0 = call <4 x i16> asm "; def $0", "=v"()
413  %vec1 = call <4 x i16> asm "; def $0", "=v"()
414  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
415  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
416  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 poison, i32 poison>
417  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
418  ret void
419}
420
421define void @v_shuffle_v4i16_v3i16__5_3_u_u(ptr addrspace(1) inreg %ptr) {
422; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_3_u_u:
423; GFX900:       ; %bb.0:
424; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
425; GFX900-NEXT:    ;;#ASMSTART
426; GFX900-NEXT:    ; def v[0:1]
427; GFX900-NEXT:    ;;#ASMEND
428; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
429; GFX900-NEXT:    v_mov_b32_e32 v2, 0
430; GFX900-NEXT:    v_perm_b32 v0, v0, v1, s4
431; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
432; GFX900-NEXT:    s_waitcnt vmcnt(0)
433; GFX900-NEXT:    s_setpc_b64 s[30:31]
434;
435; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_3_u_u:
436; GFX90A:       ; %bb.0:
437; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
438; GFX90A-NEXT:    ;;#ASMSTART
439; GFX90A-NEXT:    ; def v[0:1]
440; GFX90A-NEXT:    ;;#ASMEND
441; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
442; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
443; GFX90A-NEXT:    v_perm_b32 v0, v0, v1, s4
444; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
445; GFX90A-NEXT:    s_waitcnt vmcnt(0)
446; GFX90A-NEXT:    s_setpc_b64 s[30:31]
447;
448; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_3_u_u:
449; GFX940:       ; %bb.0:
450; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
451; GFX940-NEXT:    ;;#ASMSTART
452; GFX940-NEXT:    ; def v[0:1]
453; GFX940-NEXT:    ;;#ASMEND
454; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
455; GFX940-NEXT:    v_mov_b32_e32 v2, 0
456; GFX940-NEXT:    v_perm_b32 v0, v0, v1, s2
457; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
458; GFX940-NEXT:    s_waitcnt vmcnt(0)
459; GFX940-NEXT:    s_setpc_b64 s[30:31]
460  %vec0 = call <4 x i16> asm "; def $0", "=v"()
461  %vec1 = call <4 x i16> asm "; def $0", "=v"()
462  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
463  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
464  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 poison, i32 poison>
465  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
466  ret void
467}
468
469define void @v_shuffle_v4i16_v3i16__5_4_u_u(ptr addrspace(1) inreg %ptr) {
470; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_4_u_u:
471; GFX900:       ; %bb.0:
472; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
473; GFX900-NEXT:    ;;#ASMSTART
474; GFX900-NEXT:    ; def v[0:1]
475; GFX900-NEXT:    ;;#ASMEND
476; GFX900-NEXT:    s_mov_b32 s4, 0xffff
477; GFX900-NEXT:    v_mov_b32_e32 v2, 0
478; GFX900-NEXT:    v_bfi_b32 v0, s4, v1, v0
479; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
480; GFX900-NEXT:    s_waitcnt vmcnt(0)
481; GFX900-NEXT:    s_setpc_b64 s[30:31]
482;
483; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_4_u_u:
484; GFX90A:       ; %bb.0:
485; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
486; GFX90A-NEXT:    ;;#ASMSTART
487; GFX90A-NEXT:    ; def v[0:1]
488; GFX90A-NEXT:    ;;#ASMEND
489; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
490; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
491; GFX90A-NEXT:    v_bfi_b32 v0, s4, v1, v0
492; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
493; GFX90A-NEXT:    s_waitcnt vmcnt(0)
494; GFX90A-NEXT:    s_setpc_b64 s[30:31]
495;
496; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_4_u_u:
497; GFX940:       ; %bb.0:
498; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
499; GFX940-NEXT:    ;;#ASMSTART
500; GFX940-NEXT:    ; def v[0:1]
501; GFX940-NEXT:    ;;#ASMEND
502; GFX940-NEXT:    s_mov_b32 s2, 0xffff
503; GFX940-NEXT:    v_mov_b32_e32 v2, 0
504; GFX940-NEXT:    v_bfi_b32 v0, s2, v1, v0
505; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
506; GFX940-NEXT:    s_waitcnt vmcnt(0)
507; GFX940-NEXT:    s_setpc_b64 s[30:31]
508  %vec0 = call <4 x i16> asm "; def $0", "=v"()
509  %vec1 = call <4 x i16> asm "; def $0", "=v"()
510  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
511  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
512  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 poison, i32 poison>
513  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
514  ret void
515}
516
517define void @v_shuffle_v4i16_v3i16__5_5_u_u(ptr addrspace(1) inreg %ptr) {
518; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_u_u:
519; GFX900:       ; %bb.0:
520; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
521; GFX900-NEXT:    ;;#ASMSTART
522; GFX900-NEXT:    ; def v[0:1]
523; GFX900-NEXT:    ;;#ASMEND
524; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
525; GFX900-NEXT:    v_mov_b32_e32 v2, 0
526; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
527; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
528; GFX900-NEXT:    s_waitcnt vmcnt(0)
529; GFX900-NEXT:    s_setpc_b64 s[30:31]
530;
531; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_u_u:
532; GFX90A:       ; %bb.0:
533; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
534; GFX90A-NEXT:    ;;#ASMSTART
535; GFX90A-NEXT:    ; def v[0:1]
536; GFX90A-NEXT:    ;;#ASMEND
537; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
538; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
539; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
540; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
541; GFX90A-NEXT:    s_waitcnt vmcnt(0)
542; GFX90A-NEXT:    s_setpc_b64 s[30:31]
543;
544; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_u_u:
545; GFX940:       ; %bb.0:
546; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
547; GFX940-NEXT:    ;;#ASMSTART
548; GFX940-NEXT:    ; def v[0:1]
549; GFX940-NEXT:    ;;#ASMEND
550; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
551; GFX940-NEXT:    v_mov_b32_e32 v2, 0
552; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
553; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
554; GFX940-NEXT:    s_waitcnt vmcnt(0)
555; GFX940-NEXT:    s_setpc_b64 s[30:31]
556  %vec0 = call <4 x i16> asm "; def $0", "=v"()
557  %vec1 = call <4 x i16> asm "; def $0", "=v"()
558  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
559  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
560  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 poison>
561  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
562  ret void
563}
564
565define void @v_shuffle_v4i16_v3i16__5_5_0_u(ptr addrspace(1) inreg %ptr) {
566; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_0_u:
567; GFX900:       ; %bb.0:
568; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
569; GFX900-NEXT:    ;;#ASMSTART
570; GFX900-NEXT:    ; def v[0:1]
571; GFX900-NEXT:    ;;#ASMEND
572; GFX900-NEXT:    ;;#ASMSTART
573; GFX900-NEXT:    ; def v[1:2]
574; GFX900-NEXT:    ;;#ASMEND
575; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
576; GFX900-NEXT:    v_mov_b32_e32 v3, 0
577; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
578; GFX900-NEXT:    v_mov_b32_e32 v2, v0
579; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
580; GFX900-NEXT:    s_waitcnt vmcnt(0)
581; GFX900-NEXT:    s_setpc_b64 s[30:31]
582;
583; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_0_u:
584; GFX90A:       ; %bb.0:
585; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
586; GFX90A-NEXT:    ;;#ASMSTART
587; GFX90A-NEXT:    ; def v[2:3]
588; GFX90A-NEXT:    ;;#ASMEND
589; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
590; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
591; GFX90A-NEXT:    ;;#ASMSTART
592; GFX90A-NEXT:    ; def v[0:1]
593; GFX90A-NEXT:    ;;#ASMEND
594; GFX90A-NEXT:    v_perm_b32 v2, v3, v3, s4
595; GFX90A-NEXT:    v_mov_b32_e32 v3, v0
596; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
597; GFX90A-NEXT:    s_waitcnt vmcnt(0)
598; GFX90A-NEXT:    s_setpc_b64 s[30:31]
599;
600; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_0_u:
601; GFX940:       ; %bb.0:
602; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
603; GFX940-NEXT:    ;;#ASMSTART
604; GFX940-NEXT:    ; def v[2:3]
605; GFX940-NEXT:    ;;#ASMEND
606; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
607; GFX940-NEXT:    v_mov_b32_e32 v4, 0
608; GFX940-NEXT:    ;;#ASMSTART
609; GFX940-NEXT:    ; def v[0:1]
610; GFX940-NEXT:    ;;#ASMEND
611; GFX940-NEXT:    v_perm_b32 v2, v3, v3, s2
612; GFX940-NEXT:    v_mov_b32_e32 v3, v0
613; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
614; GFX940-NEXT:    s_waitcnt vmcnt(0)
615; GFX940-NEXT:    s_setpc_b64 s[30:31]
616  %vec0 = call <4 x i16> asm "; def $0", "=v"()
617  %vec1 = call <4 x i16> asm "; def $0", "=v"()
618  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
619  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
620  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 poison>
621  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
622  ret void
623}
624
625define void @v_shuffle_v4i16_v3i16__5_5_1_u(ptr addrspace(1) inreg %ptr) {
626; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_1_u:
627; GFX900:       ; %bb.0:
628; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
629; GFX900-NEXT:    ;;#ASMSTART
630; GFX900-NEXT:    ; def v[0:1]
631; GFX900-NEXT:    ;;#ASMEND
632; GFX900-NEXT:    ;;#ASMSTART
633; GFX900-NEXT:    ; def v[1:2]
634; GFX900-NEXT:    ;;#ASMEND
635; GFX900-NEXT:    v_alignbit_b32 v1, s4, v0, 16
636; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
637; GFX900-NEXT:    v_mov_b32_e32 v3, 0
638; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
639; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
640; GFX900-NEXT:    s_waitcnt vmcnt(0)
641; GFX900-NEXT:    s_setpc_b64 s[30:31]
642;
643; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_1_u:
644; GFX90A:       ; %bb.0:
645; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
646; GFX90A-NEXT:    ;;#ASMSTART
647; GFX90A-NEXT:    ; def v[0:1]
648; GFX90A-NEXT:    ;;#ASMEND
649; GFX90A-NEXT:    v_alignbit_b32 v1, s4, v0, 16
650; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
651; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
652; GFX90A-NEXT:    ;;#ASMSTART
653; GFX90A-NEXT:    ; def v[2:3]
654; GFX90A-NEXT:    ;;#ASMEND
655; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
656; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
657; GFX90A-NEXT:    s_waitcnt vmcnt(0)
658; GFX90A-NEXT:    s_setpc_b64 s[30:31]
659;
660; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_1_u:
661; GFX940:       ; %bb.0:
662; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
663; GFX940-NEXT:    ;;#ASMSTART
664; GFX940-NEXT:    ; def v[0:1]
665; GFX940-NEXT:    ;;#ASMEND
666; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
667; GFX940-NEXT:    v_mov_b32_e32 v4, 0
668; GFX940-NEXT:    ;;#ASMSTART
669; GFX940-NEXT:    ; def v[2:3]
670; GFX940-NEXT:    ;;#ASMEND
671; GFX940-NEXT:    v_alignbit_b32 v1, s0, v0, 16
672; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
673; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
674; GFX940-NEXT:    s_waitcnt vmcnt(0)
675; GFX940-NEXT:    s_setpc_b64 s[30:31]
676  %vec0 = call <4 x i16> asm "; def $0", "=v"()
677  %vec1 = call <4 x i16> asm "; def $0", "=v"()
678  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
679  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
680  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 poison>
681  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
682  ret void
683}
684
685define void @v_shuffle_v4i16_v3i16__5_5_2_u(ptr addrspace(1) inreg %ptr) {
686; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_2_u:
687; GFX900:       ; %bb.0:
688; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
689; GFX900-NEXT:    ;;#ASMSTART
690; GFX900-NEXT:    ; def v[0:1]
691; GFX900-NEXT:    ;;#ASMEND
692; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
693; GFX900-NEXT:    v_mov_b32_e32 v4, 0
694; GFX900-NEXT:    ;;#ASMSTART
695; GFX900-NEXT:    ; def v[2:3]
696; GFX900-NEXT:    ;;#ASMEND
697; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
698; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
699; GFX900-NEXT:    s_waitcnt vmcnt(0)
700; GFX900-NEXT:    s_setpc_b64 s[30:31]
701;
702; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_2_u:
703; GFX90A:       ; %bb.0:
704; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
705; GFX90A-NEXT:    ;;#ASMSTART
706; GFX90A-NEXT:    ; def v[0:1]
707; GFX90A-NEXT:    ;;#ASMEND
708; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
709; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
710; GFX90A-NEXT:    ;;#ASMSTART
711; GFX90A-NEXT:    ; def v[2:3]
712; GFX90A-NEXT:    ;;#ASMEND
713; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
714; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
715; GFX90A-NEXT:    s_waitcnt vmcnt(0)
716; GFX90A-NEXT:    s_setpc_b64 s[30:31]
717;
718; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_2_u:
719; GFX940:       ; %bb.0:
720; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
721; GFX940-NEXT:    ;;#ASMSTART
722; GFX940-NEXT:    ; def v[0:1]
723; GFX940-NEXT:    ;;#ASMEND
724; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
725; GFX940-NEXT:    v_mov_b32_e32 v4, 0
726; GFX940-NEXT:    ;;#ASMSTART
727; GFX940-NEXT:    ; def v[2:3]
728; GFX940-NEXT:    ;;#ASMEND
729; GFX940-NEXT:    s_nop 0
730; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
731; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
732; GFX940-NEXT:    s_waitcnt vmcnt(0)
733; GFX940-NEXT:    s_setpc_b64 s[30:31]
734  %vec0 = call <4 x i16> asm "; def $0", "=v"()
735  %vec1 = call <4 x i16> asm "; def $0", "=v"()
736  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
737  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
738  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 poison>
739  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
740  ret void
741}
742
743define void @v_shuffle_v4i16_v3i16__5_5_3_u(ptr addrspace(1) inreg %ptr) {
744; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_3_u:
745; GFX900:       ; %bb.0:
746; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
747; GFX900-NEXT:    ;;#ASMSTART
748; GFX900-NEXT:    ; def v[0:1]
749; GFX900-NEXT:    ;;#ASMEND
750; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
751; GFX900-NEXT:    v_mov_b32_e32 v3, 0
752; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
753; GFX900-NEXT:    v_mov_b32_e32 v2, v0
754; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
755; GFX900-NEXT:    s_waitcnt vmcnt(0)
756; GFX900-NEXT:    s_setpc_b64 s[30:31]
757;
758; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_3_u:
759; GFX90A:       ; %bb.0:
760; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
761; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
762; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
763; GFX90A-NEXT:    ;;#ASMSTART
764; GFX90A-NEXT:    ; def v[0:1]
765; GFX90A-NEXT:    ;;#ASMEND
766; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
767; GFX90A-NEXT:    v_mov_b32_e32 v3, v0
768; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
769; GFX90A-NEXT:    s_waitcnt vmcnt(0)
770; GFX90A-NEXT:    s_setpc_b64 s[30:31]
771;
772; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_3_u:
773; GFX940:       ; %bb.0:
774; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
775; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
776; GFX940-NEXT:    v_mov_b32_e32 v4, 0
777; GFX940-NEXT:    ;;#ASMSTART
778; GFX940-NEXT:    ; def v[0:1]
779; GFX940-NEXT:    ;;#ASMEND
780; GFX940-NEXT:    s_nop 0
781; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
782; GFX940-NEXT:    v_mov_b32_e32 v3, v0
783; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
784; GFX940-NEXT:    s_waitcnt vmcnt(0)
785; GFX940-NEXT:    s_setpc_b64 s[30:31]
786  %vec0 = call <4 x i16> asm "; def $0", "=v"()
787  %vec1 = call <4 x i16> asm "; def $0", "=v"()
788  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
789  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
790  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 poison>
791  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
792  ret void
793}
794
795define void @v_shuffle_v4i16_v3i16__5_5_4_u(ptr addrspace(1) inreg %ptr) {
796; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_4_u:
797; GFX900:       ; %bb.0:
798; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
799; GFX900-NEXT:    ;;#ASMSTART
800; GFX900-NEXT:    ; def v[0:1]
801; GFX900-NEXT:    ;;#ASMEND
802; GFX900-NEXT:    v_alignbit_b32 v2, s4, v0, 16
803; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
804; GFX900-NEXT:    v_mov_b32_e32 v3, 0
805; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
806; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
807; GFX900-NEXT:    s_waitcnt vmcnt(0)
808; GFX900-NEXT:    s_setpc_b64 s[30:31]
809;
810; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_4_u:
811; GFX90A:       ; %bb.0:
812; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
813; GFX90A-NEXT:    ;;#ASMSTART
814; GFX90A-NEXT:    ; def v[0:1]
815; GFX90A-NEXT:    ;;#ASMEND
816; GFX90A-NEXT:    v_alignbit_b32 v3, s4, v0, 16
817; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
818; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
819; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
820; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
821; GFX90A-NEXT:    s_waitcnt vmcnt(0)
822; GFX90A-NEXT:    s_setpc_b64 s[30:31]
823;
824; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_4_u:
825; GFX940:       ; %bb.0:
826; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
827; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
828; GFX940-NEXT:    v_mov_b32_e32 v4, 0
829; GFX940-NEXT:    ;;#ASMSTART
830; GFX940-NEXT:    ; def v[0:1]
831; GFX940-NEXT:    ;;#ASMEND
832; GFX940-NEXT:    s_nop 0
833; GFX940-NEXT:    v_alignbit_b32 v3, s0, v0, 16
834; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
835; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
836; GFX940-NEXT:    s_waitcnt vmcnt(0)
837; GFX940-NEXT:    s_setpc_b64 s[30:31]
838  %vec0 = call <4 x i16> asm "; def $0", "=v"()
839  %vec1 = call <4 x i16> asm "; def $0", "=v"()
840  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
841  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
842  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 poison>
843  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
844  ret void
845}
846
847define void @v_shuffle_v4i16_v3i16__5_5_5_u(ptr addrspace(1) inreg %ptr) {
848; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_5_u:
849; GFX900:       ; %bb.0:
850; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
851; GFX900-NEXT:    ;;#ASMSTART
852; GFX900-NEXT:    ; def v[0:1]
853; GFX900-NEXT:    ;;#ASMEND
854; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
855; GFX900-NEXT:    v_mov_b32_e32 v2, 0
856; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
857; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
858; GFX900-NEXT:    s_waitcnt vmcnt(0)
859; GFX900-NEXT:    s_setpc_b64 s[30:31]
860;
861; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_5_u:
862; GFX90A:       ; %bb.0:
863; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
864; GFX90A-NEXT:    ;;#ASMSTART
865; GFX90A-NEXT:    ; def v[0:1]
866; GFX90A-NEXT:    ;;#ASMEND
867; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
868; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
869; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
870; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
871; GFX90A-NEXT:    s_waitcnt vmcnt(0)
872; GFX90A-NEXT:    s_setpc_b64 s[30:31]
873;
874; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_5_u:
875; GFX940:       ; %bb.0:
876; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
877; GFX940-NEXT:    ;;#ASMSTART
878; GFX940-NEXT:    ; def v[0:1]
879; GFX940-NEXT:    ;;#ASMEND
880; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
881; GFX940-NEXT:    v_mov_b32_e32 v2, 0
882; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
883; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
884; GFX940-NEXT:    s_waitcnt vmcnt(0)
885; GFX940-NEXT:    s_setpc_b64 s[30:31]
886  %vec0 = call <4 x i16> asm "; def $0", "=v"()
887  %vec1 = call <4 x i16> asm "; def $0", "=v"()
888  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
889  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
890  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 poison>
891  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
892  ret void
893}
894
895define void @v_shuffle_v4i16_v3i16__5_5_5_0(ptr addrspace(1) inreg %ptr) {
896; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_5_0:
897; GFX900:       ; %bb.0:
898; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
899; GFX900-NEXT:    ;;#ASMSTART
900; GFX900-NEXT:    ; def v[0:1]
901; GFX900-NEXT:    ;;#ASMEND
902; GFX900-NEXT:    ;;#ASMSTART
903; GFX900-NEXT:    ; def v[1:2]
904; GFX900-NEXT:    ;;#ASMEND
905; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
906; GFX900-NEXT:    v_mov_b32_e32 v3, 0
907; GFX900-NEXT:    v_perm_b32 v1, v0, v2, s4
908; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
909; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
910; GFX900-NEXT:    s_waitcnt vmcnt(0)
911; GFX900-NEXT:    s_setpc_b64 s[30:31]
912;
913; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_5_0:
914; GFX90A:       ; %bb.0:
915; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
916; GFX90A-NEXT:    ;;#ASMSTART
917; GFX90A-NEXT:    ; def v[0:1]
918; GFX90A-NEXT:    ;;#ASMEND
919; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
920; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
921; GFX90A-NEXT:    ;;#ASMSTART
922; GFX90A-NEXT:    ; def v[2:3]
923; GFX90A-NEXT:    ;;#ASMEND
924; GFX90A-NEXT:    v_perm_b32 v1, v0, v3, s4
925; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
926; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
927; GFX90A-NEXT:    s_waitcnt vmcnt(0)
928; GFX90A-NEXT:    s_setpc_b64 s[30:31]
929;
930; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_5_0:
931; GFX940:       ; %bb.0:
932; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
933; GFX940-NEXT:    ;;#ASMSTART
934; GFX940-NEXT:    ; def v[0:1]
935; GFX940-NEXT:    ;;#ASMEND
936; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
937; GFX940-NEXT:    v_mov_b32_e32 v4, 0
938; GFX940-NEXT:    ;;#ASMSTART
939; GFX940-NEXT:    ; def v[2:3]
940; GFX940-NEXT:    ;;#ASMEND
941; GFX940-NEXT:    s_nop 0
942; GFX940-NEXT:    v_perm_b32 v1, v0, v3, s2
943; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
944; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
945; GFX940-NEXT:    s_waitcnt vmcnt(0)
946; GFX940-NEXT:    s_setpc_b64 s[30:31]
947  %vec0 = call <4 x i16> asm "; def $0", "=v"()
948  %vec1 = call <4 x i16> asm "; def $0", "=v"()
949  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
950  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
951  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 0>
952  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
953  ret void
954}
955
956define void @v_shuffle_v4i16_v3i16__5_5_5_1(ptr addrspace(1) inreg %ptr) {
957; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_5_1:
958; GFX900:       ; %bb.0:
959; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
960; GFX900-NEXT:    ;;#ASMSTART
961; GFX900-NEXT:    ; def v[0:1]
962; GFX900-NEXT:    ;;#ASMEND
963; GFX900-NEXT:    ;;#ASMSTART
964; GFX900-NEXT:    ; def v[1:2]
965; GFX900-NEXT:    ;;#ASMEND
966; GFX900-NEXT:    s_mov_b32 s4, 0xffff
967; GFX900-NEXT:    v_bfi_b32 v1, s4, v2, v0
968; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
969; GFX900-NEXT:    v_mov_b32_e32 v3, 0
970; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
971; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
972; GFX900-NEXT:    s_waitcnt vmcnt(0)
973; GFX900-NEXT:    s_setpc_b64 s[30:31]
974;
975; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_5_1:
976; GFX90A:       ; %bb.0:
977; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
978; GFX90A-NEXT:    ;;#ASMSTART
979; GFX90A-NEXT:    ; def v[0:1]
980; GFX90A-NEXT:    ;;#ASMEND
981; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
982; GFX90A-NEXT:    ;;#ASMSTART
983; GFX90A-NEXT:    ; def v[2:3]
984; GFX90A-NEXT:    ;;#ASMEND
985; GFX90A-NEXT:    v_bfi_b32 v1, s4, v3, v0
986; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
987; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
988; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
989; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
990; GFX90A-NEXT:    s_waitcnt vmcnt(0)
991; GFX90A-NEXT:    s_setpc_b64 s[30:31]
992;
993; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_5_1:
994; GFX940:       ; %bb.0:
995; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
996; GFX940-NEXT:    ;;#ASMSTART
997; GFX940-NEXT:    ; def v[0:1]
998; GFX940-NEXT:    ;;#ASMEND
999; GFX940-NEXT:    s_mov_b32 s2, 0xffff
1000; GFX940-NEXT:    ;;#ASMSTART
1001; GFX940-NEXT:    ; def v[2:3]
1002; GFX940-NEXT:    ;;#ASMEND
1003; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1004; GFX940-NEXT:    v_bfi_b32 v1, s2, v3, v0
1005; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1006; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
1007; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
1008; GFX940-NEXT:    s_waitcnt vmcnt(0)
1009; GFX940-NEXT:    s_setpc_b64 s[30:31]
1010  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1011  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1012  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1013  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1014  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 1>
1015  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1016  ret void
1017}
1018
1019define void @v_shuffle_v4i16_v3i16__5_5_5_2(ptr addrspace(1) inreg %ptr) {
1020; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_5_2:
1021; GFX900:       ; %bb.0:
1022; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1023; GFX900-NEXT:    ;;#ASMSTART
1024; GFX900-NEXT:    ; def v[0:1]
1025; GFX900-NEXT:    ;;#ASMEND
1026; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1027; GFX900-NEXT:    v_mov_b32_e32 v4, 0
1028; GFX900-NEXT:    ;;#ASMSTART
1029; GFX900-NEXT:    ; def v[2:3]
1030; GFX900-NEXT:    ;;#ASMEND
1031; GFX900-NEXT:    v_perm_b32 v1, v1, v3, s4
1032; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
1033; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
1034; GFX900-NEXT:    s_waitcnt vmcnt(0)
1035; GFX900-NEXT:    s_setpc_b64 s[30:31]
1036;
1037; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_5_2:
1038; GFX90A:       ; %bb.0:
1039; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1040; GFX90A-NEXT:    ;;#ASMSTART
1041; GFX90A-NEXT:    ; def v[0:1]
1042; GFX90A-NEXT:    ;;#ASMEND
1043; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1044; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1045; GFX90A-NEXT:    ;;#ASMSTART
1046; GFX90A-NEXT:    ; def v[2:3]
1047; GFX90A-NEXT:    ;;#ASMEND
1048; GFX90A-NEXT:    v_perm_b32 v1, v1, v3, s4
1049; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
1050; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
1051; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1052; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1053;
1054; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_5_2:
1055; GFX940:       ; %bb.0:
1056; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1057; GFX940-NEXT:    ;;#ASMSTART
1058; GFX940-NEXT:    ; def v[0:1]
1059; GFX940-NEXT:    ;;#ASMEND
1060; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1061; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1062; GFX940-NEXT:    ;;#ASMSTART
1063; GFX940-NEXT:    ; def v[2:3]
1064; GFX940-NEXT:    ;;#ASMEND
1065; GFX940-NEXT:    s_nop 0
1066; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s2
1067; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
1068; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
1069; GFX940-NEXT:    s_waitcnt vmcnt(0)
1070; GFX940-NEXT:    s_setpc_b64 s[30:31]
1071  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1072  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1073  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1074  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1075  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 2>
1076  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1077  ret void
1078}
1079
1080define void @v_shuffle_v4i16_v3i16__5_5_5_3(ptr addrspace(1) inreg %ptr) {
1081; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_5_3:
1082; GFX900:       ; %bb.0:
1083; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1084; GFX900-NEXT:    ;;#ASMSTART
1085; GFX900-NEXT:    ; def v[0:1]
1086; GFX900-NEXT:    ;;#ASMEND
1087; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1088; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1089; GFX900-NEXT:    v_perm_b32 v2, v0, v1, s4
1090; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
1091; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
1092; GFX900-NEXT:    s_waitcnt vmcnt(0)
1093; GFX900-NEXT:    s_setpc_b64 s[30:31]
1094;
1095; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_5_3:
1096; GFX90A:       ; %bb.0:
1097; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1098; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1099; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1100; GFX90A-NEXT:    ;;#ASMSTART
1101; GFX90A-NEXT:    ; def v[0:1]
1102; GFX90A-NEXT:    ;;#ASMEND
1103; GFX90A-NEXT:    v_perm_b32 v3, v0, v1, s4
1104; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
1105; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
1106; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1107; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1108;
1109; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_5_3:
1110; GFX940:       ; %bb.0:
1111; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1112; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1113; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1114; GFX940-NEXT:    ;;#ASMSTART
1115; GFX940-NEXT:    ; def v[0:1]
1116; GFX940-NEXT:    ;;#ASMEND
1117; GFX940-NEXT:    s_nop 0
1118; GFX940-NEXT:    v_perm_b32 v3, v0, v1, s2
1119; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
1120; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
1121; GFX940-NEXT:    s_waitcnt vmcnt(0)
1122; GFX940-NEXT:    s_setpc_b64 s[30:31]
1123  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1124  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1125  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1126  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1127  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 3>
1128  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1129  ret void
1130}
1131
1132define void @v_shuffle_v4i16_v3i16__5_5_5_4(ptr addrspace(1) inreg %ptr) {
1133; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_5_4:
1134; GFX900:       ; %bb.0:
1135; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1136; GFX900-NEXT:    s_mov_b32 s4, 0xffff
1137; GFX900-NEXT:    ;;#ASMSTART
1138; GFX900-NEXT:    ; def v[0:1]
1139; GFX900-NEXT:    ;;#ASMEND
1140; GFX900-NEXT:    v_bfi_b32 v2, s4, v1, v0
1141; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1142; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1143; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
1144; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
1145; GFX900-NEXT:    s_waitcnt vmcnt(0)
1146; GFX900-NEXT:    s_setpc_b64 s[30:31]
1147;
1148; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_5_4:
1149; GFX90A:       ; %bb.0:
1150; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1151; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
1152; GFX90A-NEXT:    ;;#ASMSTART
1153; GFX90A-NEXT:    ; def v[0:1]
1154; GFX90A-NEXT:    ;;#ASMEND
1155; GFX90A-NEXT:    v_bfi_b32 v3, s4, v1, v0
1156; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1157; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1158; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
1159; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
1160; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1161; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1162;
1163; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_5_4:
1164; GFX940:       ; %bb.0:
1165; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1166; GFX940-NEXT:    s_mov_b32 s2, 0xffff
1167; GFX940-NEXT:    ;;#ASMSTART
1168; GFX940-NEXT:    ; def v[0:1]
1169; GFX940-NEXT:    ;;#ASMEND
1170; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1171; GFX940-NEXT:    v_bfi_b32 v3, s2, v1, v0
1172; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1173; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
1174; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
1175; GFX940-NEXT:    s_waitcnt vmcnt(0)
1176; GFX940-NEXT:    s_setpc_b64 s[30:31]
1177  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1178  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1179  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1180  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1181  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 4>
1182  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1183  ret void
1184}
1185
1186define void @v_shuffle_v4i16_v3i16__5_5_5_5(ptr addrspace(1) inreg %ptr) {
1187; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_5_5:
1188; GFX900:       ; %bb.0:
1189; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1190; GFX900-NEXT:    ;;#ASMSTART
1191; GFX900-NEXT:    ; def v[0:1]
1192; GFX900-NEXT:    ;;#ASMEND
1193; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1194; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
1195; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1196; GFX900-NEXT:    v_mov_b32_e32 v1, v0
1197; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
1198; GFX900-NEXT:    s_waitcnt vmcnt(0)
1199; GFX900-NEXT:    s_setpc_b64 s[30:31]
1200;
1201; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_5_5:
1202; GFX90A:       ; %bb.0:
1203; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1204; GFX90A-NEXT:    ;;#ASMSTART
1205; GFX90A-NEXT:    ; def v[0:1]
1206; GFX90A-NEXT:    ;;#ASMEND
1207; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1208; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
1209; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1210; GFX90A-NEXT:    v_mov_b32_e32 v1, v0
1211; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
1212; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1213; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1214;
1215; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_5_5:
1216; GFX940:       ; %bb.0:
1217; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1218; GFX940-NEXT:    ;;#ASMSTART
1219; GFX940-NEXT:    ; def v[0:1]
1220; GFX940-NEXT:    ;;#ASMEND
1221; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1222; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
1223; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1224; GFX940-NEXT:    v_mov_b32_e32 v1, v0
1225; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1226; GFX940-NEXT:    s_waitcnt vmcnt(0)
1227; GFX940-NEXT:    s_setpc_b64 s[30:31]
1228  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1229  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1230  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1231  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1232  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
1233  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1234  ret void
1235}
1236
1237define void @v_shuffle_v4i16_v3i16__u_0_0_0(ptr addrspace(1) inreg %ptr) {
1238; GFX900-LABEL: v_shuffle_v4i16_v3i16__u_0_0_0:
1239; GFX900:       ; %bb.0:
1240; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1241; GFX900-NEXT:    ;;#ASMSTART
1242; GFX900-NEXT:    ; def v[0:1]
1243; GFX900-NEXT:    ;;#ASMEND
1244; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1245; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1246; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
1247; GFX900-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
1248; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
1249; GFX900-NEXT:    s_waitcnt vmcnt(0)
1250; GFX900-NEXT:    s_setpc_b64 s[30:31]
1251;
1252; GFX90A-LABEL: v_shuffle_v4i16_v3i16__u_0_0_0:
1253; GFX90A:       ; %bb.0:
1254; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1255; GFX90A-NEXT:    ;;#ASMSTART
1256; GFX90A-NEXT:    ; def v[0:1]
1257; GFX90A-NEXT:    ;;#ASMEND
1258; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1259; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1260; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
1261; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
1262; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
1263; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1264; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1265;
1266; GFX940-LABEL: v_shuffle_v4i16_v3i16__u_0_0_0:
1267; GFX940:       ; %bb.0:
1268; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1269; GFX940-NEXT:    ;;#ASMSTART
1270; GFX940-NEXT:    ; def v[0:1]
1271; GFX940-NEXT:    ;;#ASMEND
1272; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1273; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1274; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
1275; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
1276; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1277; GFX940-NEXT:    s_waitcnt vmcnt(0)
1278; GFX940-NEXT:    s_setpc_b64 s[30:31]
1279  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1280  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1281  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0>
1282  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1283  ret void
1284}
1285
1286define void @v_shuffle_v4i16_v3i16__0_0_0_0(ptr addrspace(1) inreg %ptr) {
1287; GFX900-LABEL: v_shuffle_v4i16_v3i16__0_0_0_0:
1288; GFX900:       ; %bb.0:
1289; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1290; GFX900-NEXT:    ;;#ASMSTART
1291; GFX900-NEXT:    ; def v[0:1]
1292; GFX900-NEXT:    ;;#ASMEND
1293; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1294; GFX900-NEXT:    v_perm_b32 v0, v0, v0, s4
1295; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1296; GFX900-NEXT:    v_mov_b32_e32 v1, v0
1297; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
1298; GFX900-NEXT:    s_waitcnt vmcnt(0)
1299; GFX900-NEXT:    s_setpc_b64 s[30:31]
1300;
1301; GFX90A-LABEL: v_shuffle_v4i16_v3i16__0_0_0_0:
1302; GFX90A:       ; %bb.0:
1303; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1304; GFX90A-NEXT:    ;;#ASMSTART
1305; GFX90A-NEXT:    ; def v[0:1]
1306; GFX90A-NEXT:    ;;#ASMEND
1307; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1308; GFX90A-NEXT:    v_perm_b32 v0, v0, v0, s4
1309; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1310; GFX90A-NEXT:    v_mov_b32_e32 v1, v0
1311; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
1312; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1313; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1314;
1315; GFX940-LABEL: v_shuffle_v4i16_v3i16__0_0_0_0:
1316; GFX940:       ; %bb.0:
1317; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1318; GFX940-NEXT:    ;;#ASMSTART
1319; GFX940-NEXT:    ; def v[0:1]
1320; GFX940-NEXT:    ;;#ASMEND
1321; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1322; GFX940-NEXT:    v_perm_b32 v0, v0, v0, s2
1323; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1324; GFX940-NEXT:    v_mov_b32_e32 v1, v0
1325; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1326; GFX940-NEXT:    s_waitcnt vmcnt(0)
1327; GFX940-NEXT:    s_setpc_b64 s[30:31]
1328  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1329  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1330  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> zeroinitializer
1331  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1332  ret void
1333}
1334
1335define void @v_shuffle_v4i16_v3i16__1_0_0_0(ptr addrspace(1) inreg %ptr) {
1336; GFX900-LABEL: v_shuffle_v4i16_v3i16__1_0_0_0:
1337; GFX900:       ; %bb.0:
1338; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1339; GFX900-NEXT:    ;;#ASMSTART
1340; GFX900-NEXT:    ; def v[0:1]
1341; GFX900-NEXT:    ;;#ASMEND
1342; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1343; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1344; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
1345; GFX900-NEXT:    v_alignbit_b32 v0, v0, v0, 16
1346; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
1347; GFX900-NEXT:    s_waitcnt vmcnt(0)
1348; GFX900-NEXT:    s_setpc_b64 s[30:31]
1349;
1350; GFX90A-LABEL: v_shuffle_v4i16_v3i16__1_0_0_0:
1351; GFX90A:       ; %bb.0:
1352; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1353; GFX90A-NEXT:    ;;#ASMSTART
1354; GFX90A-NEXT:    ; def v[0:1]
1355; GFX90A-NEXT:    ;;#ASMEND
1356; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1357; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1358; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
1359; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v0, 16
1360; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
1361; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1362; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1363;
1364; GFX940-LABEL: v_shuffle_v4i16_v3i16__1_0_0_0:
1365; GFX940:       ; %bb.0:
1366; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1367; GFX940-NEXT:    ;;#ASMSTART
1368; GFX940-NEXT:    ; def v[0:1]
1369; GFX940-NEXT:    ;;#ASMEND
1370; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1371; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1372; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
1373; GFX940-NEXT:    v_alignbit_b32 v0, v0, v0, 16
1374; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1375; GFX940-NEXT:    s_waitcnt vmcnt(0)
1376; GFX940-NEXT:    s_setpc_b64 s[30:31]
1377  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1378  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1379  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
1380  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1381  ret void
1382}
1383
1384define void @v_shuffle_v4i16_v3i16__2_0_0_0(ptr addrspace(1) inreg %ptr) {
1385; GFX900-LABEL: v_shuffle_v4i16_v3i16__2_0_0_0:
1386; GFX900:       ; %bb.0:
1387; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1388; GFX900-NEXT:    ;;#ASMSTART
1389; GFX900-NEXT:    ; def v[0:1]
1390; GFX900-NEXT:    ;;#ASMEND
1391; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1392; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1393; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
1394; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
1395; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
1396; GFX900-NEXT:    s_waitcnt vmcnt(0)
1397; GFX900-NEXT:    s_setpc_b64 s[30:31]
1398;
1399; GFX90A-LABEL: v_shuffle_v4i16_v3i16__2_0_0_0:
1400; GFX90A:       ; %bb.0:
1401; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1402; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1403; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1404; GFX90A-NEXT:    ;;#ASMSTART
1405; GFX90A-NEXT:    ; def v[0:1]
1406; GFX90A-NEXT:    ;;#ASMEND
1407; GFX90A-NEXT:    v_perm_b32 v2, v0, v1, s4
1408; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
1409; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
1410; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1411; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1412;
1413; GFX940-LABEL: v_shuffle_v4i16_v3i16__2_0_0_0:
1414; GFX940:       ; %bb.0:
1415; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1416; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1417; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1418; GFX940-NEXT:    ;;#ASMSTART
1419; GFX940-NEXT:    ; def v[0:1]
1420; GFX940-NEXT:    ;;#ASMEND
1421; GFX940-NEXT:    s_nop 0
1422; GFX940-NEXT:    v_perm_b32 v2, v0, v1, s2
1423; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
1424; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
1425; GFX940-NEXT:    s_waitcnt vmcnt(0)
1426; GFX940-NEXT:    s_setpc_b64 s[30:31]
1427  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1428  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1429  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
1430  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1431  ret void
1432}
1433
1434define void @v_shuffle_v4i16_v3i16__3_0_0_0(ptr addrspace(1) inreg %ptr) {
1435; GFX900-LABEL: v_shuffle_v4i16_v3i16__3_0_0_0:
1436; GFX900:       ; %bb.0:
1437; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1438; GFX900-NEXT:    ;;#ASMSTART
1439; GFX900-NEXT:    ; def v[0:1]
1440; GFX900-NEXT:    ;;#ASMEND
1441; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1442; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1443; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
1444; GFX900-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
1445; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
1446; GFX900-NEXT:    s_waitcnt vmcnt(0)
1447; GFX900-NEXT:    s_setpc_b64 s[30:31]
1448;
1449; GFX90A-LABEL: v_shuffle_v4i16_v3i16__3_0_0_0:
1450; GFX90A:       ; %bb.0:
1451; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1452; GFX90A-NEXT:    ;;#ASMSTART
1453; GFX90A-NEXT:    ; def v[0:1]
1454; GFX90A-NEXT:    ;;#ASMEND
1455; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1456; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1457; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
1458; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
1459; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
1460; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1461; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1462;
1463; GFX940-LABEL: v_shuffle_v4i16_v3i16__3_0_0_0:
1464; GFX940:       ; %bb.0:
1465; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1466; GFX940-NEXT:    ;;#ASMSTART
1467; GFX940-NEXT:    ; def v[0:1]
1468; GFX940-NEXT:    ;;#ASMEND
1469; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1470; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1471; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
1472; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
1473; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1474; GFX940-NEXT:    s_waitcnt vmcnt(0)
1475; GFX940-NEXT:    s_setpc_b64 s[30:31]
1476  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1477  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1478  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
1479  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1480  ret void
1481}
1482
1483define void @v_shuffle_v4i16_v3i16__4_0_0_0(ptr addrspace(1) inreg %ptr) {
1484; GFX900-LABEL: v_shuffle_v4i16_v3i16__4_0_0_0:
1485; GFX900:       ; %bb.0:
1486; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1487; GFX900-NEXT:    ;;#ASMSTART
1488; GFX900-NEXT:    ; def v[0:1]
1489; GFX900-NEXT:    ;;#ASMEND
1490; GFX900-NEXT:    ;;#ASMSTART
1491; GFX900-NEXT:    ; def v[1:2]
1492; GFX900-NEXT:    ;;#ASMEND
1493; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1494; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1495; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
1496; GFX900-NEXT:    v_alignbit_b32 v1, v0, v1, 16
1497; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
1498; GFX900-NEXT:    s_waitcnt vmcnt(0)
1499; GFX900-NEXT:    s_setpc_b64 s[30:31]
1500;
1501; GFX90A-LABEL: v_shuffle_v4i16_v3i16__4_0_0_0:
1502; GFX90A:       ; %bb.0:
1503; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1504; GFX90A-NEXT:    ;;#ASMSTART
1505; GFX90A-NEXT:    ; def v[0:1]
1506; GFX90A-NEXT:    ;;#ASMEND
1507; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1508; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1509; GFX90A-NEXT:    ;;#ASMSTART
1510; GFX90A-NEXT:    ; def v[2:3]
1511; GFX90A-NEXT:    ;;#ASMEND
1512; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
1513; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v2, 16
1514; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
1515; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1516; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1517;
1518; GFX940-LABEL: v_shuffle_v4i16_v3i16__4_0_0_0:
1519; GFX940:       ; %bb.0:
1520; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1521; GFX940-NEXT:    ;;#ASMSTART
1522; GFX940-NEXT:    ; def v[0:1]
1523; GFX940-NEXT:    ;;#ASMEND
1524; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1525; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1526; GFX940-NEXT:    ;;#ASMSTART
1527; GFX940-NEXT:    ; def v[2:3]
1528; GFX940-NEXT:    ;;#ASMEND
1529; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
1530; GFX940-NEXT:    v_alignbit_b32 v0, v0, v2, 16
1531; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
1532; GFX940-NEXT:    s_waitcnt vmcnt(0)
1533; GFX940-NEXT:    s_setpc_b64 s[30:31]
1534  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1535  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1536  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1537  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1538  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 0, i32 0, i32 0>
1539  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1540  ret void
1541}
1542
1543define void @v_shuffle_v4i16_v3i16__5_0_0_0(ptr addrspace(1) inreg %ptr) {
1544; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_0_0_0:
1545; GFX900:       ; %bb.0:
1546; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1547; GFX900-NEXT:    ;;#ASMSTART
1548; GFX900-NEXT:    ; def v[0:1]
1549; GFX900-NEXT:    ;;#ASMEND
1550; GFX900-NEXT:    ;;#ASMSTART
1551; GFX900-NEXT:    ; def v[1:2]
1552; GFX900-NEXT:    ;;#ASMEND
1553; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1554; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1555; GFX900-NEXT:    v_perm_b32 v1, v0, v2, s4
1556; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
1557; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
1558; GFX900-NEXT:    s_waitcnt vmcnt(0)
1559; GFX900-NEXT:    s_setpc_b64 s[30:31]
1560;
1561; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_0_0_0:
1562; GFX90A:       ; %bb.0:
1563; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1564; GFX90A-NEXT:    ;;#ASMSTART
1565; GFX90A-NEXT:    ; def v[2:3]
1566; GFX90A-NEXT:    ;;#ASMEND
1567; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1568; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1569; GFX90A-NEXT:    ;;#ASMSTART
1570; GFX90A-NEXT:    ; def v[0:1]
1571; GFX90A-NEXT:    ;;#ASMEND
1572; GFX90A-NEXT:    v_perm_b32 v2, v0, v3, s4
1573; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
1574; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
1575; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1576; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1577;
1578; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_0_0_0:
1579; GFX940:       ; %bb.0:
1580; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1581; GFX940-NEXT:    ;;#ASMSTART
1582; GFX940-NEXT:    ; def v[2:3]
1583; GFX940-NEXT:    ;;#ASMEND
1584; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1585; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1586; GFX940-NEXT:    ;;#ASMSTART
1587; GFX940-NEXT:    ; def v[0:1]
1588; GFX940-NEXT:    ;;#ASMEND
1589; GFX940-NEXT:    s_nop 0
1590; GFX940-NEXT:    v_perm_b32 v2, v0, v3, s2
1591; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
1592; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
1593; GFX940-NEXT:    s_waitcnt vmcnt(0)
1594; GFX940-NEXT:    s_setpc_b64 s[30:31]
1595  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1596  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1597  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1598  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1599  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 0, i32 0>
1600  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1601  ret void
1602}
1603
1604define void @v_shuffle_v4i16_v3i16__5_u_0_0(ptr addrspace(1) inreg %ptr) {
1605; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_u_0_0:
1606; GFX900:       ; %bb.0:
1607; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1608; GFX900-NEXT:    ;;#ASMSTART
1609; GFX900-NEXT:    ; def v[0:1]
1610; GFX900-NEXT:    ;;#ASMEND
1611; GFX900-NEXT:    ;;#ASMSTART
1612; GFX900-NEXT:    ; def v[1:2]
1613; GFX900-NEXT:    ;;#ASMEND
1614; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1615; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1616; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
1617; GFX900-NEXT:    v_mov_b32_e32 v0, v2
1618; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
1619; GFX900-NEXT:    s_waitcnt vmcnt(0)
1620; GFX900-NEXT:    s_setpc_b64 s[30:31]
1621;
1622; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_u_0_0:
1623; GFX90A:       ; %bb.0:
1624; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1625; GFX90A-NEXT:    ;;#ASMSTART
1626; GFX90A-NEXT:    ; def v[0:1]
1627; GFX90A-NEXT:    ;;#ASMEND
1628; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1629; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1630; GFX90A-NEXT:    ;;#ASMSTART
1631; GFX90A-NEXT:    ; def v[2:3]
1632; GFX90A-NEXT:    ;;#ASMEND
1633; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
1634; GFX90A-NEXT:    v_mov_b32_e32 v0, v3
1635; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
1636; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1637; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1638;
1639; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_u_0_0:
1640; GFX940:       ; %bb.0:
1641; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1642; GFX940-NEXT:    ;;#ASMSTART
1643; GFX940-NEXT:    ; def v[0:1]
1644; GFX940-NEXT:    ;;#ASMEND
1645; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1646; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1647; GFX940-NEXT:    ;;#ASMSTART
1648; GFX940-NEXT:    ; def v[2:3]
1649; GFX940-NEXT:    ;;#ASMEND
1650; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
1651; GFX940-NEXT:    v_mov_b32_e32 v0, v3
1652; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
1653; GFX940-NEXT:    s_waitcnt vmcnt(0)
1654; GFX940-NEXT:    s_setpc_b64 s[30:31]
1655  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1656  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1657  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1658  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1659  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 0, i32 0>
1660  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1661  ret void
1662}
1663
1664define void @v_shuffle_v4i16_v3i16__5_1_0_0(ptr addrspace(1) inreg %ptr) {
1665; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_1_0_0:
1666; GFX900:       ; %bb.0:
1667; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1668; GFX900-NEXT:    ;;#ASMSTART
1669; GFX900-NEXT:    ; def v[0:1]
1670; GFX900-NEXT:    ;;#ASMEND
1671; GFX900-NEXT:    ;;#ASMSTART
1672; GFX900-NEXT:    ; def v[1:2]
1673; GFX900-NEXT:    ;;#ASMEND
1674; GFX900-NEXT:    s_mov_b32 s4, 0xffff
1675; GFX900-NEXT:    v_bfi_b32 v1, s4, v2, v0
1676; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1677; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1678; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
1679; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
1680; GFX900-NEXT:    s_waitcnt vmcnt(0)
1681; GFX900-NEXT:    s_setpc_b64 s[30:31]
1682;
1683; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_1_0_0:
1684; GFX90A:       ; %bb.0:
1685; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1686; GFX90A-NEXT:    ;;#ASMSTART
1687; GFX90A-NEXT:    ; def v[2:3]
1688; GFX90A-NEXT:    ;;#ASMEND
1689; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
1690; GFX90A-NEXT:    ;;#ASMSTART
1691; GFX90A-NEXT:    ; def v[0:1]
1692; GFX90A-NEXT:    ;;#ASMEND
1693; GFX90A-NEXT:    v_bfi_b32 v2, s4, v3, v0
1694; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1695; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1696; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
1697; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
1698; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1699; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1700;
1701; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_1_0_0:
1702; GFX940:       ; %bb.0:
1703; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1704; GFX940-NEXT:    ;;#ASMSTART
1705; GFX940-NEXT:    ; def v[2:3]
1706; GFX940-NEXT:    ;;#ASMEND
1707; GFX940-NEXT:    s_mov_b32 s2, 0xffff
1708; GFX940-NEXT:    ;;#ASMSTART
1709; GFX940-NEXT:    ; def v[0:1]
1710; GFX940-NEXT:    ;;#ASMEND
1711; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1712; GFX940-NEXT:    v_bfi_b32 v2, s2, v3, v0
1713; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1714; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
1715; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
1716; GFX940-NEXT:    s_waitcnt vmcnt(0)
1717; GFX940-NEXT:    s_setpc_b64 s[30:31]
1718  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1719  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1720  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1721  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1722  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 0, i32 0>
1723  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1724  ret void
1725}
1726
1727define void @v_shuffle_v4i16_v3i16__5_2_0_0(ptr addrspace(1) inreg %ptr) {
1728; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_2_0_0:
1729; GFX900:       ; %bb.0:
1730; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1731; GFX900-NEXT:    ;;#ASMSTART
1732; GFX900-NEXT:    ; def v[0:1]
1733; GFX900-NEXT:    ;;#ASMEND
1734; GFX900-NEXT:    ;;#ASMSTART
1735; GFX900-NEXT:    ; def v[2:3]
1736; GFX900-NEXT:    ;;#ASMEND
1737; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1738; GFX900-NEXT:    v_mov_b32_e32 v4, 0
1739; GFX900-NEXT:    v_perm_b32 v1, v1, v3, s4
1740; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
1741; GFX900-NEXT:    global_store_dwordx2 v4, v[1:2], s[16:17]
1742; GFX900-NEXT:    s_waitcnt vmcnt(0)
1743; GFX900-NEXT:    s_setpc_b64 s[30:31]
1744;
1745; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_2_0_0:
1746; GFX90A:       ; %bb.0:
1747; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1748; GFX90A-NEXT:    ;;#ASMSTART
1749; GFX90A-NEXT:    ; def v[2:3]
1750; GFX90A-NEXT:    ;;#ASMEND
1751; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1752; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1753; GFX90A-NEXT:    ;;#ASMSTART
1754; GFX90A-NEXT:    ; def v[0:1]
1755; GFX90A-NEXT:    ;;#ASMEND
1756; GFX90A-NEXT:    v_perm_b32 v2, v1, v3, s4
1757; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
1758; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
1759; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1760; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1761;
1762; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_2_0_0:
1763; GFX940:       ; %bb.0:
1764; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1765; GFX940-NEXT:    ;;#ASMSTART
1766; GFX940-NEXT:    ; def v[2:3]
1767; GFX940-NEXT:    ;;#ASMEND
1768; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1769; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1770; GFX940-NEXT:    ;;#ASMSTART
1771; GFX940-NEXT:    ; def v[0:1]
1772; GFX940-NEXT:    ;;#ASMEND
1773; GFX940-NEXT:    s_nop 0
1774; GFX940-NEXT:    v_perm_b32 v2, v1, v3, s2
1775; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
1776; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
1777; GFX940-NEXT:    s_waitcnt vmcnt(0)
1778; GFX940-NEXT:    s_setpc_b64 s[30:31]
1779  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1780  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1781  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1782  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1783  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 0, i32 0>
1784  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1785  ret void
1786}
1787
1788define void @v_shuffle_v4i16_v3i16__5_3_0_0(ptr addrspace(1) inreg %ptr) {
1789; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_3_0_0:
1790; GFX900:       ; %bb.0:
1791; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1792; GFX900-NEXT:    ;;#ASMSTART
1793; GFX900-NEXT:    ; def v[0:1]
1794; GFX900-NEXT:    ;;#ASMEND
1795; GFX900-NEXT:    ;;#ASMSTART
1796; GFX900-NEXT:    ; def v[1:2]
1797; GFX900-NEXT:    ;;#ASMEND
1798; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1799; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1800; GFX900-NEXT:    v_perm_b32 v1, v1, v2, s4
1801; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
1802; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
1803; GFX900-NEXT:    s_waitcnt vmcnt(0)
1804; GFX900-NEXT:    s_setpc_b64 s[30:31]
1805;
1806; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_3_0_0:
1807; GFX90A:       ; %bb.0:
1808; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1809; GFX90A-NEXT:    ;;#ASMSTART
1810; GFX90A-NEXT:    ; def v[2:3]
1811; GFX90A-NEXT:    ;;#ASMEND
1812; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1813; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1814; GFX90A-NEXT:    ;;#ASMSTART
1815; GFX90A-NEXT:    ; def v[0:1]
1816; GFX90A-NEXT:    ;;#ASMEND
1817; GFX90A-NEXT:    v_perm_b32 v2, v2, v3, s4
1818; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
1819; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
1820; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1821; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1822;
1823; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_3_0_0:
1824; GFX940:       ; %bb.0:
1825; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1826; GFX940-NEXT:    ;;#ASMSTART
1827; GFX940-NEXT:    ; def v[2:3]
1828; GFX940-NEXT:    ;;#ASMEND
1829; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1830; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1831; GFX940-NEXT:    ;;#ASMSTART
1832; GFX940-NEXT:    ; def v[0:1]
1833; GFX940-NEXT:    ;;#ASMEND
1834; GFX940-NEXT:    v_perm_b32 v2, v2, v3, s2
1835; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
1836; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
1837; GFX940-NEXT:    s_waitcnt vmcnt(0)
1838; GFX940-NEXT:    s_setpc_b64 s[30:31]
1839  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1840  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1841  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1842  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1843  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 0, i32 0>
1844  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1845  ret void
1846}
1847
1848define void @v_shuffle_v4i16_v3i16__5_4_0_0(ptr addrspace(1) inreg %ptr) {
1849; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_4_0_0:
1850; GFX900:       ; %bb.0:
1851; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1852; GFX900-NEXT:    ;;#ASMSTART
1853; GFX900-NEXT:    ; def v[0:1]
1854; GFX900-NEXT:    ;;#ASMEND
1855; GFX900-NEXT:    ;;#ASMSTART
1856; GFX900-NEXT:    ; def v[1:2]
1857; GFX900-NEXT:    ;;#ASMEND
1858; GFX900-NEXT:    s_mov_b32 s4, 0xffff
1859; GFX900-NEXT:    v_bfi_b32 v1, s4, v2, v1
1860; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1861; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1862; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
1863; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
1864; GFX900-NEXT:    s_waitcnt vmcnt(0)
1865; GFX900-NEXT:    s_setpc_b64 s[30:31]
1866;
1867; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_4_0_0:
1868; GFX90A:       ; %bb.0:
1869; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1870; GFX90A-NEXT:    ;;#ASMSTART
1871; GFX90A-NEXT:    ; def v[2:3]
1872; GFX90A-NEXT:    ;;#ASMEND
1873; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
1874; GFX90A-NEXT:    v_bfi_b32 v2, s4, v3, v2
1875; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1876; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1877; GFX90A-NEXT:    ;;#ASMSTART
1878; GFX90A-NEXT:    ; def v[0:1]
1879; GFX90A-NEXT:    ;;#ASMEND
1880; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
1881; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
1882; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1883; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1884;
1885; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_4_0_0:
1886; GFX940:       ; %bb.0:
1887; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1888; GFX940-NEXT:    ;;#ASMSTART
1889; GFX940-NEXT:    ; def v[2:3]
1890; GFX940-NEXT:    ;;#ASMEND
1891; GFX940-NEXT:    s_mov_b32 s2, 0xffff
1892; GFX940-NEXT:    v_bfi_b32 v2, s2, v3, v2
1893; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1894; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1895; GFX940-NEXT:    ;;#ASMSTART
1896; GFX940-NEXT:    ; def v[0:1]
1897; GFX940-NEXT:    ;;#ASMEND
1898; GFX940-NEXT:    s_nop 0
1899; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
1900; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
1901; GFX940-NEXT:    s_waitcnt vmcnt(0)
1902; GFX940-NEXT:    s_setpc_b64 s[30:31]
1903  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1904  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1905  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1906  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1907  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 0, i32 0>
1908  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1909  ret void
1910}
1911
1912define void @v_shuffle_v4i16_v3i16__5_5_0_0(ptr addrspace(1) inreg %ptr) {
1913; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_0_0:
1914; GFX900:       ; %bb.0:
1915; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1916; GFX900-NEXT:    ;;#ASMSTART
1917; GFX900-NEXT:    ; def v[0:1]
1918; GFX900-NEXT:    ;;#ASMEND
1919; GFX900-NEXT:    ;;#ASMSTART
1920; GFX900-NEXT:    ; def v[1:2]
1921; GFX900-NEXT:    ;;#ASMEND
1922; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1923; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1924; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
1925; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
1926; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
1927; GFX900-NEXT:    s_waitcnt vmcnt(0)
1928; GFX900-NEXT:    s_setpc_b64 s[30:31]
1929;
1930; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_0_0:
1931; GFX90A:       ; %bb.0:
1932; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1933; GFX90A-NEXT:    ;;#ASMSTART
1934; GFX90A-NEXT:    ; def v[0:1]
1935; GFX90A-NEXT:    ;;#ASMEND
1936; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1937; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1938; GFX90A-NEXT:    ;;#ASMSTART
1939; GFX90A-NEXT:    ; def v[2:3]
1940; GFX90A-NEXT:    ;;#ASMEND
1941; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
1942; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
1943; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
1944; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1945; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1946;
1947; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_0_0:
1948; GFX940:       ; %bb.0:
1949; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1950; GFX940-NEXT:    ;;#ASMSTART
1951; GFX940-NEXT:    ; def v[0:1]
1952; GFX940-NEXT:    ;;#ASMEND
1953; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1954; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1955; GFX940-NEXT:    ;;#ASMSTART
1956; GFX940-NEXT:    ; def v[2:3]
1957; GFX940-NEXT:    ;;#ASMEND
1958; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
1959; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
1960; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
1961; GFX940-NEXT:    s_waitcnt vmcnt(0)
1962; GFX940-NEXT:    s_setpc_b64 s[30:31]
1963  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1964  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1965  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1966  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1967  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 0>
1968  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1969  ret void
1970}
1971
1972define void @v_shuffle_v4i16_v3i16__5_5_u_0(ptr addrspace(1) inreg %ptr) {
1973; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_u_0:
1974; GFX900:       ; %bb.0:
1975; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1976; GFX900-NEXT:    ;;#ASMSTART
1977; GFX900-NEXT:    ; def v[0:1]
1978; GFX900-NEXT:    ;;#ASMEND
1979; GFX900-NEXT:    ;;#ASMSTART
1980; GFX900-NEXT:    ; def v[1:2]
1981; GFX900-NEXT:    ;;#ASMEND
1982; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1983; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1984; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
1985; GFX900-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
1986; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
1987; GFX900-NEXT:    s_waitcnt vmcnt(0)
1988; GFX900-NEXT:    s_setpc_b64 s[30:31]
1989;
1990; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_u_0:
1991; GFX90A:       ; %bb.0:
1992; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1993; GFX90A-NEXT:    ;;#ASMSTART
1994; GFX90A-NEXT:    ; def v[2:3]
1995; GFX90A-NEXT:    ;;#ASMEND
1996; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1997; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1998; GFX90A-NEXT:    ;;#ASMSTART
1999; GFX90A-NEXT:    ; def v[0:1]
2000; GFX90A-NEXT:    ;;#ASMEND
2001; GFX90A-NEXT:    v_perm_b32 v2, v3, v3, s4
2002; GFX90A-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
2003; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
2004; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2005; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2006;
2007; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_u_0:
2008; GFX940:       ; %bb.0:
2009; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2010; GFX940-NEXT:    ;;#ASMSTART
2011; GFX940-NEXT:    ; def v[2:3]
2012; GFX940-NEXT:    ;;#ASMEND
2013; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2014; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2015; GFX940-NEXT:    ;;#ASMSTART
2016; GFX940-NEXT:    ; def v[0:1]
2017; GFX940-NEXT:    ;;#ASMEND
2018; GFX940-NEXT:    v_perm_b32 v2, v3, v3, s2
2019; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
2020; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
2021; GFX940-NEXT:    s_waitcnt vmcnt(0)
2022; GFX940-NEXT:    s_setpc_b64 s[30:31]
2023  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2024  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2025  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2026  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2027  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 0>
2028  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2029  ret void
2030}
2031
2032define void @v_shuffle_v4i16_v3i16__5_5_1_0(ptr addrspace(1) inreg %ptr) {
2033; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_1_0:
2034; GFX900:       ; %bb.0:
2035; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2036; GFX900-NEXT:    ;;#ASMSTART
2037; GFX900-NEXT:    ; def v[0:1]
2038; GFX900-NEXT:    ;;#ASMEND
2039; GFX900-NEXT:    ;;#ASMSTART
2040; GFX900-NEXT:    ; def v[1:2]
2041; GFX900-NEXT:    ;;#ASMEND
2042; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2043; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2044; GFX900-NEXT:    v_alignbit_b32 v1, v0, v0, 16
2045; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
2046; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
2047; GFX900-NEXT:    s_waitcnt vmcnt(0)
2048; GFX900-NEXT:    s_setpc_b64 s[30:31]
2049;
2050; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_1_0:
2051; GFX90A:       ; %bb.0:
2052; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2053; GFX90A-NEXT:    ;;#ASMSTART
2054; GFX90A-NEXT:    ; def v[0:1]
2055; GFX90A-NEXT:    ;;#ASMEND
2056; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2057; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2058; GFX90A-NEXT:    ;;#ASMSTART
2059; GFX90A-NEXT:    ; def v[2:3]
2060; GFX90A-NEXT:    ;;#ASMEND
2061; GFX90A-NEXT:    v_alignbit_b32 v1, v0, v0, 16
2062; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
2063; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
2064; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2065; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2066;
2067; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_1_0:
2068; GFX940:       ; %bb.0:
2069; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2070; GFX940-NEXT:    ;;#ASMSTART
2071; GFX940-NEXT:    ; def v[0:1]
2072; GFX940-NEXT:    ;;#ASMEND
2073; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2074; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2075; GFX940-NEXT:    ;;#ASMSTART
2076; GFX940-NEXT:    ; def v[2:3]
2077; GFX940-NEXT:    ;;#ASMEND
2078; GFX940-NEXT:    v_alignbit_b32 v1, v0, v0, 16
2079; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
2080; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
2081; GFX940-NEXT:    s_waitcnt vmcnt(0)
2082; GFX940-NEXT:    s_setpc_b64 s[30:31]
2083  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2084  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2085  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2086  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2087  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 0>
2088  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2089  ret void
2090}
2091
2092define void @v_shuffle_v4i16_v3i16__5_5_2_0(ptr addrspace(1) inreg %ptr) {
2093; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_2_0:
2094; GFX900:       ; %bb.0:
2095; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2096; GFX900-NEXT:    ;;#ASMSTART
2097; GFX900-NEXT:    ; def v[0:1]
2098; GFX900-NEXT:    ;;#ASMEND
2099; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2100; GFX900-NEXT:    v_mov_b32_e32 v4, 0
2101; GFX900-NEXT:    ;;#ASMSTART
2102; GFX900-NEXT:    ; def v[2:3]
2103; GFX900-NEXT:    ;;#ASMEND
2104; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
2105; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
2106; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
2107; GFX900-NEXT:    s_waitcnt vmcnt(0)
2108; GFX900-NEXT:    s_setpc_b64 s[30:31]
2109;
2110; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_2_0:
2111; GFX90A:       ; %bb.0:
2112; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2113; GFX90A-NEXT:    ;;#ASMSTART
2114; GFX90A-NEXT:    ; def v[0:1]
2115; GFX90A-NEXT:    ;;#ASMEND
2116; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2117; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2118; GFX90A-NEXT:    ;;#ASMSTART
2119; GFX90A-NEXT:    ; def v[2:3]
2120; GFX90A-NEXT:    ;;#ASMEND
2121; GFX90A-NEXT:    v_perm_b32 v1, v0, v1, s4
2122; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
2123; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
2124; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2125; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2126;
2127; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_2_0:
2128; GFX940:       ; %bb.0:
2129; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2130; GFX940-NEXT:    ;;#ASMSTART
2131; GFX940-NEXT:    ; def v[0:1]
2132; GFX940-NEXT:    ;;#ASMEND
2133; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2134; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2135; GFX940-NEXT:    ;;#ASMSTART
2136; GFX940-NEXT:    ; def v[2:3]
2137; GFX940-NEXT:    ;;#ASMEND
2138; GFX940-NEXT:    v_perm_b32 v1, v0, v1, s2
2139; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
2140; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
2141; GFX940-NEXT:    s_waitcnt vmcnt(0)
2142; GFX940-NEXT:    s_setpc_b64 s[30:31]
2143  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2144  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2145  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2146  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2147  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 0>
2148  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2149  ret void
2150}
2151
2152define void @v_shuffle_v4i16_v3i16__5_5_3_0(ptr addrspace(1) inreg %ptr) {
2153; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_3_0:
2154; GFX900:       ; %bb.0:
2155; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2156; GFX900-NEXT:    ;;#ASMSTART
2157; GFX900-NEXT:    ; def v[0:1]
2158; GFX900-NEXT:    ;;#ASMEND
2159; GFX900-NEXT:    ;;#ASMSTART
2160; GFX900-NEXT:    ; def v[1:2]
2161; GFX900-NEXT:    ;;#ASMEND
2162; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2163; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2164; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
2165; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
2166; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
2167; GFX900-NEXT:    s_waitcnt vmcnt(0)
2168; GFX900-NEXT:    s_setpc_b64 s[30:31]
2169;
2170; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_3_0:
2171; GFX90A:       ; %bb.0:
2172; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2173; GFX90A-NEXT:    ;;#ASMSTART
2174; GFX90A-NEXT:    ; def v[0:1]
2175; GFX90A-NEXT:    ;;#ASMEND
2176; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2177; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2178; GFX90A-NEXT:    ;;#ASMSTART
2179; GFX90A-NEXT:    ; def v[2:3]
2180; GFX90A-NEXT:    ;;#ASMEND
2181; GFX90A-NEXT:    v_perm_b32 v1, v0, v2, s4
2182; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
2183; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
2184; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2185; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2186;
2187; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_3_0:
2188; GFX940:       ; %bb.0:
2189; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2190; GFX940-NEXT:    ;;#ASMSTART
2191; GFX940-NEXT:    ; def v[0:1]
2192; GFX940-NEXT:    ;;#ASMEND
2193; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2194; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2195; GFX940-NEXT:    ;;#ASMSTART
2196; GFX940-NEXT:    ; def v[2:3]
2197; GFX940-NEXT:    ;;#ASMEND
2198; GFX940-NEXT:    s_nop 0
2199; GFX940-NEXT:    v_perm_b32 v1, v0, v2, s2
2200; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
2201; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
2202; GFX940-NEXT:    s_waitcnt vmcnt(0)
2203; GFX940-NEXT:    s_setpc_b64 s[30:31]
2204  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2205  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2206  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2207  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2208  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 0>
2209  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2210  ret void
2211}
2212
2213define void @v_shuffle_v4i16_v3i16__5_5_4_0(ptr addrspace(1) inreg %ptr) {
2214; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_4_0:
2215; GFX900:       ; %bb.0:
2216; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2217; GFX900-NEXT:    ;;#ASMSTART
2218; GFX900-NEXT:    ; def v[0:1]
2219; GFX900-NEXT:    ;;#ASMEND
2220; GFX900-NEXT:    ;;#ASMSTART
2221; GFX900-NEXT:    ; def v[1:2]
2222; GFX900-NEXT:    ;;#ASMEND
2223; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2224; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2225; GFX900-NEXT:    v_alignbit_b32 v1, v0, v1, 16
2226; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
2227; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
2228; GFX900-NEXT:    s_waitcnt vmcnt(0)
2229; GFX900-NEXT:    s_setpc_b64 s[30:31]
2230;
2231; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_4_0:
2232; GFX90A:       ; %bb.0:
2233; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2234; GFX90A-NEXT:    ;;#ASMSTART
2235; GFX90A-NEXT:    ; def v[0:1]
2236; GFX90A-NEXT:    ;;#ASMEND
2237; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2238; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2239; GFX90A-NEXT:    ;;#ASMSTART
2240; GFX90A-NEXT:    ; def v[2:3]
2241; GFX90A-NEXT:    ;;#ASMEND
2242; GFX90A-NEXT:    v_alignbit_b32 v1, v0, v2, 16
2243; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
2244; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
2245; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2246; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2247;
2248; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_4_0:
2249; GFX940:       ; %bb.0:
2250; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2251; GFX940-NEXT:    ;;#ASMSTART
2252; GFX940-NEXT:    ; def v[0:1]
2253; GFX940-NEXT:    ;;#ASMEND
2254; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2255; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2256; GFX940-NEXT:    ;;#ASMSTART
2257; GFX940-NEXT:    ; def v[2:3]
2258; GFX940-NEXT:    ;;#ASMEND
2259; GFX940-NEXT:    s_nop 0
2260; GFX940-NEXT:    v_alignbit_b32 v1, v0, v2, 16
2261; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
2262; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
2263; GFX940-NEXT:    s_waitcnt vmcnt(0)
2264; GFX940-NEXT:    s_setpc_b64 s[30:31]
2265  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2266  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2267  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2268  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2269  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 0>
2270  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2271  ret void
2272}
2273
2274define void @v_shuffle_v4i16_v3i16__u_1_1_1(ptr addrspace(1) inreg %ptr) {
2275; GFX900-LABEL: v_shuffle_v4i16_v3i16__u_1_1_1:
2276; GFX900:       ; %bb.0:
2277; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2278; GFX900-NEXT:    ;;#ASMSTART
2279; GFX900-NEXT:    ; def v[0:1]
2280; GFX900-NEXT:    ;;#ASMEND
2281; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2282; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2283; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
2284; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
2285; GFX900-NEXT:    s_waitcnt vmcnt(0)
2286; GFX900-NEXT:    s_setpc_b64 s[30:31]
2287;
2288; GFX90A-LABEL: v_shuffle_v4i16_v3i16__u_1_1_1:
2289; GFX90A:       ; %bb.0:
2290; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2291; GFX90A-NEXT:    ;;#ASMSTART
2292; GFX90A-NEXT:    ; def v[0:1]
2293; GFX90A-NEXT:    ;;#ASMEND
2294; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2295; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2296; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
2297; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
2298; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2299; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2300;
2301; GFX940-LABEL: v_shuffle_v4i16_v3i16__u_1_1_1:
2302; GFX940:       ; %bb.0:
2303; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2304; GFX940-NEXT:    ;;#ASMSTART
2305; GFX940-NEXT:    ; def v[0:1]
2306; GFX940-NEXT:    ;;#ASMEND
2307; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2308; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2309; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
2310; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
2311; GFX940-NEXT:    s_waitcnt vmcnt(0)
2312; GFX940-NEXT:    s_setpc_b64 s[30:31]
2313  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2314  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2315  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
2316  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2317  ret void
2318}
2319
2320define void @v_shuffle_v4i16_v3i16__0_1_1_1(ptr addrspace(1) inreg %ptr) {
2321; GFX900-LABEL: v_shuffle_v4i16_v3i16__0_1_1_1:
2322; GFX900:       ; %bb.0:
2323; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2324; GFX900-NEXT:    ;;#ASMSTART
2325; GFX900-NEXT:    ; def v[0:1]
2326; GFX900-NEXT:    ;;#ASMEND
2327; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2328; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2329; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
2330; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
2331; GFX900-NEXT:    s_waitcnt vmcnt(0)
2332; GFX900-NEXT:    s_setpc_b64 s[30:31]
2333;
2334; GFX90A-LABEL: v_shuffle_v4i16_v3i16__0_1_1_1:
2335; GFX90A:       ; %bb.0:
2336; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2337; GFX90A-NEXT:    ;;#ASMSTART
2338; GFX90A-NEXT:    ; def v[0:1]
2339; GFX90A-NEXT:    ;;#ASMEND
2340; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2341; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2342; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
2343; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
2344; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2345; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2346;
2347; GFX940-LABEL: v_shuffle_v4i16_v3i16__0_1_1_1:
2348; GFX940:       ; %bb.0:
2349; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2350; GFX940-NEXT:    ;;#ASMSTART
2351; GFX940-NEXT:    ; def v[0:1]
2352; GFX940-NEXT:    ;;#ASMEND
2353; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2354; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2355; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
2356; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
2357; GFX940-NEXT:    s_waitcnt vmcnt(0)
2358; GFX940-NEXT:    s_setpc_b64 s[30:31]
2359  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2360  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2361  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
2362  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2363  ret void
2364}
2365
2366define void @v_shuffle_v4i16_v3i16__1_1_1_1(ptr addrspace(1) inreg %ptr) {
2367; GFX900-LABEL: v_shuffle_v4i16_v3i16__1_1_1_1:
2368; GFX900:       ; %bb.0:
2369; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2370; GFX900-NEXT:    ;;#ASMSTART
2371; GFX900-NEXT:    ; def v[0:1]
2372; GFX900-NEXT:    ;;#ASMEND
2373; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2374; GFX900-NEXT:    v_perm_b32 v0, v0, v0, s4
2375; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2376; GFX900-NEXT:    v_mov_b32_e32 v1, v0
2377; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
2378; GFX900-NEXT:    s_waitcnt vmcnt(0)
2379; GFX900-NEXT:    s_setpc_b64 s[30:31]
2380;
2381; GFX90A-LABEL: v_shuffle_v4i16_v3i16__1_1_1_1:
2382; GFX90A:       ; %bb.0:
2383; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2384; GFX90A-NEXT:    ;;#ASMSTART
2385; GFX90A-NEXT:    ; def v[0:1]
2386; GFX90A-NEXT:    ;;#ASMEND
2387; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2388; GFX90A-NEXT:    v_perm_b32 v0, v0, v0, s4
2389; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2390; GFX90A-NEXT:    v_mov_b32_e32 v1, v0
2391; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
2392; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2393; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2394;
2395; GFX940-LABEL: v_shuffle_v4i16_v3i16__1_1_1_1:
2396; GFX940:       ; %bb.0:
2397; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2398; GFX940-NEXT:    ;;#ASMSTART
2399; GFX940-NEXT:    ; def v[0:1]
2400; GFX940-NEXT:    ;;#ASMEND
2401; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2402; GFX940-NEXT:    v_perm_b32 v0, v0, v0, s2
2403; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2404; GFX940-NEXT:    v_mov_b32_e32 v1, v0
2405; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
2406; GFX940-NEXT:    s_waitcnt vmcnt(0)
2407; GFX940-NEXT:    s_setpc_b64 s[30:31]
2408  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2409  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2410  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2411  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2412  ret void
2413}
2414
2415define void @v_shuffle_v4i16_v3i16__2_1_1_1(ptr addrspace(1) inreg %ptr) {
2416; GFX900-LABEL: v_shuffle_v4i16_v3i16__2_1_1_1:
2417; GFX900:       ; %bb.0:
2418; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2419; GFX900-NEXT:    ;;#ASMSTART
2420; GFX900-NEXT:    ; def v[0:1]
2421; GFX900-NEXT:    ;;#ASMEND
2422; GFX900-NEXT:    s_mov_b32 s4, 0xffff
2423; GFX900-NEXT:    v_bfi_b32 v1, s4, v1, v0
2424; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2425; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2426; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
2427; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
2428; GFX900-NEXT:    s_waitcnt vmcnt(0)
2429; GFX900-NEXT:    s_setpc_b64 s[30:31]
2430;
2431; GFX90A-LABEL: v_shuffle_v4i16_v3i16__2_1_1_1:
2432; GFX90A:       ; %bb.0:
2433; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2434; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
2435; GFX90A-NEXT:    ;;#ASMSTART
2436; GFX90A-NEXT:    ; def v[0:1]
2437; GFX90A-NEXT:    ;;#ASMEND
2438; GFX90A-NEXT:    v_bfi_b32 v2, s4, v1, v0
2439; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2440; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2441; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
2442; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
2443; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2444; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2445;
2446; GFX940-LABEL: v_shuffle_v4i16_v3i16__2_1_1_1:
2447; GFX940:       ; %bb.0:
2448; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2449; GFX940-NEXT:    s_mov_b32 s2, 0xffff
2450; GFX940-NEXT:    ;;#ASMSTART
2451; GFX940-NEXT:    ; def v[0:1]
2452; GFX940-NEXT:    ;;#ASMEND
2453; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2454; GFX940-NEXT:    v_bfi_b32 v2, s2, v1, v0
2455; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2456; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
2457; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
2458; GFX940-NEXT:    s_waitcnt vmcnt(0)
2459; GFX940-NEXT:    s_setpc_b64 s[30:31]
2460  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2461  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2462  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
2463  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2464  ret void
2465}
2466
2467define void @v_shuffle_v4i16_v3i16__3_1_1_1(ptr addrspace(1) inreg %ptr) {
2468; GFX900-LABEL: v_shuffle_v4i16_v3i16__3_1_1_1:
2469; GFX900:       ; %bb.0:
2470; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2471; GFX900-NEXT:    ;;#ASMSTART
2472; GFX900-NEXT:    ; def v[0:1]
2473; GFX900-NEXT:    ;;#ASMEND
2474; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2475; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2476; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
2477; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
2478; GFX900-NEXT:    s_waitcnt vmcnt(0)
2479; GFX900-NEXT:    s_setpc_b64 s[30:31]
2480;
2481; GFX90A-LABEL: v_shuffle_v4i16_v3i16__3_1_1_1:
2482; GFX90A:       ; %bb.0:
2483; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2484; GFX90A-NEXT:    ;;#ASMSTART
2485; GFX90A-NEXT:    ; def v[0:1]
2486; GFX90A-NEXT:    ;;#ASMEND
2487; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2488; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2489; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
2490; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
2491; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2492; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2493;
2494; GFX940-LABEL: v_shuffle_v4i16_v3i16__3_1_1_1:
2495; GFX940:       ; %bb.0:
2496; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2497; GFX940-NEXT:    ;;#ASMSTART
2498; GFX940-NEXT:    ; def v[0:1]
2499; GFX940-NEXT:    ;;#ASMEND
2500; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2501; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2502; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
2503; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
2504; GFX940-NEXT:    s_waitcnt vmcnt(0)
2505; GFX940-NEXT:    s_setpc_b64 s[30:31]
2506  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2507  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2508  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
2509  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2510  ret void
2511}
2512
2513define void @v_shuffle_v4i16_v3i16__4_1_1_1(ptr addrspace(1) inreg %ptr) {
2514; GFX900-LABEL: v_shuffle_v4i16_v3i16__4_1_1_1:
2515; GFX900:       ; %bb.0:
2516; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2517; GFX900-NEXT:    ;;#ASMSTART
2518; GFX900-NEXT:    ; def v[0:1]
2519; GFX900-NEXT:    ;;#ASMEND
2520; GFX900-NEXT:    ;;#ASMSTART
2521; GFX900-NEXT:    ; def v[1:2]
2522; GFX900-NEXT:    ;;#ASMEND
2523; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2524; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2525; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
2526; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
2527; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
2528; GFX900-NEXT:    s_waitcnt vmcnt(0)
2529; GFX900-NEXT:    s_setpc_b64 s[30:31]
2530;
2531; GFX90A-LABEL: v_shuffle_v4i16_v3i16__4_1_1_1:
2532; GFX90A:       ; %bb.0:
2533; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2534; GFX90A-NEXT:    ;;#ASMSTART
2535; GFX90A-NEXT:    ; def v[2:3]
2536; GFX90A-NEXT:    ;;#ASMEND
2537; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2538; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2539; GFX90A-NEXT:    ;;#ASMSTART
2540; GFX90A-NEXT:    ; def v[0:1]
2541; GFX90A-NEXT:    ;;#ASMEND
2542; GFX90A-NEXT:    v_perm_b32 v2, v0, v2, s4
2543; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
2544; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
2545; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2546; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2547;
2548; GFX940-LABEL: v_shuffle_v4i16_v3i16__4_1_1_1:
2549; GFX940:       ; %bb.0:
2550; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2551; GFX940-NEXT:    ;;#ASMSTART
2552; GFX940-NEXT:    ; def v[2:3]
2553; GFX940-NEXT:    ;;#ASMEND
2554; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2555; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2556; GFX940-NEXT:    ;;#ASMSTART
2557; GFX940-NEXT:    ; def v[0:1]
2558; GFX940-NEXT:    ;;#ASMEND
2559; GFX940-NEXT:    s_nop 0
2560; GFX940-NEXT:    v_perm_b32 v2, v0, v2, s2
2561; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
2562; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
2563; GFX940-NEXT:    s_waitcnt vmcnt(0)
2564; GFX940-NEXT:    s_setpc_b64 s[30:31]
2565  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2566  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2567  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2568  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2569  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 1, i32 1, i32 1>
2570  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2571  ret void
2572}
2573
2574define void @v_shuffle_v4i16_v3i16__5_1_1_1(ptr addrspace(1) inreg %ptr) {
2575; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_1_1_1:
2576; GFX900:       ; %bb.0:
2577; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2578; GFX900-NEXT:    ;;#ASMSTART
2579; GFX900-NEXT:    ; def v[0:1]
2580; GFX900-NEXT:    ;;#ASMEND
2581; GFX900-NEXT:    ;;#ASMSTART
2582; GFX900-NEXT:    ; def v[1:2]
2583; GFX900-NEXT:    ;;#ASMEND
2584; GFX900-NEXT:    s_mov_b32 s4, 0xffff
2585; GFX900-NEXT:    v_bfi_b32 v1, s4, v2, v0
2586; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2587; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2588; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
2589; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
2590; GFX900-NEXT:    s_waitcnt vmcnt(0)
2591; GFX900-NEXT:    s_setpc_b64 s[30:31]
2592;
2593; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_1_1_1:
2594; GFX90A:       ; %bb.0:
2595; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2596; GFX90A-NEXT:    ;;#ASMSTART
2597; GFX90A-NEXT:    ; def v[2:3]
2598; GFX90A-NEXT:    ;;#ASMEND
2599; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
2600; GFX90A-NEXT:    ;;#ASMSTART
2601; GFX90A-NEXT:    ; def v[0:1]
2602; GFX90A-NEXT:    ;;#ASMEND
2603; GFX90A-NEXT:    v_bfi_b32 v2, s4, v3, v0
2604; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2605; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2606; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
2607; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
2608; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2609; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2610;
2611; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_1_1_1:
2612; GFX940:       ; %bb.0:
2613; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2614; GFX940-NEXT:    ;;#ASMSTART
2615; GFX940-NEXT:    ; def v[2:3]
2616; GFX940-NEXT:    ;;#ASMEND
2617; GFX940-NEXT:    s_mov_b32 s2, 0xffff
2618; GFX940-NEXT:    ;;#ASMSTART
2619; GFX940-NEXT:    ; def v[0:1]
2620; GFX940-NEXT:    ;;#ASMEND
2621; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2622; GFX940-NEXT:    v_bfi_b32 v2, s2, v3, v0
2623; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2624; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
2625; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
2626; GFX940-NEXT:    s_waitcnt vmcnt(0)
2627; GFX940-NEXT:    s_setpc_b64 s[30:31]
2628  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2629  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2630  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2631  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2632  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 1, i32 1>
2633  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2634  ret void
2635}
2636
2637define void @v_shuffle_v4i16_v3i16__5_u_1_1(ptr addrspace(1) inreg %ptr) {
2638; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_u_1_1:
2639; GFX900:       ; %bb.0:
2640; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2641; GFX900-NEXT:    ;;#ASMSTART
2642; GFX900-NEXT:    ; def v[0:1]
2643; GFX900-NEXT:    ;;#ASMEND
2644; GFX900-NEXT:    ;;#ASMSTART
2645; GFX900-NEXT:    ; def v[1:2]
2646; GFX900-NEXT:    ;;#ASMEND
2647; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2648; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2649; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
2650; GFX900-NEXT:    v_mov_b32_e32 v0, v2
2651; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
2652; GFX900-NEXT:    s_waitcnt vmcnt(0)
2653; GFX900-NEXT:    s_setpc_b64 s[30:31]
2654;
2655; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_u_1_1:
2656; GFX90A:       ; %bb.0:
2657; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2658; GFX90A-NEXT:    ;;#ASMSTART
2659; GFX90A-NEXT:    ; def v[0:1]
2660; GFX90A-NEXT:    ;;#ASMEND
2661; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2662; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2663; GFX90A-NEXT:    ;;#ASMSTART
2664; GFX90A-NEXT:    ; def v[2:3]
2665; GFX90A-NEXT:    ;;#ASMEND
2666; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
2667; GFX90A-NEXT:    v_mov_b32_e32 v0, v3
2668; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
2669; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2670; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2671;
2672; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_u_1_1:
2673; GFX940:       ; %bb.0:
2674; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2675; GFX940-NEXT:    ;;#ASMSTART
2676; GFX940-NEXT:    ; def v[0:1]
2677; GFX940-NEXT:    ;;#ASMEND
2678; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2679; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2680; GFX940-NEXT:    ;;#ASMSTART
2681; GFX940-NEXT:    ; def v[2:3]
2682; GFX940-NEXT:    ;;#ASMEND
2683; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
2684; GFX940-NEXT:    v_mov_b32_e32 v0, v3
2685; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
2686; GFX940-NEXT:    s_waitcnt vmcnt(0)
2687; GFX940-NEXT:    s_setpc_b64 s[30:31]
2688  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2689  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2690  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2691  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2692  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 1, i32 1>
2693  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2694  ret void
2695}
2696
2697define void @v_shuffle_v4i16_v3i16__5_0_1_1(ptr addrspace(1) inreg %ptr) {
2698; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_0_1_1:
2699; GFX900:       ; %bb.0:
2700; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2701; GFX900-NEXT:    ;;#ASMSTART
2702; GFX900-NEXT:    ; def v[0:1]
2703; GFX900-NEXT:    ;;#ASMEND
2704; GFX900-NEXT:    ;;#ASMSTART
2705; GFX900-NEXT:    ; def v[1:2]
2706; GFX900-NEXT:    ;;#ASMEND
2707; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2708; GFX900-NEXT:    v_perm_b32 v1, v0, v2, s4
2709; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2710; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2711; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
2712; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
2713; GFX900-NEXT:    s_waitcnt vmcnt(0)
2714; GFX900-NEXT:    s_setpc_b64 s[30:31]
2715;
2716; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_0_1_1:
2717; GFX90A:       ; %bb.0:
2718; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2719; GFX90A-NEXT:    ;;#ASMSTART
2720; GFX90A-NEXT:    ; def v[2:3]
2721; GFX90A-NEXT:    ;;#ASMEND
2722; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2723; GFX90A-NEXT:    ;;#ASMSTART
2724; GFX90A-NEXT:    ; def v[0:1]
2725; GFX90A-NEXT:    ;;#ASMEND
2726; GFX90A-NEXT:    v_perm_b32 v2, v0, v3, s4
2727; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2728; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2729; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
2730; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
2731; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2732; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2733;
2734; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_0_1_1:
2735; GFX940:       ; %bb.0:
2736; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2737; GFX940-NEXT:    ;;#ASMSTART
2738; GFX940-NEXT:    ; def v[2:3]
2739; GFX940-NEXT:    ;;#ASMEND
2740; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2741; GFX940-NEXT:    ;;#ASMSTART
2742; GFX940-NEXT:    ; def v[0:1]
2743; GFX940-NEXT:    ;;#ASMEND
2744; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2745; GFX940-NEXT:    v_perm_b32 v2, v0, v3, s2
2746; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2747; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
2748; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
2749; GFX940-NEXT:    s_waitcnt vmcnt(0)
2750; GFX940-NEXT:    s_setpc_b64 s[30:31]
2751  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2752  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2753  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2754  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2755  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 1, i32 1>
2756  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2757  ret void
2758}
2759
2760define void @v_shuffle_v4i16_v3i16__5_2_1_1(ptr addrspace(1) inreg %ptr) {
2761; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_2_1_1:
2762; GFX900:       ; %bb.0:
2763; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2764; GFX900-NEXT:    ;;#ASMSTART
2765; GFX900-NEXT:    ; def v[0:1]
2766; GFX900-NEXT:    ;;#ASMEND
2767; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2768; GFX900-NEXT:    ;;#ASMSTART
2769; GFX900-NEXT:    ; def v[2:3]
2770; GFX900-NEXT:    ;;#ASMEND
2771; GFX900-NEXT:    v_perm_b32 v1, v1, v3, s4
2772; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2773; GFX900-NEXT:    v_mov_b32_e32 v4, 0
2774; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
2775; GFX900-NEXT:    global_store_dwordx2 v4, v[1:2], s[16:17]
2776; GFX900-NEXT:    s_waitcnt vmcnt(0)
2777; GFX900-NEXT:    s_setpc_b64 s[30:31]
2778;
2779; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_2_1_1:
2780; GFX90A:       ; %bb.0:
2781; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2782; GFX90A-NEXT:    ;;#ASMSTART
2783; GFX90A-NEXT:    ; def v[2:3]
2784; GFX90A-NEXT:    ;;#ASMEND
2785; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2786; GFX90A-NEXT:    ;;#ASMSTART
2787; GFX90A-NEXT:    ; def v[0:1]
2788; GFX90A-NEXT:    ;;#ASMEND
2789; GFX90A-NEXT:    v_perm_b32 v2, v1, v3, s4
2790; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2791; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2792; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
2793; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
2794; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2795; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2796;
2797; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_2_1_1:
2798; GFX940:       ; %bb.0:
2799; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2800; GFX940-NEXT:    ;;#ASMSTART
2801; GFX940-NEXT:    ; def v[2:3]
2802; GFX940-NEXT:    ;;#ASMEND
2803; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2804; GFX940-NEXT:    ;;#ASMSTART
2805; GFX940-NEXT:    ; def v[0:1]
2806; GFX940-NEXT:    ;;#ASMEND
2807; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2808; GFX940-NEXT:    v_perm_b32 v2, v1, v3, s2
2809; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2810; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
2811; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
2812; GFX940-NEXT:    s_waitcnt vmcnt(0)
2813; GFX940-NEXT:    s_setpc_b64 s[30:31]
2814  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2815  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2816  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2817  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2818  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 1, i32 1>
2819  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2820  ret void
2821}
2822
2823define void @v_shuffle_v4i16_v3i16__5_3_1_1(ptr addrspace(1) inreg %ptr) {
2824; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_3_1_1:
2825; GFX900:       ; %bb.0:
2826; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2827; GFX900-NEXT:    ;;#ASMSTART
2828; GFX900-NEXT:    ; def v[0:1]
2829; GFX900-NEXT:    ;;#ASMEND
2830; GFX900-NEXT:    ;;#ASMSTART
2831; GFX900-NEXT:    ; def v[1:2]
2832; GFX900-NEXT:    ;;#ASMEND
2833; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2834; GFX900-NEXT:    v_perm_b32 v1, v1, v2, s4
2835; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2836; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2837; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
2838; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
2839; GFX900-NEXT:    s_waitcnt vmcnt(0)
2840; GFX900-NEXT:    s_setpc_b64 s[30:31]
2841;
2842; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_3_1_1:
2843; GFX90A:       ; %bb.0:
2844; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2845; GFX90A-NEXT:    ;;#ASMSTART
2846; GFX90A-NEXT:    ; def v[2:3]
2847; GFX90A-NEXT:    ;;#ASMEND
2848; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2849; GFX90A-NEXT:    v_perm_b32 v2, v2, v3, s4
2850; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2851; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2852; GFX90A-NEXT:    ;;#ASMSTART
2853; GFX90A-NEXT:    ; def v[0:1]
2854; GFX90A-NEXT:    ;;#ASMEND
2855; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
2856; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
2857; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2858; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2859;
2860; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_3_1_1:
2861; GFX940:       ; %bb.0:
2862; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2863; GFX940-NEXT:    ;;#ASMSTART
2864; GFX940-NEXT:    ; def v[2:3]
2865; GFX940-NEXT:    ;;#ASMEND
2866; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2867; GFX940-NEXT:    v_perm_b32 v2, v2, v3, s2
2868; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2869; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2870; GFX940-NEXT:    ;;#ASMSTART
2871; GFX940-NEXT:    ; def v[0:1]
2872; GFX940-NEXT:    ;;#ASMEND
2873; GFX940-NEXT:    s_nop 0
2874; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
2875; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
2876; GFX940-NEXT:    s_waitcnt vmcnt(0)
2877; GFX940-NEXT:    s_setpc_b64 s[30:31]
2878  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2879  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2880  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2881  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2882  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 1, i32 1>
2883  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2884  ret void
2885}
2886
2887define void @v_shuffle_v4i16_v3i16__5_4_1_1(ptr addrspace(1) inreg %ptr) {
2888; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_4_1_1:
2889; GFX900:       ; %bb.0:
2890; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2891; GFX900-NEXT:    ;;#ASMSTART
2892; GFX900-NEXT:    ; def v[0:1]
2893; GFX900-NEXT:    ;;#ASMEND
2894; GFX900-NEXT:    ;;#ASMSTART
2895; GFX900-NEXT:    ; def v[1:2]
2896; GFX900-NEXT:    ;;#ASMEND
2897; GFX900-NEXT:    s_mov_b32 s4, 0xffff
2898; GFX900-NEXT:    v_bfi_b32 v1, s4, v2, v1
2899; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2900; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2901; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
2902; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
2903; GFX900-NEXT:    s_waitcnt vmcnt(0)
2904; GFX900-NEXT:    s_setpc_b64 s[30:31]
2905;
2906; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_4_1_1:
2907; GFX90A:       ; %bb.0:
2908; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2909; GFX90A-NEXT:    ;;#ASMSTART
2910; GFX90A-NEXT:    ; def v[2:3]
2911; GFX90A-NEXT:    ;;#ASMEND
2912; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
2913; GFX90A-NEXT:    v_bfi_b32 v2, s4, v3, v2
2914; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2915; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2916; GFX90A-NEXT:    ;;#ASMSTART
2917; GFX90A-NEXT:    ; def v[0:1]
2918; GFX90A-NEXT:    ;;#ASMEND
2919; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
2920; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
2921; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2922; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2923;
2924; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_4_1_1:
2925; GFX940:       ; %bb.0:
2926; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2927; GFX940-NEXT:    ;;#ASMSTART
2928; GFX940-NEXT:    ; def v[2:3]
2929; GFX940-NEXT:    ;;#ASMEND
2930; GFX940-NEXT:    s_mov_b32 s2, 0xffff
2931; GFX940-NEXT:    v_bfi_b32 v2, s2, v3, v2
2932; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2933; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2934; GFX940-NEXT:    ;;#ASMSTART
2935; GFX940-NEXT:    ; def v[0:1]
2936; GFX940-NEXT:    ;;#ASMEND
2937; GFX940-NEXT:    s_nop 0
2938; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
2939; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
2940; GFX940-NEXT:    s_waitcnt vmcnt(0)
2941; GFX940-NEXT:    s_setpc_b64 s[30:31]
2942  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2943  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2944  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2945  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2946  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 1, i32 1>
2947  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2948  ret void
2949}
2950
2951define void @v_shuffle_v4i16_v3i16__5_5_1_1(ptr addrspace(1) inreg %ptr) {
2952; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_1_1:
2953; GFX900:       ; %bb.0:
2954; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2955; GFX900-NEXT:    ;;#ASMSTART
2956; GFX900-NEXT:    ; def v[0:1]
2957; GFX900-NEXT:    ;;#ASMEND
2958; GFX900-NEXT:    ;;#ASMSTART
2959; GFX900-NEXT:    ; def v[1:2]
2960; GFX900-NEXT:    ;;#ASMEND
2961; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2962; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
2963; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2964; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2965; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
2966; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
2967; GFX900-NEXT:    s_waitcnt vmcnt(0)
2968; GFX900-NEXT:    s_setpc_b64 s[30:31]
2969;
2970; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_1_1:
2971; GFX90A:       ; %bb.0:
2972; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2973; GFX90A-NEXT:    ;;#ASMSTART
2974; GFX90A-NEXT:    ; def v[0:1]
2975; GFX90A-NEXT:    ;;#ASMEND
2976; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2977; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
2978; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2979; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2980; GFX90A-NEXT:    ;;#ASMSTART
2981; GFX90A-NEXT:    ; def v[2:3]
2982; GFX90A-NEXT:    ;;#ASMEND
2983; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
2984; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
2985; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2986; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2987;
2988; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_1_1:
2989; GFX940:       ; %bb.0:
2990; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2991; GFX940-NEXT:    ;;#ASMSTART
2992; GFX940-NEXT:    ; def v[0:1]
2993; GFX940-NEXT:    ;;#ASMEND
2994; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2995; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
2996; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2997; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2998; GFX940-NEXT:    ;;#ASMSTART
2999; GFX940-NEXT:    ; def v[2:3]
3000; GFX940-NEXT:    ;;#ASMEND
3001; GFX940-NEXT:    s_nop 0
3002; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
3003; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
3004; GFX940-NEXT:    s_waitcnt vmcnt(0)
3005; GFX940-NEXT:    s_setpc_b64 s[30:31]
3006  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3007  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3008  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3009  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3010  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 1>
3011  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3012  ret void
3013}
3014
3015define void @v_shuffle_v4i16_v3i16__5_5_u_1(ptr addrspace(1) inreg %ptr) {
3016; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_u_1:
3017; GFX900:       ; %bb.0:
3018; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3019; GFX900-NEXT:    ;;#ASMSTART
3020; GFX900-NEXT:    ; def v[0:1]
3021; GFX900-NEXT:    ;;#ASMEND
3022; GFX900-NEXT:    ;;#ASMSTART
3023; GFX900-NEXT:    ; def v[1:2]
3024; GFX900-NEXT:    ;;#ASMEND
3025; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
3026; GFX900-NEXT:    v_mov_b32_e32 v3, 0
3027; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
3028; GFX900-NEXT:    v_mov_b32_e32 v2, v0
3029; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
3030; GFX900-NEXT:    s_waitcnt vmcnt(0)
3031; GFX900-NEXT:    s_setpc_b64 s[30:31]
3032;
3033; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_u_1:
3034; GFX90A:       ; %bb.0:
3035; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3036; GFX90A-NEXT:    ;;#ASMSTART
3037; GFX90A-NEXT:    ; def v[2:3]
3038; GFX90A-NEXT:    ;;#ASMEND
3039; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
3040; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3041; GFX90A-NEXT:    ;;#ASMSTART
3042; GFX90A-NEXT:    ; def v[0:1]
3043; GFX90A-NEXT:    ;;#ASMEND
3044; GFX90A-NEXT:    v_perm_b32 v2, v3, v3, s4
3045; GFX90A-NEXT:    v_mov_b32_e32 v3, v0
3046; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
3047; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3048; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3049;
3050; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_u_1:
3051; GFX940:       ; %bb.0:
3052; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3053; GFX940-NEXT:    ;;#ASMSTART
3054; GFX940-NEXT:    ; def v[2:3]
3055; GFX940-NEXT:    ;;#ASMEND
3056; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
3057; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3058; GFX940-NEXT:    ;;#ASMSTART
3059; GFX940-NEXT:    ; def v[0:1]
3060; GFX940-NEXT:    ;;#ASMEND
3061; GFX940-NEXT:    v_perm_b32 v2, v3, v3, s2
3062; GFX940-NEXT:    v_mov_b32_e32 v3, v0
3063; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
3064; GFX940-NEXT:    s_waitcnt vmcnt(0)
3065; GFX940-NEXT:    s_setpc_b64 s[30:31]
3066  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3067  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3068  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3069  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3070  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 1>
3071  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3072  ret void
3073}
3074
3075define void @v_shuffle_v4i16_v3i16__5_5_0_1(ptr addrspace(1) inreg %ptr) {
3076; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_0_1:
3077; GFX900:       ; %bb.0:
3078; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3079; GFX900-NEXT:    ;;#ASMSTART
3080; GFX900-NEXT:    ; def v[0:1]
3081; GFX900-NEXT:    ;;#ASMEND
3082; GFX900-NEXT:    ;;#ASMSTART
3083; GFX900-NEXT:    ; def v[1:2]
3084; GFX900-NEXT:    ;;#ASMEND
3085; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
3086; GFX900-NEXT:    v_mov_b32_e32 v3, 0
3087; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
3088; GFX900-NEXT:    v_mov_b32_e32 v2, v0
3089; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
3090; GFX900-NEXT:    s_waitcnt vmcnt(0)
3091; GFX900-NEXT:    s_setpc_b64 s[30:31]
3092;
3093; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_0_1:
3094; GFX90A:       ; %bb.0:
3095; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3096; GFX90A-NEXT:    ;;#ASMSTART
3097; GFX90A-NEXT:    ; def v[2:3]
3098; GFX90A-NEXT:    ;;#ASMEND
3099; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
3100; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3101; GFX90A-NEXT:    ;;#ASMSTART
3102; GFX90A-NEXT:    ; def v[0:1]
3103; GFX90A-NEXT:    ;;#ASMEND
3104; GFX90A-NEXT:    v_perm_b32 v2, v3, v3, s4
3105; GFX90A-NEXT:    v_mov_b32_e32 v3, v0
3106; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
3107; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3108; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3109;
3110; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_0_1:
3111; GFX940:       ; %bb.0:
3112; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3113; GFX940-NEXT:    ;;#ASMSTART
3114; GFX940-NEXT:    ; def v[2:3]
3115; GFX940-NEXT:    ;;#ASMEND
3116; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
3117; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3118; GFX940-NEXT:    ;;#ASMSTART
3119; GFX940-NEXT:    ; def v[0:1]
3120; GFX940-NEXT:    ;;#ASMEND
3121; GFX940-NEXT:    v_perm_b32 v2, v3, v3, s2
3122; GFX940-NEXT:    v_mov_b32_e32 v3, v0
3123; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
3124; GFX940-NEXT:    s_waitcnt vmcnt(0)
3125; GFX940-NEXT:    s_setpc_b64 s[30:31]
3126  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3127  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3128  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3129  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3130  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 1>
3131  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3132  ret void
3133}
3134
3135define void @v_shuffle_v4i16_v3i16__5_5_2_1(ptr addrspace(1) inreg %ptr) {
3136; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_2_1:
3137; GFX900:       ; %bb.0:
3138; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3139; GFX900-NEXT:    ;;#ASMSTART
3140; GFX900-NEXT:    ; def v[0:1]
3141; GFX900-NEXT:    ;;#ASMEND
3142; GFX900-NEXT:    s_mov_b32 s4, 0xffff
3143; GFX900-NEXT:    v_bfi_b32 v1, s4, v1, v0
3144; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
3145; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3146; GFX900-NEXT:    ;;#ASMSTART
3147; GFX900-NEXT:    ; def v[2:3]
3148; GFX900-NEXT:    ;;#ASMEND
3149; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
3150; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
3151; GFX900-NEXT:    s_waitcnt vmcnt(0)
3152; GFX900-NEXT:    s_setpc_b64 s[30:31]
3153;
3154; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_2_1:
3155; GFX90A:       ; %bb.0:
3156; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3157; GFX90A-NEXT:    ;;#ASMSTART
3158; GFX90A-NEXT:    ; def v[0:1]
3159; GFX90A-NEXT:    ;;#ASMEND
3160; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
3161; GFX90A-NEXT:    v_bfi_b32 v1, s4, v1, v0
3162; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
3163; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3164; GFX90A-NEXT:    ;;#ASMSTART
3165; GFX90A-NEXT:    ; def v[2:3]
3166; GFX90A-NEXT:    ;;#ASMEND
3167; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
3168; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
3169; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3170; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3171;
3172; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_2_1:
3173; GFX940:       ; %bb.0:
3174; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3175; GFX940-NEXT:    ;;#ASMSTART
3176; GFX940-NEXT:    ; def v[0:1]
3177; GFX940-NEXT:    ;;#ASMEND
3178; GFX940-NEXT:    s_mov_b32 s2, 0xffff
3179; GFX940-NEXT:    v_bfi_b32 v1, s2, v1, v0
3180; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
3181; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3182; GFX940-NEXT:    ;;#ASMSTART
3183; GFX940-NEXT:    ; def v[2:3]
3184; GFX940-NEXT:    ;;#ASMEND
3185; GFX940-NEXT:    s_nop 0
3186; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
3187; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
3188; GFX940-NEXT:    s_waitcnt vmcnt(0)
3189; GFX940-NEXT:    s_setpc_b64 s[30:31]
3190  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3191  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3192  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3193  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3194  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 1>
3195  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3196  ret void
3197}
3198
3199define void @v_shuffle_v4i16_v3i16__5_5_3_1(ptr addrspace(1) inreg %ptr) {
3200; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_3_1:
3201; GFX900:       ; %bb.0:
3202; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3203; GFX900-NEXT:    ;;#ASMSTART
3204; GFX900-NEXT:    ; def v[0:1]
3205; GFX900-NEXT:    ;;#ASMEND
3206; GFX900-NEXT:    ;;#ASMSTART
3207; GFX900-NEXT:    ; def v[1:2]
3208; GFX900-NEXT:    ;;#ASMEND
3209; GFX900-NEXT:    s_mov_b32 s4, 0xffff
3210; GFX900-NEXT:    v_bfi_b32 v1, s4, v1, v0
3211; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
3212; GFX900-NEXT:    v_mov_b32_e32 v3, 0
3213; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
3214; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
3215; GFX900-NEXT:    s_waitcnt vmcnt(0)
3216; GFX900-NEXT:    s_setpc_b64 s[30:31]
3217;
3218; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_3_1:
3219; GFX90A:       ; %bb.0:
3220; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3221; GFX90A-NEXT:    ;;#ASMSTART
3222; GFX90A-NEXT:    ; def v[0:1]
3223; GFX90A-NEXT:    ;;#ASMEND
3224; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
3225; GFX90A-NEXT:    ;;#ASMSTART
3226; GFX90A-NEXT:    ; def v[2:3]
3227; GFX90A-NEXT:    ;;#ASMEND
3228; GFX90A-NEXT:    v_bfi_b32 v1, s4, v2, v0
3229; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
3230; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3231; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
3232; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
3233; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3234; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3235;
3236; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_3_1:
3237; GFX940:       ; %bb.0:
3238; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3239; GFX940-NEXT:    ;;#ASMSTART
3240; GFX940-NEXT:    ; def v[0:1]
3241; GFX940-NEXT:    ;;#ASMEND
3242; GFX940-NEXT:    s_mov_b32 s2, 0xffff
3243; GFX940-NEXT:    ;;#ASMSTART
3244; GFX940-NEXT:    ; def v[2:3]
3245; GFX940-NEXT:    ;;#ASMEND
3246; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3247; GFX940-NEXT:    v_bfi_b32 v1, s2, v2, v0
3248; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
3249; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
3250; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
3251; GFX940-NEXT:    s_waitcnt vmcnt(0)
3252; GFX940-NEXT:    s_setpc_b64 s[30:31]
3253  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3254  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3255  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3256  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3257  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 1>
3258  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3259  ret void
3260}
3261
3262define void @v_shuffle_v4i16_v3i16__5_5_4_1(ptr addrspace(1) inreg %ptr) {
3263; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_4_1:
3264; GFX900:       ; %bb.0:
3265; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3266; GFX900-NEXT:    ;;#ASMSTART
3267; GFX900-NEXT:    ; def v[0:1]
3268; GFX900-NEXT:    ;;#ASMEND
3269; GFX900-NEXT:    ;;#ASMSTART
3270; GFX900-NEXT:    ; def v[1:2]
3271; GFX900-NEXT:    ;;#ASMEND
3272; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3273; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
3274; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
3275; GFX900-NEXT:    v_mov_b32_e32 v3, 0
3276; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
3277; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
3278; GFX900-NEXT:    s_waitcnt vmcnt(0)
3279; GFX900-NEXT:    s_setpc_b64 s[30:31]
3280;
3281; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_4_1:
3282; GFX90A:       ; %bb.0:
3283; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3284; GFX90A-NEXT:    ;;#ASMSTART
3285; GFX90A-NEXT:    ; def v[0:1]
3286; GFX90A-NEXT:    ;;#ASMEND
3287; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3288; GFX90A-NEXT:    ;;#ASMSTART
3289; GFX90A-NEXT:    ; def v[2:3]
3290; GFX90A-NEXT:    ;;#ASMEND
3291; GFX90A-NEXT:    v_perm_b32 v1, v0, v2, s4
3292; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
3293; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3294; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
3295; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
3296; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3297; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3298;
3299; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_4_1:
3300; GFX940:       ; %bb.0:
3301; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3302; GFX940-NEXT:    ;;#ASMSTART
3303; GFX940-NEXT:    ; def v[0:1]
3304; GFX940-NEXT:    ;;#ASMEND
3305; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3306; GFX940-NEXT:    ;;#ASMSTART
3307; GFX940-NEXT:    ; def v[2:3]
3308; GFX940-NEXT:    ;;#ASMEND
3309; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3310; GFX940-NEXT:    v_perm_b32 v1, v0, v2, s2
3311; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
3312; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
3313; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
3314; GFX940-NEXT:    s_waitcnt vmcnt(0)
3315; GFX940-NEXT:    s_setpc_b64 s[30:31]
3316  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3317  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3318  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3319  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3320  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 1>
3321  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3322  ret void
3323}
3324
3325define void @v_shuffle_v4i16_v3i16__u_2_2_2(ptr addrspace(1) inreg %ptr) {
3326; GFX900-LABEL: v_shuffle_v4i16_v3i16__u_2_2_2:
3327; GFX900:       ; %bb.0:
3328; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3329; GFX900-NEXT:    ;;#ASMSTART
3330; GFX900-NEXT:    ; def v[0:1]
3331; GFX900-NEXT:    ;;#ASMEND
3332; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
3333; GFX900-NEXT:    v_mov_b32_e32 v3, 0
3334; GFX900-NEXT:    v_perm_b32 v2, v1, v1, s4
3335; GFX900-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
3336; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
3337; GFX900-NEXT:    s_waitcnt vmcnt(0)
3338; GFX900-NEXT:    s_setpc_b64 s[30:31]
3339;
3340; GFX90A-LABEL: v_shuffle_v4i16_v3i16__u_2_2_2:
3341; GFX90A:       ; %bb.0:
3342; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3343; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
3344; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3345; GFX90A-NEXT:    ;;#ASMSTART
3346; GFX90A-NEXT:    ; def v[0:1]
3347; GFX90A-NEXT:    ;;#ASMEND
3348; GFX90A-NEXT:    v_perm_b32 v3, v1, v1, s4
3349; GFX90A-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
3350; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
3351; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3352; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3353;
3354; GFX940-LABEL: v_shuffle_v4i16_v3i16__u_2_2_2:
3355; GFX940:       ; %bb.0:
3356; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3357; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
3358; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3359; GFX940-NEXT:    ;;#ASMSTART
3360; GFX940-NEXT:    ; def v[0:1]
3361; GFX940-NEXT:    ;;#ASMEND
3362; GFX940-NEXT:    s_nop 0
3363; GFX940-NEXT:    v_perm_b32 v3, v1, v1, s2
3364; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
3365; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
3366; GFX940-NEXT:    s_waitcnt vmcnt(0)
3367; GFX940-NEXT:    s_setpc_b64 s[30:31]
3368  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3369  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3370  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
3371  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3372  ret void
3373}
3374
3375define void @v_shuffle_v4i16_v3i16__0_2_2_2(ptr addrspace(1) inreg %ptr) {
3376; GFX900-LABEL: v_shuffle_v4i16_v3i16__0_2_2_2:
3377; GFX900:       ; %bb.0:
3378; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3379; GFX900-NEXT:    ;;#ASMSTART
3380; GFX900-NEXT:    ; def v[0:1]
3381; GFX900-NEXT:    ;;#ASMEND
3382; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
3383; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3384; GFX900-NEXT:    v_perm_b32 v0, v1, v0, s4
3385; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
3386; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
3387; GFX900-NEXT:    s_waitcnt vmcnt(0)
3388; GFX900-NEXT:    s_setpc_b64 s[30:31]
3389;
3390; GFX90A-LABEL: v_shuffle_v4i16_v3i16__0_2_2_2:
3391; GFX90A:       ; %bb.0:
3392; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3393; GFX90A-NEXT:    ;;#ASMSTART
3394; GFX90A-NEXT:    ; def v[0:1]
3395; GFX90A-NEXT:    ;;#ASMEND
3396; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
3397; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3398; GFX90A-NEXT:    v_perm_b32 v0, v1, v0, s4
3399; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
3400; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
3401; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3402; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3403;
3404; GFX940-LABEL: v_shuffle_v4i16_v3i16__0_2_2_2:
3405; GFX940:       ; %bb.0:
3406; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3407; GFX940-NEXT:    ;;#ASMSTART
3408; GFX940-NEXT:    ; def v[0:1]
3409; GFX940-NEXT:    ;;#ASMEND
3410; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
3411; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3412; GFX940-NEXT:    v_perm_b32 v0, v1, v0, s2
3413; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
3414; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
3415; GFX940-NEXT:    s_waitcnt vmcnt(0)
3416; GFX940-NEXT:    s_setpc_b64 s[30:31]
3417  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3418  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3419  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
3420  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3421  ret void
3422}
3423
3424define void @v_shuffle_v4i16_v3i16__1_2_2_2(ptr addrspace(1) inreg %ptr) {
3425; GFX900-LABEL: v_shuffle_v4i16_v3i16__1_2_2_2:
3426; GFX900:       ; %bb.0:
3427; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3428; GFX900-NEXT:    ;;#ASMSTART
3429; GFX900-NEXT:    ; def v[0:1]
3430; GFX900-NEXT:    ;;#ASMEND
3431; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
3432; GFX900-NEXT:    v_mov_b32_e32 v3, 0
3433; GFX900-NEXT:    v_perm_b32 v2, v1, v1, s4
3434; GFX900-NEXT:    v_alignbit_b32 v1, v1, v0, 16
3435; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
3436; GFX900-NEXT:    s_waitcnt vmcnt(0)
3437; GFX900-NEXT:    s_setpc_b64 s[30:31]
3438;
3439; GFX90A-LABEL: v_shuffle_v4i16_v3i16__1_2_2_2:
3440; GFX90A:       ; %bb.0:
3441; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3442; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
3443; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3444; GFX90A-NEXT:    ;;#ASMSTART
3445; GFX90A-NEXT:    ; def v[0:1]
3446; GFX90A-NEXT:    ;;#ASMEND
3447; GFX90A-NEXT:    v_perm_b32 v3, v1, v1, s4
3448; GFX90A-NEXT:    v_alignbit_b32 v2, v1, v0, 16
3449; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
3450; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3451; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3452;
3453; GFX940-LABEL: v_shuffle_v4i16_v3i16__1_2_2_2:
3454; GFX940:       ; %bb.0:
3455; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3456; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
3457; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3458; GFX940-NEXT:    ;;#ASMSTART
3459; GFX940-NEXT:    ; def v[0:1]
3460; GFX940-NEXT:    ;;#ASMEND
3461; GFX940-NEXT:    s_nop 0
3462; GFX940-NEXT:    v_perm_b32 v3, v1, v1, s2
3463; GFX940-NEXT:    v_alignbit_b32 v2, v1, v0, 16
3464; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
3465; GFX940-NEXT:    s_waitcnt vmcnt(0)
3466; GFX940-NEXT:    s_setpc_b64 s[30:31]
3467  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3468  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3469  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
3470  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3471  ret void
3472}
3473
3474define void @v_shuffle_v4i16_v3i16__2_2_2_2(ptr addrspace(1) inreg %ptr) {
3475; GFX900-LABEL: v_shuffle_v4i16_v3i16__2_2_2_2:
3476; GFX900:       ; %bb.0:
3477; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3478; GFX900-NEXT:    ;;#ASMSTART
3479; GFX900-NEXT:    ; def v[0:1]
3480; GFX900-NEXT:    ;;#ASMEND
3481; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
3482; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
3483; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3484; GFX900-NEXT:    v_mov_b32_e32 v1, v0
3485; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
3486; GFX900-NEXT:    s_waitcnt vmcnt(0)
3487; GFX900-NEXT:    s_setpc_b64 s[30:31]
3488;
3489; GFX90A-LABEL: v_shuffle_v4i16_v3i16__2_2_2_2:
3490; GFX90A:       ; %bb.0:
3491; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3492; GFX90A-NEXT:    ;;#ASMSTART
3493; GFX90A-NEXT:    ; def v[0:1]
3494; GFX90A-NEXT:    ;;#ASMEND
3495; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
3496; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
3497; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3498; GFX90A-NEXT:    v_mov_b32_e32 v1, v0
3499; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
3500; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3501; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3502;
3503; GFX940-LABEL: v_shuffle_v4i16_v3i16__2_2_2_2:
3504; GFX940:       ; %bb.0:
3505; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3506; GFX940-NEXT:    ;;#ASMSTART
3507; GFX940-NEXT:    ; def v[0:1]
3508; GFX940-NEXT:    ;;#ASMEND
3509; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
3510; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
3511; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3512; GFX940-NEXT:    v_mov_b32_e32 v1, v0
3513; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
3514; GFX940-NEXT:    s_waitcnt vmcnt(0)
3515; GFX940-NEXT:    s_setpc_b64 s[30:31]
3516  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3517  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3518  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
3519  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3520  ret void
3521}
3522
3523define void @v_shuffle_v4i16_v3i16__3_2_2_2(ptr addrspace(1) inreg %ptr) {
3524; GFX900-LABEL: v_shuffle_v4i16_v3i16__3_2_2_2:
3525; GFX900:       ; %bb.0:
3526; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3527; GFX900-NEXT:    ;;#ASMSTART
3528; GFX900-NEXT:    ; def v[0:1]
3529; GFX900-NEXT:    ;;#ASMEND
3530; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
3531; GFX900-NEXT:    v_mov_b32_e32 v3, 0
3532; GFX900-NEXT:    v_perm_b32 v2, v1, v1, s4
3533; GFX900-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
3534; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
3535; GFX900-NEXT:    s_waitcnt vmcnt(0)
3536; GFX900-NEXT:    s_setpc_b64 s[30:31]
3537;
3538; GFX90A-LABEL: v_shuffle_v4i16_v3i16__3_2_2_2:
3539; GFX90A:       ; %bb.0:
3540; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3541; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
3542; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3543; GFX90A-NEXT:    ;;#ASMSTART
3544; GFX90A-NEXT:    ; def v[0:1]
3545; GFX90A-NEXT:    ;;#ASMEND
3546; GFX90A-NEXT:    v_perm_b32 v3, v1, v1, s4
3547; GFX90A-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
3548; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
3549; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3550; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3551;
3552; GFX940-LABEL: v_shuffle_v4i16_v3i16__3_2_2_2:
3553; GFX940:       ; %bb.0:
3554; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3555; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
3556; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3557; GFX940-NEXT:    ;;#ASMSTART
3558; GFX940-NEXT:    ; def v[0:1]
3559; GFX940-NEXT:    ;;#ASMEND
3560; GFX940-NEXT:    s_nop 0
3561; GFX940-NEXT:    v_perm_b32 v3, v1, v1, s2
3562; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
3563; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
3564; GFX940-NEXT:    s_waitcnt vmcnt(0)
3565; GFX940-NEXT:    s_setpc_b64 s[30:31]
3566  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3567  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3568  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
3569  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3570  ret void
3571}
3572
3573define void @v_shuffle_v4i16_v3i16__4_2_2_2(ptr addrspace(1) inreg %ptr) {
3574; GFX900-LABEL: v_shuffle_v4i16_v3i16__4_2_2_2:
3575; GFX900:       ; %bb.0:
3576; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3577; GFX900-NEXT:    ;;#ASMSTART
3578; GFX900-NEXT:    ; def v[2:3]
3579; GFX900-NEXT:    ;;#ASMEND
3580; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
3581; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3582; GFX900-NEXT:    ;;#ASMSTART
3583; GFX900-NEXT:    ; def v[0:1]
3584; GFX900-NEXT:    ;;#ASMEND
3585; GFX900-NEXT:    v_perm_b32 v3, v1, v1, s4
3586; GFX900-NEXT:    v_alignbit_b32 v2, v1, v2, 16
3587; GFX900-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
3588; GFX900-NEXT:    s_waitcnt vmcnt(0)
3589; GFX900-NEXT:    s_setpc_b64 s[30:31]
3590;
3591; GFX90A-LABEL: v_shuffle_v4i16_v3i16__4_2_2_2:
3592; GFX90A:       ; %bb.0:
3593; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3594; GFX90A-NEXT:    ;;#ASMSTART
3595; GFX90A-NEXT:    ; def v[2:3]
3596; GFX90A-NEXT:    ;;#ASMEND
3597; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
3598; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3599; GFX90A-NEXT:    ;;#ASMSTART
3600; GFX90A-NEXT:    ; def v[0:1]
3601; GFX90A-NEXT:    ;;#ASMEND
3602; GFX90A-NEXT:    v_perm_b32 v3, v1, v1, s4
3603; GFX90A-NEXT:    v_alignbit_b32 v2, v1, v2, 16
3604; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
3605; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3606; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3607;
3608; GFX940-LABEL: v_shuffle_v4i16_v3i16__4_2_2_2:
3609; GFX940:       ; %bb.0:
3610; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3611; GFX940-NEXT:    ;;#ASMSTART
3612; GFX940-NEXT:    ; def v[2:3]
3613; GFX940-NEXT:    ;;#ASMEND
3614; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
3615; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3616; GFX940-NEXT:    ;;#ASMSTART
3617; GFX940-NEXT:    ; def v[0:1]
3618; GFX940-NEXT:    ;;#ASMEND
3619; GFX940-NEXT:    s_nop 0
3620; GFX940-NEXT:    v_perm_b32 v3, v1, v1, s2
3621; GFX940-NEXT:    v_alignbit_b32 v2, v1, v2, 16
3622; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
3623; GFX940-NEXT:    s_waitcnt vmcnt(0)
3624; GFX940-NEXT:    s_setpc_b64 s[30:31]
3625  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3626  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3627  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3628  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3629  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 2, i32 2, i32 2>
3630  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3631  ret void
3632}
3633
3634define void @v_shuffle_v4i16_v3i16__5_2_2_2(ptr addrspace(1) inreg %ptr) {
3635; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_2_2_2:
3636; GFX900:       ; %bb.0:
3637; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3638; GFX900-NEXT:    ;;#ASMSTART
3639; GFX900-NEXT:    ; def v[0:1]
3640; GFX900-NEXT:    ;;#ASMEND
3641; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
3642; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3643; GFX900-NEXT:    ;;#ASMSTART
3644; GFX900-NEXT:    ; def v[2:3]
3645; GFX900-NEXT:    ;;#ASMEND
3646; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
3647; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
3648; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
3649; GFX900-NEXT:    s_waitcnt vmcnt(0)
3650; GFX900-NEXT:    s_setpc_b64 s[30:31]
3651;
3652; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_2_2_2:
3653; GFX90A:       ; %bb.0:
3654; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3655; GFX90A-NEXT:    ;;#ASMSTART
3656; GFX90A-NEXT:    ; def v[0:1]
3657; GFX90A-NEXT:    ;;#ASMEND
3658; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
3659; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3660; GFX90A-NEXT:    ;;#ASMSTART
3661; GFX90A-NEXT:    ; def v[2:3]
3662; GFX90A-NEXT:    ;;#ASMEND
3663; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
3664; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
3665; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
3666; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3667; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3668;
3669; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_2_2_2:
3670; GFX940:       ; %bb.0:
3671; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3672; GFX940-NEXT:    ;;#ASMSTART
3673; GFX940-NEXT:    ; def v[0:1]
3674; GFX940-NEXT:    ;;#ASMEND
3675; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
3676; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3677; GFX940-NEXT:    ;;#ASMSTART
3678; GFX940-NEXT:    ; def v[2:3]
3679; GFX940-NEXT:    ;;#ASMEND
3680; GFX940-NEXT:    s_nop 0
3681; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
3682; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
3683; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
3684; GFX940-NEXT:    s_waitcnt vmcnt(0)
3685; GFX940-NEXT:    s_setpc_b64 s[30:31]
3686  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3687  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3688  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3689  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3690  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 2, i32 2>
3691  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3692  ret void
3693}
3694
3695define void @v_shuffle_v4i16_v3i16__5_u_2_2(ptr addrspace(1) inreg %ptr) {
3696; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_u_2_2:
3697; GFX900:       ; %bb.0:
3698; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3699; GFX900-NEXT:    ;;#ASMSTART
3700; GFX900-NEXT:    ; def v[0:1]
3701; GFX900-NEXT:    ;;#ASMEND
3702; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
3703; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3704; GFX900-NEXT:    ;;#ASMSTART
3705; GFX900-NEXT:    ; def v[2:3]
3706; GFX900-NEXT:    ;;#ASMEND
3707; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
3708; GFX900-NEXT:    v_mov_b32_e32 v0, v3
3709; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
3710; GFX900-NEXT:    s_waitcnt vmcnt(0)
3711; GFX900-NEXT:    s_setpc_b64 s[30:31]
3712;
3713; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_u_2_2:
3714; GFX90A:       ; %bb.0:
3715; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3716; GFX90A-NEXT:    ;;#ASMSTART
3717; GFX90A-NEXT:    ; def v[0:1]
3718; GFX90A-NEXT:    ;;#ASMEND
3719; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
3720; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3721; GFX90A-NEXT:    ;;#ASMSTART
3722; GFX90A-NEXT:    ; def v[2:3]
3723; GFX90A-NEXT:    ;;#ASMEND
3724; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
3725; GFX90A-NEXT:    v_mov_b32_e32 v0, v3
3726; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
3727; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3728; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3729;
3730; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_u_2_2:
3731; GFX940:       ; %bb.0:
3732; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3733; GFX940-NEXT:    ;;#ASMSTART
3734; GFX940-NEXT:    ; def v[0:1]
3735; GFX940-NEXT:    ;;#ASMEND
3736; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
3737; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3738; GFX940-NEXT:    ;;#ASMSTART
3739; GFX940-NEXT:    ; def v[2:3]
3740; GFX940-NEXT:    ;;#ASMEND
3741; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
3742; GFX940-NEXT:    v_mov_b32_e32 v0, v3
3743; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
3744; GFX940-NEXT:    s_waitcnt vmcnt(0)
3745; GFX940-NEXT:    s_setpc_b64 s[30:31]
3746  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3747  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3748  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3749  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3750  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 2, i32 2>
3751  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3752  ret void
3753}
3754
3755define void @v_shuffle_v4i16_v3i16__5_0_2_2(ptr addrspace(1) inreg %ptr) {
3756; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_0_2_2:
3757; GFX900:       ; %bb.0:
3758; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3759; GFX900-NEXT:    ;;#ASMSTART
3760; GFX900-NEXT:    ; def v[0:1]
3761; GFX900-NEXT:    ;;#ASMEND
3762; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
3763; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3764; GFX900-NEXT:    ;;#ASMSTART
3765; GFX900-NEXT:    ; def v[2:3]
3766; GFX900-NEXT:    ;;#ASMEND
3767; GFX900-NEXT:    v_perm_b32 v0, v0, v3, s4
3768; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
3769; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
3770; GFX900-NEXT:    s_waitcnt vmcnt(0)
3771; GFX900-NEXT:    s_setpc_b64 s[30:31]
3772;
3773; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_0_2_2:
3774; GFX90A:       ; %bb.0:
3775; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3776; GFX90A-NEXT:    ;;#ASMSTART
3777; GFX90A-NEXT:    ; def v[0:1]
3778; GFX90A-NEXT:    ;;#ASMEND
3779; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
3780; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3781; GFX90A-NEXT:    ;;#ASMSTART
3782; GFX90A-NEXT:    ; def v[2:3]
3783; GFX90A-NEXT:    ;;#ASMEND
3784; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
3785; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
3786; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
3787; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3788; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3789;
3790; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_0_2_2:
3791; GFX940:       ; %bb.0:
3792; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3793; GFX940-NEXT:    ;;#ASMSTART
3794; GFX940-NEXT:    ; def v[0:1]
3795; GFX940-NEXT:    ;;#ASMEND
3796; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
3797; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3798; GFX940-NEXT:    ;;#ASMSTART
3799; GFX940-NEXT:    ; def v[2:3]
3800; GFX940-NEXT:    ;;#ASMEND
3801; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
3802; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
3803; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
3804; GFX940-NEXT:    s_waitcnt vmcnt(0)
3805; GFX940-NEXT:    s_setpc_b64 s[30:31]
3806  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3807  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3808  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3809  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3810  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 2, i32 2>
3811  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3812  ret void
3813}
3814
3815define void @v_shuffle_v4i16_v3i16__5_1_2_2(ptr addrspace(1) inreg %ptr) {
3816; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_1_2_2:
3817; GFX900:       ; %bb.0:
3818; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3819; GFX900-NEXT:    ;;#ASMSTART
3820; GFX900-NEXT:    ; def v[0:1]
3821; GFX900-NEXT:    ;;#ASMEND
3822; GFX900-NEXT:    s_mov_b32 s4, 0xffff
3823; GFX900-NEXT:    ;;#ASMSTART
3824; GFX900-NEXT:    ; def v[2:3]
3825; GFX900-NEXT:    ;;#ASMEND
3826; GFX900-NEXT:    v_bfi_b32 v0, s4, v3, v0
3827; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
3828; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3829; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
3830; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
3831; GFX900-NEXT:    s_waitcnt vmcnt(0)
3832; GFX900-NEXT:    s_setpc_b64 s[30:31]
3833;
3834; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_1_2_2:
3835; GFX90A:       ; %bb.0:
3836; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3837; GFX90A-NEXT:    ;;#ASMSTART
3838; GFX90A-NEXT:    ; def v[0:1]
3839; GFX90A-NEXT:    ;;#ASMEND
3840; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
3841; GFX90A-NEXT:    ;;#ASMSTART
3842; GFX90A-NEXT:    ; def v[2:3]
3843; GFX90A-NEXT:    ;;#ASMEND
3844; GFX90A-NEXT:    v_bfi_b32 v0, s4, v3, v0
3845; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
3846; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3847; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
3848; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
3849; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3850; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3851;
3852; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_1_2_2:
3853; GFX940:       ; %bb.0:
3854; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3855; GFX940-NEXT:    ;;#ASMSTART
3856; GFX940-NEXT:    ; def v[0:1]
3857; GFX940-NEXT:    ;;#ASMEND
3858; GFX940-NEXT:    s_mov_b32 s2, 0xffff
3859; GFX940-NEXT:    ;;#ASMSTART
3860; GFX940-NEXT:    ; def v[2:3]
3861; GFX940-NEXT:    ;;#ASMEND
3862; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3863; GFX940-NEXT:    v_bfi_b32 v0, s2, v3, v0
3864; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
3865; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
3866; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
3867; GFX940-NEXT:    s_waitcnt vmcnt(0)
3868; GFX940-NEXT:    s_setpc_b64 s[30:31]
3869  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3870  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3871  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3872  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3873  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 2, i32 2>
3874  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3875  ret void
3876}
3877
3878define void @v_shuffle_v4i16_v3i16__5_3_2_2(ptr addrspace(1) inreg %ptr) {
3879; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_3_2_2:
3880; GFX900:       ; %bb.0:
3881; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3882; GFX900-NEXT:    ;;#ASMSTART
3883; GFX900-NEXT:    ; def v[0:1]
3884; GFX900-NEXT:    ;;#ASMEND
3885; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
3886; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3887; GFX900-NEXT:    ;;#ASMSTART
3888; GFX900-NEXT:    ; def v[2:3]
3889; GFX900-NEXT:    ;;#ASMEND
3890; GFX900-NEXT:    v_perm_b32 v0, v2, v3, s4
3891; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
3892; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
3893; GFX900-NEXT:    s_waitcnt vmcnt(0)
3894; GFX900-NEXT:    s_setpc_b64 s[30:31]
3895;
3896; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_3_2_2:
3897; GFX90A:       ; %bb.0:
3898; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3899; GFX90A-NEXT:    ;;#ASMSTART
3900; GFX90A-NEXT:    ; def v[0:1]
3901; GFX90A-NEXT:    ;;#ASMEND
3902; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
3903; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3904; GFX90A-NEXT:    ;;#ASMSTART
3905; GFX90A-NEXT:    ; def v[2:3]
3906; GFX90A-NEXT:    ;;#ASMEND
3907; GFX90A-NEXT:    v_perm_b32 v0, v2, v3, s4
3908; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
3909; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
3910; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3911; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3912;
3913; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_3_2_2:
3914; GFX940:       ; %bb.0:
3915; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3916; GFX940-NEXT:    ;;#ASMSTART
3917; GFX940-NEXT:    ; def v[0:1]
3918; GFX940-NEXT:    ;;#ASMEND
3919; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
3920; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3921; GFX940-NEXT:    ;;#ASMSTART
3922; GFX940-NEXT:    ; def v[2:3]
3923; GFX940-NEXT:    ;;#ASMEND
3924; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
3925; GFX940-NEXT:    v_perm_b32 v0, v2, v3, s2
3926; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
3927; GFX940-NEXT:    s_waitcnt vmcnt(0)
3928; GFX940-NEXT:    s_setpc_b64 s[30:31]
3929  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3930  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3931  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3932  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3933  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 2, i32 2>
3934  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3935  ret void
3936}
3937
3938define void @v_shuffle_v4i16_v3i16__5_4_2_2(ptr addrspace(1) inreg %ptr) {
3939; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_4_2_2:
3940; GFX900:       ; %bb.0:
3941; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3942; GFX900-NEXT:    ;;#ASMSTART
3943; GFX900-NEXT:    ; def v[0:1]
3944; GFX900-NEXT:    ;;#ASMEND
3945; GFX900-NEXT:    s_mov_b32 s4, 0xffff
3946; GFX900-NEXT:    ;;#ASMSTART
3947; GFX900-NEXT:    ; def v[2:3]
3948; GFX900-NEXT:    ;;#ASMEND
3949; GFX900-NEXT:    v_bfi_b32 v0, s4, v3, v2
3950; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
3951; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3952; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
3953; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
3954; GFX900-NEXT:    s_waitcnt vmcnt(0)
3955; GFX900-NEXT:    s_setpc_b64 s[30:31]
3956;
3957; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_4_2_2:
3958; GFX90A:       ; %bb.0:
3959; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3960; GFX90A-NEXT:    ;;#ASMSTART
3961; GFX90A-NEXT:    ; def v[0:1]
3962; GFX90A-NEXT:    ;;#ASMEND
3963; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
3964; GFX90A-NEXT:    ;;#ASMSTART
3965; GFX90A-NEXT:    ; def v[2:3]
3966; GFX90A-NEXT:    ;;#ASMEND
3967; GFX90A-NEXT:    v_bfi_b32 v0, s4, v3, v2
3968; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
3969; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3970; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
3971; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
3972; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3973; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3974;
3975; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_4_2_2:
3976; GFX940:       ; %bb.0:
3977; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3978; GFX940-NEXT:    ;;#ASMSTART
3979; GFX940-NEXT:    ; def v[0:1]
3980; GFX940-NEXT:    ;;#ASMEND
3981; GFX940-NEXT:    s_mov_b32 s2, 0xffff
3982; GFX940-NEXT:    ;;#ASMSTART
3983; GFX940-NEXT:    ; def v[2:3]
3984; GFX940-NEXT:    ;;#ASMEND
3985; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3986; GFX940-NEXT:    v_bfi_b32 v0, s2, v3, v2
3987; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
3988; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
3989; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
3990; GFX940-NEXT:    s_waitcnt vmcnt(0)
3991; GFX940-NEXT:    s_setpc_b64 s[30:31]
3992  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3993  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3994  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3995  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3996  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 2, i32 2>
3997  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3998  ret void
3999}
4000
4001define void @v_shuffle_v4i16_v3i16__5_5_2_2(ptr addrspace(1) inreg %ptr) {
4002; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_2_2:
4003; GFX900:       ; %bb.0:
4004; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4005; GFX900-NEXT:    ;;#ASMSTART
4006; GFX900-NEXT:    ; def v[0:1]
4007; GFX900-NEXT:    ;;#ASMEND
4008; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4009; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4010; GFX900-NEXT:    ;;#ASMSTART
4011; GFX900-NEXT:    ; def v[2:3]
4012; GFX900-NEXT:    ;;#ASMEND
4013; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
4014; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
4015; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4016; GFX900-NEXT:    s_waitcnt vmcnt(0)
4017; GFX900-NEXT:    s_setpc_b64 s[30:31]
4018;
4019; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_2_2:
4020; GFX90A:       ; %bb.0:
4021; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4022; GFX90A-NEXT:    ;;#ASMSTART
4023; GFX90A-NEXT:    ; def v[0:1]
4024; GFX90A-NEXT:    ;;#ASMEND
4025; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4026; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4027; GFX90A-NEXT:    ;;#ASMSTART
4028; GFX90A-NEXT:    ; def v[2:3]
4029; GFX90A-NEXT:    ;;#ASMEND
4030; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
4031; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
4032; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4033; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4034; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4035;
4036; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_2_2:
4037; GFX940:       ; %bb.0:
4038; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4039; GFX940-NEXT:    ;;#ASMSTART
4040; GFX940-NEXT:    ; def v[0:1]
4041; GFX940-NEXT:    ;;#ASMEND
4042; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4043; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4044; GFX940-NEXT:    ;;#ASMSTART
4045; GFX940-NEXT:    ; def v[2:3]
4046; GFX940-NEXT:    ;;#ASMEND
4047; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
4048; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
4049; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
4050; GFX940-NEXT:    s_waitcnt vmcnt(0)
4051; GFX940-NEXT:    s_setpc_b64 s[30:31]
4052  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4053  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4054  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4055  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4056  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 2>
4057  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4058  ret void
4059}
4060
4061define void @v_shuffle_v4i16_v3i16__5_5_u_2(ptr addrspace(1) inreg %ptr) {
4062; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_u_2:
4063; GFX900:       ; %bb.0:
4064; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4065; GFX900-NEXT:    ;;#ASMSTART
4066; GFX900-NEXT:    ; def v[0:1]
4067; GFX900-NEXT:    ;;#ASMEND
4068; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4069; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4070; GFX900-NEXT:    ;;#ASMSTART
4071; GFX900-NEXT:    ; def v[2:3]
4072; GFX900-NEXT:    ;;#ASMEND
4073; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
4074; GFX900-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
4075; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4076; GFX900-NEXT:    s_waitcnt vmcnt(0)
4077; GFX900-NEXT:    s_setpc_b64 s[30:31]
4078;
4079; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_u_2:
4080; GFX90A:       ; %bb.0:
4081; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4082; GFX90A-NEXT:    ;;#ASMSTART
4083; GFX90A-NEXT:    ; def v[0:1]
4084; GFX90A-NEXT:    ;;#ASMEND
4085; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4086; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4087; GFX90A-NEXT:    ;;#ASMSTART
4088; GFX90A-NEXT:    ; def v[2:3]
4089; GFX90A-NEXT:    ;;#ASMEND
4090; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
4091; GFX90A-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
4092; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4093; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4094; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4095;
4096; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_u_2:
4097; GFX940:       ; %bb.0:
4098; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4099; GFX940-NEXT:    ;;#ASMSTART
4100; GFX940-NEXT:    ; def v[0:1]
4101; GFX940-NEXT:    ;;#ASMEND
4102; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4103; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4104; GFX940-NEXT:    ;;#ASMSTART
4105; GFX940-NEXT:    ; def v[2:3]
4106; GFX940-NEXT:    ;;#ASMEND
4107; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
4108; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
4109; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
4110; GFX940-NEXT:    s_waitcnt vmcnt(0)
4111; GFX940-NEXT:    s_setpc_b64 s[30:31]
4112  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4113  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4114  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4115  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4116  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 2>
4117  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4118  ret void
4119}
4120
4121define void @v_shuffle_v4i16_v3i16__5_5_0_2(ptr addrspace(1) inreg %ptr) {
4122; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_0_2:
4123; GFX900:       ; %bb.0:
4124; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4125; GFX900-NEXT:    ;;#ASMSTART
4126; GFX900-NEXT:    ; def v[0:1]
4127; GFX900-NEXT:    ;;#ASMEND
4128; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4129; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4130; GFX900-NEXT:    ;;#ASMSTART
4131; GFX900-NEXT:    ; def v[2:3]
4132; GFX900-NEXT:    ;;#ASMEND
4133; GFX900-NEXT:    v_perm_b32 v1, v1, v0, s4
4134; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
4135; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4136; GFX900-NEXT:    s_waitcnt vmcnt(0)
4137; GFX900-NEXT:    s_setpc_b64 s[30:31]
4138;
4139; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_0_2:
4140; GFX90A:       ; %bb.0:
4141; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4142; GFX90A-NEXT:    ;;#ASMSTART
4143; GFX90A-NEXT:    ; def v[0:1]
4144; GFX90A-NEXT:    ;;#ASMEND
4145; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4146; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4147; GFX90A-NEXT:    ;;#ASMSTART
4148; GFX90A-NEXT:    ; def v[2:3]
4149; GFX90A-NEXT:    ;;#ASMEND
4150; GFX90A-NEXT:    v_perm_b32 v1, v1, v0, s4
4151; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
4152; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4153; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4154; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4155;
4156; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_0_2:
4157; GFX940:       ; %bb.0:
4158; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4159; GFX940-NEXT:    ;;#ASMSTART
4160; GFX940-NEXT:    ; def v[0:1]
4161; GFX940-NEXT:    ;;#ASMEND
4162; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4163; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4164; GFX940-NEXT:    ;;#ASMSTART
4165; GFX940-NEXT:    ; def v[2:3]
4166; GFX940-NEXT:    ;;#ASMEND
4167; GFX940-NEXT:    v_perm_b32 v1, v1, v0, s2
4168; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
4169; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
4170; GFX940-NEXT:    s_waitcnt vmcnt(0)
4171; GFX940-NEXT:    s_setpc_b64 s[30:31]
4172  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4173  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4174  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4175  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4176  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 2>
4177  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4178  ret void
4179}
4180
4181define void @v_shuffle_v4i16_v3i16__5_5_1_2(ptr addrspace(1) inreg %ptr) {
4182; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_1_2:
4183; GFX900:       ; %bb.0:
4184; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4185; GFX900-NEXT:    ;;#ASMSTART
4186; GFX900-NEXT:    ; def v[0:1]
4187; GFX900-NEXT:    ;;#ASMEND
4188; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4189; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4190; GFX900-NEXT:    ;;#ASMSTART
4191; GFX900-NEXT:    ; def v[2:3]
4192; GFX900-NEXT:    ;;#ASMEND
4193; GFX900-NEXT:    v_alignbit_b32 v1, v1, v0, 16
4194; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
4195; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4196; GFX900-NEXT:    s_waitcnt vmcnt(0)
4197; GFX900-NEXT:    s_setpc_b64 s[30:31]
4198;
4199; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_1_2:
4200; GFX90A:       ; %bb.0:
4201; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4202; GFX90A-NEXT:    ;;#ASMSTART
4203; GFX90A-NEXT:    ; def v[0:1]
4204; GFX90A-NEXT:    ;;#ASMEND
4205; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4206; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4207; GFX90A-NEXT:    ;;#ASMSTART
4208; GFX90A-NEXT:    ; def v[2:3]
4209; GFX90A-NEXT:    ;;#ASMEND
4210; GFX90A-NEXT:    v_alignbit_b32 v1, v1, v0, 16
4211; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
4212; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4213; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4214; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4215;
4216; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_1_2:
4217; GFX940:       ; %bb.0:
4218; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4219; GFX940-NEXT:    ;;#ASMSTART
4220; GFX940-NEXT:    ; def v[0:1]
4221; GFX940-NEXT:    ;;#ASMEND
4222; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4223; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4224; GFX940-NEXT:    ;;#ASMSTART
4225; GFX940-NEXT:    ; def v[2:3]
4226; GFX940-NEXT:    ;;#ASMEND
4227; GFX940-NEXT:    v_alignbit_b32 v1, v1, v0, 16
4228; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
4229; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
4230; GFX940-NEXT:    s_waitcnt vmcnt(0)
4231; GFX940-NEXT:    s_setpc_b64 s[30:31]
4232  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4233  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4234  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4235  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4236  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 2>
4237  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4238  ret void
4239}
4240
4241define void @v_shuffle_v4i16_v3i16__5_5_3_2(ptr addrspace(1) inreg %ptr) {
4242; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_3_2:
4243; GFX900:       ; %bb.0:
4244; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4245; GFX900-NEXT:    ;;#ASMSTART
4246; GFX900-NEXT:    ; def v[0:1]
4247; GFX900-NEXT:    ;;#ASMEND
4248; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4249; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4250; GFX900-NEXT:    ;;#ASMSTART
4251; GFX900-NEXT:    ; def v[2:3]
4252; GFX900-NEXT:    ;;#ASMEND
4253; GFX900-NEXT:    v_perm_b32 v1, v1, v2, s4
4254; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
4255; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4256; GFX900-NEXT:    s_waitcnt vmcnt(0)
4257; GFX900-NEXT:    s_setpc_b64 s[30:31]
4258;
4259; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_3_2:
4260; GFX90A:       ; %bb.0:
4261; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4262; GFX90A-NEXT:    ;;#ASMSTART
4263; GFX90A-NEXT:    ; def v[0:1]
4264; GFX90A-NEXT:    ;;#ASMEND
4265; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4266; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4267; GFX90A-NEXT:    ;;#ASMSTART
4268; GFX90A-NEXT:    ; def v[2:3]
4269; GFX90A-NEXT:    ;;#ASMEND
4270; GFX90A-NEXT:    v_perm_b32 v1, v1, v2, s4
4271; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
4272; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4273; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4274; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4275;
4276; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_3_2:
4277; GFX940:       ; %bb.0:
4278; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4279; GFX940-NEXT:    ;;#ASMSTART
4280; GFX940-NEXT:    ; def v[0:1]
4281; GFX940-NEXT:    ;;#ASMEND
4282; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4283; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4284; GFX940-NEXT:    ;;#ASMSTART
4285; GFX940-NEXT:    ; def v[2:3]
4286; GFX940-NEXT:    ;;#ASMEND
4287; GFX940-NEXT:    s_nop 0
4288; GFX940-NEXT:    v_perm_b32 v1, v1, v2, s2
4289; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
4290; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
4291; GFX940-NEXT:    s_waitcnt vmcnt(0)
4292; GFX940-NEXT:    s_setpc_b64 s[30:31]
4293  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4294  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4295  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4296  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4297  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 2>
4298  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4299  ret void
4300}
4301
4302define void @v_shuffle_v4i16_v3i16__5_5_4_2(ptr addrspace(1) inreg %ptr) {
4303; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_4_2:
4304; GFX900:       ; %bb.0:
4305; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4306; GFX900-NEXT:    ;;#ASMSTART
4307; GFX900-NEXT:    ; def v[0:1]
4308; GFX900-NEXT:    ;;#ASMEND
4309; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4310; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4311; GFX900-NEXT:    ;;#ASMSTART
4312; GFX900-NEXT:    ; def v[2:3]
4313; GFX900-NEXT:    ;;#ASMEND
4314; GFX900-NEXT:    v_alignbit_b32 v1, v1, v2, 16
4315; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
4316; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4317; GFX900-NEXT:    s_waitcnt vmcnt(0)
4318; GFX900-NEXT:    s_setpc_b64 s[30:31]
4319;
4320; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_4_2:
4321; GFX90A:       ; %bb.0:
4322; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4323; GFX90A-NEXT:    ;;#ASMSTART
4324; GFX90A-NEXT:    ; def v[0:1]
4325; GFX90A-NEXT:    ;;#ASMEND
4326; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4327; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4328; GFX90A-NEXT:    ;;#ASMSTART
4329; GFX90A-NEXT:    ; def v[2:3]
4330; GFX90A-NEXT:    ;;#ASMEND
4331; GFX90A-NEXT:    v_alignbit_b32 v1, v1, v2, 16
4332; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
4333; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4334; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4335; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4336;
4337; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_4_2:
4338; GFX940:       ; %bb.0:
4339; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4340; GFX940-NEXT:    ;;#ASMSTART
4341; GFX940-NEXT:    ; def v[0:1]
4342; GFX940-NEXT:    ;;#ASMEND
4343; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4344; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4345; GFX940-NEXT:    ;;#ASMSTART
4346; GFX940-NEXT:    ; def v[2:3]
4347; GFX940-NEXT:    ;;#ASMEND
4348; GFX940-NEXT:    s_nop 0
4349; GFX940-NEXT:    v_alignbit_b32 v1, v1, v2, 16
4350; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
4351; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
4352; GFX940-NEXT:    s_waitcnt vmcnt(0)
4353; GFX940-NEXT:    s_setpc_b64 s[30:31]
4354  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4355  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4356  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4357  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4358  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 2>
4359  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4360  ret void
4361}
4362
4363define void @v_shuffle_v4i16_v3i16__u_3_3_3(ptr addrspace(1) inreg %ptr) {
4364; GFX9-LABEL: v_shuffle_v4i16_v3i16__u_3_3_3:
4365; GFX9:       ; %bb.0:
4366; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4367; GFX9-NEXT:    s_setpc_b64 s[30:31]
4368  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4369  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4370  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
4371  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4372  ret void
4373}
4374
4375define void @v_shuffle_v4i16_v3i16__0_3_3_3(ptr addrspace(1) inreg %ptr) {
4376; GFX900-LABEL: v_shuffle_v4i16_v3i16__0_3_3_3:
4377; GFX900:       ; %bb.0:
4378; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4379; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4380; GFX900-NEXT:    ;;#ASMSTART
4381; GFX900-NEXT:    ; def v[0:1]
4382; GFX900-NEXT:    ;;#ASMEND
4383; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
4384; GFX900-NEXT:    s_waitcnt vmcnt(0)
4385; GFX900-NEXT:    s_setpc_b64 s[30:31]
4386;
4387; GFX90A-LABEL: v_shuffle_v4i16_v3i16__0_3_3_3:
4388; GFX90A:       ; %bb.0:
4389; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4390; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4391; GFX90A-NEXT:    ;;#ASMSTART
4392; GFX90A-NEXT:    ; def v[0:1]
4393; GFX90A-NEXT:    ;;#ASMEND
4394; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
4395; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4396; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4397;
4398; GFX940-LABEL: v_shuffle_v4i16_v3i16__0_3_3_3:
4399; GFX940:       ; %bb.0:
4400; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4401; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4402; GFX940-NEXT:    ;;#ASMSTART
4403; GFX940-NEXT:    ; def v[0:1]
4404; GFX940-NEXT:    ;;#ASMEND
4405; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
4406; GFX940-NEXT:    s_waitcnt vmcnt(0)
4407; GFX940-NEXT:    s_setpc_b64 s[30:31]
4408  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4409  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4410  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
4411  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4412  ret void
4413}
4414
4415define void @v_shuffle_v4i16_v3i16__1_3_3_3(ptr addrspace(1) inreg %ptr) {
4416; GFX900-LABEL: v_shuffle_v4i16_v3i16__1_3_3_3:
4417; GFX900:       ; %bb.0:
4418; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4419; GFX900-NEXT:    ;;#ASMSTART
4420; GFX900-NEXT:    ; def v[0:1]
4421; GFX900-NEXT:    ;;#ASMEND
4422; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4423; GFX900-NEXT:    v_alignbit_b32 v0, s4, v0, 16
4424; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
4425; GFX900-NEXT:    s_waitcnt vmcnt(0)
4426; GFX900-NEXT:    s_setpc_b64 s[30:31]
4427;
4428; GFX90A-LABEL: v_shuffle_v4i16_v3i16__1_3_3_3:
4429; GFX90A:       ; %bb.0:
4430; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4431; GFX90A-NEXT:    ;;#ASMSTART
4432; GFX90A-NEXT:    ; def v[0:1]
4433; GFX90A-NEXT:    ;;#ASMEND
4434; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4435; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v0, 16
4436; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
4437; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4438; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4439;
4440; GFX940-LABEL: v_shuffle_v4i16_v3i16__1_3_3_3:
4441; GFX940:       ; %bb.0:
4442; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4443; GFX940-NEXT:    ;;#ASMSTART
4444; GFX940-NEXT:    ; def v[0:1]
4445; GFX940-NEXT:    ;;#ASMEND
4446; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4447; GFX940-NEXT:    v_alignbit_b32 v0, s0, v0, 16
4448; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
4449; GFX940-NEXT:    s_waitcnt vmcnt(0)
4450; GFX940-NEXT:    s_setpc_b64 s[30:31]
4451  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4452  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4453  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
4454  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4455  ret void
4456}
4457
4458define void @v_shuffle_v4i16_v3i16__2_3_3_3(ptr addrspace(1) inreg %ptr) {
4459; GFX900-LABEL: v_shuffle_v4i16_v3i16__2_3_3_3:
4460; GFX900:       ; %bb.0:
4461; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4462; GFX900-NEXT:    ;;#ASMSTART
4463; GFX900-NEXT:    ; def v[0:1]
4464; GFX900-NEXT:    ;;#ASMEND
4465; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4466; GFX900-NEXT:    v_mov_b32_e32 v0, v1
4467; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
4468; GFX900-NEXT:    s_waitcnt vmcnt(0)
4469; GFX900-NEXT:    s_setpc_b64 s[30:31]
4470;
4471; GFX90A-LABEL: v_shuffle_v4i16_v3i16__2_3_3_3:
4472; GFX90A:       ; %bb.0:
4473; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4474; GFX90A-NEXT:    ;;#ASMSTART
4475; GFX90A-NEXT:    ; def v[0:1]
4476; GFX90A-NEXT:    ;;#ASMEND
4477; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4478; GFX90A-NEXT:    v_mov_b32_e32 v0, v1
4479; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
4480; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4481; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4482;
4483; GFX940-LABEL: v_shuffle_v4i16_v3i16__2_3_3_3:
4484; GFX940:       ; %bb.0:
4485; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4486; GFX940-NEXT:    ;;#ASMSTART
4487; GFX940-NEXT:    ; def v[0:1]
4488; GFX940-NEXT:    ;;#ASMEND
4489; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4490; GFX940-NEXT:    v_mov_b32_e32 v0, v1
4491; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
4492; GFX940-NEXT:    s_waitcnt vmcnt(0)
4493; GFX940-NEXT:    s_setpc_b64 s[30:31]
4494  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4495  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4496  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
4497  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4498  ret void
4499}
4500
4501define void @v_shuffle_v4i16_v3i16__3_3_3_3(ptr addrspace(1) inreg %ptr) {
4502; GFX9-LABEL: v_shuffle_v4i16_v3i16__3_3_3_3:
4503; GFX9:       ; %bb.0:
4504; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4505; GFX9-NEXT:    s_setpc_b64 s[30:31]
4506  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4507  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4508  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
4509  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4510  ret void
4511}
4512
4513define void @v_shuffle_v4i16_v3i16__4_3_3_3(ptr addrspace(1) inreg %ptr) {
4514; GFX900-LABEL: v_shuffle_v4i16_v3i16__4_3_3_3:
4515; GFX900:       ; %bb.0:
4516; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4517; GFX900-NEXT:    ;;#ASMSTART
4518; GFX900-NEXT:    ; def v[0:1]
4519; GFX900-NEXT:    ;;#ASMEND
4520; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4521; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4522; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
4523; GFX900-NEXT:    v_alignbit_b32 v0, v0, v0, 16
4524; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
4525; GFX900-NEXT:    s_waitcnt vmcnt(0)
4526; GFX900-NEXT:    s_setpc_b64 s[30:31]
4527;
4528; GFX90A-LABEL: v_shuffle_v4i16_v3i16__4_3_3_3:
4529; GFX90A:       ; %bb.0:
4530; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4531; GFX90A-NEXT:    ;;#ASMSTART
4532; GFX90A-NEXT:    ; def v[0:1]
4533; GFX90A-NEXT:    ;;#ASMEND
4534; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4535; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4536; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
4537; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v0, 16
4538; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
4539; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4540; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4541;
4542; GFX940-LABEL: v_shuffle_v4i16_v3i16__4_3_3_3:
4543; GFX940:       ; %bb.0:
4544; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4545; GFX940-NEXT:    ;;#ASMSTART
4546; GFX940-NEXT:    ; def v[0:1]
4547; GFX940-NEXT:    ;;#ASMEND
4548; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4549; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4550; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
4551; GFX940-NEXT:    v_alignbit_b32 v0, v0, v0, 16
4552; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
4553; GFX940-NEXT:    s_waitcnt vmcnt(0)
4554; GFX940-NEXT:    s_setpc_b64 s[30:31]
4555  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4556  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4557  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4558  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4559  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 3, i32 3, i32 3>
4560  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4561  ret void
4562}
4563
4564define void @v_shuffle_v4i16_v3i16__5_3_3_3(ptr addrspace(1) inreg %ptr) {
4565; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_3_3_3:
4566; GFX900:       ; %bb.0:
4567; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4568; GFX900-NEXT:    ;;#ASMSTART
4569; GFX900-NEXT:    ; def v[0:1]
4570; GFX900-NEXT:    ;;#ASMEND
4571; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4572; GFX900-NEXT:    v_mov_b32_e32 v3, 0
4573; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
4574; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
4575; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
4576; GFX900-NEXT:    s_waitcnt vmcnt(0)
4577; GFX900-NEXT:    s_setpc_b64 s[30:31]
4578;
4579; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_3_3_3:
4580; GFX90A:       ; %bb.0:
4581; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4582; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4583; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4584; GFX90A-NEXT:    ;;#ASMSTART
4585; GFX90A-NEXT:    ; def v[0:1]
4586; GFX90A-NEXT:    ;;#ASMEND
4587; GFX90A-NEXT:    v_perm_b32 v2, v0, v1, s4
4588; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
4589; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
4590; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4591; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4592;
4593; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_3_3_3:
4594; GFX940:       ; %bb.0:
4595; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4596; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4597; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4598; GFX940-NEXT:    ;;#ASMSTART
4599; GFX940-NEXT:    ; def v[0:1]
4600; GFX940-NEXT:    ;;#ASMEND
4601; GFX940-NEXT:    s_nop 0
4602; GFX940-NEXT:    v_perm_b32 v2, v0, v1, s2
4603; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
4604; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
4605; GFX940-NEXT:    s_waitcnt vmcnt(0)
4606; GFX940-NEXT:    s_setpc_b64 s[30:31]
4607  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4608  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4609  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4610  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4611  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 3, i32 3>
4612  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4613  ret void
4614}
4615
4616define void @v_shuffle_v4i16_v3i16__5_u_3_3(ptr addrspace(1) inreg %ptr) {
4617; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_u_3_3:
4618; GFX900:       ; %bb.0:
4619; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4620; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4621; GFX900-NEXT:    v_mov_b32_e32 v3, 0
4622; GFX900-NEXT:    ;;#ASMSTART
4623; GFX900-NEXT:    ; def v[0:1]
4624; GFX900-NEXT:    ;;#ASMEND
4625; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
4626; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
4627; GFX900-NEXT:    s_waitcnt vmcnt(0)
4628; GFX900-NEXT:    s_setpc_b64 s[30:31]
4629;
4630; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_u_3_3:
4631; GFX90A:       ; %bb.0:
4632; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4633; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4634; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4635; GFX90A-NEXT:    ;;#ASMSTART
4636; GFX90A-NEXT:    ; def v[0:1]
4637; GFX90A-NEXT:    ;;#ASMEND
4638; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
4639; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
4640; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
4641; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4642; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4643;
4644; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_u_3_3:
4645; GFX940:       ; %bb.0:
4646; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4647; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4648; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4649; GFX940-NEXT:    ;;#ASMSTART
4650; GFX940-NEXT:    ; def v[0:1]
4651; GFX940-NEXT:    ;;#ASMEND
4652; GFX940-NEXT:    s_nop 0
4653; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
4654; GFX940-NEXT:    v_mov_b32_e32 v2, v1
4655; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
4656; GFX940-NEXT:    s_waitcnt vmcnt(0)
4657; GFX940-NEXT:    s_setpc_b64 s[30:31]
4658  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4659  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4660  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4661  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4662  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 3, i32 3>
4663  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4664  ret void
4665}
4666
4667define void @v_shuffle_v4i16_v3i16__5_0_3_3(ptr addrspace(1) inreg %ptr) {
4668; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_0_3_3:
4669; GFX900:       ; %bb.0:
4670; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4671; GFX900-NEXT:    ;;#ASMSTART
4672; GFX900-NEXT:    ; def v[0:1]
4673; GFX900-NEXT:    ;;#ASMEND
4674; GFX900-NEXT:    ;;#ASMSTART
4675; GFX900-NEXT:    ; def v[1:2]
4676; GFX900-NEXT:    ;;#ASMEND
4677; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4678; GFX900-NEXT:    v_mov_b32_e32 v3, 0
4679; GFX900-NEXT:    v_perm_b32 v0, v0, v2, s4
4680; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
4681; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
4682; GFX900-NEXT:    s_waitcnt vmcnt(0)
4683; GFX900-NEXT:    s_setpc_b64 s[30:31]
4684;
4685; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_0_3_3:
4686; GFX90A:       ; %bb.0:
4687; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4688; GFX90A-NEXT:    ;;#ASMSTART
4689; GFX90A-NEXT:    ; def v[0:1]
4690; GFX90A-NEXT:    ;;#ASMEND
4691; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4692; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4693; GFX90A-NEXT:    ;;#ASMSTART
4694; GFX90A-NEXT:    ; def v[2:3]
4695; GFX90A-NEXT:    ;;#ASMEND
4696; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
4697; GFX90A-NEXT:    v_perm_b32 v1, v2, v2, s4
4698; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4699; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4700; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4701;
4702; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_0_3_3:
4703; GFX940:       ; %bb.0:
4704; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4705; GFX940-NEXT:    ;;#ASMSTART
4706; GFX940-NEXT:    ; def v[0:1]
4707; GFX940-NEXT:    ;;#ASMEND
4708; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4709; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4710; GFX940-NEXT:    ;;#ASMSTART
4711; GFX940-NEXT:    ; def v[2:3]
4712; GFX940-NEXT:    ;;#ASMEND
4713; GFX940-NEXT:    s_nop 0
4714; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
4715; GFX940-NEXT:    v_perm_b32 v1, v2, v2, s2
4716; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
4717; GFX940-NEXT:    s_waitcnt vmcnt(0)
4718; GFX940-NEXT:    s_setpc_b64 s[30:31]
4719  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4720  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4721  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4722  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4723  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 3, i32 3>
4724  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4725  ret void
4726}
4727
4728define void @v_shuffle_v4i16_v3i16__5_1_3_3(ptr addrspace(1) inreg %ptr) {
4729; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_1_3_3:
4730; GFX900:       ; %bb.0:
4731; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4732; GFX900-NEXT:    ;;#ASMSTART
4733; GFX900-NEXT:    ; def v[0:1]
4734; GFX900-NEXT:    ;;#ASMEND
4735; GFX900-NEXT:    s_mov_b32 s4, 0xffff
4736; GFX900-NEXT:    ;;#ASMSTART
4737; GFX900-NEXT:    ; def v[1:2]
4738; GFX900-NEXT:    ;;#ASMEND
4739; GFX900-NEXT:    v_bfi_b32 v0, s4, v2, v0
4740; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4741; GFX900-NEXT:    v_mov_b32_e32 v3, 0
4742; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
4743; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
4744; GFX900-NEXT:    s_waitcnt vmcnt(0)
4745; GFX900-NEXT:    s_setpc_b64 s[30:31]
4746;
4747; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_1_3_3:
4748; GFX90A:       ; %bb.0:
4749; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4750; GFX90A-NEXT:    ;;#ASMSTART
4751; GFX90A-NEXT:    ; def v[0:1]
4752; GFX90A-NEXT:    ;;#ASMEND
4753; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
4754; GFX90A-NEXT:    ;;#ASMSTART
4755; GFX90A-NEXT:    ; def v[2:3]
4756; GFX90A-NEXT:    ;;#ASMEND
4757; GFX90A-NEXT:    v_bfi_b32 v0, s4, v3, v0
4758; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4759; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4760; GFX90A-NEXT:    v_perm_b32 v1, v2, v2, s4
4761; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4762; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4763; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4764;
4765; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_1_3_3:
4766; GFX940:       ; %bb.0:
4767; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4768; GFX940-NEXT:    ;;#ASMSTART
4769; GFX940-NEXT:    ; def v[0:1]
4770; GFX940-NEXT:    ;;#ASMEND
4771; GFX940-NEXT:    s_mov_b32 s2, 0xffff
4772; GFX940-NEXT:    ;;#ASMSTART
4773; GFX940-NEXT:    ; def v[2:3]
4774; GFX940-NEXT:    ;;#ASMEND
4775; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4776; GFX940-NEXT:    v_bfi_b32 v0, s2, v3, v0
4777; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4778; GFX940-NEXT:    v_perm_b32 v1, v2, v2, s2
4779; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
4780; GFX940-NEXT:    s_waitcnt vmcnt(0)
4781; GFX940-NEXT:    s_setpc_b64 s[30:31]
4782  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4783  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4784  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4785  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4786  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 3, i32 3>
4787  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4788  ret void
4789}
4790
4791define void @v_shuffle_v4i16_v3i16__5_2_3_3(ptr addrspace(1) inreg %ptr) {
4792; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_2_3_3:
4793; GFX900:       ; %bb.0:
4794; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4795; GFX900-NEXT:    ;;#ASMSTART
4796; GFX900-NEXT:    ; def v[0:1]
4797; GFX900-NEXT:    ;;#ASMEND
4798; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4799; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4800; GFX900-NEXT:    ;;#ASMSTART
4801; GFX900-NEXT:    ; def v[2:3]
4802; GFX900-NEXT:    ;;#ASMEND
4803; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
4804; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
4805; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4806; GFX900-NEXT:    s_waitcnt vmcnt(0)
4807; GFX900-NEXT:    s_setpc_b64 s[30:31]
4808;
4809; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_2_3_3:
4810; GFX90A:       ; %bb.0:
4811; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4812; GFX90A-NEXT:    ;;#ASMSTART
4813; GFX90A-NEXT:    ; def v[0:1]
4814; GFX90A-NEXT:    ;;#ASMEND
4815; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4816; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4817; GFX90A-NEXT:    ;;#ASMSTART
4818; GFX90A-NEXT:    ; def v[2:3]
4819; GFX90A-NEXT:    ;;#ASMEND
4820; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
4821; GFX90A-NEXT:    v_perm_b32 v1, v2, v2, s4
4822; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4823; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4824; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4825;
4826; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_2_3_3:
4827; GFX940:       ; %bb.0:
4828; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4829; GFX940-NEXT:    ;;#ASMSTART
4830; GFX940-NEXT:    ; def v[0:1]
4831; GFX940-NEXT:    ;;#ASMEND
4832; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4833; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4834; GFX940-NEXT:    ;;#ASMSTART
4835; GFX940-NEXT:    ; def v[2:3]
4836; GFX940-NEXT:    ;;#ASMEND
4837; GFX940-NEXT:    s_nop 0
4838; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
4839; GFX940-NEXT:    v_perm_b32 v1, v2, v2, s2
4840; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
4841; GFX940-NEXT:    s_waitcnt vmcnt(0)
4842; GFX940-NEXT:    s_setpc_b64 s[30:31]
4843  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4844  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4845  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4846  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4847  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 3, i32 3>
4848  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4849  ret void
4850}
4851
4852define void @v_shuffle_v4i16_v3i16__5_4_3_3(ptr addrspace(1) inreg %ptr) {
4853; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_4_3_3:
4854; GFX900:       ; %bb.0:
4855; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4856; GFX900-NEXT:    ;;#ASMSTART
4857; GFX900-NEXT:    ; def v[0:1]
4858; GFX900-NEXT:    ;;#ASMEND
4859; GFX900-NEXT:    s_mov_b32 s4, 0xffff
4860; GFX900-NEXT:    v_bfi_b32 v1, s4, v1, v0
4861; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4862; GFX900-NEXT:    v_mov_b32_e32 v3, 0
4863; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
4864; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
4865; GFX900-NEXT:    s_waitcnt vmcnt(0)
4866; GFX900-NEXT:    s_setpc_b64 s[30:31]
4867;
4868; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_4_3_3:
4869; GFX90A:       ; %bb.0:
4870; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4871; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
4872; GFX90A-NEXT:    ;;#ASMSTART
4873; GFX90A-NEXT:    ; def v[0:1]
4874; GFX90A-NEXT:    ;;#ASMEND
4875; GFX90A-NEXT:    v_bfi_b32 v2, s4, v1, v0
4876; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4877; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4878; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
4879; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
4880; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4881; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4882;
4883; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_4_3_3:
4884; GFX940:       ; %bb.0:
4885; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4886; GFX940-NEXT:    s_mov_b32 s2, 0xffff
4887; GFX940-NEXT:    ;;#ASMSTART
4888; GFX940-NEXT:    ; def v[0:1]
4889; GFX940-NEXT:    ;;#ASMEND
4890; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4891; GFX940-NEXT:    v_bfi_b32 v2, s2, v1, v0
4892; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4893; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
4894; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
4895; GFX940-NEXT:    s_waitcnt vmcnt(0)
4896; GFX940-NEXT:    s_setpc_b64 s[30:31]
4897  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4898  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4899  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4900  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4901  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 3, i32 3>
4902  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4903  ret void
4904}
4905
4906define void @v_shuffle_v4i16_v3i16__5_5_3_3(ptr addrspace(1) inreg %ptr) {
4907; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_3_3:
4908; GFX900:       ; %bb.0:
4909; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4910; GFX900-NEXT:    ;;#ASMSTART
4911; GFX900-NEXT:    ; def v[0:1]
4912; GFX900-NEXT:    ;;#ASMEND
4913; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4914; GFX900-NEXT:    v_mov_b32_e32 v3, 0
4915; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
4916; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
4917; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
4918; GFX900-NEXT:    s_waitcnt vmcnt(0)
4919; GFX900-NEXT:    s_setpc_b64 s[30:31]
4920;
4921; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_3_3:
4922; GFX90A:       ; %bb.0:
4923; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4924; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4925; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4926; GFX90A-NEXT:    ;;#ASMSTART
4927; GFX90A-NEXT:    ; def v[0:1]
4928; GFX90A-NEXT:    ;;#ASMEND
4929; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
4930; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
4931; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
4932; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4933; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4934;
4935; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_3_3:
4936; GFX940:       ; %bb.0:
4937; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4938; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4939; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4940; GFX940-NEXT:    ;;#ASMSTART
4941; GFX940-NEXT:    ; def v[0:1]
4942; GFX940-NEXT:    ;;#ASMEND
4943; GFX940-NEXT:    s_nop 0
4944; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
4945; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
4946; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
4947; GFX940-NEXT:    s_waitcnt vmcnt(0)
4948; GFX940-NEXT:    s_setpc_b64 s[30:31]
4949  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4950  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4951  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4952  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4953  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 3>
4954  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4955  ret void
4956}
4957
4958define void @v_shuffle_v4i16_v3i16__5_5_u_3(ptr addrspace(1) inreg %ptr) {
4959; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_u_3:
4960; GFX900:       ; %bb.0:
4961; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4962; GFX900-NEXT:    ;;#ASMSTART
4963; GFX900-NEXT:    ; def v[0:1]
4964; GFX900-NEXT:    ;;#ASMEND
4965; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4966; GFX900-NEXT:    v_mov_b32_e32 v3, 0
4967; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
4968; GFX900-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
4969; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
4970; GFX900-NEXT:    s_waitcnt vmcnt(0)
4971; GFX900-NEXT:    s_setpc_b64 s[30:31]
4972;
4973; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_u_3:
4974; GFX90A:       ; %bb.0:
4975; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4976; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4977; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4978; GFX90A-NEXT:    ;;#ASMSTART
4979; GFX90A-NEXT:    ; def v[0:1]
4980; GFX90A-NEXT:    ;;#ASMEND
4981; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
4982; GFX90A-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
4983; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
4984; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4985; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4986;
4987; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_u_3:
4988; GFX940:       ; %bb.0:
4989; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4990; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4991; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4992; GFX940-NEXT:    ;;#ASMSTART
4993; GFX940-NEXT:    ; def v[0:1]
4994; GFX940-NEXT:    ;;#ASMEND
4995; GFX940-NEXT:    s_nop 0
4996; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
4997; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
4998; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
4999; GFX940-NEXT:    s_waitcnt vmcnt(0)
5000; GFX940-NEXT:    s_setpc_b64 s[30:31]
5001  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5002  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5003  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5004  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5005  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 3>
5006  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5007  ret void
5008}
5009
5010define void @v_shuffle_v4i16_v3i16__5_5_0_3(ptr addrspace(1) inreg %ptr) {
5011; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_0_3:
5012; GFX900:       ; %bb.0:
5013; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5014; GFX900-NEXT:    ;;#ASMSTART
5015; GFX900-NEXT:    ; def v[0:1]
5016; GFX900-NEXT:    ;;#ASMEND
5017; GFX900-NEXT:    ;;#ASMSTART
5018; GFX900-NEXT:    ; def v[1:2]
5019; GFX900-NEXT:    ;;#ASMEND
5020; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
5021; GFX900-NEXT:    v_mov_b32_e32 v3, 0
5022; GFX900-NEXT:    v_perm_b32 v1, v1, v0, s4
5023; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
5024; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
5025; GFX900-NEXT:    s_waitcnt vmcnt(0)
5026; GFX900-NEXT:    s_setpc_b64 s[30:31]
5027;
5028; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_0_3:
5029; GFX90A:       ; %bb.0:
5030; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5031; GFX90A-NEXT:    ;;#ASMSTART
5032; GFX90A-NEXT:    ; def v[0:1]
5033; GFX90A-NEXT:    ;;#ASMEND
5034; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
5035; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5036; GFX90A-NEXT:    ;;#ASMSTART
5037; GFX90A-NEXT:    ; def v[2:3]
5038; GFX90A-NEXT:    ;;#ASMEND
5039; GFX90A-NEXT:    v_perm_b32 v1, v2, v0, s4
5040; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
5041; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5042; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5043; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5044;
5045; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_0_3:
5046; GFX940:       ; %bb.0:
5047; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5048; GFX940-NEXT:    ;;#ASMSTART
5049; GFX940-NEXT:    ; def v[0:1]
5050; GFX940-NEXT:    ;;#ASMEND
5051; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
5052; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5053; GFX940-NEXT:    ;;#ASMSTART
5054; GFX940-NEXT:    ; def v[2:3]
5055; GFX940-NEXT:    ;;#ASMEND
5056; GFX940-NEXT:    s_nop 0
5057; GFX940-NEXT:    v_perm_b32 v1, v2, v0, s2
5058; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
5059; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
5060; GFX940-NEXT:    s_waitcnt vmcnt(0)
5061; GFX940-NEXT:    s_setpc_b64 s[30:31]
5062  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5063  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5064  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5065  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5066  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 3>
5067  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5068  ret void
5069}
5070
5071define void @v_shuffle_v4i16_v3i16__5_5_1_3(ptr addrspace(1) inreg %ptr) {
5072; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_1_3:
5073; GFX900:       ; %bb.0:
5074; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5075; GFX900-NEXT:    ;;#ASMSTART
5076; GFX900-NEXT:    ; def v[0:1]
5077; GFX900-NEXT:    ;;#ASMEND
5078; GFX900-NEXT:    ;;#ASMSTART
5079; GFX900-NEXT:    ; def v[1:2]
5080; GFX900-NEXT:    ;;#ASMEND
5081; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
5082; GFX900-NEXT:    v_mov_b32_e32 v3, 0
5083; GFX900-NEXT:    v_alignbit_b32 v1, v1, v0, 16
5084; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
5085; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
5086; GFX900-NEXT:    s_waitcnt vmcnt(0)
5087; GFX900-NEXT:    s_setpc_b64 s[30:31]
5088;
5089; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_1_3:
5090; GFX90A:       ; %bb.0:
5091; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5092; GFX90A-NEXT:    ;;#ASMSTART
5093; GFX90A-NEXT:    ; def v[0:1]
5094; GFX90A-NEXT:    ;;#ASMEND
5095; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
5096; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5097; GFX90A-NEXT:    ;;#ASMSTART
5098; GFX90A-NEXT:    ; def v[2:3]
5099; GFX90A-NEXT:    ;;#ASMEND
5100; GFX90A-NEXT:    v_alignbit_b32 v1, v2, v0, 16
5101; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
5102; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5103; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5104; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5105;
5106; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_1_3:
5107; GFX940:       ; %bb.0:
5108; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5109; GFX940-NEXT:    ;;#ASMSTART
5110; GFX940-NEXT:    ; def v[0:1]
5111; GFX940-NEXT:    ;;#ASMEND
5112; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
5113; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5114; GFX940-NEXT:    ;;#ASMSTART
5115; GFX940-NEXT:    ; def v[2:3]
5116; GFX940-NEXT:    ;;#ASMEND
5117; GFX940-NEXT:    s_nop 0
5118; GFX940-NEXT:    v_alignbit_b32 v1, v2, v0, 16
5119; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
5120; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
5121; GFX940-NEXT:    s_waitcnt vmcnt(0)
5122; GFX940-NEXT:    s_setpc_b64 s[30:31]
5123  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5124  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5125  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5126  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5127  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 3>
5128  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5129  ret void
5130}
5131
5132define void @v_shuffle_v4i16_v3i16__5_5_2_3(ptr addrspace(1) inreg %ptr) {
5133; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_2_3:
5134; GFX900:       ; %bb.0:
5135; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5136; GFX900-NEXT:    ;;#ASMSTART
5137; GFX900-NEXT:    ; def v[0:1]
5138; GFX900-NEXT:    ;;#ASMEND
5139; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
5140; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5141; GFX900-NEXT:    ;;#ASMSTART
5142; GFX900-NEXT:    ; def v[2:3]
5143; GFX900-NEXT:    ;;#ASMEND
5144; GFX900-NEXT:    v_perm_b32 v1, v2, v1, s4
5145; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
5146; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5147; GFX900-NEXT:    s_waitcnt vmcnt(0)
5148; GFX900-NEXT:    s_setpc_b64 s[30:31]
5149;
5150; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_2_3:
5151; GFX90A:       ; %bb.0:
5152; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5153; GFX90A-NEXT:    ;;#ASMSTART
5154; GFX90A-NEXT:    ; def v[0:1]
5155; GFX90A-NEXT:    ;;#ASMEND
5156; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
5157; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5158; GFX90A-NEXT:    ;;#ASMSTART
5159; GFX90A-NEXT:    ; def v[2:3]
5160; GFX90A-NEXT:    ;;#ASMEND
5161; GFX90A-NEXT:    v_perm_b32 v1, v2, v1, s4
5162; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
5163; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5164; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5165; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5166;
5167; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_2_3:
5168; GFX940:       ; %bb.0:
5169; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5170; GFX940-NEXT:    ;;#ASMSTART
5171; GFX940-NEXT:    ; def v[0:1]
5172; GFX940-NEXT:    ;;#ASMEND
5173; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
5174; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5175; GFX940-NEXT:    ;;#ASMSTART
5176; GFX940-NEXT:    ; def v[2:3]
5177; GFX940-NEXT:    ;;#ASMEND
5178; GFX940-NEXT:    s_nop 0
5179; GFX940-NEXT:    v_perm_b32 v1, v2, v1, s2
5180; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
5181; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
5182; GFX940-NEXT:    s_waitcnt vmcnt(0)
5183; GFX940-NEXT:    s_setpc_b64 s[30:31]
5184  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5185  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5186  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5187  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5188  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 3>
5189  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5190  ret void
5191}
5192
5193define void @v_shuffle_v4i16_v3i16__5_5_4_3(ptr addrspace(1) inreg %ptr) {
5194; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_4_3:
5195; GFX900:       ; %bb.0:
5196; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5197; GFX900-NEXT:    ;;#ASMSTART
5198; GFX900-NEXT:    ; def v[0:1]
5199; GFX900-NEXT:    ;;#ASMEND
5200; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
5201; GFX900-NEXT:    v_mov_b32_e32 v3, 0
5202; GFX900-NEXT:    v_alignbit_b32 v2, v0, v0, 16
5203; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
5204; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
5205; GFX900-NEXT:    s_waitcnt vmcnt(0)
5206; GFX900-NEXT:    s_setpc_b64 s[30:31]
5207;
5208; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_4_3:
5209; GFX90A:       ; %bb.0:
5210; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5211; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
5212; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5213; GFX90A-NEXT:    ;;#ASMSTART
5214; GFX90A-NEXT:    ; def v[0:1]
5215; GFX90A-NEXT:    ;;#ASMEND
5216; GFX90A-NEXT:    v_alignbit_b32 v3, v0, v0, 16
5217; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
5218; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
5219; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5220; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5221;
5222; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_4_3:
5223; GFX940:       ; %bb.0:
5224; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5225; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
5226; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5227; GFX940-NEXT:    ;;#ASMSTART
5228; GFX940-NEXT:    ; def v[0:1]
5229; GFX940-NEXT:    ;;#ASMEND
5230; GFX940-NEXT:    s_nop 0
5231; GFX940-NEXT:    v_alignbit_b32 v3, v0, v0, 16
5232; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
5233; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
5234; GFX940-NEXT:    s_waitcnt vmcnt(0)
5235; GFX940-NEXT:    s_setpc_b64 s[30:31]
5236  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5237  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5238  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5239  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5240  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 3>
5241  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5242  ret void
5243}
5244
5245define void @v_shuffle_v4i16_v3i16__u_4_4_4(ptr addrspace(1) inreg %ptr) {
5246; GFX900-LABEL: v_shuffle_v4i16_v3i16__u_4_4_4:
5247; GFX900:       ; %bb.0:
5248; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5249; GFX900-NEXT:    ;;#ASMSTART
5250; GFX900-NEXT:    ; def v[0:1]
5251; GFX900-NEXT:    ;;#ASMEND
5252; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5253; GFX900-NEXT:    v_mov_b32_e32 v2, 0
5254; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
5255; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
5256; GFX900-NEXT:    s_waitcnt vmcnt(0)
5257; GFX900-NEXT:    s_setpc_b64 s[30:31]
5258;
5259; GFX90A-LABEL: v_shuffle_v4i16_v3i16__u_4_4_4:
5260; GFX90A:       ; %bb.0:
5261; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5262; GFX90A-NEXT:    ;;#ASMSTART
5263; GFX90A-NEXT:    ; def v[0:1]
5264; GFX90A-NEXT:    ;;#ASMEND
5265; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5266; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
5267; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
5268; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
5269; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5270; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5271;
5272; GFX940-LABEL: v_shuffle_v4i16_v3i16__u_4_4_4:
5273; GFX940:       ; %bb.0:
5274; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5275; GFX940-NEXT:    ;;#ASMSTART
5276; GFX940-NEXT:    ; def v[0:1]
5277; GFX940-NEXT:    ;;#ASMEND
5278; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5279; GFX940-NEXT:    v_mov_b32_e32 v2, 0
5280; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
5281; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
5282; GFX940-NEXT:    s_waitcnt vmcnt(0)
5283; GFX940-NEXT:    s_setpc_b64 s[30:31]
5284  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5285  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5286  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5287  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5288  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
5289  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5290  ret void
5291}
5292
5293define void @v_shuffle_v4i16_v3i16__0_4_4_4(ptr addrspace(1) inreg %ptr) {
5294; GFX900-LABEL: v_shuffle_v4i16_v3i16__0_4_4_4:
5295; GFX900:       ; %bb.0:
5296; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5297; GFX900-NEXT:    ;;#ASMSTART
5298; GFX900-NEXT:    ; def v[0:1]
5299; GFX900-NEXT:    ;;#ASMEND
5300; GFX900-NEXT:    s_mov_b32 s4, 0xffff
5301; GFX900-NEXT:    ;;#ASMSTART
5302; GFX900-NEXT:    ; def v[1:2]
5303; GFX900-NEXT:    ;;#ASMEND
5304; GFX900-NEXT:    v_bfi_b32 v0, s4, v0, v1
5305; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5306; GFX900-NEXT:    v_mov_b32_e32 v3, 0
5307; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
5308; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
5309; GFX900-NEXT:    s_waitcnt vmcnt(0)
5310; GFX900-NEXT:    s_setpc_b64 s[30:31]
5311;
5312; GFX90A-LABEL: v_shuffle_v4i16_v3i16__0_4_4_4:
5313; GFX90A:       ; %bb.0:
5314; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5315; GFX90A-NEXT:    ;;#ASMSTART
5316; GFX90A-NEXT:    ; def v[0:1]
5317; GFX90A-NEXT:    ;;#ASMEND
5318; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
5319; GFX90A-NEXT:    ;;#ASMSTART
5320; GFX90A-NEXT:    ; def v[2:3]
5321; GFX90A-NEXT:    ;;#ASMEND
5322; GFX90A-NEXT:    v_bfi_b32 v0, s4, v0, v2
5323; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5324; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5325; GFX90A-NEXT:    v_perm_b32 v1, v2, v2, s4
5326; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5327; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5328; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5329;
5330; GFX940-LABEL: v_shuffle_v4i16_v3i16__0_4_4_4:
5331; GFX940:       ; %bb.0:
5332; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5333; GFX940-NEXT:    ;;#ASMSTART
5334; GFX940-NEXT:    ; def v[0:1]
5335; GFX940-NEXT:    ;;#ASMEND
5336; GFX940-NEXT:    s_mov_b32 s2, 0xffff
5337; GFX940-NEXT:    ;;#ASMSTART
5338; GFX940-NEXT:    ; def v[2:3]
5339; GFX940-NEXT:    ;;#ASMEND
5340; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5341; GFX940-NEXT:    v_bfi_b32 v0, s2, v0, v2
5342; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5343; GFX940-NEXT:    v_perm_b32 v1, v2, v2, s2
5344; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
5345; GFX940-NEXT:    s_waitcnt vmcnt(0)
5346; GFX940-NEXT:    s_setpc_b64 s[30:31]
5347  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5348  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5349  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5350  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5351  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
5352  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5353  ret void
5354}
5355
5356define void @v_shuffle_v4i16_v3i16__1_4_4_4(ptr addrspace(1) inreg %ptr) {
5357; GFX900-LABEL: v_shuffle_v4i16_v3i16__1_4_4_4:
5358; GFX900:       ; %bb.0:
5359; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5360; GFX900-NEXT:    ;;#ASMSTART
5361; GFX900-NEXT:    ; def v[0:1]
5362; GFX900-NEXT:    ;;#ASMEND
5363; GFX900-NEXT:    ;;#ASMSTART
5364; GFX900-NEXT:    ; def v[1:2]
5365; GFX900-NEXT:    ;;#ASMEND
5366; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5367; GFX900-NEXT:    v_mov_b32_e32 v3, 0
5368; GFX900-NEXT:    v_perm_b32 v0, v1, v0, s4
5369; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
5370; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
5371; GFX900-NEXT:    s_waitcnt vmcnt(0)
5372; GFX900-NEXT:    s_setpc_b64 s[30:31]
5373;
5374; GFX90A-LABEL: v_shuffle_v4i16_v3i16__1_4_4_4:
5375; GFX90A:       ; %bb.0:
5376; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5377; GFX90A-NEXT:    ;;#ASMSTART
5378; GFX90A-NEXT:    ; def v[0:1]
5379; GFX90A-NEXT:    ;;#ASMEND
5380; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5381; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5382; GFX90A-NEXT:    ;;#ASMSTART
5383; GFX90A-NEXT:    ; def v[2:3]
5384; GFX90A-NEXT:    ;;#ASMEND
5385; GFX90A-NEXT:    v_perm_b32 v0, v2, v0, s4
5386; GFX90A-NEXT:    v_perm_b32 v1, v2, v2, s4
5387; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5388; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5389; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5390;
5391; GFX940-LABEL: v_shuffle_v4i16_v3i16__1_4_4_4:
5392; GFX940:       ; %bb.0:
5393; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5394; GFX940-NEXT:    ;;#ASMSTART
5395; GFX940-NEXT:    ; def v[0:1]
5396; GFX940-NEXT:    ;;#ASMEND
5397; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5398; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5399; GFX940-NEXT:    ;;#ASMSTART
5400; GFX940-NEXT:    ; def v[2:3]
5401; GFX940-NEXT:    ;;#ASMEND
5402; GFX940-NEXT:    s_nop 0
5403; GFX940-NEXT:    v_perm_b32 v0, v2, v0, s2
5404; GFX940-NEXT:    v_perm_b32 v1, v2, v2, s2
5405; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
5406; GFX940-NEXT:    s_waitcnt vmcnt(0)
5407; GFX940-NEXT:    s_setpc_b64 s[30:31]
5408  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5409  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5410  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5411  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5412  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
5413  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5414  ret void
5415}
5416
5417define void @v_shuffle_v4i16_v3i16__2_4_4_4(ptr addrspace(1) inreg %ptr) {
5418; GFX900-LABEL: v_shuffle_v4i16_v3i16__2_4_4_4:
5419; GFX900:       ; %bb.0:
5420; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5421; GFX900-NEXT:    ;;#ASMSTART
5422; GFX900-NEXT:    ; def v[0:1]
5423; GFX900-NEXT:    ;;#ASMEND
5424; GFX900-NEXT:    s_mov_b32 s4, 0xffff
5425; GFX900-NEXT:    ;;#ASMSTART
5426; GFX900-NEXT:    ; def v[2:3]
5427; GFX900-NEXT:    ;;#ASMEND
5428; GFX900-NEXT:    v_bfi_b32 v0, s4, v1, v2
5429; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5430; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5431; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
5432; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5433; GFX900-NEXT:    s_waitcnt vmcnt(0)
5434; GFX900-NEXT:    s_setpc_b64 s[30:31]
5435;
5436; GFX90A-LABEL: v_shuffle_v4i16_v3i16__2_4_4_4:
5437; GFX90A:       ; %bb.0:
5438; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5439; GFX90A-NEXT:    ;;#ASMSTART
5440; GFX90A-NEXT:    ; def v[0:1]
5441; GFX90A-NEXT:    ;;#ASMEND
5442; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
5443; GFX90A-NEXT:    ;;#ASMSTART
5444; GFX90A-NEXT:    ; def v[2:3]
5445; GFX90A-NEXT:    ;;#ASMEND
5446; GFX90A-NEXT:    v_bfi_b32 v0, s4, v1, v2
5447; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5448; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5449; GFX90A-NEXT:    v_perm_b32 v1, v2, v2, s4
5450; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5451; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5452; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5453;
5454; GFX940-LABEL: v_shuffle_v4i16_v3i16__2_4_4_4:
5455; GFX940:       ; %bb.0:
5456; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5457; GFX940-NEXT:    ;;#ASMSTART
5458; GFX940-NEXT:    ; def v[0:1]
5459; GFX940-NEXT:    ;;#ASMEND
5460; GFX940-NEXT:    s_mov_b32 s2, 0xffff
5461; GFX940-NEXT:    ;;#ASMSTART
5462; GFX940-NEXT:    ; def v[2:3]
5463; GFX940-NEXT:    ;;#ASMEND
5464; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5465; GFX940-NEXT:    v_bfi_b32 v0, s2, v1, v2
5466; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5467; GFX940-NEXT:    v_perm_b32 v1, v2, v2, s2
5468; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
5469; GFX940-NEXT:    s_waitcnt vmcnt(0)
5470; GFX940-NEXT:    s_setpc_b64 s[30:31]
5471  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5472  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5473  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5474  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5475  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
5476  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5477  ret void
5478}
5479
5480define void @v_shuffle_v4i16_v3i16__3_4_4_4(ptr addrspace(1) inreg %ptr) {
5481; GFX900-LABEL: v_shuffle_v4i16_v3i16__3_4_4_4:
5482; GFX900:       ; %bb.0:
5483; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5484; GFX900-NEXT:    ;;#ASMSTART
5485; GFX900-NEXT:    ; def v[0:1]
5486; GFX900-NEXT:    ;;#ASMEND
5487; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5488; GFX900-NEXT:    v_mov_b32_e32 v2, 0
5489; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
5490; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
5491; GFX900-NEXT:    s_waitcnt vmcnt(0)
5492; GFX900-NEXT:    s_setpc_b64 s[30:31]
5493;
5494; GFX90A-LABEL: v_shuffle_v4i16_v3i16__3_4_4_4:
5495; GFX90A:       ; %bb.0:
5496; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5497; GFX90A-NEXT:    ;;#ASMSTART
5498; GFX90A-NEXT:    ; def v[0:1]
5499; GFX90A-NEXT:    ;;#ASMEND
5500; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5501; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
5502; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
5503; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
5504; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5505; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5506;
5507; GFX940-LABEL: v_shuffle_v4i16_v3i16__3_4_4_4:
5508; GFX940:       ; %bb.0:
5509; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5510; GFX940-NEXT:    ;;#ASMSTART
5511; GFX940-NEXT:    ; def v[0:1]
5512; GFX940-NEXT:    ;;#ASMEND
5513; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5514; GFX940-NEXT:    v_mov_b32_e32 v2, 0
5515; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
5516; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
5517; GFX940-NEXT:    s_waitcnt vmcnt(0)
5518; GFX940-NEXT:    s_setpc_b64 s[30:31]
5519  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5520  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5521  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5522  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5523  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
5524  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5525  ret void
5526}
5527
5528define void @v_shuffle_v4i16_v3i16__4_4_4_4(ptr addrspace(1) inreg %ptr) {
5529; GFX900-LABEL: v_shuffle_v4i16_v3i16__4_4_4_4:
5530; GFX900:       ; %bb.0:
5531; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5532; GFX900-NEXT:    ;;#ASMSTART
5533; GFX900-NEXT:    ; def v[0:1]
5534; GFX900-NEXT:    ;;#ASMEND
5535; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5536; GFX900-NEXT:    v_perm_b32 v0, v0, v0, s4
5537; GFX900-NEXT:    v_mov_b32_e32 v2, 0
5538; GFX900-NEXT:    v_mov_b32_e32 v1, v0
5539; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
5540; GFX900-NEXT:    s_waitcnt vmcnt(0)
5541; GFX900-NEXT:    s_setpc_b64 s[30:31]
5542;
5543; GFX90A-LABEL: v_shuffle_v4i16_v3i16__4_4_4_4:
5544; GFX90A:       ; %bb.0:
5545; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5546; GFX90A-NEXT:    ;;#ASMSTART
5547; GFX90A-NEXT:    ; def v[0:1]
5548; GFX90A-NEXT:    ;;#ASMEND
5549; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5550; GFX90A-NEXT:    v_perm_b32 v0, v0, v0, s4
5551; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
5552; GFX90A-NEXT:    v_mov_b32_e32 v1, v0
5553; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
5554; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5555; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5556;
5557; GFX940-LABEL: v_shuffle_v4i16_v3i16__4_4_4_4:
5558; GFX940:       ; %bb.0:
5559; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5560; GFX940-NEXT:    ;;#ASMSTART
5561; GFX940-NEXT:    ; def v[0:1]
5562; GFX940-NEXT:    ;;#ASMEND
5563; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5564; GFX940-NEXT:    v_perm_b32 v0, v0, v0, s2
5565; GFX940-NEXT:    v_mov_b32_e32 v2, 0
5566; GFX940-NEXT:    v_mov_b32_e32 v1, v0
5567; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
5568; GFX940-NEXT:    s_waitcnt vmcnt(0)
5569; GFX940-NEXT:    s_setpc_b64 s[30:31]
5570  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5571  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5572  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5573  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5574  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
5575  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5576  ret void
5577}
5578
5579define void @v_shuffle_v4i16_v3i16__5_4_4_4(ptr addrspace(1) inreg %ptr) {
5580; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_4_4_4:
5581; GFX900:       ; %bb.0:
5582; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5583; GFX900-NEXT:    ;;#ASMSTART
5584; GFX900-NEXT:    ; def v[0:1]
5585; GFX900-NEXT:    ;;#ASMEND
5586; GFX900-NEXT:    s_mov_b32 s4, 0xffff
5587; GFX900-NEXT:    v_bfi_b32 v1, s4, v1, v0
5588; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5589; GFX900-NEXT:    v_mov_b32_e32 v3, 0
5590; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
5591; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
5592; GFX900-NEXT:    s_waitcnt vmcnt(0)
5593; GFX900-NEXT:    s_setpc_b64 s[30:31]
5594;
5595; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_4_4_4:
5596; GFX90A:       ; %bb.0:
5597; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5598; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
5599; GFX90A-NEXT:    ;;#ASMSTART
5600; GFX90A-NEXT:    ; def v[0:1]
5601; GFX90A-NEXT:    ;;#ASMEND
5602; GFX90A-NEXT:    v_bfi_b32 v2, s4, v1, v0
5603; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5604; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5605; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
5606; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
5607; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5608; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5609;
5610; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_4_4_4:
5611; GFX940:       ; %bb.0:
5612; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5613; GFX940-NEXT:    s_mov_b32 s2, 0xffff
5614; GFX940-NEXT:    ;;#ASMSTART
5615; GFX940-NEXT:    ; def v[0:1]
5616; GFX940-NEXT:    ;;#ASMEND
5617; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5618; GFX940-NEXT:    v_bfi_b32 v2, s2, v1, v0
5619; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5620; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
5621; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
5622; GFX940-NEXT:    s_waitcnt vmcnt(0)
5623; GFX940-NEXT:    s_setpc_b64 s[30:31]
5624  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5625  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5626  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5627  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5628  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
5629  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5630  ret void
5631}
5632
5633define void @v_shuffle_v4i16_v3i16__5_u_4_4(ptr addrspace(1) inreg %ptr) {
5634; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_u_4_4:
5635; GFX900:       ; %bb.0:
5636; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5637; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5638; GFX900-NEXT:    v_mov_b32_e32 v3, 0
5639; GFX900-NEXT:    ;;#ASMSTART
5640; GFX900-NEXT:    ; def v[0:1]
5641; GFX900-NEXT:    ;;#ASMEND
5642; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
5643; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
5644; GFX900-NEXT:    s_waitcnt vmcnt(0)
5645; GFX900-NEXT:    s_setpc_b64 s[30:31]
5646;
5647; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_u_4_4:
5648; GFX90A:       ; %bb.0:
5649; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5650; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5651; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5652; GFX90A-NEXT:    ;;#ASMSTART
5653; GFX90A-NEXT:    ; def v[0:1]
5654; GFX90A-NEXT:    ;;#ASMEND
5655; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
5656; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
5657; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
5658; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5659; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5660;
5661; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_u_4_4:
5662; GFX940:       ; %bb.0:
5663; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5664; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5665; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5666; GFX940-NEXT:    ;;#ASMSTART
5667; GFX940-NEXT:    ; def v[0:1]
5668; GFX940-NEXT:    ;;#ASMEND
5669; GFX940-NEXT:    s_nop 0
5670; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
5671; GFX940-NEXT:    v_mov_b32_e32 v2, v1
5672; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
5673; GFX940-NEXT:    s_waitcnt vmcnt(0)
5674; GFX940-NEXT:    s_setpc_b64 s[30:31]
5675  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5676  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5677  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5678  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5679  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 4, i32 4>
5680  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5681  ret void
5682}
5683
5684define void @v_shuffle_v4i16_v3i16__5_0_4_4(ptr addrspace(1) inreg %ptr) {
5685; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_0_4_4:
5686; GFX900:       ; %bb.0:
5687; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5688; GFX900-NEXT:    ;;#ASMSTART
5689; GFX900-NEXT:    ; def v[0:1]
5690; GFX900-NEXT:    ;;#ASMEND
5691; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
5692; GFX900-NEXT:    ;;#ASMSTART
5693; GFX900-NEXT:    ; def v[1:2]
5694; GFX900-NEXT:    ;;#ASMEND
5695; GFX900-NEXT:    v_perm_b32 v0, v0, v2, s4
5696; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5697; GFX900-NEXT:    v_mov_b32_e32 v3, 0
5698; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
5699; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
5700; GFX900-NEXT:    s_waitcnt vmcnt(0)
5701; GFX900-NEXT:    s_setpc_b64 s[30:31]
5702;
5703; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_0_4_4:
5704; GFX90A:       ; %bb.0:
5705; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5706; GFX90A-NEXT:    ;;#ASMSTART
5707; GFX90A-NEXT:    ; def v[0:1]
5708; GFX90A-NEXT:    ;;#ASMEND
5709; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
5710; GFX90A-NEXT:    ;;#ASMSTART
5711; GFX90A-NEXT:    ; def v[2:3]
5712; GFX90A-NEXT:    ;;#ASMEND
5713; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
5714; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5715; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5716; GFX90A-NEXT:    v_perm_b32 v1, v2, v2, s4
5717; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5718; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5719; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5720;
5721; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_0_4_4:
5722; GFX940:       ; %bb.0:
5723; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5724; GFX940-NEXT:    ;;#ASMSTART
5725; GFX940-NEXT:    ; def v[0:1]
5726; GFX940-NEXT:    ;;#ASMEND
5727; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
5728; GFX940-NEXT:    ;;#ASMSTART
5729; GFX940-NEXT:    ; def v[2:3]
5730; GFX940-NEXT:    ;;#ASMEND
5731; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5732; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
5733; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5734; GFX940-NEXT:    v_perm_b32 v1, v2, v2, s2
5735; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
5736; GFX940-NEXT:    s_waitcnt vmcnt(0)
5737; GFX940-NEXT:    s_setpc_b64 s[30:31]
5738  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5739  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5740  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5741  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5742  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 4, i32 4>
5743  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5744  ret void
5745}
5746
5747define void @v_shuffle_v4i16_v3i16__5_1_4_4(ptr addrspace(1) inreg %ptr) {
5748; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_1_4_4:
5749; GFX900:       ; %bb.0:
5750; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5751; GFX900-NEXT:    ;;#ASMSTART
5752; GFX900-NEXT:    ; def v[0:1]
5753; GFX900-NEXT:    ;;#ASMEND
5754; GFX900-NEXT:    s_mov_b32 s4, 0xffff
5755; GFX900-NEXT:    ;;#ASMSTART
5756; GFX900-NEXT:    ; def v[1:2]
5757; GFX900-NEXT:    ;;#ASMEND
5758; GFX900-NEXT:    v_bfi_b32 v0, s4, v2, v0
5759; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5760; GFX900-NEXT:    v_mov_b32_e32 v3, 0
5761; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
5762; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
5763; GFX900-NEXT:    s_waitcnt vmcnt(0)
5764; GFX900-NEXT:    s_setpc_b64 s[30:31]
5765;
5766; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_1_4_4:
5767; GFX90A:       ; %bb.0:
5768; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5769; GFX90A-NEXT:    ;;#ASMSTART
5770; GFX90A-NEXT:    ; def v[0:1]
5771; GFX90A-NEXT:    ;;#ASMEND
5772; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
5773; GFX90A-NEXT:    ;;#ASMSTART
5774; GFX90A-NEXT:    ; def v[2:3]
5775; GFX90A-NEXT:    ;;#ASMEND
5776; GFX90A-NEXT:    v_bfi_b32 v0, s4, v3, v0
5777; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5778; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5779; GFX90A-NEXT:    v_perm_b32 v1, v2, v2, s4
5780; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5781; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5782; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5783;
5784; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_1_4_4:
5785; GFX940:       ; %bb.0:
5786; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5787; GFX940-NEXT:    ;;#ASMSTART
5788; GFX940-NEXT:    ; def v[0:1]
5789; GFX940-NEXT:    ;;#ASMEND
5790; GFX940-NEXT:    s_mov_b32 s2, 0xffff
5791; GFX940-NEXT:    ;;#ASMSTART
5792; GFX940-NEXT:    ; def v[2:3]
5793; GFX940-NEXT:    ;;#ASMEND
5794; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5795; GFX940-NEXT:    v_bfi_b32 v0, s2, v3, v0
5796; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5797; GFX940-NEXT:    v_perm_b32 v1, v2, v2, s2
5798; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
5799; GFX940-NEXT:    s_waitcnt vmcnt(0)
5800; GFX940-NEXT:    s_setpc_b64 s[30:31]
5801  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5802  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5803  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5804  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5805  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 4, i32 4>
5806  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5807  ret void
5808}
5809
5810define void @v_shuffle_v4i16_v3i16__5_2_4_4(ptr addrspace(1) inreg %ptr) {
5811; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_2_4_4:
5812; GFX900:       ; %bb.0:
5813; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5814; GFX900-NEXT:    ;;#ASMSTART
5815; GFX900-NEXT:    ; def v[0:1]
5816; GFX900-NEXT:    ;;#ASMEND
5817; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
5818; GFX900-NEXT:    ;;#ASMSTART
5819; GFX900-NEXT:    ; def v[2:3]
5820; GFX900-NEXT:    ;;#ASMEND
5821; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
5822; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5823; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5824; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
5825; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5826; GFX900-NEXT:    s_waitcnt vmcnt(0)
5827; GFX900-NEXT:    s_setpc_b64 s[30:31]
5828;
5829; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_2_4_4:
5830; GFX90A:       ; %bb.0:
5831; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5832; GFX90A-NEXT:    ;;#ASMSTART
5833; GFX90A-NEXT:    ; def v[0:1]
5834; GFX90A-NEXT:    ;;#ASMEND
5835; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
5836; GFX90A-NEXT:    ;;#ASMSTART
5837; GFX90A-NEXT:    ; def v[2:3]
5838; GFX90A-NEXT:    ;;#ASMEND
5839; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
5840; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5841; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5842; GFX90A-NEXT:    v_perm_b32 v1, v2, v2, s4
5843; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5844; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5845; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5846;
5847; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_2_4_4:
5848; GFX940:       ; %bb.0:
5849; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5850; GFX940-NEXT:    ;;#ASMSTART
5851; GFX940-NEXT:    ; def v[0:1]
5852; GFX940-NEXT:    ;;#ASMEND
5853; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
5854; GFX940-NEXT:    ;;#ASMSTART
5855; GFX940-NEXT:    ; def v[2:3]
5856; GFX940-NEXT:    ;;#ASMEND
5857; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5858; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
5859; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5860; GFX940-NEXT:    v_perm_b32 v1, v2, v2, s2
5861; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
5862; GFX940-NEXT:    s_waitcnt vmcnt(0)
5863; GFX940-NEXT:    s_setpc_b64 s[30:31]
5864  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5865  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5866  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5867  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5868  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 4, i32 4>
5869  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5870  ret void
5871}
5872
5873define void @v_shuffle_v4i16_v3i16__5_3_4_4(ptr addrspace(1) inreg %ptr) {
5874; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_3_4_4:
5875; GFX900:       ; %bb.0:
5876; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5877; GFX900-NEXT:    ;;#ASMSTART
5878; GFX900-NEXT:    ; def v[0:1]
5879; GFX900-NEXT:    ;;#ASMEND
5880; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
5881; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
5882; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5883; GFX900-NEXT:    v_mov_b32_e32 v3, 0
5884; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
5885; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
5886; GFX900-NEXT:    s_waitcnt vmcnt(0)
5887; GFX900-NEXT:    s_setpc_b64 s[30:31]
5888;
5889; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_3_4_4:
5890; GFX90A:       ; %bb.0:
5891; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5892; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
5893; GFX90A-NEXT:    ;;#ASMSTART
5894; GFX90A-NEXT:    ; def v[0:1]
5895; GFX90A-NEXT:    ;;#ASMEND
5896; GFX90A-NEXT:    v_perm_b32 v2, v0, v1, s4
5897; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5898; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5899; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
5900; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
5901; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5902; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5903;
5904; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_3_4_4:
5905; GFX940:       ; %bb.0:
5906; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5907; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
5908; GFX940-NEXT:    ;;#ASMSTART
5909; GFX940-NEXT:    ; def v[0:1]
5910; GFX940-NEXT:    ;;#ASMEND
5911; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5912; GFX940-NEXT:    v_perm_b32 v2, v0, v1, s2
5913; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5914; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
5915; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
5916; GFX940-NEXT:    s_waitcnt vmcnt(0)
5917; GFX940-NEXT:    s_setpc_b64 s[30:31]
5918  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5919  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5920  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5921  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5922  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 4, i32 4>
5923  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5924  ret void
5925}
5926
5927define void @v_shuffle_v4i16_v3i16__5_5_4_4(ptr addrspace(1) inreg %ptr) {
5928; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_4_4:
5929; GFX900:       ; %bb.0:
5930; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5931; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5932; GFX900-NEXT:    ;;#ASMSTART
5933; GFX900-NEXT:    ; def v[0:1]
5934; GFX900-NEXT:    ;;#ASMEND
5935; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
5936; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
5937; GFX900-NEXT:    v_mov_b32_e32 v3, 0
5938; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
5939; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
5940; GFX900-NEXT:    s_waitcnt vmcnt(0)
5941; GFX900-NEXT:    s_setpc_b64 s[30:31]
5942;
5943; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_4_4:
5944; GFX90A:       ; %bb.0:
5945; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5946; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5947; GFX90A-NEXT:    ;;#ASMSTART
5948; GFX90A-NEXT:    ; def v[0:1]
5949; GFX90A-NEXT:    ;;#ASMEND
5950; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
5951; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
5952; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5953; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
5954; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
5955; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5956; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5957;
5958; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_4_4:
5959; GFX940:       ; %bb.0:
5960; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5961; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5962; GFX940-NEXT:    ;;#ASMSTART
5963; GFX940-NEXT:    ; def v[0:1]
5964; GFX940-NEXT:    ;;#ASMEND
5965; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5966; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
5967; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
5968; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
5969; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
5970; GFX940-NEXT:    s_waitcnt vmcnt(0)
5971; GFX940-NEXT:    s_setpc_b64 s[30:31]
5972  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5973  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5974  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5975  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5976  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 4>
5977  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5978  ret void
5979}
5980
5981define void @v_shuffle_v4i16_v3i16__5_5_u_4(ptr addrspace(1) inreg %ptr) {
5982; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_u_4:
5983; GFX900:       ; %bb.0:
5984; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5985; GFX900-NEXT:    ;;#ASMSTART
5986; GFX900-NEXT:    ; def v[0:1]
5987; GFX900-NEXT:    ;;#ASMEND
5988; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
5989; GFX900-NEXT:    v_mov_b32_e32 v3, 0
5990; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
5991; GFX900-NEXT:    v_mov_b32_e32 v2, v0
5992; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
5993; GFX900-NEXT:    s_waitcnt vmcnt(0)
5994; GFX900-NEXT:    s_setpc_b64 s[30:31]
5995;
5996; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_u_4:
5997; GFX90A:       ; %bb.0:
5998; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5999; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
6000; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6001; GFX90A-NEXT:    ;;#ASMSTART
6002; GFX90A-NEXT:    ; def v[0:1]
6003; GFX90A-NEXT:    ;;#ASMEND
6004; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
6005; GFX90A-NEXT:    v_mov_b32_e32 v3, v0
6006; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
6007; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6008; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6009;
6010; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_u_4:
6011; GFX940:       ; %bb.0:
6012; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6013; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
6014; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6015; GFX940-NEXT:    ;;#ASMSTART
6016; GFX940-NEXT:    ; def v[0:1]
6017; GFX940-NEXT:    ;;#ASMEND
6018; GFX940-NEXT:    s_nop 0
6019; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
6020; GFX940-NEXT:    v_mov_b32_e32 v3, v0
6021; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
6022; GFX940-NEXT:    s_waitcnt vmcnt(0)
6023; GFX940-NEXT:    s_setpc_b64 s[30:31]
6024  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6025  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6026  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6027  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6028  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 4>
6029  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6030  ret void
6031}
6032
6033define void @v_shuffle_v4i16_v3i16__5_5_0_4(ptr addrspace(1) inreg %ptr) {
6034; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_0_4:
6035; GFX900:       ; %bb.0:
6036; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6037; GFX900-NEXT:    ;;#ASMSTART
6038; GFX900-NEXT:    ; def v[0:1]
6039; GFX900-NEXT:    ;;#ASMEND
6040; GFX900-NEXT:    ;;#ASMSTART
6041; GFX900-NEXT:    ; def v[1:2]
6042; GFX900-NEXT:    ;;#ASMEND
6043; GFX900-NEXT:    s_mov_b32 s4, 0xffff
6044; GFX900-NEXT:    v_bfi_b32 v1, s4, v0, v1
6045; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
6046; GFX900-NEXT:    v_mov_b32_e32 v3, 0
6047; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
6048; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
6049; GFX900-NEXT:    s_waitcnt vmcnt(0)
6050; GFX900-NEXT:    s_setpc_b64 s[30:31]
6051;
6052; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_0_4:
6053; GFX90A:       ; %bb.0:
6054; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6055; GFX90A-NEXT:    ;;#ASMSTART
6056; GFX90A-NEXT:    ; def v[0:1]
6057; GFX90A-NEXT:    ;;#ASMEND
6058; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
6059; GFX90A-NEXT:    ;;#ASMSTART
6060; GFX90A-NEXT:    ; def v[2:3]
6061; GFX90A-NEXT:    ;;#ASMEND
6062; GFX90A-NEXT:    v_bfi_b32 v1, s4, v0, v2
6063; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
6064; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6065; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
6066; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6067; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6068; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6069;
6070; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_0_4:
6071; GFX940:       ; %bb.0:
6072; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6073; GFX940-NEXT:    ;;#ASMSTART
6074; GFX940-NEXT:    ; def v[0:1]
6075; GFX940-NEXT:    ;;#ASMEND
6076; GFX940-NEXT:    s_mov_b32 s2, 0xffff
6077; GFX940-NEXT:    ;;#ASMSTART
6078; GFX940-NEXT:    ; def v[2:3]
6079; GFX940-NEXT:    ;;#ASMEND
6080; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6081; GFX940-NEXT:    v_bfi_b32 v1, s2, v0, v2
6082; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
6083; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
6084; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
6085; GFX940-NEXT:    s_waitcnt vmcnt(0)
6086; GFX940-NEXT:    s_setpc_b64 s[30:31]
6087  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6088  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6089  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6090  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6091  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 4>
6092  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6093  ret void
6094}
6095
6096define void @v_shuffle_v4i16_v3i16__5_5_1_4(ptr addrspace(1) inreg %ptr) {
6097; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_1_4:
6098; GFX900:       ; %bb.0:
6099; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6100; GFX900-NEXT:    ;;#ASMSTART
6101; GFX900-NEXT:    ; def v[0:1]
6102; GFX900-NEXT:    ;;#ASMEND
6103; GFX900-NEXT:    ;;#ASMSTART
6104; GFX900-NEXT:    ; def v[1:2]
6105; GFX900-NEXT:    ;;#ASMEND
6106; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
6107; GFX900-NEXT:    v_perm_b32 v1, v1, v0, s4
6108; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
6109; GFX900-NEXT:    v_mov_b32_e32 v3, 0
6110; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
6111; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
6112; GFX900-NEXT:    s_waitcnt vmcnt(0)
6113; GFX900-NEXT:    s_setpc_b64 s[30:31]
6114;
6115; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_1_4:
6116; GFX90A:       ; %bb.0:
6117; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6118; GFX90A-NEXT:    ;;#ASMSTART
6119; GFX90A-NEXT:    ; def v[0:1]
6120; GFX90A-NEXT:    ;;#ASMEND
6121; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
6122; GFX90A-NEXT:    ;;#ASMSTART
6123; GFX90A-NEXT:    ; def v[2:3]
6124; GFX90A-NEXT:    ;;#ASMEND
6125; GFX90A-NEXT:    v_perm_b32 v1, v2, v0, s4
6126; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
6127; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6128; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
6129; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6130; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6131; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6132;
6133; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_1_4:
6134; GFX940:       ; %bb.0:
6135; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6136; GFX940-NEXT:    ;;#ASMSTART
6137; GFX940-NEXT:    ; def v[0:1]
6138; GFX940-NEXT:    ;;#ASMEND
6139; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
6140; GFX940-NEXT:    ;;#ASMSTART
6141; GFX940-NEXT:    ; def v[2:3]
6142; GFX940-NEXT:    ;;#ASMEND
6143; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6144; GFX940-NEXT:    v_perm_b32 v1, v2, v0, s2
6145; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
6146; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
6147; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
6148; GFX940-NEXT:    s_waitcnt vmcnt(0)
6149; GFX940-NEXT:    s_setpc_b64 s[30:31]
6150  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6151  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6152  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6153  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6154  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 4>
6155  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6156  ret void
6157}
6158
6159define void @v_shuffle_v4i16_v3i16__5_5_2_4(ptr addrspace(1) inreg %ptr) {
6160; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_2_4:
6161; GFX900:       ; %bb.0:
6162; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6163; GFX900-NEXT:    ;;#ASMSTART
6164; GFX900-NEXT:    ; def v[0:1]
6165; GFX900-NEXT:    ;;#ASMEND
6166; GFX900-NEXT:    s_mov_b32 s4, 0xffff
6167; GFX900-NEXT:    ;;#ASMSTART
6168; GFX900-NEXT:    ; def v[2:3]
6169; GFX900-NEXT:    ;;#ASMEND
6170; GFX900-NEXT:    v_bfi_b32 v1, s4, v1, v2
6171; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
6172; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6173; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
6174; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6175; GFX900-NEXT:    s_waitcnt vmcnt(0)
6176; GFX900-NEXT:    s_setpc_b64 s[30:31]
6177;
6178; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_2_4:
6179; GFX90A:       ; %bb.0:
6180; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6181; GFX90A-NEXT:    ;;#ASMSTART
6182; GFX90A-NEXT:    ; def v[0:1]
6183; GFX90A-NEXT:    ;;#ASMEND
6184; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
6185; GFX90A-NEXT:    ;;#ASMSTART
6186; GFX90A-NEXT:    ; def v[2:3]
6187; GFX90A-NEXT:    ;;#ASMEND
6188; GFX90A-NEXT:    v_bfi_b32 v1, s4, v1, v2
6189; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
6190; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6191; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
6192; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6193; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6194; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6195;
6196; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_2_4:
6197; GFX940:       ; %bb.0:
6198; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6199; GFX940-NEXT:    ;;#ASMSTART
6200; GFX940-NEXT:    ; def v[0:1]
6201; GFX940-NEXT:    ;;#ASMEND
6202; GFX940-NEXT:    s_mov_b32 s2, 0xffff
6203; GFX940-NEXT:    ;;#ASMSTART
6204; GFX940-NEXT:    ; def v[2:3]
6205; GFX940-NEXT:    ;;#ASMEND
6206; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6207; GFX940-NEXT:    v_bfi_b32 v1, s2, v1, v2
6208; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
6209; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
6210; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
6211; GFX940-NEXT:    s_waitcnt vmcnt(0)
6212; GFX940-NEXT:    s_setpc_b64 s[30:31]
6213  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6214  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6215  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6216  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6217  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 4>
6218  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6219  ret void
6220}
6221
6222define void @v_shuffle_v4i16_v3i16__5_5_3_4(ptr addrspace(1) inreg %ptr) {
6223; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_3_4:
6224; GFX900:       ; %bb.0:
6225; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6226; GFX900-NEXT:    ;;#ASMSTART
6227; GFX900-NEXT:    ; def v[0:1]
6228; GFX900-NEXT:    ;;#ASMEND
6229; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
6230; GFX900-NEXT:    v_mov_b32_e32 v3, 0
6231; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
6232; GFX900-NEXT:    v_mov_b32_e32 v2, v0
6233; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
6234; GFX900-NEXT:    s_waitcnt vmcnt(0)
6235; GFX900-NEXT:    s_setpc_b64 s[30:31]
6236;
6237; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_3_4:
6238; GFX90A:       ; %bb.0:
6239; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6240; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
6241; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6242; GFX90A-NEXT:    ;;#ASMSTART
6243; GFX90A-NEXT:    ; def v[0:1]
6244; GFX90A-NEXT:    ;;#ASMEND
6245; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
6246; GFX90A-NEXT:    v_mov_b32_e32 v3, v0
6247; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
6248; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6249; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6250;
6251; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_3_4:
6252; GFX940:       ; %bb.0:
6253; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6254; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
6255; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6256; GFX940-NEXT:    ;;#ASMSTART
6257; GFX940-NEXT:    ; def v[0:1]
6258; GFX940-NEXT:    ;;#ASMEND
6259; GFX940-NEXT:    s_nop 0
6260; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
6261; GFX940-NEXT:    v_mov_b32_e32 v3, v0
6262; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
6263; GFX940-NEXT:    s_waitcnt vmcnt(0)
6264; GFX940-NEXT:    s_setpc_b64 s[30:31]
6265  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6266  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6267  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6268  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6269  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 4>
6270  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6271  ret void
6272}
6273
6274define void @v_shuffle_v4i16_v3i16__u_5_5_5(ptr addrspace(1) inreg %ptr) {
6275; GFX900-LABEL: v_shuffle_v4i16_v3i16__u_5_5_5:
6276; GFX900:       ; %bb.0:
6277; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6278; GFX900-NEXT:    ;;#ASMSTART
6279; GFX900-NEXT:    ; def v[0:1]
6280; GFX900-NEXT:    ;;#ASMEND
6281; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
6282; GFX900-NEXT:    v_mov_b32_e32 v3, 0
6283; GFX900-NEXT:    v_perm_b32 v2, v1, v1, s4
6284; GFX900-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
6285; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
6286; GFX900-NEXT:    s_waitcnt vmcnt(0)
6287; GFX900-NEXT:    s_setpc_b64 s[30:31]
6288;
6289; GFX90A-LABEL: v_shuffle_v4i16_v3i16__u_5_5_5:
6290; GFX90A:       ; %bb.0:
6291; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6292; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
6293; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6294; GFX90A-NEXT:    ;;#ASMSTART
6295; GFX90A-NEXT:    ; def v[0:1]
6296; GFX90A-NEXT:    ;;#ASMEND
6297; GFX90A-NEXT:    v_perm_b32 v3, v1, v1, s4
6298; GFX90A-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
6299; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
6300; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6301; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6302;
6303; GFX940-LABEL: v_shuffle_v4i16_v3i16__u_5_5_5:
6304; GFX940:       ; %bb.0:
6305; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6306; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
6307; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6308; GFX940-NEXT:    ;;#ASMSTART
6309; GFX940-NEXT:    ; def v[0:1]
6310; GFX940-NEXT:    ;;#ASMEND
6311; GFX940-NEXT:    s_nop 0
6312; GFX940-NEXT:    v_perm_b32 v3, v1, v1, s2
6313; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
6314; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
6315; GFX940-NEXT:    s_waitcnt vmcnt(0)
6316; GFX940-NEXT:    s_setpc_b64 s[30:31]
6317  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6318  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6319  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6320  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6321  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
6322  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6323  ret void
6324}
6325
6326define void @v_shuffle_v4i16_v3i16__0_5_5_5(ptr addrspace(1) inreg %ptr) {
6327; GFX900-LABEL: v_shuffle_v4i16_v3i16__0_5_5_5:
6328; GFX900:       ; %bb.0:
6329; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6330; GFX900-NEXT:    ;;#ASMSTART
6331; GFX900-NEXT:    ; def v[0:1]
6332; GFX900-NEXT:    ;;#ASMEND
6333; GFX900-NEXT:    ;;#ASMSTART
6334; GFX900-NEXT:    ; def v[1:2]
6335; GFX900-NEXT:    ;;#ASMEND
6336; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
6337; GFX900-NEXT:    v_mov_b32_e32 v3, 0
6338; GFX900-NEXT:    v_perm_b32 v0, v2, v0, s4
6339; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
6340; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
6341; GFX900-NEXT:    s_waitcnt vmcnt(0)
6342; GFX900-NEXT:    s_setpc_b64 s[30:31]
6343;
6344; GFX90A-LABEL: v_shuffle_v4i16_v3i16__0_5_5_5:
6345; GFX90A:       ; %bb.0:
6346; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6347; GFX90A-NEXT:    ;;#ASMSTART
6348; GFX90A-NEXT:    ; def v[0:1]
6349; GFX90A-NEXT:    ;;#ASMEND
6350; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
6351; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6352; GFX90A-NEXT:    ;;#ASMSTART
6353; GFX90A-NEXT:    ; def v[2:3]
6354; GFX90A-NEXT:    ;;#ASMEND
6355; GFX90A-NEXT:    v_perm_b32 v0, v3, v0, s4
6356; GFX90A-NEXT:    v_perm_b32 v1, v3, v3, s4
6357; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6358; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6359; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6360;
6361; GFX940-LABEL: v_shuffle_v4i16_v3i16__0_5_5_5:
6362; GFX940:       ; %bb.0:
6363; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6364; GFX940-NEXT:    ;;#ASMSTART
6365; GFX940-NEXT:    ; def v[0:1]
6366; GFX940-NEXT:    ;;#ASMEND
6367; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
6368; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6369; GFX940-NEXT:    ;;#ASMSTART
6370; GFX940-NEXT:    ; def v[2:3]
6371; GFX940-NEXT:    ;;#ASMEND
6372; GFX940-NEXT:    s_nop 0
6373; GFX940-NEXT:    v_perm_b32 v0, v3, v0, s2
6374; GFX940-NEXT:    v_perm_b32 v1, v3, v3, s2
6375; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
6376; GFX940-NEXT:    s_waitcnt vmcnt(0)
6377; GFX940-NEXT:    s_setpc_b64 s[30:31]
6378  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6379  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6380  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6381  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6382  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
6383  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6384  ret void
6385}
6386
6387define void @v_shuffle_v4i16_v3i16__1_5_5_5(ptr addrspace(1) inreg %ptr) {
6388; GFX900-LABEL: v_shuffle_v4i16_v3i16__1_5_5_5:
6389; GFX900:       ; %bb.0:
6390; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6391; GFX900-NEXT:    ;;#ASMSTART
6392; GFX900-NEXT:    ; def v[0:1]
6393; GFX900-NEXT:    ;;#ASMEND
6394; GFX900-NEXT:    ;;#ASMSTART
6395; GFX900-NEXT:    ; def v[1:2]
6396; GFX900-NEXT:    ;;#ASMEND
6397; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
6398; GFX900-NEXT:    v_mov_b32_e32 v3, 0
6399; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
6400; GFX900-NEXT:    v_alignbit_b32 v0, v2, v0, 16
6401; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
6402; GFX900-NEXT:    s_waitcnt vmcnt(0)
6403; GFX900-NEXT:    s_setpc_b64 s[30:31]
6404;
6405; GFX90A-LABEL: v_shuffle_v4i16_v3i16__1_5_5_5:
6406; GFX90A:       ; %bb.0:
6407; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6408; GFX90A-NEXT:    ;;#ASMSTART
6409; GFX90A-NEXT:    ; def v[0:1]
6410; GFX90A-NEXT:    ;;#ASMEND
6411; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
6412; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6413; GFX90A-NEXT:    ;;#ASMSTART
6414; GFX90A-NEXT:    ; def v[2:3]
6415; GFX90A-NEXT:    ;;#ASMEND
6416; GFX90A-NEXT:    v_perm_b32 v1, v3, v3, s4
6417; GFX90A-NEXT:    v_alignbit_b32 v0, v3, v0, 16
6418; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6419; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6420; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6421;
6422; GFX940-LABEL: v_shuffle_v4i16_v3i16__1_5_5_5:
6423; GFX940:       ; %bb.0:
6424; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6425; GFX940-NEXT:    ;;#ASMSTART
6426; GFX940-NEXT:    ; def v[0:1]
6427; GFX940-NEXT:    ;;#ASMEND
6428; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
6429; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6430; GFX940-NEXT:    ;;#ASMSTART
6431; GFX940-NEXT:    ; def v[2:3]
6432; GFX940-NEXT:    ;;#ASMEND
6433; GFX940-NEXT:    s_nop 0
6434; GFX940-NEXT:    v_perm_b32 v1, v3, v3, s2
6435; GFX940-NEXT:    v_alignbit_b32 v0, v3, v0, 16
6436; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
6437; GFX940-NEXT:    s_waitcnt vmcnt(0)
6438; GFX940-NEXT:    s_setpc_b64 s[30:31]
6439  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6440  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6441  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6442  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6443  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
6444  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6445  ret void
6446}
6447
6448define void @v_shuffle_v4i16_v3i16__2_5_5_5(ptr addrspace(1) inreg %ptr) {
6449; GFX900-LABEL: v_shuffle_v4i16_v3i16__2_5_5_5:
6450; GFX900:       ; %bb.0:
6451; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6452; GFX900-NEXT:    ;;#ASMSTART
6453; GFX900-NEXT:    ; def v[0:1]
6454; GFX900-NEXT:    ;;#ASMEND
6455; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
6456; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6457; GFX900-NEXT:    ;;#ASMSTART
6458; GFX900-NEXT:    ; def v[2:3]
6459; GFX900-NEXT:    ;;#ASMEND
6460; GFX900-NEXT:    v_perm_b32 v0, v3, v1, s4
6461; GFX900-NEXT:    v_perm_b32 v1, v3, v3, s4
6462; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6463; GFX900-NEXT:    s_waitcnt vmcnt(0)
6464; GFX900-NEXT:    s_setpc_b64 s[30:31]
6465;
6466; GFX90A-LABEL: v_shuffle_v4i16_v3i16__2_5_5_5:
6467; GFX90A:       ; %bb.0:
6468; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6469; GFX90A-NEXT:    ;;#ASMSTART
6470; GFX90A-NEXT:    ; def v[0:1]
6471; GFX90A-NEXT:    ;;#ASMEND
6472; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
6473; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6474; GFX90A-NEXT:    ;;#ASMSTART
6475; GFX90A-NEXT:    ; def v[2:3]
6476; GFX90A-NEXT:    ;;#ASMEND
6477; GFX90A-NEXT:    v_perm_b32 v0, v3, v1, s4
6478; GFX90A-NEXT:    v_perm_b32 v1, v3, v3, s4
6479; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6480; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6481; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6482;
6483; GFX940-LABEL: v_shuffle_v4i16_v3i16__2_5_5_5:
6484; GFX940:       ; %bb.0:
6485; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6486; GFX940-NEXT:    ;;#ASMSTART
6487; GFX940-NEXT:    ; def v[0:1]
6488; GFX940-NEXT:    ;;#ASMEND
6489; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
6490; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6491; GFX940-NEXT:    ;;#ASMSTART
6492; GFX940-NEXT:    ; def v[2:3]
6493; GFX940-NEXT:    ;;#ASMEND
6494; GFX940-NEXT:    s_nop 0
6495; GFX940-NEXT:    v_perm_b32 v0, v3, v1, s2
6496; GFX940-NEXT:    v_perm_b32 v1, v3, v3, s2
6497; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
6498; GFX940-NEXT:    s_waitcnt vmcnt(0)
6499; GFX940-NEXT:    s_setpc_b64 s[30:31]
6500  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6501  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6502  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6503  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6504  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
6505  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6506  ret void
6507}
6508
6509define void @v_shuffle_v4i16_v3i16__3_5_5_5(ptr addrspace(1) inreg %ptr) {
6510; GFX900-LABEL: v_shuffle_v4i16_v3i16__3_5_5_5:
6511; GFX900:       ; %bb.0:
6512; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6513; GFX900-NEXT:    ;;#ASMSTART
6514; GFX900-NEXT:    ; def v[0:1]
6515; GFX900-NEXT:    ;;#ASMEND
6516; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
6517; GFX900-NEXT:    v_mov_b32_e32 v2, 0
6518; GFX900-NEXT:    v_perm_b32 v0, v1, v0, s4
6519; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
6520; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
6521; GFX900-NEXT:    s_waitcnt vmcnt(0)
6522; GFX900-NEXT:    s_setpc_b64 s[30:31]
6523;
6524; GFX90A-LABEL: v_shuffle_v4i16_v3i16__3_5_5_5:
6525; GFX90A:       ; %bb.0:
6526; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6527; GFX90A-NEXT:    ;;#ASMSTART
6528; GFX90A-NEXT:    ; def v[0:1]
6529; GFX90A-NEXT:    ;;#ASMEND
6530; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
6531; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
6532; GFX90A-NEXT:    v_perm_b32 v0, v1, v0, s4
6533; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
6534; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
6535; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6536; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6537;
6538; GFX940-LABEL: v_shuffle_v4i16_v3i16__3_5_5_5:
6539; GFX940:       ; %bb.0:
6540; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6541; GFX940-NEXT:    ;;#ASMSTART
6542; GFX940-NEXT:    ; def v[0:1]
6543; GFX940-NEXT:    ;;#ASMEND
6544; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
6545; GFX940-NEXT:    v_mov_b32_e32 v2, 0
6546; GFX940-NEXT:    v_perm_b32 v0, v1, v0, s2
6547; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
6548; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
6549; GFX940-NEXT:    s_waitcnt vmcnt(0)
6550; GFX940-NEXT:    s_setpc_b64 s[30:31]
6551  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6552  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6553  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6554  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6555  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
6556  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6557  ret void
6558}
6559
6560define void @v_shuffle_v4i16_v3i16__4_5_5_5(ptr addrspace(1) inreg %ptr) {
6561; GFX900-LABEL: v_shuffle_v4i16_v3i16__4_5_5_5:
6562; GFX900:       ; %bb.0:
6563; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6564; GFX900-NEXT:    ;;#ASMSTART
6565; GFX900-NEXT:    ; def v[0:1]
6566; GFX900-NEXT:    ;;#ASMEND
6567; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
6568; GFX900-NEXT:    v_mov_b32_e32 v3, 0
6569; GFX900-NEXT:    v_perm_b32 v2, v1, v1, s4
6570; GFX900-NEXT:    v_alignbit_b32 v1, v1, v0, 16
6571; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
6572; GFX900-NEXT:    s_waitcnt vmcnt(0)
6573; GFX900-NEXT:    s_setpc_b64 s[30:31]
6574;
6575; GFX90A-LABEL: v_shuffle_v4i16_v3i16__4_5_5_5:
6576; GFX90A:       ; %bb.0:
6577; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6578; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
6579; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6580; GFX90A-NEXT:    ;;#ASMSTART
6581; GFX90A-NEXT:    ; def v[0:1]
6582; GFX90A-NEXT:    ;;#ASMEND
6583; GFX90A-NEXT:    v_perm_b32 v3, v1, v1, s4
6584; GFX90A-NEXT:    v_alignbit_b32 v2, v1, v0, 16
6585; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
6586; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6587; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6588;
6589; GFX940-LABEL: v_shuffle_v4i16_v3i16__4_5_5_5:
6590; GFX940:       ; %bb.0:
6591; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6592; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
6593; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6594; GFX940-NEXT:    ;;#ASMSTART
6595; GFX940-NEXT:    ; def v[0:1]
6596; GFX940-NEXT:    ;;#ASMEND
6597; GFX940-NEXT:    s_nop 0
6598; GFX940-NEXT:    v_perm_b32 v3, v1, v1, s2
6599; GFX940-NEXT:    v_alignbit_b32 v2, v1, v0, 16
6600; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
6601; GFX940-NEXT:    s_waitcnt vmcnt(0)
6602; GFX940-NEXT:    s_setpc_b64 s[30:31]
6603  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6604  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6605  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6606  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6607  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
6608  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6609  ret void
6610}
6611
6612define void @v_shuffle_v4i16_v3i16__5_u_5_5(ptr addrspace(1) inreg %ptr) {
6613; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_u_5_5:
6614; GFX900:       ; %bb.0:
6615; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6616; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
6617; GFX900-NEXT:    v_mov_b32_e32 v3, 0
6618; GFX900-NEXT:    ;;#ASMSTART
6619; GFX900-NEXT:    ; def v[0:1]
6620; GFX900-NEXT:    ;;#ASMEND
6621; GFX900-NEXT:    v_perm_b32 v2, v1, v1, s4
6622; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
6623; GFX900-NEXT:    s_waitcnt vmcnt(0)
6624; GFX900-NEXT:    s_setpc_b64 s[30:31]
6625;
6626; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_u_5_5:
6627; GFX90A:       ; %bb.0:
6628; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6629; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
6630; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6631; GFX90A-NEXT:    ;;#ASMSTART
6632; GFX90A-NEXT:    ; def v[0:1]
6633; GFX90A-NEXT:    ;;#ASMEND
6634; GFX90A-NEXT:    v_perm_b32 v3, v1, v1, s4
6635; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
6636; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
6637; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6638; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6639;
6640; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_u_5_5:
6641; GFX940:       ; %bb.0:
6642; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6643; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
6644; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6645; GFX940-NEXT:    ;;#ASMSTART
6646; GFX940-NEXT:    ; def v[0:1]
6647; GFX940-NEXT:    ;;#ASMEND
6648; GFX940-NEXT:    s_nop 0
6649; GFX940-NEXT:    v_perm_b32 v3, v1, v1, s2
6650; GFX940-NEXT:    v_mov_b32_e32 v2, v1
6651; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
6652; GFX940-NEXT:    s_waitcnt vmcnt(0)
6653; GFX940-NEXT:    s_setpc_b64 s[30:31]
6654  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6655  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6656  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6657  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6658  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 5, i32 5>
6659  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6660  ret void
6661}
6662
6663define void @v_shuffle_v4i16_v3i16__5_0_5_5(ptr addrspace(1) inreg %ptr) {
6664; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_0_5_5:
6665; GFX900:       ; %bb.0:
6666; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6667; GFX900-NEXT:    ;;#ASMSTART
6668; GFX900-NEXT:    ; def v[0:1]
6669; GFX900-NEXT:    ;;#ASMEND
6670; GFX900-NEXT:    ;;#ASMSTART
6671; GFX900-NEXT:    ; def v[1:2]
6672; GFX900-NEXT:    ;;#ASMEND
6673; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
6674; GFX900-NEXT:    v_mov_b32_e32 v3, 0
6675; GFX900-NEXT:    v_perm_b32 v0, v0, v2, s4
6676; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
6677; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
6678; GFX900-NEXT:    s_waitcnt vmcnt(0)
6679; GFX900-NEXT:    s_setpc_b64 s[30:31]
6680;
6681; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_0_5_5:
6682; GFX90A:       ; %bb.0:
6683; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6684; GFX90A-NEXT:    ;;#ASMSTART
6685; GFX90A-NEXT:    ; def v[0:1]
6686; GFX90A-NEXT:    ;;#ASMEND
6687; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
6688; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6689; GFX90A-NEXT:    ;;#ASMSTART
6690; GFX90A-NEXT:    ; def v[2:3]
6691; GFX90A-NEXT:    ;;#ASMEND
6692; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
6693; GFX90A-NEXT:    v_perm_b32 v1, v3, v3, s4
6694; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6695; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6696; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6697;
6698; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_0_5_5:
6699; GFX940:       ; %bb.0:
6700; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6701; GFX940-NEXT:    ;;#ASMSTART
6702; GFX940-NEXT:    ; def v[0:1]
6703; GFX940-NEXT:    ;;#ASMEND
6704; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
6705; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6706; GFX940-NEXT:    ;;#ASMSTART
6707; GFX940-NEXT:    ; def v[2:3]
6708; GFX940-NEXT:    ;;#ASMEND
6709; GFX940-NEXT:    s_nop 0
6710; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
6711; GFX940-NEXT:    v_perm_b32 v1, v3, v3, s2
6712; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
6713; GFX940-NEXT:    s_waitcnt vmcnt(0)
6714; GFX940-NEXT:    s_setpc_b64 s[30:31]
6715  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6716  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6717  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6718  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6719  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 5, i32 5>
6720  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6721  ret void
6722}
6723
6724define void @v_shuffle_v4i16_v3i16__5_1_5_5(ptr addrspace(1) inreg %ptr) {
6725; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_1_5_5:
6726; GFX900:       ; %bb.0:
6727; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6728; GFX900-NEXT:    ;;#ASMSTART
6729; GFX900-NEXT:    ; def v[0:1]
6730; GFX900-NEXT:    ;;#ASMEND
6731; GFX900-NEXT:    s_mov_b32 s4, 0xffff
6732; GFX900-NEXT:    ;;#ASMSTART
6733; GFX900-NEXT:    ; def v[1:2]
6734; GFX900-NEXT:    ;;#ASMEND
6735; GFX900-NEXT:    v_bfi_b32 v0, s4, v2, v0
6736; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
6737; GFX900-NEXT:    v_mov_b32_e32 v3, 0
6738; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
6739; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
6740; GFX900-NEXT:    s_waitcnt vmcnt(0)
6741; GFX900-NEXT:    s_setpc_b64 s[30:31]
6742;
6743; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_1_5_5:
6744; GFX90A:       ; %bb.0:
6745; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6746; GFX90A-NEXT:    ;;#ASMSTART
6747; GFX90A-NEXT:    ; def v[0:1]
6748; GFX90A-NEXT:    ;;#ASMEND
6749; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
6750; GFX90A-NEXT:    ;;#ASMSTART
6751; GFX90A-NEXT:    ; def v[2:3]
6752; GFX90A-NEXT:    ;;#ASMEND
6753; GFX90A-NEXT:    v_bfi_b32 v0, s4, v3, v0
6754; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
6755; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6756; GFX90A-NEXT:    v_perm_b32 v1, v3, v3, s4
6757; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6758; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6759; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6760;
6761; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_1_5_5:
6762; GFX940:       ; %bb.0:
6763; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6764; GFX940-NEXT:    ;;#ASMSTART
6765; GFX940-NEXT:    ; def v[0:1]
6766; GFX940-NEXT:    ;;#ASMEND
6767; GFX940-NEXT:    s_mov_b32 s2, 0xffff
6768; GFX940-NEXT:    ;;#ASMSTART
6769; GFX940-NEXT:    ; def v[2:3]
6770; GFX940-NEXT:    ;;#ASMEND
6771; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6772; GFX940-NEXT:    v_bfi_b32 v0, s2, v3, v0
6773; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
6774; GFX940-NEXT:    v_perm_b32 v1, v3, v3, s2
6775; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
6776; GFX940-NEXT:    s_waitcnt vmcnt(0)
6777; GFX940-NEXT:    s_setpc_b64 s[30:31]
6778  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6779  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6780  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6781  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6782  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 5, i32 5>
6783  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6784  ret void
6785}
6786
6787define void @v_shuffle_v4i16_v3i16__5_2_5_5(ptr addrspace(1) inreg %ptr) {
6788; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_2_5_5:
6789; GFX900:       ; %bb.0:
6790; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6791; GFX900-NEXT:    ;;#ASMSTART
6792; GFX900-NEXT:    ; def v[0:1]
6793; GFX900-NEXT:    ;;#ASMEND
6794; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
6795; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6796; GFX900-NEXT:    ;;#ASMSTART
6797; GFX900-NEXT:    ; def v[2:3]
6798; GFX900-NEXT:    ;;#ASMEND
6799; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
6800; GFX900-NEXT:    v_perm_b32 v1, v3, v3, s4
6801; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6802; GFX900-NEXT:    s_waitcnt vmcnt(0)
6803; GFX900-NEXT:    s_setpc_b64 s[30:31]
6804;
6805; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_2_5_5:
6806; GFX90A:       ; %bb.0:
6807; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6808; GFX90A-NEXT:    ;;#ASMSTART
6809; GFX90A-NEXT:    ; def v[0:1]
6810; GFX90A-NEXT:    ;;#ASMEND
6811; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
6812; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6813; GFX90A-NEXT:    ;;#ASMSTART
6814; GFX90A-NEXT:    ; def v[2:3]
6815; GFX90A-NEXT:    ;;#ASMEND
6816; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
6817; GFX90A-NEXT:    v_perm_b32 v1, v3, v3, s4
6818; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6819; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6820; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6821;
6822; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_2_5_5:
6823; GFX940:       ; %bb.0:
6824; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6825; GFX940-NEXT:    ;;#ASMSTART
6826; GFX940-NEXT:    ; def v[0:1]
6827; GFX940-NEXT:    ;;#ASMEND
6828; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
6829; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6830; GFX940-NEXT:    ;;#ASMSTART
6831; GFX940-NEXT:    ; def v[2:3]
6832; GFX940-NEXT:    ;;#ASMEND
6833; GFX940-NEXT:    s_nop 0
6834; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
6835; GFX940-NEXT:    v_perm_b32 v1, v3, v3, s2
6836; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
6837; GFX940-NEXT:    s_waitcnt vmcnt(0)
6838; GFX940-NEXT:    s_setpc_b64 s[30:31]
6839  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6840  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6841  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6842  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6843  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 5, i32 5>
6844  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6845  ret void
6846}
6847
6848define void @v_shuffle_v4i16_v3i16__5_3_5_5(ptr addrspace(1) inreg %ptr) {
6849; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_3_5_5:
6850; GFX900:       ; %bb.0:
6851; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6852; GFX900-NEXT:    ;;#ASMSTART
6853; GFX900-NEXT:    ; def v[0:1]
6854; GFX900-NEXT:    ;;#ASMEND
6855; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
6856; GFX900-NEXT:    v_mov_b32_e32 v2, 0
6857; GFX900-NEXT:    v_perm_b32 v0, v0, v1, s4
6858; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
6859; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
6860; GFX900-NEXT:    s_waitcnt vmcnt(0)
6861; GFX900-NEXT:    s_setpc_b64 s[30:31]
6862;
6863; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_3_5_5:
6864; GFX90A:       ; %bb.0:
6865; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6866; GFX90A-NEXT:    ;;#ASMSTART
6867; GFX90A-NEXT:    ; def v[0:1]
6868; GFX90A-NEXT:    ;;#ASMEND
6869; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
6870; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
6871; GFX90A-NEXT:    v_perm_b32 v0, v0, v1, s4
6872; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
6873; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
6874; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6875; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6876;
6877; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_3_5_5:
6878; GFX940:       ; %bb.0:
6879; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6880; GFX940-NEXT:    ;;#ASMSTART
6881; GFX940-NEXT:    ; def v[0:1]
6882; GFX940-NEXT:    ;;#ASMEND
6883; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
6884; GFX940-NEXT:    v_mov_b32_e32 v2, 0
6885; GFX940-NEXT:    v_perm_b32 v0, v0, v1, s2
6886; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
6887; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
6888; GFX940-NEXT:    s_waitcnt vmcnt(0)
6889; GFX940-NEXT:    s_setpc_b64 s[30:31]
6890  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6891  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6892  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6893  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6894  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 5, i32 5>
6895  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6896  ret void
6897}
6898
6899define void @v_shuffle_v4i16_v3i16__5_4_5_5(ptr addrspace(1) inreg %ptr) {
6900; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_4_5_5:
6901; GFX900:       ; %bb.0:
6902; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6903; GFX900-NEXT:    ;;#ASMSTART
6904; GFX900-NEXT:    ; def v[0:1]
6905; GFX900-NEXT:    ;;#ASMEND
6906; GFX900-NEXT:    s_mov_b32 s4, 0xffff
6907; GFX900-NEXT:    v_bfi_b32 v0, s4, v1, v0
6908; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
6909; GFX900-NEXT:    v_mov_b32_e32 v2, 0
6910; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
6911; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
6912; GFX900-NEXT:    s_waitcnt vmcnt(0)
6913; GFX900-NEXT:    s_setpc_b64 s[30:31]
6914;
6915; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_4_5_5:
6916; GFX90A:       ; %bb.0:
6917; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6918; GFX90A-NEXT:    ;;#ASMSTART
6919; GFX90A-NEXT:    ; def v[0:1]
6920; GFX90A-NEXT:    ;;#ASMEND
6921; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
6922; GFX90A-NEXT:    v_bfi_b32 v0, s4, v1, v0
6923; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
6924; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
6925; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
6926; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
6927; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6928; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6929;
6930; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_4_5_5:
6931; GFX940:       ; %bb.0:
6932; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6933; GFX940-NEXT:    ;;#ASMSTART
6934; GFX940-NEXT:    ; def v[0:1]
6935; GFX940-NEXT:    ;;#ASMEND
6936; GFX940-NEXT:    s_mov_b32 s2, 0xffff
6937; GFX940-NEXT:    v_bfi_b32 v0, s2, v1, v0
6938; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
6939; GFX940-NEXT:    v_mov_b32_e32 v2, 0
6940; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
6941; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
6942; GFX940-NEXT:    s_waitcnt vmcnt(0)
6943; GFX940-NEXT:    s_setpc_b64 s[30:31]
6944  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6945  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6946  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6947  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6948  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 5, i32 5>
6949  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6950  ret void
6951}
6952
6953define void @v_shuffle_v4i16_v3i16__5_5_u_5(ptr addrspace(1) inreg %ptr) {
6954; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_u_5:
6955; GFX900:       ; %bb.0:
6956; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6957; GFX900-NEXT:    ;;#ASMSTART
6958; GFX900-NEXT:    ; def v[0:1]
6959; GFX900-NEXT:    ;;#ASMEND
6960; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
6961; GFX900-NEXT:    v_mov_b32_e32 v2, 0
6962; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
6963; GFX900-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
6964; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
6965; GFX900-NEXT:    s_waitcnt vmcnt(0)
6966; GFX900-NEXT:    s_setpc_b64 s[30:31]
6967;
6968; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_u_5:
6969; GFX90A:       ; %bb.0:
6970; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6971; GFX90A-NEXT:    ;;#ASMSTART
6972; GFX90A-NEXT:    ; def v[0:1]
6973; GFX90A-NEXT:    ;;#ASMEND
6974; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
6975; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
6976; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
6977; GFX90A-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
6978; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
6979; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6980; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6981;
6982; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_u_5:
6983; GFX940:       ; %bb.0:
6984; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6985; GFX940-NEXT:    ;;#ASMSTART
6986; GFX940-NEXT:    ; def v[0:1]
6987; GFX940-NEXT:    ;;#ASMEND
6988; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
6989; GFX940-NEXT:    v_mov_b32_e32 v2, 0
6990; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
6991; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
6992; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
6993; GFX940-NEXT:    s_waitcnt vmcnt(0)
6994; GFX940-NEXT:    s_setpc_b64 s[30:31]
6995  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6996  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6997  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6998  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6999  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 5>
7000  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7001  ret void
7002}
7003
7004define void @v_shuffle_v4i16_v3i16__5_5_0_5(ptr addrspace(1) inreg %ptr) {
7005; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_0_5:
7006; GFX900:       ; %bb.0:
7007; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7008; GFX900-NEXT:    ;;#ASMSTART
7009; GFX900-NEXT:    ; def v[0:1]
7010; GFX900-NEXT:    ;;#ASMEND
7011; GFX900-NEXT:    ;;#ASMSTART
7012; GFX900-NEXT:    ; def v[1:2]
7013; GFX900-NEXT:    ;;#ASMEND
7014; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
7015; GFX900-NEXT:    v_mov_b32_e32 v3, 0
7016; GFX900-NEXT:    v_perm_b32 v1, v2, v0, s4
7017; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
7018; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
7019; GFX900-NEXT:    s_waitcnt vmcnt(0)
7020; GFX900-NEXT:    s_setpc_b64 s[30:31]
7021;
7022; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_0_5:
7023; GFX90A:       ; %bb.0:
7024; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7025; GFX90A-NEXT:    ;;#ASMSTART
7026; GFX90A-NEXT:    ; def v[0:1]
7027; GFX90A-NEXT:    ;;#ASMEND
7028; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
7029; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7030; GFX90A-NEXT:    ;;#ASMSTART
7031; GFX90A-NEXT:    ; def v[2:3]
7032; GFX90A-NEXT:    ;;#ASMEND
7033; GFX90A-NEXT:    v_perm_b32 v1, v3, v0, s4
7034; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
7035; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
7036; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7037; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7038;
7039; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_0_5:
7040; GFX940:       ; %bb.0:
7041; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7042; GFX940-NEXT:    ;;#ASMSTART
7043; GFX940-NEXT:    ; def v[0:1]
7044; GFX940-NEXT:    ;;#ASMEND
7045; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
7046; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7047; GFX940-NEXT:    ;;#ASMSTART
7048; GFX940-NEXT:    ; def v[2:3]
7049; GFX940-NEXT:    ;;#ASMEND
7050; GFX940-NEXT:    s_nop 0
7051; GFX940-NEXT:    v_perm_b32 v1, v3, v0, s2
7052; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
7053; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
7054; GFX940-NEXT:    s_waitcnt vmcnt(0)
7055; GFX940-NEXT:    s_setpc_b64 s[30:31]
7056  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7057  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7058  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7059  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7060  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 5>
7061  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7062  ret void
7063}
7064
7065define void @v_shuffle_v4i16_v3i16__5_5_1_5(ptr addrspace(1) inreg %ptr) {
7066; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_1_5:
7067; GFX900:       ; %bb.0:
7068; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7069; GFX900-NEXT:    ;;#ASMSTART
7070; GFX900-NEXT:    ; def v[0:1]
7071; GFX900-NEXT:    ;;#ASMEND
7072; GFX900-NEXT:    ;;#ASMSTART
7073; GFX900-NEXT:    ; def v[1:2]
7074; GFX900-NEXT:    ;;#ASMEND
7075; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
7076; GFX900-NEXT:    v_mov_b32_e32 v3, 0
7077; GFX900-NEXT:    v_alignbit_b32 v1, v2, v0, 16
7078; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
7079; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
7080; GFX900-NEXT:    s_waitcnt vmcnt(0)
7081; GFX900-NEXT:    s_setpc_b64 s[30:31]
7082;
7083; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_1_5:
7084; GFX90A:       ; %bb.0:
7085; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7086; GFX90A-NEXT:    ;;#ASMSTART
7087; GFX90A-NEXT:    ; def v[0:1]
7088; GFX90A-NEXT:    ;;#ASMEND
7089; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
7090; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7091; GFX90A-NEXT:    ;;#ASMSTART
7092; GFX90A-NEXT:    ; def v[2:3]
7093; GFX90A-NEXT:    ;;#ASMEND
7094; GFX90A-NEXT:    v_alignbit_b32 v1, v3, v0, 16
7095; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
7096; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
7097; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7098; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7099;
7100; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_1_5:
7101; GFX940:       ; %bb.0:
7102; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7103; GFX940-NEXT:    ;;#ASMSTART
7104; GFX940-NEXT:    ; def v[0:1]
7105; GFX940-NEXT:    ;;#ASMEND
7106; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
7107; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7108; GFX940-NEXT:    ;;#ASMSTART
7109; GFX940-NEXT:    ; def v[2:3]
7110; GFX940-NEXT:    ;;#ASMEND
7111; GFX940-NEXT:    s_nop 0
7112; GFX940-NEXT:    v_alignbit_b32 v1, v3, v0, 16
7113; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
7114; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
7115; GFX940-NEXT:    s_waitcnt vmcnt(0)
7116; GFX940-NEXT:    s_setpc_b64 s[30:31]
7117  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7118  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7119  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7120  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7121  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 5>
7122  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7123  ret void
7124}
7125
7126define void @v_shuffle_v4i16_v3i16__5_5_2_5(ptr addrspace(1) inreg %ptr) {
7127; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_2_5:
7128; GFX900:       ; %bb.0:
7129; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7130; GFX900-NEXT:    ;;#ASMSTART
7131; GFX900-NEXT:    ; def v[0:1]
7132; GFX900-NEXT:    ;;#ASMEND
7133; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
7134; GFX900-NEXT:    v_mov_b32_e32 v4, 0
7135; GFX900-NEXT:    ;;#ASMSTART
7136; GFX900-NEXT:    ; def v[2:3]
7137; GFX900-NEXT:    ;;#ASMEND
7138; GFX900-NEXT:    v_perm_b32 v1, v3, v1, s4
7139; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
7140; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
7141; GFX900-NEXT:    s_waitcnt vmcnt(0)
7142; GFX900-NEXT:    s_setpc_b64 s[30:31]
7143;
7144; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_2_5:
7145; GFX90A:       ; %bb.0:
7146; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7147; GFX90A-NEXT:    ;;#ASMSTART
7148; GFX90A-NEXT:    ; def v[0:1]
7149; GFX90A-NEXT:    ;;#ASMEND
7150; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
7151; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7152; GFX90A-NEXT:    ;;#ASMSTART
7153; GFX90A-NEXT:    ; def v[2:3]
7154; GFX90A-NEXT:    ;;#ASMEND
7155; GFX90A-NEXT:    v_perm_b32 v1, v3, v1, s4
7156; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
7157; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
7158; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7159; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7160;
7161; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_2_5:
7162; GFX940:       ; %bb.0:
7163; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7164; GFX940-NEXT:    ;;#ASMSTART
7165; GFX940-NEXT:    ; def v[0:1]
7166; GFX940-NEXT:    ;;#ASMEND
7167; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
7168; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7169; GFX940-NEXT:    ;;#ASMSTART
7170; GFX940-NEXT:    ; def v[2:3]
7171; GFX940-NEXT:    ;;#ASMEND
7172; GFX940-NEXT:    s_nop 0
7173; GFX940-NEXT:    v_perm_b32 v1, v3, v1, s2
7174; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
7175; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
7176; GFX940-NEXT:    s_waitcnt vmcnt(0)
7177; GFX940-NEXT:    s_setpc_b64 s[30:31]
7178  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7179  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7180  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7181  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7182  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 5>
7183  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7184  ret void
7185}
7186
7187define void @v_shuffle_v4i16_v3i16__5_5_3_5(ptr addrspace(1) inreg %ptr) {
7188; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_3_5:
7189; GFX900:       ; %bb.0:
7190; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7191; GFX900-NEXT:    ;;#ASMSTART
7192; GFX900-NEXT:    ; def v[0:1]
7193; GFX900-NEXT:    ;;#ASMEND
7194; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
7195; GFX900-NEXT:    v_mov_b32_e32 v3, 0
7196; GFX900-NEXT:    v_perm_b32 v2, v1, v0, s4
7197; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
7198; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
7199; GFX900-NEXT:    s_waitcnt vmcnt(0)
7200; GFX900-NEXT:    s_setpc_b64 s[30:31]
7201;
7202; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_3_5:
7203; GFX90A:       ; %bb.0:
7204; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7205; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
7206; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7207; GFX90A-NEXT:    ;;#ASMSTART
7208; GFX90A-NEXT:    ; def v[0:1]
7209; GFX90A-NEXT:    ;;#ASMEND
7210; GFX90A-NEXT:    v_perm_b32 v3, v1, v0, s4
7211; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
7212; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
7213; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7214; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7215;
7216; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_3_5:
7217; GFX940:       ; %bb.0:
7218; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7219; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
7220; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7221; GFX940-NEXT:    ;;#ASMSTART
7222; GFX940-NEXT:    ; def v[0:1]
7223; GFX940-NEXT:    ;;#ASMEND
7224; GFX940-NEXT:    s_nop 0
7225; GFX940-NEXT:    v_perm_b32 v3, v1, v0, s2
7226; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
7227; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
7228; GFX940-NEXT:    s_waitcnt vmcnt(0)
7229; GFX940-NEXT:    s_setpc_b64 s[30:31]
7230  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7231  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7232  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7233  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7234  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 5>
7235  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7236  ret void
7237}
7238
7239define void @v_shuffle_v4i16_v3i16__5_5_4_5(ptr addrspace(1) inreg %ptr) {
7240; GFX900-LABEL: v_shuffle_v4i16_v3i16__5_5_4_5:
7241; GFX900:       ; %bb.0:
7242; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7243; GFX900-NEXT:    ;;#ASMSTART
7244; GFX900-NEXT:    ; def v[0:1]
7245; GFX900-NEXT:    ;;#ASMEND
7246; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
7247; GFX900-NEXT:    v_mov_b32_e32 v3, 0
7248; GFX900-NEXT:    v_alignbit_b32 v2, v1, v0, 16
7249; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
7250; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
7251; GFX900-NEXT:    s_waitcnt vmcnt(0)
7252; GFX900-NEXT:    s_setpc_b64 s[30:31]
7253;
7254; GFX90A-LABEL: v_shuffle_v4i16_v3i16__5_5_4_5:
7255; GFX90A:       ; %bb.0:
7256; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7257; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
7258; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7259; GFX90A-NEXT:    ;;#ASMSTART
7260; GFX90A-NEXT:    ; def v[0:1]
7261; GFX90A-NEXT:    ;;#ASMEND
7262; GFX90A-NEXT:    v_alignbit_b32 v3, v1, v0, 16
7263; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
7264; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
7265; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7266; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7267;
7268; GFX940-LABEL: v_shuffle_v4i16_v3i16__5_5_4_5:
7269; GFX940:       ; %bb.0:
7270; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7271; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
7272; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7273; GFX940-NEXT:    ;;#ASMSTART
7274; GFX940-NEXT:    ; def v[0:1]
7275; GFX940-NEXT:    ;;#ASMEND
7276; GFX940-NEXT:    s_nop 0
7277; GFX940-NEXT:    v_alignbit_b32 v3, v1, v0, 16
7278; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
7279; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
7280; GFX940-NEXT:    s_waitcnt vmcnt(0)
7281; GFX940-NEXT:    s_setpc_b64 s[30:31]
7282  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7283  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7284  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7285  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7286  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 5>
7287  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7288  ret void
7289}
7290
7291define void @s_shuffle_v4i16_v3i16__u_u_u_u() {
7292; GFX9-LABEL: s_shuffle_v4i16_v3i16__u_u_u_u:
7293; GFX9:       ; %bb.0:
7294; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7295; GFX9-NEXT:    ;;#ASMSTART
7296; GFX9-NEXT:    ; use s[8:9]
7297; GFX9-NEXT:    ;;#ASMEND
7298; GFX9-NEXT:    s_setpc_b64 s[30:31]
7299  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7300  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7301  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> poison
7302  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
7303  ret void
7304}
7305
7306define void @s_shuffle_v4i16_v3i16__0_u_u_u() {
7307; GFX900-LABEL: s_shuffle_v4i16_v3i16__0_u_u_u:
7308; GFX900:       ; %bb.0:
7309; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7310; GFX900-NEXT:    ;;#ASMSTART
7311; GFX900-NEXT:    ; def s[8:9]
7312; GFX900-NEXT:    ;;#ASMEND
7313; GFX900-NEXT:    ;;#ASMSTART
7314; GFX900-NEXT:    ; use s[8:9]
7315; GFX900-NEXT:    ;;#ASMEND
7316; GFX900-NEXT:    s_setpc_b64 s[30:31]
7317;
7318; GFX90A-LABEL: s_shuffle_v4i16_v3i16__0_u_u_u:
7319; GFX90A:       ; %bb.0:
7320; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7321; GFX90A-NEXT:    ;;#ASMSTART
7322; GFX90A-NEXT:    ; def s[8:9]
7323; GFX90A-NEXT:    ;;#ASMEND
7324; GFX90A-NEXT:    ;;#ASMSTART
7325; GFX90A-NEXT:    ; use s[8:9]
7326; GFX90A-NEXT:    ;;#ASMEND
7327; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7328;
7329; GFX940-LABEL: s_shuffle_v4i16_v3i16__0_u_u_u:
7330; GFX940:       ; %bb.0:
7331; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7332; GFX940-NEXT:    ;;#ASMSTART
7333; GFX940-NEXT:    ; def s[8:9]
7334; GFX940-NEXT:    ;;#ASMEND
7335; GFX940-NEXT:    s_nop 0
7336; GFX940-NEXT:    ;;#ASMSTART
7337; GFX940-NEXT:    ; use s[8:9]
7338; GFX940-NEXT:    ;;#ASMEND
7339; GFX940-NEXT:    s_setpc_b64 s[30:31]
7340  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7341  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7342  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
7343  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
7344  ret void
7345}
7346
7347define void @s_shuffle_v4i16_v3i16__1_u_u_u() {
7348; GFX900-LABEL: s_shuffle_v4i16_v3i16__1_u_u_u:
7349; GFX900:       ; %bb.0:
7350; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7351; GFX900-NEXT:    ;;#ASMSTART
7352; GFX900-NEXT:    ; def s[4:5]
7353; GFX900-NEXT:    ;;#ASMEND
7354; GFX900-NEXT:    s_lshr_b32 s8, s4, 16
7355; GFX900-NEXT:    ;;#ASMSTART
7356; GFX900-NEXT:    ; use s[8:9]
7357; GFX900-NEXT:    ;;#ASMEND
7358; GFX900-NEXT:    s_setpc_b64 s[30:31]
7359;
7360; GFX90A-LABEL: s_shuffle_v4i16_v3i16__1_u_u_u:
7361; GFX90A:       ; %bb.0:
7362; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7363; GFX90A-NEXT:    ;;#ASMSTART
7364; GFX90A-NEXT:    ; def s[4:5]
7365; GFX90A-NEXT:    ;;#ASMEND
7366; GFX90A-NEXT:    s_lshr_b32 s8, s4, 16
7367; GFX90A-NEXT:    ;;#ASMSTART
7368; GFX90A-NEXT:    ; use s[8:9]
7369; GFX90A-NEXT:    ;;#ASMEND
7370; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7371;
7372; GFX940-LABEL: s_shuffle_v4i16_v3i16__1_u_u_u:
7373; GFX940:       ; %bb.0:
7374; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7375; GFX940-NEXT:    ;;#ASMSTART
7376; GFX940-NEXT:    ; def s[0:1]
7377; GFX940-NEXT:    ;;#ASMEND
7378; GFX940-NEXT:    s_lshr_b32 s8, s0, 16
7379; GFX940-NEXT:    ;;#ASMSTART
7380; GFX940-NEXT:    ; use s[8:9]
7381; GFX940-NEXT:    ;;#ASMEND
7382; GFX940-NEXT:    s_setpc_b64 s[30:31]
7383  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7384  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7385  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
7386  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
7387  ret void
7388}
7389
7390define void @s_shuffle_v4i16_v3i16__2_u_u_u() {
7391; GFX900-LABEL: s_shuffle_v4i16_v3i16__2_u_u_u:
7392; GFX900:       ; %bb.0:
7393; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7394; GFX900-NEXT:    ;;#ASMSTART
7395; GFX900-NEXT:    ; def s[4:5]
7396; GFX900-NEXT:    ;;#ASMEND
7397; GFX900-NEXT:    s_mov_b32 s8, s5
7398; GFX900-NEXT:    ;;#ASMSTART
7399; GFX900-NEXT:    ; use s[8:9]
7400; GFX900-NEXT:    ;;#ASMEND
7401; GFX900-NEXT:    s_setpc_b64 s[30:31]
7402;
7403; GFX90A-LABEL: s_shuffle_v4i16_v3i16__2_u_u_u:
7404; GFX90A:       ; %bb.0:
7405; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7406; GFX90A-NEXT:    ;;#ASMSTART
7407; GFX90A-NEXT:    ; def s[4:5]
7408; GFX90A-NEXT:    ;;#ASMEND
7409; GFX90A-NEXT:    s_mov_b32 s8, s5
7410; GFX90A-NEXT:    ;;#ASMSTART
7411; GFX90A-NEXT:    ; use s[8:9]
7412; GFX90A-NEXT:    ;;#ASMEND
7413; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7414;
7415; GFX940-LABEL: s_shuffle_v4i16_v3i16__2_u_u_u:
7416; GFX940:       ; %bb.0:
7417; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7418; GFX940-NEXT:    ;;#ASMSTART
7419; GFX940-NEXT:    ; def s[0:1]
7420; GFX940-NEXT:    ;;#ASMEND
7421; GFX940-NEXT:    s_mov_b32 s8, s1
7422; GFX940-NEXT:    ;;#ASMSTART
7423; GFX940-NEXT:    ; use s[8:9]
7424; GFX940-NEXT:    ;;#ASMEND
7425; GFX940-NEXT:    s_setpc_b64 s[30:31]
7426  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7427  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7428  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
7429  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
7430  ret void
7431}
7432
7433define void @s_shuffle_v4i16_v3i16__3_u_u_u() {
7434; GFX9-LABEL: s_shuffle_v4i16_v3i16__3_u_u_u:
7435; GFX9:       ; %bb.0:
7436; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7437; GFX9-NEXT:    ;;#ASMSTART
7438; GFX9-NEXT:    ; use s[8:9]
7439; GFX9-NEXT:    ;;#ASMEND
7440; GFX9-NEXT:    s_setpc_b64 s[30:31]
7441  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7442  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7443  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
7444  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
7445  ret void
7446}
7447
7448define void @s_shuffle_v4i16_v3i16__4_u_u_u() {
7449; GFX900-LABEL: s_shuffle_v4i16_v3i16__4_u_u_u:
7450; GFX900:       ; %bb.0:
7451; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7452; GFX900-NEXT:    ;;#ASMSTART
7453; GFX900-NEXT:    ; def s[4:5]
7454; GFX900-NEXT:    ;;#ASMEND
7455; GFX900-NEXT:    s_lshr_b32 s8, s4, 16
7456; GFX900-NEXT:    ;;#ASMSTART
7457; GFX900-NEXT:    ; use s[8:9]
7458; GFX900-NEXT:    ;;#ASMEND
7459; GFX900-NEXT:    s_setpc_b64 s[30:31]
7460;
7461; GFX90A-LABEL: s_shuffle_v4i16_v3i16__4_u_u_u:
7462; GFX90A:       ; %bb.0:
7463; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7464; GFX90A-NEXT:    ;;#ASMSTART
7465; GFX90A-NEXT:    ; def s[4:5]
7466; GFX90A-NEXT:    ;;#ASMEND
7467; GFX90A-NEXT:    s_lshr_b32 s8, s4, 16
7468; GFX90A-NEXT:    ;;#ASMSTART
7469; GFX90A-NEXT:    ; use s[8:9]
7470; GFX90A-NEXT:    ;;#ASMEND
7471; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7472;
7473; GFX940-LABEL: s_shuffle_v4i16_v3i16__4_u_u_u:
7474; GFX940:       ; %bb.0:
7475; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7476; GFX940-NEXT:    ;;#ASMSTART
7477; GFX940-NEXT:    ; def s[0:1]
7478; GFX940-NEXT:    ;;#ASMEND
7479; GFX940-NEXT:    s_lshr_b32 s8, s0, 16
7480; GFX940-NEXT:    ;;#ASMSTART
7481; GFX940-NEXT:    ; use s[8:9]
7482; GFX940-NEXT:    ;;#ASMEND
7483; GFX940-NEXT:    s_setpc_b64 s[30:31]
7484  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7485  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7486  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7487  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7488  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 poison, i32 poison, i32 poison>
7489  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
7490  ret void
7491}
7492
7493define void @s_shuffle_v4i16_v3i16__5_u_u_u() {
7494; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_u_u_u:
7495; GFX900:       ; %bb.0:
7496; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7497; GFX900-NEXT:    ;;#ASMSTART
7498; GFX900-NEXT:    ; def s[4:5]
7499; GFX900-NEXT:    ;;#ASMEND
7500; GFX900-NEXT:    s_mov_b32 s8, s5
7501; GFX900-NEXT:    ;;#ASMSTART
7502; GFX900-NEXT:    ; use s[8:9]
7503; GFX900-NEXT:    ;;#ASMEND
7504; GFX900-NEXT:    s_setpc_b64 s[30:31]
7505;
7506; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_u_u_u:
7507; GFX90A:       ; %bb.0:
7508; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7509; GFX90A-NEXT:    ;;#ASMSTART
7510; GFX90A-NEXT:    ; def s[4:5]
7511; GFX90A-NEXT:    ;;#ASMEND
7512; GFX90A-NEXT:    s_mov_b32 s8, s5
7513; GFX90A-NEXT:    ;;#ASMSTART
7514; GFX90A-NEXT:    ; use s[8:9]
7515; GFX90A-NEXT:    ;;#ASMEND
7516; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7517;
7518; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_u_u_u:
7519; GFX940:       ; %bb.0:
7520; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7521; GFX940-NEXT:    ;;#ASMSTART
7522; GFX940-NEXT:    ; def s[0:1]
7523; GFX940-NEXT:    ;;#ASMEND
7524; GFX940-NEXT:    s_mov_b32 s8, s1
7525; GFX940-NEXT:    ;;#ASMSTART
7526; GFX940-NEXT:    ; use s[8:9]
7527; GFX940-NEXT:    ;;#ASMEND
7528; GFX940-NEXT:    s_setpc_b64 s[30:31]
7529  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7530  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7531  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7532  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7533  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 poison, i32 poison>
7534  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
7535  ret void
7536}
7537
7538define void @s_shuffle_v4i16_v3i16__5_0_u_u() {
7539; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_0_u_u:
7540; GFX900:       ; %bb.0:
7541; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7542; GFX900-NEXT:    ;;#ASMSTART
7543; GFX900-NEXT:    ; def s[4:5]
7544; GFX900-NEXT:    ;;#ASMEND
7545; GFX900-NEXT:    ;;#ASMSTART
7546; GFX900-NEXT:    ; def s[6:7]
7547; GFX900-NEXT:    ;;#ASMEND
7548; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
7549; GFX900-NEXT:    ;;#ASMSTART
7550; GFX900-NEXT:    ; use s[8:9]
7551; GFX900-NEXT:    ;;#ASMEND
7552; GFX900-NEXT:    s_setpc_b64 s[30:31]
7553;
7554; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_0_u_u:
7555; GFX90A:       ; %bb.0:
7556; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7557; GFX90A-NEXT:    ;;#ASMSTART
7558; GFX90A-NEXT:    ; def s[4:5]
7559; GFX90A-NEXT:    ;;#ASMEND
7560; GFX90A-NEXT:    ;;#ASMSTART
7561; GFX90A-NEXT:    ; def s[6:7]
7562; GFX90A-NEXT:    ;;#ASMEND
7563; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
7564; GFX90A-NEXT:    ;;#ASMSTART
7565; GFX90A-NEXT:    ; use s[8:9]
7566; GFX90A-NEXT:    ;;#ASMEND
7567; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7568;
7569; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_0_u_u:
7570; GFX940:       ; %bb.0:
7571; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7572; GFX940-NEXT:    ;;#ASMSTART
7573; GFX940-NEXT:    ; def s[0:1]
7574; GFX940-NEXT:    ;;#ASMEND
7575; GFX940-NEXT:    ;;#ASMSTART
7576; GFX940-NEXT:    ; def s[2:3]
7577; GFX940-NEXT:    ;;#ASMEND
7578; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s0
7579; GFX940-NEXT:    ;;#ASMSTART
7580; GFX940-NEXT:    ; use s[8:9]
7581; GFX940-NEXT:    ;;#ASMEND
7582; GFX940-NEXT:    s_setpc_b64 s[30:31]
7583  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7584  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7585  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7586  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7587  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 poison, i32 poison>
7588  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
7589  ret void
7590}
7591
7592define void @s_shuffle_v4i16_v3i16__5_1_u_u() {
7593; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_1_u_u:
7594; GFX900:       ; %bb.0:
7595; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7596; GFX900-NEXT:    ;;#ASMSTART
7597; GFX900-NEXT:    ; def s[4:5]
7598; GFX900-NEXT:    ;;#ASMEND
7599; GFX900-NEXT:    ;;#ASMSTART
7600; GFX900-NEXT:    ; def s[6:7]
7601; GFX900-NEXT:    ;;#ASMEND
7602; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
7603; GFX900-NEXT:    ;;#ASMSTART
7604; GFX900-NEXT:    ; use s[8:9]
7605; GFX900-NEXT:    ;;#ASMEND
7606; GFX900-NEXT:    s_setpc_b64 s[30:31]
7607;
7608; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_1_u_u:
7609; GFX90A:       ; %bb.0:
7610; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7611; GFX90A-NEXT:    ;;#ASMSTART
7612; GFX90A-NEXT:    ; def s[4:5]
7613; GFX90A-NEXT:    ;;#ASMEND
7614; GFX90A-NEXT:    ;;#ASMSTART
7615; GFX90A-NEXT:    ; def s[6:7]
7616; GFX90A-NEXT:    ;;#ASMEND
7617; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
7618; GFX90A-NEXT:    ;;#ASMSTART
7619; GFX90A-NEXT:    ; use s[8:9]
7620; GFX90A-NEXT:    ;;#ASMEND
7621; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7622;
7623; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_1_u_u:
7624; GFX940:       ; %bb.0:
7625; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7626; GFX940-NEXT:    ;;#ASMSTART
7627; GFX940-NEXT:    ; def s[0:1]
7628; GFX940-NEXT:    ;;#ASMEND
7629; GFX940-NEXT:    ;;#ASMSTART
7630; GFX940-NEXT:    ; def s[2:3]
7631; GFX940-NEXT:    ;;#ASMEND
7632; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s3, s0
7633; GFX940-NEXT:    ;;#ASMSTART
7634; GFX940-NEXT:    ; use s[8:9]
7635; GFX940-NEXT:    ;;#ASMEND
7636; GFX940-NEXT:    s_setpc_b64 s[30:31]
7637  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7638  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7639  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7640  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7641  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 poison, i32 poison>
7642  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
7643  ret void
7644}
7645
7646define void @s_shuffle_v4i16_v3i16__5_2_u_u() {
7647; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_2_u_u:
7648; GFX900:       ; %bb.0:
7649; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7650; GFX900-NEXT:    ;;#ASMSTART
7651; GFX900-NEXT:    ; def s[4:5]
7652; GFX900-NEXT:    ;;#ASMEND
7653; GFX900-NEXT:    ;;#ASMSTART
7654; GFX900-NEXT:    ; def s[6:7]
7655; GFX900-NEXT:    ;;#ASMEND
7656; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
7657; GFX900-NEXT:    ;;#ASMSTART
7658; GFX900-NEXT:    ; use s[8:9]
7659; GFX900-NEXT:    ;;#ASMEND
7660; GFX900-NEXT:    s_setpc_b64 s[30:31]
7661;
7662; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_2_u_u:
7663; GFX90A:       ; %bb.0:
7664; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7665; GFX90A-NEXT:    ;;#ASMSTART
7666; GFX90A-NEXT:    ; def s[4:5]
7667; GFX90A-NEXT:    ;;#ASMEND
7668; GFX90A-NEXT:    ;;#ASMSTART
7669; GFX90A-NEXT:    ; def s[6:7]
7670; GFX90A-NEXT:    ;;#ASMEND
7671; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
7672; GFX90A-NEXT:    ;;#ASMSTART
7673; GFX90A-NEXT:    ; use s[8:9]
7674; GFX90A-NEXT:    ;;#ASMEND
7675; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7676;
7677; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_2_u_u:
7678; GFX940:       ; %bb.0:
7679; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7680; GFX940-NEXT:    ;;#ASMSTART
7681; GFX940-NEXT:    ; def s[0:1]
7682; GFX940-NEXT:    ;;#ASMEND
7683; GFX940-NEXT:    ;;#ASMSTART
7684; GFX940-NEXT:    ; def s[2:3]
7685; GFX940-NEXT:    ;;#ASMEND
7686; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s1
7687; GFX940-NEXT:    ;;#ASMSTART
7688; GFX940-NEXT:    ; use s[8:9]
7689; GFX940-NEXT:    ;;#ASMEND
7690; GFX940-NEXT:    s_setpc_b64 s[30:31]
7691  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7692  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7693  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7694  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7695  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 poison, i32 poison>
7696  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
7697  ret void
7698}
7699
7700define void @s_shuffle_v4i16_v3i16__5_3_u_u() {
7701; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_3_u_u:
7702; GFX900:       ; %bb.0:
7703; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7704; GFX900-NEXT:    ;;#ASMSTART
7705; GFX900-NEXT:    ; def s[4:5]
7706; GFX900-NEXT:    ;;#ASMEND
7707; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
7708; GFX900-NEXT:    ;;#ASMSTART
7709; GFX900-NEXT:    ; use s[8:9]
7710; GFX900-NEXT:    ;;#ASMEND
7711; GFX900-NEXT:    s_setpc_b64 s[30:31]
7712;
7713; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_3_u_u:
7714; GFX90A:       ; %bb.0:
7715; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7716; GFX90A-NEXT:    ;;#ASMSTART
7717; GFX90A-NEXT:    ; def s[4:5]
7718; GFX90A-NEXT:    ;;#ASMEND
7719; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
7720; GFX90A-NEXT:    ;;#ASMSTART
7721; GFX90A-NEXT:    ; use s[8:9]
7722; GFX90A-NEXT:    ;;#ASMEND
7723; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7724;
7725; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_3_u_u:
7726; GFX940:       ; %bb.0:
7727; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7728; GFX940-NEXT:    ;;#ASMSTART
7729; GFX940-NEXT:    ; def s[0:1]
7730; GFX940-NEXT:    ;;#ASMEND
7731; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
7732; GFX940-NEXT:    ;;#ASMSTART
7733; GFX940-NEXT:    ; use s[8:9]
7734; GFX940-NEXT:    ;;#ASMEND
7735; GFX940-NEXT:    s_setpc_b64 s[30:31]
7736  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7737  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7738  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7739  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7740  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 poison, i32 poison>
7741  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
7742  ret void
7743}
7744
7745define void @s_shuffle_v4i16_v3i16__5_4_u_u() {
7746; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_4_u_u:
7747; GFX900:       ; %bb.0:
7748; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7749; GFX900-NEXT:    ;;#ASMSTART
7750; GFX900-NEXT:    ; def s[4:5]
7751; GFX900-NEXT:    ;;#ASMEND
7752; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
7753; GFX900-NEXT:    ;;#ASMSTART
7754; GFX900-NEXT:    ; use s[8:9]
7755; GFX900-NEXT:    ;;#ASMEND
7756; GFX900-NEXT:    s_setpc_b64 s[30:31]
7757;
7758; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_4_u_u:
7759; GFX90A:       ; %bb.0:
7760; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7761; GFX90A-NEXT:    ;;#ASMSTART
7762; GFX90A-NEXT:    ; def s[4:5]
7763; GFX90A-NEXT:    ;;#ASMEND
7764; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
7765; GFX90A-NEXT:    ;;#ASMSTART
7766; GFX90A-NEXT:    ; use s[8:9]
7767; GFX90A-NEXT:    ;;#ASMEND
7768; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7769;
7770; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_4_u_u:
7771; GFX940:       ; %bb.0:
7772; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7773; GFX940-NEXT:    ;;#ASMSTART
7774; GFX940-NEXT:    ; def s[0:1]
7775; GFX940-NEXT:    ;;#ASMEND
7776; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s1, s0
7777; GFX940-NEXT:    ;;#ASMSTART
7778; GFX940-NEXT:    ; use s[8:9]
7779; GFX940-NEXT:    ;;#ASMEND
7780; GFX940-NEXT:    s_setpc_b64 s[30:31]
7781  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7782  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7783  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7784  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7785  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 poison, i32 poison>
7786  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
7787  ret void
7788}
7789
7790define void @s_shuffle_v4i16_v3i16__5_5_u_u() {
7791; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_u_u:
7792; GFX900:       ; %bb.0:
7793; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7794; GFX900-NEXT:    ;;#ASMSTART
7795; GFX900-NEXT:    ; def s[4:5]
7796; GFX900-NEXT:    ;;#ASMEND
7797; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
7798; GFX900-NEXT:    ;;#ASMSTART
7799; GFX900-NEXT:    ; use s[8:9]
7800; GFX900-NEXT:    ;;#ASMEND
7801; GFX900-NEXT:    s_setpc_b64 s[30:31]
7802;
7803; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_u_u:
7804; GFX90A:       ; %bb.0:
7805; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7806; GFX90A-NEXT:    ;;#ASMSTART
7807; GFX90A-NEXT:    ; def s[4:5]
7808; GFX90A-NEXT:    ;;#ASMEND
7809; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
7810; GFX90A-NEXT:    ;;#ASMSTART
7811; GFX90A-NEXT:    ; use s[8:9]
7812; GFX90A-NEXT:    ;;#ASMEND
7813; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7814;
7815; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_u_u:
7816; GFX940:       ; %bb.0:
7817; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7818; GFX940-NEXT:    ;;#ASMSTART
7819; GFX940-NEXT:    ; def s[0:1]
7820; GFX940-NEXT:    ;;#ASMEND
7821; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
7822; GFX940-NEXT:    ;;#ASMSTART
7823; GFX940-NEXT:    ; use s[8:9]
7824; GFX940-NEXT:    ;;#ASMEND
7825; GFX940-NEXT:    s_setpc_b64 s[30:31]
7826  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7827  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7828  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7829  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7830  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 poison>
7831  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
7832  ret void
7833}
7834
7835define void @s_shuffle_v4i16_v3i16__5_5_0_u() {
7836; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_0_u:
7837; GFX900:       ; %bb.0:
7838; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7839; GFX900-NEXT:    ;;#ASMSTART
7840; GFX900-NEXT:    ; def s[4:5]
7841; GFX900-NEXT:    ;;#ASMEND
7842; GFX900-NEXT:    ;;#ASMSTART
7843; GFX900-NEXT:    ; def s[6:7]
7844; GFX900-NEXT:    ;;#ASMEND
7845; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
7846; GFX900-NEXT:    s_mov_b32 s9, s4
7847; GFX900-NEXT:    ;;#ASMSTART
7848; GFX900-NEXT:    ; use s[8:9]
7849; GFX900-NEXT:    ;;#ASMEND
7850; GFX900-NEXT:    s_setpc_b64 s[30:31]
7851;
7852; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_0_u:
7853; GFX90A:       ; %bb.0:
7854; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7855; GFX90A-NEXT:    ;;#ASMSTART
7856; GFX90A-NEXT:    ; def s[4:5]
7857; GFX90A-NEXT:    ;;#ASMEND
7858; GFX90A-NEXT:    ;;#ASMSTART
7859; GFX90A-NEXT:    ; def s[6:7]
7860; GFX90A-NEXT:    ;;#ASMEND
7861; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
7862; GFX90A-NEXT:    s_mov_b32 s9, s4
7863; GFX90A-NEXT:    ;;#ASMSTART
7864; GFX90A-NEXT:    ; use s[8:9]
7865; GFX90A-NEXT:    ;;#ASMEND
7866; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7867;
7868; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_0_u:
7869; GFX940:       ; %bb.0:
7870; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7871; GFX940-NEXT:    ;;#ASMSTART
7872; GFX940-NEXT:    ; def s[0:1]
7873; GFX940-NEXT:    ;;#ASMEND
7874; GFX940-NEXT:    ;;#ASMSTART
7875; GFX940-NEXT:    ; def s[2:3]
7876; GFX940-NEXT:    ;;#ASMEND
7877; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
7878; GFX940-NEXT:    s_mov_b32 s9, s0
7879; GFX940-NEXT:    ;;#ASMSTART
7880; GFX940-NEXT:    ; use s[8:9]
7881; GFX940-NEXT:    ;;#ASMEND
7882; GFX940-NEXT:    s_setpc_b64 s[30:31]
7883  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7884  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7885  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7886  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7887  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 poison>
7888  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
7889  ret void
7890}
7891
7892define void @s_shuffle_v4i16_v3i16__5_5_1_u() {
7893; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_1_u:
7894; GFX900:       ; %bb.0:
7895; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7896; GFX900-NEXT:    ;;#ASMSTART
7897; GFX900-NEXT:    ; def s[4:5]
7898; GFX900-NEXT:    ;;#ASMEND
7899; GFX900-NEXT:    ;;#ASMSTART
7900; GFX900-NEXT:    ; def s[6:7]
7901; GFX900-NEXT:    ;;#ASMEND
7902; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
7903; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
7904; GFX900-NEXT:    ;;#ASMSTART
7905; GFX900-NEXT:    ; use s[8:9]
7906; GFX900-NEXT:    ;;#ASMEND
7907; GFX900-NEXT:    s_setpc_b64 s[30:31]
7908;
7909; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_1_u:
7910; GFX90A:       ; %bb.0:
7911; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7912; GFX90A-NEXT:    ;;#ASMSTART
7913; GFX90A-NEXT:    ; def s[4:5]
7914; GFX90A-NEXT:    ;;#ASMEND
7915; GFX90A-NEXT:    ;;#ASMSTART
7916; GFX90A-NEXT:    ; def s[6:7]
7917; GFX90A-NEXT:    ;;#ASMEND
7918; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
7919; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
7920; GFX90A-NEXT:    ;;#ASMSTART
7921; GFX90A-NEXT:    ; use s[8:9]
7922; GFX90A-NEXT:    ;;#ASMEND
7923; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7924;
7925; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_1_u:
7926; GFX940:       ; %bb.0:
7927; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7928; GFX940-NEXT:    ;;#ASMSTART
7929; GFX940-NEXT:    ; def s[0:1]
7930; GFX940-NEXT:    ;;#ASMEND
7931; GFX940-NEXT:    ;;#ASMSTART
7932; GFX940-NEXT:    ; def s[2:3]
7933; GFX940-NEXT:    ;;#ASMEND
7934; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
7935; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
7936; GFX940-NEXT:    ;;#ASMSTART
7937; GFX940-NEXT:    ; use s[8:9]
7938; GFX940-NEXT:    ;;#ASMEND
7939; GFX940-NEXT:    s_setpc_b64 s[30:31]
7940  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7941  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7942  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7943  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7944  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 poison>
7945  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
7946  ret void
7947}
7948
7949define void @s_shuffle_v4i16_v3i16__5_5_2_u() {
7950; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_2_u:
7951; GFX900:       ; %bb.0:
7952; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7953; GFX900-NEXT:    ;;#ASMSTART
7954; GFX900-NEXT:    ; def s[8:9]
7955; GFX900-NEXT:    ;;#ASMEND
7956; GFX900-NEXT:    ;;#ASMSTART
7957; GFX900-NEXT:    ; def s[4:5]
7958; GFX900-NEXT:    ;;#ASMEND
7959; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
7960; GFX900-NEXT:    ;;#ASMSTART
7961; GFX900-NEXT:    ; use s[8:9]
7962; GFX900-NEXT:    ;;#ASMEND
7963; GFX900-NEXT:    s_setpc_b64 s[30:31]
7964;
7965; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_2_u:
7966; GFX90A:       ; %bb.0:
7967; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7968; GFX90A-NEXT:    ;;#ASMSTART
7969; GFX90A-NEXT:    ; def s[8:9]
7970; GFX90A-NEXT:    ;;#ASMEND
7971; GFX90A-NEXT:    ;;#ASMSTART
7972; GFX90A-NEXT:    ; def s[4:5]
7973; GFX90A-NEXT:    ;;#ASMEND
7974; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
7975; GFX90A-NEXT:    ;;#ASMSTART
7976; GFX90A-NEXT:    ; use s[8:9]
7977; GFX90A-NEXT:    ;;#ASMEND
7978; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7979;
7980; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_2_u:
7981; GFX940:       ; %bb.0:
7982; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7983; GFX940-NEXT:    ;;#ASMSTART
7984; GFX940-NEXT:    ; def s[8:9]
7985; GFX940-NEXT:    ;;#ASMEND
7986; GFX940-NEXT:    ;;#ASMSTART
7987; GFX940-NEXT:    ; def s[0:1]
7988; GFX940-NEXT:    ;;#ASMEND
7989; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
7990; GFX940-NEXT:    ;;#ASMSTART
7991; GFX940-NEXT:    ; use s[8:9]
7992; GFX940-NEXT:    ;;#ASMEND
7993; GFX940-NEXT:    s_setpc_b64 s[30:31]
7994  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7995  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7996  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7997  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7998  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 poison>
7999  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
8000  ret void
8001}
8002
8003define void @s_shuffle_v4i16_v3i16__5_5_3_u() {
8004; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_3_u:
8005; GFX900:       ; %bb.0:
8006; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8007; GFX900-NEXT:    ;;#ASMSTART
8008; GFX900-NEXT:    ; def s[4:5]
8009; GFX900-NEXT:    ;;#ASMEND
8010; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
8011; GFX900-NEXT:    s_mov_b32 s9, s4
8012; GFX900-NEXT:    ;;#ASMSTART
8013; GFX900-NEXT:    ; use s[8:9]
8014; GFX900-NEXT:    ;;#ASMEND
8015; GFX900-NEXT:    s_setpc_b64 s[30:31]
8016;
8017; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_3_u:
8018; GFX90A:       ; %bb.0:
8019; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8020; GFX90A-NEXT:    ;;#ASMSTART
8021; GFX90A-NEXT:    ; def s[4:5]
8022; GFX90A-NEXT:    ;;#ASMEND
8023; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
8024; GFX90A-NEXT:    s_mov_b32 s9, s4
8025; GFX90A-NEXT:    ;;#ASMSTART
8026; GFX90A-NEXT:    ; use s[8:9]
8027; GFX90A-NEXT:    ;;#ASMEND
8028; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8029;
8030; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_3_u:
8031; GFX940:       ; %bb.0:
8032; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8033; GFX940-NEXT:    ;;#ASMSTART
8034; GFX940-NEXT:    ; def s[0:1]
8035; GFX940-NEXT:    ;;#ASMEND
8036; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
8037; GFX940-NEXT:    s_mov_b32 s9, s0
8038; GFX940-NEXT:    ;;#ASMSTART
8039; GFX940-NEXT:    ; use s[8:9]
8040; GFX940-NEXT:    ;;#ASMEND
8041; GFX940-NEXT:    s_setpc_b64 s[30:31]
8042  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8043  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8044  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8045  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8046  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 poison>
8047  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
8048  ret void
8049}
8050
8051define void @s_shuffle_v4i16_v3i16__5_5_4_u() {
8052; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_4_u:
8053; GFX900:       ; %bb.0:
8054; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8055; GFX900-NEXT:    ;;#ASMSTART
8056; GFX900-NEXT:    ; def s[4:5]
8057; GFX900-NEXT:    ;;#ASMEND
8058; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
8059; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
8060; GFX900-NEXT:    ;;#ASMSTART
8061; GFX900-NEXT:    ; use s[8:9]
8062; GFX900-NEXT:    ;;#ASMEND
8063; GFX900-NEXT:    s_setpc_b64 s[30:31]
8064;
8065; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_4_u:
8066; GFX90A:       ; %bb.0:
8067; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8068; GFX90A-NEXT:    ;;#ASMSTART
8069; GFX90A-NEXT:    ; def s[4:5]
8070; GFX90A-NEXT:    ;;#ASMEND
8071; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
8072; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
8073; GFX90A-NEXT:    ;;#ASMSTART
8074; GFX90A-NEXT:    ; use s[8:9]
8075; GFX90A-NEXT:    ;;#ASMEND
8076; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8077;
8078; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_4_u:
8079; GFX940:       ; %bb.0:
8080; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8081; GFX940-NEXT:    ;;#ASMSTART
8082; GFX940-NEXT:    ; def s[0:1]
8083; GFX940-NEXT:    ;;#ASMEND
8084; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
8085; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
8086; GFX940-NEXT:    ;;#ASMSTART
8087; GFX940-NEXT:    ; use s[8:9]
8088; GFX940-NEXT:    ;;#ASMEND
8089; GFX940-NEXT:    s_setpc_b64 s[30:31]
8090  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8091  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8092  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8093  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8094  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 poison>
8095  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
8096  ret void
8097}
8098
8099define void @s_shuffle_v4i16_v3i16__5_5_5_u() {
8100; GFX9-LABEL: s_shuffle_v4i16_v3i16__5_5_5_u:
8101; GFX9:       ; %bb.0:
8102; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8103; GFX9-NEXT:    ;;#ASMSTART
8104; GFX9-NEXT:    ; def s[8:9]
8105; GFX9-NEXT:    ;;#ASMEND
8106; GFX9-NEXT:    s_pack_ll_b32_b16 s8, s9, s9
8107; GFX9-NEXT:    ;;#ASMSTART
8108; GFX9-NEXT:    ; use s[8:9]
8109; GFX9-NEXT:    ;;#ASMEND
8110; GFX9-NEXT:    s_setpc_b64 s[30:31]
8111  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8112  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8113  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8114  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8115  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 poison>
8116  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
8117  ret void
8118}
8119
8120define void @s_shuffle_v4i16_v3i16__5_5_5_0() {
8121; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_5_0:
8122; GFX900:       ; %bb.0:
8123; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8124; GFX900-NEXT:    ;;#ASMSTART
8125; GFX900-NEXT:    ; def s[4:5]
8126; GFX900-NEXT:    ;;#ASMEND
8127; GFX900-NEXT:    ;;#ASMSTART
8128; GFX900-NEXT:    ; def s[6:7]
8129; GFX900-NEXT:    ;;#ASMEND
8130; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s7, s4
8131; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
8132; GFX900-NEXT:    ;;#ASMSTART
8133; GFX900-NEXT:    ; use s[8:9]
8134; GFX900-NEXT:    ;;#ASMEND
8135; GFX900-NEXT:    s_setpc_b64 s[30:31]
8136;
8137; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_5_0:
8138; GFX90A:       ; %bb.0:
8139; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8140; GFX90A-NEXT:    ;;#ASMSTART
8141; GFX90A-NEXT:    ; def s[4:5]
8142; GFX90A-NEXT:    ;;#ASMEND
8143; GFX90A-NEXT:    ;;#ASMSTART
8144; GFX90A-NEXT:    ; def s[6:7]
8145; GFX90A-NEXT:    ;;#ASMEND
8146; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s7, s4
8147; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
8148; GFX90A-NEXT:    ;;#ASMSTART
8149; GFX90A-NEXT:    ; use s[8:9]
8150; GFX90A-NEXT:    ;;#ASMEND
8151; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8152;
8153; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_5_0:
8154; GFX940:       ; %bb.0:
8155; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8156; GFX940-NEXT:    ;;#ASMSTART
8157; GFX940-NEXT:    ; def s[0:1]
8158; GFX940-NEXT:    ;;#ASMEND
8159; GFX940-NEXT:    ;;#ASMSTART
8160; GFX940-NEXT:    ; def s[2:3]
8161; GFX940-NEXT:    ;;#ASMEND
8162; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s3, s0
8163; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
8164; GFX940-NEXT:    ;;#ASMSTART
8165; GFX940-NEXT:    ; use s[8:9]
8166; GFX940-NEXT:    ;;#ASMEND
8167; GFX940-NEXT:    s_setpc_b64 s[30:31]
8168  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8169  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8170  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8171  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8172  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 0>
8173  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
8174  ret void
8175}
8176
8177define void @s_shuffle_v4i16_v3i16__5_5_5_1() {
8178; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_5_1:
8179; GFX900:       ; %bb.0:
8180; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8181; GFX900-NEXT:    ;;#ASMSTART
8182; GFX900-NEXT:    ; def s[4:5]
8183; GFX900-NEXT:    ;;#ASMEND
8184; GFX900-NEXT:    ;;#ASMSTART
8185; GFX900-NEXT:    ; def s[6:7]
8186; GFX900-NEXT:    ;;#ASMEND
8187; GFX900-NEXT:    s_pack_lh_b32_b16 s9, s7, s4
8188; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
8189; GFX900-NEXT:    ;;#ASMSTART
8190; GFX900-NEXT:    ; use s[8:9]
8191; GFX900-NEXT:    ;;#ASMEND
8192; GFX900-NEXT:    s_setpc_b64 s[30:31]
8193;
8194; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_5_1:
8195; GFX90A:       ; %bb.0:
8196; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8197; GFX90A-NEXT:    ;;#ASMSTART
8198; GFX90A-NEXT:    ; def s[4:5]
8199; GFX90A-NEXT:    ;;#ASMEND
8200; GFX90A-NEXT:    ;;#ASMSTART
8201; GFX90A-NEXT:    ; def s[6:7]
8202; GFX90A-NEXT:    ;;#ASMEND
8203; GFX90A-NEXT:    s_pack_lh_b32_b16 s9, s7, s4
8204; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
8205; GFX90A-NEXT:    ;;#ASMSTART
8206; GFX90A-NEXT:    ; use s[8:9]
8207; GFX90A-NEXT:    ;;#ASMEND
8208; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8209;
8210; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_5_1:
8211; GFX940:       ; %bb.0:
8212; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8213; GFX940-NEXT:    ;;#ASMSTART
8214; GFX940-NEXT:    ; def s[0:1]
8215; GFX940-NEXT:    ;;#ASMEND
8216; GFX940-NEXT:    ;;#ASMSTART
8217; GFX940-NEXT:    ; def s[2:3]
8218; GFX940-NEXT:    ;;#ASMEND
8219; GFX940-NEXT:    s_pack_lh_b32_b16 s9, s3, s0
8220; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
8221; GFX940-NEXT:    ;;#ASMSTART
8222; GFX940-NEXT:    ; use s[8:9]
8223; GFX940-NEXT:    ;;#ASMEND
8224; GFX940-NEXT:    s_setpc_b64 s[30:31]
8225  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8226  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8227  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8228  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8229  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 1>
8230  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
8231  ret void
8232}
8233
8234define void @s_shuffle_v4i16_v3i16__5_5_5_2() {
8235; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_5_2:
8236; GFX900:       ; %bb.0:
8237; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8238; GFX900-NEXT:    ;;#ASMSTART
8239; GFX900-NEXT:    ; def s[4:5]
8240; GFX900-NEXT:    ;;#ASMEND
8241; GFX900-NEXT:    ;;#ASMSTART
8242; GFX900-NEXT:    ; def s[6:7]
8243; GFX900-NEXT:    ;;#ASMEND
8244; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s7, s5
8245; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
8246; GFX900-NEXT:    ;;#ASMSTART
8247; GFX900-NEXT:    ; use s[8:9]
8248; GFX900-NEXT:    ;;#ASMEND
8249; GFX900-NEXT:    s_setpc_b64 s[30:31]
8250;
8251; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_5_2:
8252; GFX90A:       ; %bb.0:
8253; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8254; GFX90A-NEXT:    ;;#ASMSTART
8255; GFX90A-NEXT:    ; def s[4:5]
8256; GFX90A-NEXT:    ;;#ASMEND
8257; GFX90A-NEXT:    ;;#ASMSTART
8258; GFX90A-NEXT:    ; def s[6:7]
8259; GFX90A-NEXT:    ;;#ASMEND
8260; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s7, s5
8261; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
8262; GFX90A-NEXT:    ;;#ASMSTART
8263; GFX90A-NEXT:    ; use s[8:9]
8264; GFX90A-NEXT:    ;;#ASMEND
8265; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8266;
8267; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_5_2:
8268; GFX940:       ; %bb.0:
8269; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8270; GFX940-NEXT:    ;;#ASMSTART
8271; GFX940-NEXT:    ; def s[0:1]
8272; GFX940-NEXT:    ;;#ASMEND
8273; GFX940-NEXT:    ;;#ASMSTART
8274; GFX940-NEXT:    ; def s[2:3]
8275; GFX940-NEXT:    ;;#ASMEND
8276; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s3, s1
8277; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
8278; GFX940-NEXT:    ;;#ASMSTART
8279; GFX940-NEXT:    ; use s[8:9]
8280; GFX940-NEXT:    ;;#ASMEND
8281; GFX940-NEXT:    s_setpc_b64 s[30:31]
8282  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8283  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8284  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8285  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8286  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 2>
8287  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
8288  ret void
8289}
8290
8291define void @s_shuffle_v4i16_v3i16__5_5_5_3() {
8292; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_5_3:
8293; GFX900:       ; %bb.0:
8294; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8295; GFX900-NEXT:    ;;#ASMSTART
8296; GFX900-NEXT:    ; def s[4:5]
8297; GFX900-NEXT:    ;;#ASMEND
8298; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s4
8299; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
8300; GFX900-NEXT:    ;;#ASMSTART
8301; GFX900-NEXT:    ; use s[8:9]
8302; GFX900-NEXT:    ;;#ASMEND
8303; GFX900-NEXT:    s_setpc_b64 s[30:31]
8304;
8305; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_5_3:
8306; GFX90A:       ; %bb.0:
8307; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8308; GFX90A-NEXT:    ;;#ASMSTART
8309; GFX90A-NEXT:    ; def s[4:5]
8310; GFX90A-NEXT:    ;;#ASMEND
8311; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s4
8312; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
8313; GFX90A-NEXT:    ;;#ASMSTART
8314; GFX90A-NEXT:    ; use s[8:9]
8315; GFX90A-NEXT:    ;;#ASMEND
8316; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8317;
8318; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_5_3:
8319; GFX940:       ; %bb.0:
8320; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8321; GFX940-NEXT:    ;;#ASMSTART
8322; GFX940-NEXT:    ; def s[0:1]
8323; GFX940-NEXT:    ;;#ASMEND
8324; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s0
8325; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
8326; GFX940-NEXT:    ;;#ASMSTART
8327; GFX940-NEXT:    ; use s[8:9]
8328; GFX940-NEXT:    ;;#ASMEND
8329; GFX940-NEXT:    s_setpc_b64 s[30:31]
8330  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8331  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8332  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8333  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8334  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 3>
8335  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
8336  ret void
8337}
8338
8339define void @s_shuffle_v4i16_v3i16__5_5_5_4() {
8340; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_5_4:
8341; GFX900:       ; %bb.0:
8342; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8343; GFX900-NEXT:    ;;#ASMSTART
8344; GFX900-NEXT:    ; def s[4:5]
8345; GFX900-NEXT:    ;;#ASMEND
8346; GFX900-NEXT:    s_pack_lh_b32_b16 s9, s5, s4
8347; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
8348; GFX900-NEXT:    ;;#ASMSTART
8349; GFX900-NEXT:    ; use s[8:9]
8350; GFX900-NEXT:    ;;#ASMEND
8351; GFX900-NEXT:    s_setpc_b64 s[30:31]
8352;
8353; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_5_4:
8354; GFX90A:       ; %bb.0:
8355; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8356; GFX90A-NEXT:    ;;#ASMSTART
8357; GFX90A-NEXT:    ; def s[4:5]
8358; GFX90A-NEXT:    ;;#ASMEND
8359; GFX90A-NEXT:    s_pack_lh_b32_b16 s9, s5, s4
8360; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
8361; GFX90A-NEXT:    ;;#ASMSTART
8362; GFX90A-NEXT:    ; use s[8:9]
8363; GFX90A-NEXT:    ;;#ASMEND
8364; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8365;
8366; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_5_4:
8367; GFX940:       ; %bb.0:
8368; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8369; GFX940-NEXT:    ;;#ASMSTART
8370; GFX940-NEXT:    ; def s[0:1]
8371; GFX940-NEXT:    ;;#ASMEND
8372; GFX940-NEXT:    s_pack_lh_b32_b16 s9, s1, s0
8373; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
8374; GFX940-NEXT:    ;;#ASMSTART
8375; GFX940-NEXT:    ; use s[8:9]
8376; GFX940-NEXT:    ;;#ASMEND
8377; GFX940-NEXT:    s_setpc_b64 s[30:31]
8378  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8379  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8380  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8381  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8382  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 4>
8383  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
8384  ret void
8385}
8386
8387define void @s_shuffle_v4i16_v3i16__5_5_5_5() {
8388; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_5_5:
8389; GFX900:       ; %bb.0:
8390; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8391; GFX900-NEXT:    ;;#ASMSTART
8392; GFX900-NEXT:    ; def s[4:5]
8393; GFX900-NEXT:    ;;#ASMEND
8394; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
8395; GFX900-NEXT:    s_mov_b32 s9, s8
8396; GFX900-NEXT:    ;;#ASMSTART
8397; GFX900-NEXT:    ; use s[8:9]
8398; GFX900-NEXT:    ;;#ASMEND
8399; GFX900-NEXT:    s_setpc_b64 s[30:31]
8400;
8401; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_5_5:
8402; GFX90A:       ; %bb.0:
8403; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8404; GFX90A-NEXT:    ;;#ASMSTART
8405; GFX90A-NEXT:    ; def s[4:5]
8406; GFX90A-NEXT:    ;;#ASMEND
8407; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
8408; GFX90A-NEXT:    s_mov_b32 s9, s8
8409; GFX90A-NEXT:    ;;#ASMSTART
8410; GFX90A-NEXT:    ; use s[8:9]
8411; GFX90A-NEXT:    ;;#ASMEND
8412; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8413;
8414; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_5_5:
8415; GFX940:       ; %bb.0:
8416; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8417; GFX940-NEXT:    ;;#ASMSTART
8418; GFX940-NEXT:    ; def s[0:1]
8419; GFX940-NEXT:    ;;#ASMEND
8420; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
8421; GFX940-NEXT:    s_mov_b32 s9, s8
8422; GFX940-NEXT:    ;;#ASMSTART
8423; GFX940-NEXT:    ; use s[8:9]
8424; GFX940-NEXT:    ;;#ASMEND
8425; GFX940-NEXT:    s_setpc_b64 s[30:31]
8426  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8427  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8428  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8429  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8430  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
8431  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
8432  ret void
8433}
8434
8435define void @s_shuffle_v4i16_v3i16__u_0_0_0() {
8436; GFX900-LABEL: s_shuffle_v4i16_v3i16__u_0_0_0:
8437; GFX900:       ; %bb.0:
8438; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8439; GFX900-NEXT:    ;;#ASMSTART
8440; GFX900-NEXT:    ; def s[4:5]
8441; GFX900-NEXT:    ;;#ASMEND
8442; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
8443; GFX900-NEXT:    s_lshl_b32 s8, s4, 16
8444; GFX900-NEXT:    ;;#ASMSTART
8445; GFX900-NEXT:    ; use s[8:9]
8446; GFX900-NEXT:    ;;#ASMEND
8447; GFX900-NEXT:    s_setpc_b64 s[30:31]
8448;
8449; GFX90A-LABEL: s_shuffle_v4i16_v3i16__u_0_0_0:
8450; GFX90A:       ; %bb.0:
8451; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8452; GFX90A-NEXT:    ;;#ASMSTART
8453; GFX90A-NEXT:    ; def s[4:5]
8454; GFX90A-NEXT:    ;;#ASMEND
8455; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
8456; GFX90A-NEXT:    s_lshl_b32 s8, s4, 16
8457; GFX90A-NEXT:    ;;#ASMSTART
8458; GFX90A-NEXT:    ; use s[8:9]
8459; GFX90A-NEXT:    ;;#ASMEND
8460; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8461;
8462; GFX940-LABEL: s_shuffle_v4i16_v3i16__u_0_0_0:
8463; GFX940:       ; %bb.0:
8464; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8465; GFX940-NEXT:    ;;#ASMSTART
8466; GFX940-NEXT:    ; def s[0:1]
8467; GFX940-NEXT:    ;;#ASMEND
8468; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
8469; GFX940-NEXT:    s_lshl_b32 s8, s0, 16
8470; GFX940-NEXT:    ;;#ASMSTART
8471; GFX940-NEXT:    ; use s[8:9]
8472; GFX940-NEXT:    ;;#ASMEND
8473; GFX940-NEXT:    s_setpc_b64 s[30:31]
8474  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8475  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8476  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0>
8477  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
8478  ret void
8479}
8480
8481define void @s_shuffle_v4i16_v3i16__0_0_0_0() {
8482; GFX900-LABEL: s_shuffle_v4i16_v3i16__0_0_0_0:
8483; GFX900:       ; %bb.0:
8484; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8485; GFX900-NEXT:    ;;#ASMSTART
8486; GFX900-NEXT:    ; def s[4:5]
8487; GFX900-NEXT:    ;;#ASMEND
8488; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
8489; GFX900-NEXT:    s_mov_b32 s9, s8
8490; GFX900-NEXT:    ;;#ASMSTART
8491; GFX900-NEXT:    ; use s[8:9]
8492; GFX900-NEXT:    ;;#ASMEND
8493; GFX900-NEXT:    s_setpc_b64 s[30:31]
8494;
8495; GFX90A-LABEL: s_shuffle_v4i16_v3i16__0_0_0_0:
8496; GFX90A:       ; %bb.0:
8497; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8498; GFX90A-NEXT:    ;;#ASMSTART
8499; GFX90A-NEXT:    ; def s[4:5]
8500; GFX90A-NEXT:    ;;#ASMEND
8501; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
8502; GFX90A-NEXT:    s_mov_b32 s9, s8
8503; GFX90A-NEXT:    ;;#ASMSTART
8504; GFX90A-NEXT:    ; use s[8:9]
8505; GFX90A-NEXT:    ;;#ASMEND
8506; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8507;
8508; GFX940-LABEL: s_shuffle_v4i16_v3i16__0_0_0_0:
8509; GFX940:       ; %bb.0:
8510; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8511; GFX940-NEXT:    ;;#ASMSTART
8512; GFX940-NEXT:    ; def s[0:1]
8513; GFX940-NEXT:    ;;#ASMEND
8514; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s0
8515; GFX940-NEXT:    s_mov_b32 s9, s8
8516; GFX940-NEXT:    ;;#ASMSTART
8517; GFX940-NEXT:    ; use s[8:9]
8518; GFX940-NEXT:    ;;#ASMEND
8519; GFX940-NEXT:    s_setpc_b64 s[30:31]
8520  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8521  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8522  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> zeroinitializer
8523  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
8524  ret void
8525}
8526
8527define void @s_shuffle_v4i16_v3i16__1_0_0_0() {
8528; GFX900-LABEL: s_shuffle_v4i16_v3i16__1_0_0_0:
8529; GFX900:       ; %bb.0:
8530; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8531; GFX900-NEXT:    ;;#ASMSTART
8532; GFX900-NEXT:    ; def s[4:5]
8533; GFX900-NEXT:    ;;#ASMEND
8534; GFX900-NEXT:    s_lshr_b32 s5, s4, 16
8535; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
8536; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
8537; GFX900-NEXT:    ;;#ASMSTART
8538; GFX900-NEXT:    ; use s[8:9]
8539; GFX900-NEXT:    ;;#ASMEND
8540; GFX900-NEXT:    s_setpc_b64 s[30:31]
8541;
8542; GFX90A-LABEL: s_shuffle_v4i16_v3i16__1_0_0_0:
8543; GFX90A:       ; %bb.0:
8544; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8545; GFX90A-NEXT:    ;;#ASMSTART
8546; GFX90A-NEXT:    ; def s[4:5]
8547; GFX90A-NEXT:    ;;#ASMEND
8548; GFX90A-NEXT:    s_lshr_b32 s5, s4, 16
8549; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
8550; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
8551; GFX90A-NEXT:    ;;#ASMSTART
8552; GFX90A-NEXT:    ; use s[8:9]
8553; GFX90A-NEXT:    ;;#ASMEND
8554; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8555;
8556; GFX940-LABEL: s_shuffle_v4i16_v3i16__1_0_0_0:
8557; GFX940:       ; %bb.0:
8558; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8559; GFX940-NEXT:    ;;#ASMSTART
8560; GFX940-NEXT:    ; def s[0:1]
8561; GFX940-NEXT:    ;;#ASMEND
8562; GFX940-NEXT:    s_lshr_b32 s1, s0, 16
8563; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
8564; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
8565; GFX940-NEXT:    ;;#ASMSTART
8566; GFX940-NEXT:    ; use s[8:9]
8567; GFX940-NEXT:    ;;#ASMEND
8568; GFX940-NEXT:    s_setpc_b64 s[30:31]
8569  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8570  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8571  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
8572  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
8573  ret void
8574}
8575
8576define void @s_shuffle_v4i16_v3i16__2_0_0_0() {
8577; GFX900-LABEL: s_shuffle_v4i16_v3i16__2_0_0_0:
8578; GFX900:       ; %bb.0:
8579; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8580; GFX900-NEXT:    ;;#ASMSTART
8581; GFX900-NEXT:    ; def s[4:5]
8582; GFX900-NEXT:    ;;#ASMEND
8583; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
8584; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
8585; GFX900-NEXT:    ;;#ASMSTART
8586; GFX900-NEXT:    ; use s[8:9]
8587; GFX900-NEXT:    ;;#ASMEND
8588; GFX900-NEXT:    s_setpc_b64 s[30:31]
8589;
8590; GFX90A-LABEL: s_shuffle_v4i16_v3i16__2_0_0_0:
8591; GFX90A:       ; %bb.0:
8592; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8593; GFX90A-NEXT:    ;;#ASMSTART
8594; GFX90A-NEXT:    ; def s[4:5]
8595; GFX90A-NEXT:    ;;#ASMEND
8596; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
8597; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
8598; GFX90A-NEXT:    ;;#ASMSTART
8599; GFX90A-NEXT:    ; use s[8:9]
8600; GFX90A-NEXT:    ;;#ASMEND
8601; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8602;
8603; GFX940-LABEL: s_shuffle_v4i16_v3i16__2_0_0_0:
8604; GFX940:       ; %bb.0:
8605; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8606; GFX940-NEXT:    ;;#ASMSTART
8607; GFX940-NEXT:    ; def s[0:1]
8608; GFX940-NEXT:    ;;#ASMEND
8609; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
8610; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
8611; GFX940-NEXT:    ;;#ASMSTART
8612; GFX940-NEXT:    ; use s[8:9]
8613; GFX940-NEXT:    ;;#ASMEND
8614; GFX940-NEXT:    s_setpc_b64 s[30:31]
8615  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8616  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8617  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
8618  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
8619  ret void
8620}
8621
8622define void @s_shuffle_v4i16_v3i16__3_0_0_0() {
8623; GFX900-LABEL: s_shuffle_v4i16_v3i16__3_0_0_0:
8624; GFX900:       ; %bb.0:
8625; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8626; GFX900-NEXT:    ;;#ASMSTART
8627; GFX900-NEXT:    ; def s[4:5]
8628; GFX900-NEXT:    ;;#ASMEND
8629; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
8630; GFX900-NEXT:    s_lshl_b32 s8, s4, 16
8631; GFX900-NEXT:    ;;#ASMSTART
8632; GFX900-NEXT:    ; use s[8:9]
8633; GFX900-NEXT:    ;;#ASMEND
8634; GFX900-NEXT:    s_setpc_b64 s[30:31]
8635;
8636; GFX90A-LABEL: s_shuffle_v4i16_v3i16__3_0_0_0:
8637; GFX90A:       ; %bb.0:
8638; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8639; GFX90A-NEXT:    ;;#ASMSTART
8640; GFX90A-NEXT:    ; def s[4:5]
8641; GFX90A-NEXT:    ;;#ASMEND
8642; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
8643; GFX90A-NEXT:    s_lshl_b32 s8, s4, 16
8644; GFX90A-NEXT:    ;;#ASMSTART
8645; GFX90A-NEXT:    ; use s[8:9]
8646; GFX90A-NEXT:    ;;#ASMEND
8647; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8648;
8649; GFX940-LABEL: s_shuffle_v4i16_v3i16__3_0_0_0:
8650; GFX940:       ; %bb.0:
8651; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8652; GFX940-NEXT:    ;;#ASMSTART
8653; GFX940-NEXT:    ; def s[0:1]
8654; GFX940-NEXT:    ;;#ASMEND
8655; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
8656; GFX940-NEXT:    s_lshl_b32 s8, s0, 16
8657; GFX940-NEXT:    ;;#ASMSTART
8658; GFX940-NEXT:    ; use s[8:9]
8659; GFX940-NEXT:    ;;#ASMEND
8660; GFX940-NEXT:    s_setpc_b64 s[30:31]
8661  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8662  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8663  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
8664  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
8665  ret void
8666}
8667
8668define void @s_shuffle_v4i16_v3i16__4_0_0_0() {
8669; GFX900-LABEL: s_shuffle_v4i16_v3i16__4_0_0_0:
8670; GFX900:       ; %bb.0:
8671; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8672; GFX900-NEXT:    ;;#ASMSTART
8673; GFX900-NEXT:    ; def s[4:5]
8674; GFX900-NEXT:    ;;#ASMEND
8675; GFX900-NEXT:    ;;#ASMSTART
8676; GFX900-NEXT:    ; def s[6:7]
8677; GFX900-NEXT:    ;;#ASMEND
8678; GFX900-NEXT:    s_lshr_b32 s5, s6, 16
8679; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
8680; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
8681; GFX900-NEXT:    ;;#ASMSTART
8682; GFX900-NEXT:    ; use s[8:9]
8683; GFX900-NEXT:    ;;#ASMEND
8684; GFX900-NEXT:    s_setpc_b64 s[30:31]
8685;
8686; GFX90A-LABEL: s_shuffle_v4i16_v3i16__4_0_0_0:
8687; GFX90A:       ; %bb.0:
8688; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8689; GFX90A-NEXT:    ;;#ASMSTART
8690; GFX90A-NEXT:    ; def s[4:5]
8691; GFX90A-NEXT:    ;;#ASMEND
8692; GFX90A-NEXT:    ;;#ASMSTART
8693; GFX90A-NEXT:    ; def s[6:7]
8694; GFX90A-NEXT:    ;;#ASMEND
8695; GFX90A-NEXT:    s_lshr_b32 s5, s6, 16
8696; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
8697; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
8698; GFX90A-NEXT:    ;;#ASMSTART
8699; GFX90A-NEXT:    ; use s[8:9]
8700; GFX90A-NEXT:    ;;#ASMEND
8701; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8702;
8703; GFX940-LABEL: s_shuffle_v4i16_v3i16__4_0_0_0:
8704; GFX940:       ; %bb.0:
8705; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8706; GFX940-NEXT:    ;;#ASMSTART
8707; GFX940-NEXT:    ; def s[0:1]
8708; GFX940-NEXT:    ;;#ASMEND
8709; GFX940-NEXT:    ;;#ASMSTART
8710; GFX940-NEXT:    ; def s[2:3]
8711; GFX940-NEXT:    ;;#ASMEND
8712; GFX940-NEXT:    s_lshr_b32 s1, s2, 16
8713; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
8714; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
8715; GFX940-NEXT:    ;;#ASMSTART
8716; GFX940-NEXT:    ; use s[8:9]
8717; GFX940-NEXT:    ;;#ASMEND
8718; GFX940-NEXT:    s_setpc_b64 s[30:31]
8719  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8720  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8721  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8722  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8723  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 0, i32 0, i32 0>
8724  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
8725  ret void
8726}
8727
8728define void @s_shuffle_v4i16_v3i16__5_0_0_0() {
8729; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_0_0_0:
8730; GFX900:       ; %bb.0:
8731; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8732; GFX900-NEXT:    ;;#ASMSTART
8733; GFX900-NEXT:    ; def s[4:5]
8734; GFX900-NEXT:    ;;#ASMEND
8735; GFX900-NEXT:    ;;#ASMSTART
8736; GFX900-NEXT:    ; def s[6:7]
8737; GFX900-NEXT:    ;;#ASMEND
8738; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
8739; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
8740; GFX900-NEXT:    ;;#ASMSTART
8741; GFX900-NEXT:    ; use s[8:9]
8742; GFX900-NEXT:    ;;#ASMEND
8743; GFX900-NEXT:    s_setpc_b64 s[30:31]
8744;
8745; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_0_0_0:
8746; GFX90A:       ; %bb.0:
8747; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8748; GFX90A-NEXT:    ;;#ASMSTART
8749; GFX90A-NEXT:    ; def s[4:5]
8750; GFX90A-NEXT:    ;;#ASMEND
8751; GFX90A-NEXT:    ;;#ASMSTART
8752; GFX90A-NEXT:    ; def s[6:7]
8753; GFX90A-NEXT:    ;;#ASMEND
8754; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
8755; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
8756; GFX90A-NEXT:    ;;#ASMSTART
8757; GFX90A-NEXT:    ; use s[8:9]
8758; GFX90A-NEXT:    ;;#ASMEND
8759; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8760;
8761; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_0_0_0:
8762; GFX940:       ; %bb.0:
8763; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8764; GFX940-NEXT:    ;;#ASMSTART
8765; GFX940-NEXT:    ; def s[0:1]
8766; GFX940-NEXT:    ;;#ASMEND
8767; GFX940-NEXT:    ;;#ASMSTART
8768; GFX940-NEXT:    ; def s[2:3]
8769; GFX940-NEXT:    ;;#ASMEND
8770; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s0
8771; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
8772; GFX940-NEXT:    ;;#ASMSTART
8773; GFX940-NEXT:    ; use s[8:9]
8774; GFX940-NEXT:    ;;#ASMEND
8775; GFX940-NEXT:    s_setpc_b64 s[30:31]
8776  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8777  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8778  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8779  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8780  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 0, i32 0>
8781  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
8782  ret void
8783}
8784
8785define void @s_shuffle_v4i16_v3i16__5_u_0_0() {
8786; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_u_0_0:
8787; GFX900:       ; %bb.0:
8788; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8789; GFX900-NEXT:    ;;#ASMSTART
8790; GFX900-NEXT:    ; def s[4:5]
8791; GFX900-NEXT:    ;;#ASMEND
8792; GFX900-NEXT:    ;;#ASMSTART
8793; GFX900-NEXT:    ; def s[6:7]
8794; GFX900-NEXT:    ;;#ASMEND
8795; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
8796; GFX900-NEXT:    s_mov_b32 s8, s7
8797; GFX900-NEXT:    ;;#ASMSTART
8798; GFX900-NEXT:    ; use s[8:9]
8799; GFX900-NEXT:    ;;#ASMEND
8800; GFX900-NEXT:    s_setpc_b64 s[30:31]
8801;
8802; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_u_0_0:
8803; GFX90A:       ; %bb.0:
8804; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8805; GFX90A-NEXT:    ;;#ASMSTART
8806; GFX90A-NEXT:    ; def s[4:5]
8807; GFX90A-NEXT:    ;;#ASMEND
8808; GFX90A-NEXT:    ;;#ASMSTART
8809; GFX90A-NEXT:    ; def s[6:7]
8810; GFX90A-NEXT:    ;;#ASMEND
8811; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
8812; GFX90A-NEXT:    s_mov_b32 s8, s7
8813; GFX90A-NEXT:    ;;#ASMSTART
8814; GFX90A-NEXT:    ; use s[8:9]
8815; GFX90A-NEXT:    ;;#ASMEND
8816; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8817;
8818; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_u_0_0:
8819; GFX940:       ; %bb.0:
8820; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8821; GFX940-NEXT:    ;;#ASMSTART
8822; GFX940-NEXT:    ; def s[0:1]
8823; GFX940-NEXT:    ;;#ASMEND
8824; GFX940-NEXT:    ;;#ASMSTART
8825; GFX940-NEXT:    ; def s[2:3]
8826; GFX940-NEXT:    ;;#ASMEND
8827; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
8828; GFX940-NEXT:    s_mov_b32 s8, s3
8829; GFX940-NEXT:    ;;#ASMSTART
8830; GFX940-NEXT:    ; use s[8:9]
8831; GFX940-NEXT:    ;;#ASMEND
8832; GFX940-NEXT:    s_setpc_b64 s[30:31]
8833  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8834  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8835  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8836  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8837  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 0, i32 0>
8838  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
8839  ret void
8840}
8841
8842define void @s_shuffle_v4i16_v3i16__5_1_0_0() {
8843; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_1_0_0:
8844; GFX900:       ; %bb.0:
8845; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8846; GFX900-NEXT:    ;;#ASMSTART
8847; GFX900-NEXT:    ; def s[4:5]
8848; GFX900-NEXT:    ;;#ASMEND
8849; GFX900-NEXT:    ;;#ASMSTART
8850; GFX900-NEXT:    ; def s[6:7]
8851; GFX900-NEXT:    ;;#ASMEND
8852; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
8853; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
8854; GFX900-NEXT:    ;;#ASMSTART
8855; GFX900-NEXT:    ; use s[8:9]
8856; GFX900-NEXT:    ;;#ASMEND
8857; GFX900-NEXT:    s_setpc_b64 s[30:31]
8858;
8859; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_1_0_0:
8860; GFX90A:       ; %bb.0:
8861; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8862; GFX90A-NEXT:    ;;#ASMSTART
8863; GFX90A-NEXT:    ; def s[4:5]
8864; GFX90A-NEXT:    ;;#ASMEND
8865; GFX90A-NEXT:    ;;#ASMSTART
8866; GFX90A-NEXT:    ; def s[6:7]
8867; GFX90A-NEXT:    ;;#ASMEND
8868; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
8869; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
8870; GFX90A-NEXT:    ;;#ASMSTART
8871; GFX90A-NEXT:    ; use s[8:9]
8872; GFX90A-NEXT:    ;;#ASMEND
8873; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8874;
8875; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_1_0_0:
8876; GFX940:       ; %bb.0:
8877; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8878; GFX940-NEXT:    ;;#ASMSTART
8879; GFX940-NEXT:    ; def s[0:1]
8880; GFX940-NEXT:    ;;#ASMEND
8881; GFX940-NEXT:    ;;#ASMSTART
8882; GFX940-NEXT:    ; def s[2:3]
8883; GFX940-NEXT:    ;;#ASMEND
8884; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s3, s0
8885; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
8886; GFX940-NEXT:    ;;#ASMSTART
8887; GFX940-NEXT:    ; use s[8:9]
8888; GFX940-NEXT:    ;;#ASMEND
8889; GFX940-NEXT:    s_setpc_b64 s[30:31]
8890  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8891  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8892  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8893  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8894  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 0, i32 0>
8895  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
8896  ret void
8897}
8898
8899define void @s_shuffle_v4i16_v3i16__5_2_0_0() {
8900; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_2_0_0:
8901; GFX900:       ; %bb.0:
8902; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8903; GFX900-NEXT:    ;;#ASMSTART
8904; GFX900-NEXT:    ; def s[4:5]
8905; GFX900-NEXT:    ;;#ASMEND
8906; GFX900-NEXT:    ;;#ASMSTART
8907; GFX900-NEXT:    ; def s[6:7]
8908; GFX900-NEXT:    ;;#ASMEND
8909; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
8910; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
8911; GFX900-NEXT:    ;;#ASMSTART
8912; GFX900-NEXT:    ; use s[8:9]
8913; GFX900-NEXT:    ;;#ASMEND
8914; GFX900-NEXT:    s_setpc_b64 s[30:31]
8915;
8916; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_2_0_0:
8917; GFX90A:       ; %bb.0:
8918; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8919; GFX90A-NEXT:    ;;#ASMSTART
8920; GFX90A-NEXT:    ; def s[4:5]
8921; GFX90A-NEXT:    ;;#ASMEND
8922; GFX90A-NEXT:    ;;#ASMSTART
8923; GFX90A-NEXT:    ; def s[6:7]
8924; GFX90A-NEXT:    ;;#ASMEND
8925; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
8926; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
8927; GFX90A-NEXT:    ;;#ASMSTART
8928; GFX90A-NEXT:    ; use s[8:9]
8929; GFX90A-NEXT:    ;;#ASMEND
8930; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8931;
8932; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_2_0_0:
8933; GFX940:       ; %bb.0:
8934; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8935; GFX940-NEXT:    ;;#ASMSTART
8936; GFX940-NEXT:    ; def s[0:1]
8937; GFX940-NEXT:    ;;#ASMEND
8938; GFX940-NEXT:    ;;#ASMSTART
8939; GFX940-NEXT:    ; def s[2:3]
8940; GFX940-NEXT:    ;;#ASMEND
8941; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s1
8942; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
8943; GFX940-NEXT:    ;;#ASMSTART
8944; GFX940-NEXT:    ; use s[8:9]
8945; GFX940-NEXT:    ;;#ASMEND
8946; GFX940-NEXT:    s_setpc_b64 s[30:31]
8947  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8948  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8949  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8950  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8951  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 0, i32 0>
8952  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
8953  ret void
8954}
8955
8956define void @s_shuffle_v4i16_v3i16__5_3_0_0() {
8957; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_3_0_0:
8958; GFX900:       ; %bb.0:
8959; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8960; GFX900-NEXT:    ;;#ASMSTART
8961; GFX900-NEXT:    ; def s[4:5]
8962; GFX900-NEXT:    ;;#ASMEND
8963; GFX900-NEXT:    ;;#ASMSTART
8964; GFX900-NEXT:    ; def s[6:7]
8965; GFX900-NEXT:    ;;#ASMEND
8966; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s6
8967; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
8968; GFX900-NEXT:    ;;#ASMSTART
8969; GFX900-NEXT:    ; use s[8:9]
8970; GFX900-NEXT:    ;;#ASMEND
8971; GFX900-NEXT:    s_setpc_b64 s[30:31]
8972;
8973; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_3_0_0:
8974; GFX90A:       ; %bb.0:
8975; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8976; GFX90A-NEXT:    ;;#ASMSTART
8977; GFX90A-NEXT:    ; def s[4:5]
8978; GFX90A-NEXT:    ;;#ASMEND
8979; GFX90A-NEXT:    ;;#ASMSTART
8980; GFX90A-NEXT:    ; def s[6:7]
8981; GFX90A-NEXT:    ;;#ASMEND
8982; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s6
8983; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
8984; GFX90A-NEXT:    ;;#ASMSTART
8985; GFX90A-NEXT:    ; use s[8:9]
8986; GFX90A-NEXT:    ;;#ASMEND
8987; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8988;
8989; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_3_0_0:
8990; GFX940:       ; %bb.0:
8991; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8992; GFX940-NEXT:    ;;#ASMSTART
8993; GFX940-NEXT:    ; def s[0:1]
8994; GFX940-NEXT:    ;;#ASMEND
8995; GFX940-NEXT:    ;;#ASMSTART
8996; GFX940-NEXT:    ; def s[2:3]
8997; GFX940-NEXT:    ;;#ASMEND
8998; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s2
8999; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
9000; GFX940-NEXT:    ;;#ASMSTART
9001; GFX940-NEXT:    ; use s[8:9]
9002; GFX940-NEXT:    ;;#ASMEND
9003; GFX940-NEXT:    s_setpc_b64 s[30:31]
9004  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9005  %vec1 = call <4 x i16> asm "; def $0", "=s"()
9006  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9007  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9008  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 0, i32 0>
9009  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
9010  ret void
9011}
9012
9013define void @s_shuffle_v4i16_v3i16__5_4_0_0() {
9014; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_4_0_0:
9015; GFX900:       ; %bb.0:
9016; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9017; GFX900-NEXT:    ;;#ASMSTART
9018; GFX900-NEXT:    ; def s[4:5]
9019; GFX900-NEXT:    ;;#ASMEND
9020; GFX900-NEXT:    ;;#ASMSTART
9021; GFX900-NEXT:    ; def s[6:7]
9022; GFX900-NEXT:    ;;#ASMEND
9023; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s7, s6
9024; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
9025; GFX900-NEXT:    ;;#ASMSTART
9026; GFX900-NEXT:    ; use s[8:9]
9027; GFX900-NEXT:    ;;#ASMEND
9028; GFX900-NEXT:    s_setpc_b64 s[30:31]
9029;
9030; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_4_0_0:
9031; GFX90A:       ; %bb.0:
9032; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9033; GFX90A-NEXT:    ;;#ASMSTART
9034; GFX90A-NEXT:    ; def s[4:5]
9035; GFX90A-NEXT:    ;;#ASMEND
9036; GFX90A-NEXT:    ;;#ASMSTART
9037; GFX90A-NEXT:    ; def s[6:7]
9038; GFX90A-NEXT:    ;;#ASMEND
9039; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s7, s6
9040; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
9041; GFX90A-NEXT:    ;;#ASMSTART
9042; GFX90A-NEXT:    ; use s[8:9]
9043; GFX90A-NEXT:    ;;#ASMEND
9044; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9045;
9046; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_4_0_0:
9047; GFX940:       ; %bb.0:
9048; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9049; GFX940-NEXT:    ;;#ASMSTART
9050; GFX940-NEXT:    ; def s[0:1]
9051; GFX940-NEXT:    ;;#ASMEND
9052; GFX940-NEXT:    ;;#ASMSTART
9053; GFX940-NEXT:    ; def s[2:3]
9054; GFX940-NEXT:    ;;#ASMEND
9055; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s3, s2
9056; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
9057; GFX940-NEXT:    ;;#ASMSTART
9058; GFX940-NEXT:    ; use s[8:9]
9059; GFX940-NEXT:    ;;#ASMEND
9060; GFX940-NEXT:    s_setpc_b64 s[30:31]
9061  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9062  %vec1 = call <4 x i16> asm "; def $0", "=s"()
9063  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9064  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9065  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 0, i32 0>
9066  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
9067  ret void
9068}
9069
9070define void @s_shuffle_v4i16_v3i16__5_5_0_0() {
9071; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_0_0:
9072; GFX900:       ; %bb.0:
9073; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9074; GFX900-NEXT:    ;;#ASMSTART
9075; GFX900-NEXT:    ; def s[4:5]
9076; GFX900-NEXT:    ;;#ASMEND
9077; GFX900-NEXT:    ;;#ASMSTART
9078; GFX900-NEXT:    ; def s[6:7]
9079; GFX900-NEXT:    ;;#ASMEND
9080; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
9081; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
9082; GFX900-NEXT:    ;;#ASMSTART
9083; GFX900-NEXT:    ; use s[8:9]
9084; GFX900-NEXT:    ;;#ASMEND
9085; GFX900-NEXT:    s_setpc_b64 s[30:31]
9086;
9087; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_0_0:
9088; GFX90A:       ; %bb.0:
9089; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9090; GFX90A-NEXT:    ;;#ASMSTART
9091; GFX90A-NEXT:    ; def s[4:5]
9092; GFX90A-NEXT:    ;;#ASMEND
9093; GFX90A-NEXT:    ;;#ASMSTART
9094; GFX90A-NEXT:    ; def s[6:7]
9095; GFX90A-NEXT:    ;;#ASMEND
9096; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
9097; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
9098; GFX90A-NEXT:    ;;#ASMSTART
9099; GFX90A-NEXT:    ; use s[8:9]
9100; GFX90A-NEXT:    ;;#ASMEND
9101; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9102;
9103; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_0_0:
9104; GFX940:       ; %bb.0:
9105; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9106; GFX940-NEXT:    ;;#ASMSTART
9107; GFX940-NEXT:    ; def s[0:1]
9108; GFX940-NEXT:    ;;#ASMEND
9109; GFX940-NEXT:    ;;#ASMSTART
9110; GFX940-NEXT:    ; def s[2:3]
9111; GFX940-NEXT:    ;;#ASMEND
9112; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
9113; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
9114; GFX940-NEXT:    ;;#ASMSTART
9115; GFX940-NEXT:    ; use s[8:9]
9116; GFX940-NEXT:    ;;#ASMEND
9117; GFX940-NEXT:    s_setpc_b64 s[30:31]
9118  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9119  %vec1 = call <4 x i16> asm "; def $0", "=s"()
9120  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9121  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9122  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 0>
9123  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
9124  ret void
9125}
9126
9127define void @s_shuffle_v4i16_v3i16__5_5_u_0() {
9128; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_u_0:
9129; GFX900:       ; %bb.0:
9130; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9131; GFX900-NEXT:    ;;#ASMSTART
9132; GFX900-NEXT:    ; def s[4:5]
9133; GFX900-NEXT:    ;;#ASMEND
9134; GFX900-NEXT:    ;;#ASMSTART
9135; GFX900-NEXT:    ; def s[6:7]
9136; GFX900-NEXT:    ;;#ASMEND
9137; GFX900-NEXT:    s_lshl_b32 s9, s4, 16
9138; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
9139; GFX900-NEXT:    ;;#ASMSTART
9140; GFX900-NEXT:    ; use s[8:9]
9141; GFX900-NEXT:    ;;#ASMEND
9142; GFX900-NEXT:    s_setpc_b64 s[30:31]
9143;
9144; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_u_0:
9145; GFX90A:       ; %bb.0:
9146; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9147; GFX90A-NEXT:    ;;#ASMSTART
9148; GFX90A-NEXT:    ; def s[4:5]
9149; GFX90A-NEXT:    ;;#ASMEND
9150; GFX90A-NEXT:    ;;#ASMSTART
9151; GFX90A-NEXT:    ; def s[6:7]
9152; GFX90A-NEXT:    ;;#ASMEND
9153; GFX90A-NEXT:    s_lshl_b32 s9, s4, 16
9154; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
9155; GFX90A-NEXT:    ;;#ASMSTART
9156; GFX90A-NEXT:    ; use s[8:9]
9157; GFX90A-NEXT:    ;;#ASMEND
9158; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9159;
9160; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_u_0:
9161; GFX940:       ; %bb.0:
9162; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9163; GFX940-NEXT:    ;;#ASMSTART
9164; GFX940-NEXT:    ; def s[0:1]
9165; GFX940-NEXT:    ;;#ASMEND
9166; GFX940-NEXT:    ;;#ASMSTART
9167; GFX940-NEXT:    ; def s[2:3]
9168; GFX940-NEXT:    ;;#ASMEND
9169; GFX940-NEXT:    s_lshl_b32 s9, s0, 16
9170; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
9171; GFX940-NEXT:    ;;#ASMSTART
9172; GFX940-NEXT:    ; use s[8:9]
9173; GFX940-NEXT:    ;;#ASMEND
9174; GFX940-NEXT:    s_setpc_b64 s[30:31]
9175  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9176  %vec1 = call <4 x i16> asm "; def $0", "=s"()
9177  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9178  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9179  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 0>
9180  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
9181  ret void
9182}
9183
9184define void @s_shuffle_v4i16_v3i16__5_5_1_0() {
9185; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_1_0:
9186; GFX900:       ; %bb.0:
9187; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9188; GFX900-NEXT:    ;;#ASMSTART
9189; GFX900-NEXT:    ; def s[4:5]
9190; GFX900-NEXT:    ;;#ASMEND
9191; GFX900-NEXT:    s_lshr_b32 s5, s4, 16
9192; GFX900-NEXT:    ;;#ASMSTART
9193; GFX900-NEXT:    ; def s[6:7]
9194; GFX900-NEXT:    ;;#ASMEND
9195; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s4
9196; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
9197; GFX900-NEXT:    ;;#ASMSTART
9198; GFX900-NEXT:    ; use s[8:9]
9199; GFX900-NEXT:    ;;#ASMEND
9200; GFX900-NEXT:    s_setpc_b64 s[30:31]
9201;
9202; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_1_0:
9203; GFX90A:       ; %bb.0:
9204; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9205; GFX90A-NEXT:    ;;#ASMSTART
9206; GFX90A-NEXT:    ; def s[4:5]
9207; GFX90A-NEXT:    ;;#ASMEND
9208; GFX90A-NEXT:    s_lshr_b32 s5, s4, 16
9209; GFX90A-NEXT:    ;;#ASMSTART
9210; GFX90A-NEXT:    ; def s[6:7]
9211; GFX90A-NEXT:    ;;#ASMEND
9212; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s4
9213; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
9214; GFX90A-NEXT:    ;;#ASMSTART
9215; GFX90A-NEXT:    ; use s[8:9]
9216; GFX90A-NEXT:    ;;#ASMEND
9217; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9218;
9219; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_1_0:
9220; GFX940:       ; %bb.0:
9221; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9222; GFX940-NEXT:    ;;#ASMSTART
9223; GFX940-NEXT:    ; def s[0:1]
9224; GFX940-NEXT:    ;;#ASMEND
9225; GFX940-NEXT:    s_lshr_b32 s1, s0, 16
9226; GFX940-NEXT:    ;;#ASMSTART
9227; GFX940-NEXT:    ; def s[2:3]
9228; GFX940-NEXT:    ;;#ASMEND
9229; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s0
9230; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
9231; GFX940-NEXT:    ;;#ASMSTART
9232; GFX940-NEXT:    ; use s[8:9]
9233; GFX940-NEXT:    ;;#ASMEND
9234; GFX940-NEXT:    s_setpc_b64 s[30:31]
9235  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9236  %vec1 = call <4 x i16> asm "; def $0", "=s"()
9237  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9238  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9239  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 0>
9240  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
9241  ret void
9242}
9243
9244define void @s_shuffle_v4i16_v3i16__5_5_2_0() {
9245; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_2_0:
9246; GFX900:       ; %bb.0:
9247; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9248; GFX900-NEXT:    ;;#ASMSTART
9249; GFX900-NEXT:    ; def s[4:5]
9250; GFX900-NEXT:    ;;#ASMEND
9251; GFX900-NEXT:    ;;#ASMSTART
9252; GFX900-NEXT:    ; def s[6:7]
9253; GFX900-NEXT:    ;;#ASMEND
9254; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s4
9255; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
9256; GFX900-NEXT:    ;;#ASMSTART
9257; GFX900-NEXT:    ; use s[8:9]
9258; GFX900-NEXT:    ;;#ASMEND
9259; GFX900-NEXT:    s_setpc_b64 s[30:31]
9260;
9261; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_2_0:
9262; GFX90A:       ; %bb.0:
9263; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9264; GFX90A-NEXT:    ;;#ASMSTART
9265; GFX90A-NEXT:    ; def s[4:5]
9266; GFX90A-NEXT:    ;;#ASMEND
9267; GFX90A-NEXT:    ;;#ASMSTART
9268; GFX90A-NEXT:    ; def s[6:7]
9269; GFX90A-NEXT:    ;;#ASMEND
9270; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s4
9271; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
9272; GFX90A-NEXT:    ;;#ASMSTART
9273; GFX90A-NEXT:    ; use s[8:9]
9274; GFX90A-NEXT:    ;;#ASMEND
9275; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9276;
9277; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_2_0:
9278; GFX940:       ; %bb.0:
9279; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9280; GFX940-NEXT:    ;;#ASMSTART
9281; GFX940-NEXT:    ; def s[0:1]
9282; GFX940-NEXT:    ;;#ASMEND
9283; GFX940-NEXT:    ;;#ASMSTART
9284; GFX940-NEXT:    ; def s[2:3]
9285; GFX940-NEXT:    ;;#ASMEND
9286; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s0
9287; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
9288; GFX940-NEXT:    ;;#ASMSTART
9289; GFX940-NEXT:    ; use s[8:9]
9290; GFX940-NEXT:    ;;#ASMEND
9291; GFX940-NEXT:    s_setpc_b64 s[30:31]
9292  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9293  %vec1 = call <4 x i16> asm "; def $0", "=s"()
9294  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9295  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9296  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 0>
9297  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
9298  ret void
9299}
9300
9301define void @s_shuffle_v4i16_v3i16__5_5_3_0() {
9302; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_3_0:
9303; GFX900:       ; %bb.0:
9304; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9305; GFX900-NEXT:    ;;#ASMSTART
9306; GFX900-NEXT:    ; def s[4:5]
9307; GFX900-NEXT:    ;;#ASMEND
9308; GFX900-NEXT:    ;;#ASMSTART
9309; GFX900-NEXT:    ; def s[6:7]
9310; GFX900-NEXT:    ;;#ASMEND
9311; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s6, s4
9312; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
9313; GFX900-NEXT:    ;;#ASMSTART
9314; GFX900-NEXT:    ; use s[8:9]
9315; GFX900-NEXT:    ;;#ASMEND
9316; GFX900-NEXT:    s_setpc_b64 s[30:31]
9317;
9318; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_3_0:
9319; GFX90A:       ; %bb.0:
9320; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9321; GFX90A-NEXT:    ;;#ASMSTART
9322; GFX90A-NEXT:    ; def s[4:5]
9323; GFX90A-NEXT:    ;;#ASMEND
9324; GFX90A-NEXT:    ;;#ASMSTART
9325; GFX90A-NEXT:    ; def s[6:7]
9326; GFX90A-NEXT:    ;;#ASMEND
9327; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s6, s4
9328; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
9329; GFX90A-NEXT:    ;;#ASMSTART
9330; GFX90A-NEXT:    ; use s[8:9]
9331; GFX90A-NEXT:    ;;#ASMEND
9332; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9333;
9334; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_3_0:
9335; GFX940:       ; %bb.0:
9336; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9337; GFX940-NEXT:    ;;#ASMSTART
9338; GFX940-NEXT:    ; def s[0:1]
9339; GFX940-NEXT:    ;;#ASMEND
9340; GFX940-NEXT:    ;;#ASMSTART
9341; GFX940-NEXT:    ; def s[2:3]
9342; GFX940-NEXT:    ;;#ASMEND
9343; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s2, s0
9344; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
9345; GFX940-NEXT:    ;;#ASMSTART
9346; GFX940-NEXT:    ; use s[8:9]
9347; GFX940-NEXT:    ;;#ASMEND
9348; GFX940-NEXT:    s_setpc_b64 s[30:31]
9349  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9350  %vec1 = call <4 x i16> asm "; def $0", "=s"()
9351  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9352  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9353  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 0>
9354  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
9355  ret void
9356}
9357
9358define void @s_shuffle_v4i16_v3i16__5_5_4_0() {
9359; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_4_0:
9360; GFX900:       ; %bb.0:
9361; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9362; GFX900-NEXT:    ;;#ASMSTART
9363; GFX900-NEXT:    ; def s[4:5]
9364; GFX900-NEXT:    ;;#ASMEND
9365; GFX900-NEXT:    ;;#ASMSTART
9366; GFX900-NEXT:    ; def s[6:7]
9367; GFX900-NEXT:    ;;#ASMEND
9368; GFX900-NEXT:    s_lshr_b32 s5, s6, 16
9369; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s4
9370; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
9371; GFX900-NEXT:    ;;#ASMSTART
9372; GFX900-NEXT:    ; use s[8:9]
9373; GFX900-NEXT:    ;;#ASMEND
9374; GFX900-NEXT:    s_setpc_b64 s[30:31]
9375;
9376; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_4_0:
9377; GFX90A:       ; %bb.0:
9378; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9379; GFX90A-NEXT:    ;;#ASMSTART
9380; GFX90A-NEXT:    ; def s[4:5]
9381; GFX90A-NEXT:    ;;#ASMEND
9382; GFX90A-NEXT:    ;;#ASMSTART
9383; GFX90A-NEXT:    ; def s[6:7]
9384; GFX90A-NEXT:    ;;#ASMEND
9385; GFX90A-NEXT:    s_lshr_b32 s5, s6, 16
9386; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s4
9387; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
9388; GFX90A-NEXT:    ;;#ASMSTART
9389; GFX90A-NEXT:    ; use s[8:9]
9390; GFX90A-NEXT:    ;;#ASMEND
9391; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9392;
9393; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_4_0:
9394; GFX940:       ; %bb.0:
9395; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9396; GFX940-NEXT:    ;;#ASMSTART
9397; GFX940-NEXT:    ; def s[0:1]
9398; GFX940-NEXT:    ;;#ASMEND
9399; GFX940-NEXT:    ;;#ASMSTART
9400; GFX940-NEXT:    ; def s[2:3]
9401; GFX940-NEXT:    ;;#ASMEND
9402; GFX940-NEXT:    s_lshr_b32 s1, s2, 16
9403; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s0
9404; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
9405; GFX940-NEXT:    ;;#ASMSTART
9406; GFX940-NEXT:    ; use s[8:9]
9407; GFX940-NEXT:    ;;#ASMEND
9408; GFX940-NEXT:    s_setpc_b64 s[30:31]
9409  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9410  %vec1 = call <4 x i16> asm "; def $0", "=s"()
9411  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9412  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9413  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 0>
9414  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
9415  ret void
9416}
9417
9418define void @s_shuffle_v4i16_v3i16__u_1_1_1() {
9419; GFX9-LABEL: s_shuffle_v4i16_v3i16__u_1_1_1:
9420; GFX9:       ; %bb.0:
9421; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9422; GFX9-NEXT:    ;;#ASMSTART
9423; GFX9-NEXT:    ; def s[8:9]
9424; GFX9-NEXT:    ;;#ASMEND
9425; GFX9-NEXT:    s_pack_hh_b32_b16 s9, s8, s8
9426; GFX9-NEXT:    ;;#ASMSTART
9427; GFX9-NEXT:    ; use s[8:9]
9428; GFX9-NEXT:    ;;#ASMEND
9429; GFX9-NEXT:    s_setpc_b64 s[30:31]
9430  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9431  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9432  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
9433  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
9434  ret void
9435}
9436
9437define void @s_shuffle_v4i16_v3i16__0_1_1_1() {
9438; GFX9-LABEL: s_shuffle_v4i16_v3i16__0_1_1_1:
9439; GFX9:       ; %bb.0:
9440; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9441; GFX9-NEXT:    ;;#ASMSTART
9442; GFX9-NEXT:    ; def s[8:9]
9443; GFX9-NEXT:    ;;#ASMEND
9444; GFX9-NEXT:    s_pack_hh_b32_b16 s9, s8, s8
9445; GFX9-NEXT:    ;;#ASMSTART
9446; GFX9-NEXT:    ; use s[8:9]
9447; GFX9-NEXT:    ;;#ASMEND
9448; GFX9-NEXT:    s_setpc_b64 s[30:31]
9449  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9450  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9451  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
9452  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
9453  ret void
9454}
9455
9456define void @s_shuffle_v4i16_v3i16__1_1_1_1() {
9457; GFX900-LABEL: s_shuffle_v4i16_v3i16__1_1_1_1:
9458; GFX900:       ; %bb.0:
9459; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9460; GFX900-NEXT:    ;;#ASMSTART
9461; GFX900-NEXT:    ; def s[4:5]
9462; GFX900-NEXT:    ;;#ASMEND
9463; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s4, s4
9464; GFX900-NEXT:    s_mov_b32 s9, s8
9465; GFX900-NEXT:    ;;#ASMSTART
9466; GFX900-NEXT:    ; use s[8:9]
9467; GFX900-NEXT:    ;;#ASMEND
9468; GFX900-NEXT:    s_setpc_b64 s[30:31]
9469;
9470; GFX90A-LABEL: s_shuffle_v4i16_v3i16__1_1_1_1:
9471; GFX90A:       ; %bb.0:
9472; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9473; GFX90A-NEXT:    ;;#ASMSTART
9474; GFX90A-NEXT:    ; def s[4:5]
9475; GFX90A-NEXT:    ;;#ASMEND
9476; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s4, s4
9477; GFX90A-NEXT:    s_mov_b32 s9, s8
9478; GFX90A-NEXT:    ;;#ASMSTART
9479; GFX90A-NEXT:    ; use s[8:9]
9480; GFX90A-NEXT:    ;;#ASMEND
9481; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9482;
9483; GFX940-LABEL: s_shuffle_v4i16_v3i16__1_1_1_1:
9484; GFX940:       ; %bb.0:
9485; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9486; GFX940-NEXT:    ;;#ASMSTART
9487; GFX940-NEXT:    ; def s[0:1]
9488; GFX940-NEXT:    ;;#ASMEND
9489; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s0, s0
9490; GFX940-NEXT:    s_mov_b32 s9, s8
9491; GFX940-NEXT:    ;;#ASMSTART
9492; GFX940-NEXT:    ; use s[8:9]
9493; GFX940-NEXT:    ;;#ASMEND
9494; GFX940-NEXT:    s_setpc_b64 s[30:31]
9495  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9496  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9497  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
9498  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
9499  ret void
9500}
9501
9502define void @s_shuffle_v4i16_v3i16__2_1_1_1() {
9503; GFX900-LABEL: s_shuffle_v4i16_v3i16__2_1_1_1:
9504; GFX900:       ; %bb.0:
9505; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9506; GFX900-NEXT:    ;;#ASMSTART
9507; GFX900-NEXT:    ; def s[4:5]
9508; GFX900-NEXT:    ;;#ASMEND
9509; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
9510; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
9511; GFX900-NEXT:    ;;#ASMSTART
9512; GFX900-NEXT:    ; use s[8:9]
9513; GFX900-NEXT:    ;;#ASMEND
9514; GFX900-NEXT:    s_setpc_b64 s[30:31]
9515;
9516; GFX90A-LABEL: s_shuffle_v4i16_v3i16__2_1_1_1:
9517; GFX90A:       ; %bb.0:
9518; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9519; GFX90A-NEXT:    ;;#ASMSTART
9520; GFX90A-NEXT:    ; def s[4:5]
9521; GFX90A-NEXT:    ;;#ASMEND
9522; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
9523; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
9524; GFX90A-NEXT:    ;;#ASMSTART
9525; GFX90A-NEXT:    ; use s[8:9]
9526; GFX90A-NEXT:    ;;#ASMEND
9527; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9528;
9529; GFX940-LABEL: s_shuffle_v4i16_v3i16__2_1_1_1:
9530; GFX940:       ; %bb.0:
9531; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9532; GFX940-NEXT:    ;;#ASMSTART
9533; GFX940-NEXT:    ; def s[0:1]
9534; GFX940-NEXT:    ;;#ASMEND
9535; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s1, s0
9536; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
9537; GFX940-NEXT:    ;;#ASMSTART
9538; GFX940-NEXT:    ; use s[8:9]
9539; GFX940-NEXT:    ;;#ASMEND
9540; GFX940-NEXT:    s_setpc_b64 s[30:31]
9541  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9542  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9543  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
9544  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
9545  ret void
9546}
9547
9548define void @s_shuffle_v4i16_v3i16__3_1_1_1() {
9549; GFX9-LABEL: s_shuffle_v4i16_v3i16__3_1_1_1:
9550; GFX9:       ; %bb.0:
9551; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9552; GFX9-NEXT:    ;;#ASMSTART
9553; GFX9-NEXT:    ; def s[8:9]
9554; GFX9-NEXT:    ;;#ASMEND
9555; GFX9-NEXT:    s_pack_hh_b32_b16 s9, s8, s8
9556; GFX9-NEXT:    ;;#ASMSTART
9557; GFX9-NEXT:    ; use s[8:9]
9558; GFX9-NEXT:    ;;#ASMEND
9559; GFX9-NEXT:    s_setpc_b64 s[30:31]
9560  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9561  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9562  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
9563  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
9564  ret void
9565}
9566
9567define void @s_shuffle_v4i16_v3i16__4_1_1_1() {
9568; GFX900-LABEL: s_shuffle_v4i16_v3i16__4_1_1_1:
9569; GFX900:       ; %bb.0:
9570; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9571; GFX900-NEXT:    ;;#ASMSTART
9572; GFX900-NEXT:    ; def s[4:5]
9573; GFX900-NEXT:    ;;#ASMEND
9574; GFX900-NEXT:    ;;#ASMSTART
9575; GFX900-NEXT:    ; def s[6:7]
9576; GFX900-NEXT:    ;;#ASMEND
9577; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s6, s4
9578; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
9579; GFX900-NEXT:    ;;#ASMSTART
9580; GFX900-NEXT:    ; use s[8:9]
9581; GFX900-NEXT:    ;;#ASMEND
9582; GFX900-NEXT:    s_setpc_b64 s[30:31]
9583;
9584; GFX90A-LABEL: s_shuffle_v4i16_v3i16__4_1_1_1:
9585; GFX90A:       ; %bb.0:
9586; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9587; GFX90A-NEXT:    ;;#ASMSTART
9588; GFX90A-NEXT:    ; def s[4:5]
9589; GFX90A-NEXT:    ;;#ASMEND
9590; GFX90A-NEXT:    ;;#ASMSTART
9591; GFX90A-NEXT:    ; def s[6:7]
9592; GFX90A-NEXT:    ;;#ASMEND
9593; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s6, s4
9594; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
9595; GFX90A-NEXT:    ;;#ASMSTART
9596; GFX90A-NEXT:    ; use s[8:9]
9597; GFX90A-NEXT:    ;;#ASMEND
9598; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9599;
9600; GFX940-LABEL: s_shuffle_v4i16_v3i16__4_1_1_1:
9601; GFX940:       ; %bb.0:
9602; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9603; GFX940-NEXT:    ;;#ASMSTART
9604; GFX940-NEXT:    ; def s[0:1]
9605; GFX940-NEXT:    ;;#ASMEND
9606; GFX940-NEXT:    ;;#ASMSTART
9607; GFX940-NEXT:    ; def s[2:3]
9608; GFX940-NEXT:    ;;#ASMEND
9609; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s2, s0
9610; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
9611; GFX940-NEXT:    ;;#ASMSTART
9612; GFX940-NEXT:    ; use s[8:9]
9613; GFX940-NEXT:    ;;#ASMEND
9614; GFX940-NEXT:    s_setpc_b64 s[30:31]
9615  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9616  %vec1 = call <4 x i16> asm "; def $0", "=s"()
9617  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9618  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9619  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 1, i32 1, i32 1>
9620  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
9621  ret void
9622}
9623
9624define void @s_shuffle_v4i16_v3i16__5_1_1_1() {
9625; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_1_1_1:
9626; GFX900:       ; %bb.0:
9627; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9628; GFX900-NEXT:    ;;#ASMSTART
9629; GFX900-NEXT:    ; def s[4:5]
9630; GFX900-NEXT:    ;;#ASMEND
9631; GFX900-NEXT:    ;;#ASMSTART
9632; GFX900-NEXT:    ; def s[6:7]
9633; GFX900-NEXT:    ;;#ASMEND
9634; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
9635; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
9636; GFX900-NEXT:    ;;#ASMSTART
9637; GFX900-NEXT:    ; use s[8:9]
9638; GFX900-NEXT:    ;;#ASMEND
9639; GFX900-NEXT:    s_setpc_b64 s[30:31]
9640;
9641; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_1_1_1:
9642; GFX90A:       ; %bb.0:
9643; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9644; GFX90A-NEXT:    ;;#ASMSTART
9645; GFX90A-NEXT:    ; def s[4:5]
9646; GFX90A-NEXT:    ;;#ASMEND
9647; GFX90A-NEXT:    ;;#ASMSTART
9648; GFX90A-NEXT:    ; def s[6:7]
9649; GFX90A-NEXT:    ;;#ASMEND
9650; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
9651; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
9652; GFX90A-NEXT:    ;;#ASMSTART
9653; GFX90A-NEXT:    ; use s[8:9]
9654; GFX90A-NEXT:    ;;#ASMEND
9655; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9656;
9657; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_1_1_1:
9658; GFX940:       ; %bb.0:
9659; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9660; GFX940-NEXT:    ;;#ASMSTART
9661; GFX940-NEXT:    ; def s[0:1]
9662; GFX940-NEXT:    ;;#ASMEND
9663; GFX940-NEXT:    ;;#ASMSTART
9664; GFX940-NEXT:    ; def s[2:3]
9665; GFX940-NEXT:    ;;#ASMEND
9666; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s3, s0
9667; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
9668; GFX940-NEXT:    ;;#ASMSTART
9669; GFX940-NEXT:    ; use s[8:9]
9670; GFX940-NEXT:    ;;#ASMEND
9671; GFX940-NEXT:    s_setpc_b64 s[30:31]
9672  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9673  %vec1 = call <4 x i16> asm "; def $0", "=s"()
9674  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9675  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9676  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 1, i32 1>
9677  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
9678  ret void
9679}
9680
9681define void @s_shuffle_v4i16_v3i16__5_u_1_1() {
9682; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_u_1_1:
9683; GFX900:       ; %bb.0:
9684; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9685; GFX900-NEXT:    ;;#ASMSTART
9686; GFX900-NEXT:    ; def s[4:5]
9687; GFX900-NEXT:    ;;#ASMEND
9688; GFX900-NEXT:    ;;#ASMSTART
9689; GFX900-NEXT:    ; def s[6:7]
9690; GFX900-NEXT:    ;;#ASMEND
9691; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
9692; GFX900-NEXT:    s_mov_b32 s8, s7
9693; GFX900-NEXT:    ;;#ASMSTART
9694; GFX900-NEXT:    ; use s[8:9]
9695; GFX900-NEXT:    ;;#ASMEND
9696; GFX900-NEXT:    s_setpc_b64 s[30:31]
9697;
9698; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_u_1_1:
9699; GFX90A:       ; %bb.0:
9700; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9701; GFX90A-NEXT:    ;;#ASMSTART
9702; GFX90A-NEXT:    ; def s[4:5]
9703; GFX90A-NEXT:    ;;#ASMEND
9704; GFX90A-NEXT:    ;;#ASMSTART
9705; GFX90A-NEXT:    ; def s[6:7]
9706; GFX90A-NEXT:    ;;#ASMEND
9707; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
9708; GFX90A-NEXT:    s_mov_b32 s8, s7
9709; GFX90A-NEXT:    ;;#ASMSTART
9710; GFX90A-NEXT:    ; use s[8:9]
9711; GFX90A-NEXT:    ;;#ASMEND
9712; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9713;
9714; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_u_1_1:
9715; GFX940:       ; %bb.0:
9716; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9717; GFX940-NEXT:    ;;#ASMSTART
9718; GFX940-NEXT:    ; def s[0:1]
9719; GFX940-NEXT:    ;;#ASMEND
9720; GFX940-NEXT:    ;;#ASMSTART
9721; GFX940-NEXT:    ; def s[2:3]
9722; GFX940-NEXT:    ;;#ASMEND
9723; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
9724; GFX940-NEXT:    s_mov_b32 s8, s3
9725; GFX940-NEXT:    ;;#ASMSTART
9726; GFX940-NEXT:    ; use s[8:9]
9727; GFX940-NEXT:    ;;#ASMEND
9728; GFX940-NEXT:    s_setpc_b64 s[30:31]
9729  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9730  %vec1 = call <4 x i16> asm "; def $0", "=s"()
9731  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9732  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9733  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 1, i32 1>
9734  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
9735  ret void
9736}
9737
9738define void @s_shuffle_v4i16_v3i16__5_0_1_1() {
9739; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_0_1_1:
9740; GFX900:       ; %bb.0:
9741; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9742; GFX900-NEXT:    ;;#ASMSTART
9743; GFX900-NEXT:    ; def s[4:5]
9744; GFX900-NEXT:    ;;#ASMEND
9745; GFX900-NEXT:    ;;#ASMSTART
9746; GFX900-NEXT:    ; def s[6:7]
9747; GFX900-NEXT:    ;;#ASMEND
9748; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
9749; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
9750; GFX900-NEXT:    ;;#ASMSTART
9751; GFX900-NEXT:    ; use s[8:9]
9752; GFX900-NEXT:    ;;#ASMEND
9753; GFX900-NEXT:    s_setpc_b64 s[30:31]
9754;
9755; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_0_1_1:
9756; GFX90A:       ; %bb.0:
9757; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9758; GFX90A-NEXT:    ;;#ASMSTART
9759; GFX90A-NEXT:    ; def s[4:5]
9760; GFX90A-NEXT:    ;;#ASMEND
9761; GFX90A-NEXT:    ;;#ASMSTART
9762; GFX90A-NEXT:    ; def s[6:7]
9763; GFX90A-NEXT:    ;;#ASMEND
9764; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
9765; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
9766; GFX90A-NEXT:    ;;#ASMSTART
9767; GFX90A-NEXT:    ; use s[8:9]
9768; GFX90A-NEXT:    ;;#ASMEND
9769; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9770;
9771; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_0_1_1:
9772; GFX940:       ; %bb.0:
9773; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9774; GFX940-NEXT:    ;;#ASMSTART
9775; GFX940-NEXT:    ; def s[0:1]
9776; GFX940-NEXT:    ;;#ASMEND
9777; GFX940-NEXT:    ;;#ASMSTART
9778; GFX940-NEXT:    ; def s[2:3]
9779; GFX940-NEXT:    ;;#ASMEND
9780; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s0
9781; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
9782; GFX940-NEXT:    ;;#ASMSTART
9783; GFX940-NEXT:    ; use s[8:9]
9784; GFX940-NEXT:    ;;#ASMEND
9785; GFX940-NEXT:    s_setpc_b64 s[30:31]
9786  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9787  %vec1 = call <4 x i16> asm "; def $0", "=s"()
9788  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9789  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9790  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 1, i32 1>
9791  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
9792  ret void
9793}
9794
9795define void @s_shuffle_v4i16_v3i16__5_2_1_1() {
9796; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_2_1_1:
9797; GFX900:       ; %bb.0:
9798; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9799; GFX900-NEXT:    ;;#ASMSTART
9800; GFX900-NEXT:    ; def s[4:5]
9801; GFX900-NEXT:    ;;#ASMEND
9802; GFX900-NEXT:    ;;#ASMSTART
9803; GFX900-NEXT:    ; def s[6:7]
9804; GFX900-NEXT:    ;;#ASMEND
9805; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
9806; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
9807; GFX900-NEXT:    ;;#ASMSTART
9808; GFX900-NEXT:    ; use s[8:9]
9809; GFX900-NEXT:    ;;#ASMEND
9810; GFX900-NEXT:    s_setpc_b64 s[30:31]
9811;
9812; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_2_1_1:
9813; GFX90A:       ; %bb.0:
9814; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9815; GFX90A-NEXT:    ;;#ASMSTART
9816; GFX90A-NEXT:    ; def s[4:5]
9817; GFX90A-NEXT:    ;;#ASMEND
9818; GFX90A-NEXT:    ;;#ASMSTART
9819; GFX90A-NEXT:    ; def s[6:7]
9820; GFX90A-NEXT:    ;;#ASMEND
9821; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
9822; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
9823; GFX90A-NEXT:    ;;#ASMSTART
9824; GFX90A-NEXT:    ; use s[8:9]
9825; GFX90A-NEXT:    ;;#ASMEND
9826; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9827;
9828; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_2_1_1:
9829; GFX940:       ; %bb.0:
9830; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9831; GFX940-NEXT:    ;;#ASMSTART
9832; GFX940-NEXT:    ; def s[0:1]
9833; GFX940-NEXT:    ;;#ASMEND
9834; GFX940-NEXT:    ;;#ASMSTART
9835; GFX940-NEXT:    ; def s[2:3]
9836; GFX940-NEXT:    ;;#ASMEND
9837; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s1
9838; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
9839; GFX940-NEXT:    ;;#ASMSTART
9840; GFX940-NEXT:    ; use s[8:9]
9841; GFX940-NEXT:    ;;#ASMEND
9842; GFX940-NEXT:    s_setpc_b64 s[30:31]
9843  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9844  %vec1 = call <4 x i16> asm "; def $0", "=s"()
9845  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9846  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9847  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 1, i32 1>
9848  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
9849  ret void
9850}
9851
9852define void @s_shuffle_v4i16_v3i16__5_3_1_1() {
9853; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_3_1_1:
9854; GFX900:       ; %bb.0:
9855; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9856; GFX900-NEXT:    ;;#ASMSTART
9857; GFX900-NEXT:    ; def s[4:5]
9858; GFX900-NEXT:    ;;#ASMEND
9859; GFX900-NEXT:    ;;#ASMSTART
9860; GFX900-NEXT:    ; def s[6:7]
9861; GFX900-NEXT:    ;;#ASMEND
9862; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s6
9863; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
9864; GFX900-NEXT:    ;;#ASMSTART
9865; GFX900-NEXT:    ; use s[8:9]
9866; GFX900-NEXT:    ;;#ASMEND
9867; GFX900-NEXT:    s_setpc_b64 s[30:31]
9868;
9869; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_3_1_1:
9870; GFX90A:       ; %bb.0:
9871; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9872; GFX90A-NEXT:    ;;#ASMSTART
9873; GFX90A-NEXT:    ; def s[4:5]
9874; GFX90A-NEXT:    ;;#ASMEND
9875; GFX90A-NEXT:    ;;#ASMSTART
9876; GFX90A-NEXT:    ; def s[6:7]
9877; GFX90A-NEXT:    ;;#ASMEND
9878; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s6
9879; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
9880; GFX90A-NEXT:    ;;#ASMSTART
9881; GFX90A-NEXT:    ; use s[8:9]
9882; GFX90A-NEXT:    ;;#ASMEND
9883; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9884;
9885; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_3_1_1:
9886; GFX940:       ; %bb.0:
9887; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9888; GFX940-NEXT:    ;;#ASMSTART
9889; GFX940-NEXT:    ; def s[0:1]
9890; GFX940-NEXT:    ;;#ASMEND
9891; GFX940-NEXT:    ;;#ASMSTART
9892; GFX940-NEXT:    ; def s[2:3]
9893; GFX940-NEXT:    ;;#ASMEND
9894; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s2
9895; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
9896; GFX940-NEXT:    ;;#ASMSTART
9897; GFX940-NEXT:    ; use s[8:9]
9898; GFX940-NEXT:    ;;#ASMEND
9899; GFX940-NEXT:    s_setpc_b64 s[30:31]
9900  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9901  %vec1 = call <4 x i16> asm "; def $0", "=s"()
9902  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9903  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9904  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 1, i32 1>
9905  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
9906  ret void
9907}
9908
9909define void @s_shuffle_v4i16_v3i16__5_4_1_1() {
9910; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_4_1_1:
9911; GFX900:       ; %bb.0:
9912; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9913; GFX900-NEXT:    ;;#ASMSTART
9914; GFX900-NEXT:    ; def s[4:5]
9915; GFX900-NEXT:    ;;#ASMEND
9916; GFX900-NEXT:    ;;#ASMSTART
9917; GFX900-NEXT:    ; def s[6:7]
9918; GFX900-NEXT:    ;;#ASMEND
9919; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s7, s6
9920; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
9921; GFX900-NEXT:    ;;#ASMSTART
9922; GFX900-NEXT:    ; use s[8:9]
9923; GFX900-NEXT:    ;;#ASMEND
9924; GFX900-NEXT:    s_setpc_b64 s[30:31]
9925;
9926; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_4_1_1:
9927; GFX90A:       ; %bb.0:
9928; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9929; GFX90A-NEXT:    ;;#ASMSTART
9930; GFX90A-NEXT:    ; def s[4:5]
9931; GFX90A-NEXT:    ;;#ASMEND
9932; GFX90A-NEXT:    ;;#ASMSTART
9933; GFX90A-NEXT:    ; def s[6:7]
9934; GFX90A-NEXT:    ;;#ASMEND
9935; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s7, s6
9936; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
9937; GFX90A-NEXT:    ;;#ASMSTART
9938; GFX90A-NEXT:    ; use s[8:9]
9939; GFX90A-NEXT:    ;;#ASMEND
9940; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9941;
9942; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_4_1_1:
9943; GFX940:       ; %bb.0:
9944; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9945; GFX940-NEXT:    ;;#ASMSTART
9946; GFX940-NEXT:    ; def s[0:1]
9947; GFX940-NEXT:    ;;#ASMEND
9948; GFX940-NEXT:    ;;#ASMSTART
9949; GFX940-NEXT:    ; def s[2:3]
9950; GFX940-NEXT:    ;;#ASMEND
9951; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s3, s2
9952; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
9953; GFX940-NEXT:    ;;#ASMSTART
9954; GFX940-NEXT:    ; use s[8:9]
9955; GFX940-NEXT:    ;;#ASMEND
9956; GFX940-NEXT:    s_setpc_b64 s[30:31]
9957  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9958  %vec1 = call <4 x i16> asm "; def $0", "=s"()
9959  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9960  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
9961  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 1, i32 1>
9962  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
9963  ret void
9964}
9965
9966define void @s_shuffle_v4i16_v3i16__5_5_1_1() {
9967; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_1_1:
9968; GFX900:       ; %bb.0:
9969; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9970; GFX900-NEXT:    ;;#ASMSTART
9971; GFX900-NEXT:    ; def s[4:5]
9972; GFX900-NEXT:    ;;#ASMEND
9973; GFX900-NEXT:    ;;#ASMSTART
9974; GFX900-NEXT:    ; def s[6:7]
9975; GFX900-NEXT:    ;;#ASMEND
9976; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
9977; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
9978; GFX900-NEXT:    ;;#ASMSTART
9979; GFX900-NEXT:    ; use s[8:9]
9980; GFX900-NEXT:    ;;#ASMEND
9981; GFX900-NEXT:    s_setpc_b64 s[30:31]
9982;
9983; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_1_1:
9984; GFX90A:       ; %bb.0:
9985; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9986; GFX90A-NEXT:    ;;#ASMSTART
9987; GFX90A-NEXT:    ; def s[4:5]
9988; GFX90A-NEXT:    ;;#ASMEND
9989; GFX90A-NEXT:    ;;#ASMSTART
9990; GFX90A-NEXT:    ; def s[6:7]
9991; GFX90A-NEXT:    ;;#ASMEND
9992; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
9993; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
9994; GFX90A-NEXT:    ;;#ASMSTART
9995; GFX90A-NEXT:    ; use s[8:9]
9996; GFX90A-NEXT:    ;;#ASMEND
9997; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9998;
9999; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_1_1:
10000; GFX940:       ; %bb.0:
10001; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10002; GFX940-NEXT:    ;;#ASMSTART
10003; GFX940-NEXT:    ; def s[0:1]
10004; GFX940-NEXT:    ;;#ASMEND
10005; GFX940-NEXT:    ;;#ASMSTART
10006; GFX940-NEXT:    ; def s[2:3]
10007; GFX940-NEXT:    ;;#ASMEND
10008; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
10009; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
10010; GFX940-NEXT:    ;;#ASMSTART
10011; GFX940-NEXT:    ; use s[8:9]
10012; GFX940-NEXT:    ;;#ASMEND
10013; GFX940-NEXT:    s_setpc_b64 s[30:31]
10014  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10015  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10016  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10017  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10018  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 1>
10019  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
10020  ret void
10021}
10022
10023define void @s_shuffle_v4i16_v3i16__5_5_u_1() {
10024; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_u_1:
10025; GFX900:       ; %bb.0:
10026; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10027; GFX900-NEXT:    ;;#ASMSTART
10028; GFX900-NEXT:    ; def s[4:5]
10029; GFX900-NEXT:    ;;#ASMEND
10030; GFX900-NEXT:    ;;#ASMSTART
10031; GFX900-NEXT:    ; def s[6:7]
10032; GFX900-NEXT:    ;;#ASMEND
10033; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
10034; GFX900-NEXT:    s_mov_b32 s9, s4
10035; GFX900-NEXT:    ;;#ASMSTART
10036; GFX900-NEXT:    ; use s[8:9]
10037; GFX900-NEXT:    ;;#ASMEND
10038; GFX900-NEXT:    s_setpc_b64 s[30:31]
10039;
10040; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_u_1:
10041; GFX90A:       ; %bb.0:
10042; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10043; GFX90A-NEXT:    ;;#ASMSTART
10044; GFX90A-NEXT:    ; def s[4:5]
10045; GFX90A-NEXT:    ;;#ASMEND
10046; GFX90A-NEXT:    ;;#ASMSTART
10047; GFX90A-NEXT:    ; def s[6:7]
10048; GFX90A-NEXT:    ;;#ASMEND
10049; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
10050; GFX90A-NEXT:    s_mov_b32 s9, s4
10051; GFX90A-NEXT:    ;;#ASMSTART
10052; GFX90A-NEXT:    ; use s[8:9]
10053; GFX90A-NEXT:    ;;#ASMEND
10054; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10055;
10056; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_u_1:
10057; GFX940:       ; %bb.0:
10058; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10059; GFX940-NEXT:    ;;#ASMSTART
10060; GFX940-NEXT:    ; def s[0:1]
10061; GFX940-NEXT:    ;;#ASMEND
10062; GFX940-NEXT:    ;;#ASMSTART
10063; GFX940-NEXT:    ; def s[2:3]
10064; GFX940-NEXT:    ;;#ASMEND
10065; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
10066; GFX940-NEXT:    s_mov_b32 s9, s0
10067; GFX940-NEXT:    ;;#ASMSTART
10068; GFX940-NEXT:    ; use s[8:9]
10069; GFX940-NEXT:    ;;#ASMEND
10070; GFX940-NEXT:    s_setpc_b64 s[30:31]
10071  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10072  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10073  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10074  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10075  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 1>
10076  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
10077  ret void
10078}
10079
10080define void @s_shuffle_v4i16_v3i16__5_5_0_1() {
10081; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_0_1:
10082; GFX900:       ; %bb.0:
10083; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10084; GFX900-NEXT:    ;;#ASMSTART
10085; GFX900-NEXT:    ; def s[4:5]
10086; GFX900-NEXT:    ;;#ASMEND
10087; GFX900-NEXT:    ;;#ASMSTART
10088; GFX900-NEXT:    ; def s[6:7]
10089; GFX900-NEXT:    ;;#ASMEND
10090; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
10091; GFX900-NEXT:    s_mov_b32 s9, s4
10092; GFX900-NEXT:    ;;#ASMSTART
10093; GFX900-NEXT:    ; use s[8:9]
10094; GFX900-NEXT:    ;;#ASMEND
10095; GFX900-NEXT:    s_setpc_b64 s[30:31]
10096;
10097; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_0_1:
10098; GFX90A:       ; %bb.0:
10099; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10100; GFX90A-NEXT:    ;;#ASMSTART
10101; GFX90A-NEXT:    ; def s[4:5]
10102; GFX90A-NEXT:    ;;#ASMEND
10103; GFX90A-NEXT:    ;;#ASMSTART
10104; GFX90A-NEXT:    ; def s[6:7]
10105; GFX90A-NEXT:    ;;#ASMEND
10106; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
10107; GFX90A-NEXT:    s_mov_b32 s9, s4
10108; GFX90A-NEXT:    ;;#ASMSTART
10109; GFX90A-NEXT:    ; use s[8:9]
10110; GFX90A-NEXT:    ;;#ASMEND
10111; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10112;
10113; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_0_1:
10114; GFX940:       ; %bb.0:
10115; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10116; GFX940-NEXT:    ;;#ASMSTART
10117; GFX940-NEXT:    ; def s[0:1]
10118; GFX940-NEXT:    ;;#ASMEND
10119; GFX940-NEXT:    ;;#ASMSTART
10120; GFX940-NEXT:    ; def s[2:3]
10121; GFX940-NEXT:    ;;#ASMEND
10122; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
10123; GFX940-NEXT:    s_mov_b32 s9, s0
10124; GFX940-NEXT:    ;;#ASMSTART
10125; GFX940-NEXT:    ; use s[8:9]
10126; GFX940-NEXT:    ;;#ASMEND
10127; GFX940-NEXT:    s_setpc_b64 s[30:31]
10128  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10129  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10130  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10131  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10132  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 1>
10133  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
10134  ret void
10135}
10136
10137define void @s_shuffle_v4i16_v3i16__5_5_2_1() {
10138; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_2_1:
10139; GFX900:       ; %bb.0:
10140; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10141; GFX900-NEXT:    ;;#ASMSTART
10142; GFX900-NEXT:    ; def s[4:5]
10143; GFX900-NEXT:    ;;#ASMEND
10144; GFX900-NEXT:    ;;#ASMSTART
10145; GFX900-NEXT:    ; def s[6:7]
10146; GFX900-NEXT:    ;;#ASMEND
10147; GFX900-NEXT:    s_pack_lh_b32_b16 s9, s5, s4
10148; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
10149; GFX900-NEXT:    ;;#ASMSTART
10150; GFX900-NEXT:    ; use s[8:9]
10151; GFX900-NEXT:    ;;#ASMEND
10152; GFX900-NEXT:    s_setpc_b64 s[30:31]
10153;
10154; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_2_1:
10155; GFX90A:       ; %bb.0:
10156; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10157; GFX90A-NEXT:    ;;#ASMSTART
10158; GFX90A-NEXT:    ; def s[4:5]
10159; GFX90A-NEXT:    ;;#ASMEND
10160; GFX90A-NEXT:    ;;#ASMSTART
10161; GFX90A-NEXT:    ; def s[6:7]
10162; GFX90A-NEXT:    ;;#ASMEND
10163; GFX90A-NEXT:    s_pack_lh_b32_b16 s9, s5, s4
10164; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
10165; GFX90A-NEXT:    ;;#ASMSTART
10166; GFX90A-NEXT:    ; use s[8:9]
10167; GFX90A-NEXT:    ;;#ASMEND
10168; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10169;
10170; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_2_1:
10171; GFX940:       ; %bb.0:
10172; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10173; GFX940-NEXT:    ;;#ASMSTART
10174; GFX940-NEXT:    ; def s[0:1]
10175; GFX940-NEXT:    ;;#ASMEND
10176; GFX940-NEXT:    ;;#ASMSTART
10177; GFX940-NEXT:    ; def s[2:3]
10178; GFX940-NEXT:    ;;#ASMEND
10179; GFX940-NEXT:    s_pack_lh_b32_b16 s9, s1, s0
10180; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
10181; GFX940-NEXT:    ;;#ASMSTART
10182; GFX940-NEXT:    ; use s[8:9]
10183; GFX940-NEXT:    ;;#ASMEND
10184; GFX940-NEXT:    s_setpc_b64 s[30:31]
10185  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10186  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10187  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10188  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10189  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 1>
10190  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
10191  ret void
10192}
10193
10194define void @s_shuffle_v4i16_v3i16__5_5_3_1() {
10195; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_3_1:
10196; GFX900:       ; %bb.0:
10197; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10198; GFX900-NEXT:    ;;#ASMSTART
10199; GFX900-NEXT:    ; def s[4:5]
10200; GFX900-NEXT:    ;;#ASMEND
10201; GFX900-NEXT:    ;;#ASMSTART
10202; GFX900-NEXT:    ; def s[6:7]
10203; GFX900-NEXT:    ;;#ASMEND
10204; GFX900-NEXT:    s_pack_lh_b32_b16 s9, s6, s4
10205; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
10206; GFX900-NEXT:    ;;#ASMSTART
10207; GFX900-NEXT:    ; use s[8:9]
10208; GFX900-NEXT:    ;;#ASMEND
10209; GFX900-NEXT:    s_setpc_b64 s[30:31]
10210;
10211; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_3_1:
10212; GFX90A:       ; %bb.0:
10213; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10214; GFX90A-NEXT:    ;;#ASMSTART
10215; GFX90A-NEXT:    ; def s[4:5]
10216; GFX90A-NEXT:    ;;#ASMEND
10217; GFX90A-NEXT:    ;;#ASMSTART
10218; GFX90A-NEXT:    ; def s[6:7]
10219; GFX90A-NEXT:    ;;#ASMEND
10220; GFX90A-NEXT:    s_pack_lh_b32_b16 s9, s6, s4
10221; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
10222; GFX90A-NEXT:    ;;#ASMSTART
10223; GFX90A-NEXT:    ; use s[8:9]
10224; GFX90A-NEXT:    ;;#ASMEND
10225; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10226;
10227; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_3_1:
10228; GFX940:       ; %bb.0:
10229; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10230; GFX940-NEXT:    ;;#ASMSTART
10231; GFX940-NEXT:    ; def s[0:1]
10232; GFX940-NEXT:    ;;#ASMEND
10233; GFX940-NEXT:    ;;#ASMSTART
10234; GFX940-NEXT:    ; def s[2:3]
10235; GFX940-NEXT:    ;;#ASMEND
10236; GFX940-NEXT:    s_pack_lh_b32_b16 s9, s2, s0
10237; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
10238; GFX940-NEXT:    ;;#ASMSTART
10239; GFX940-NEXT:    ; use s[8:9]
10240; GFX940-NEXT:    ;;#ASMEND
10241; GFX940-NEXT:    s_setpc_b64 s[30:31]
10242  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10243  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10244  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10245  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10246  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 1>
10247  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
10248  ret void
10249}
10250
10251define void @s_shuffle_v4i16_v3i16__5_5_4_1() {
10252; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_4_1:
10253; GFX900:       ; %bb.0:
10254; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10255; GFX900-NEXT:    ;;#ASMSTART
10256; GFX900-NEXT:    ; def s[4:5]
10257; GFX900-NEXT:    ;;#ASMEND
10258; GFX900-NEXT:    ;;#ASMSTART
10259; GFX900-NEXT:    ; def s[6:7]
10260; GFX900-NEXT:    ;;#ASMEND
10261; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s6, s4
10262; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
10263; GFX900-NEXT:    ;;#ASMSTART
10264; GFX900-NEXT:    ; use s[8:9]
10265; GFX900-NEXT:    ;;#ASMEND
10266; GFX900-NEXT:    s_setpc_b64 s[30:31]
10267;
10268; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_4_1:
10269; GFX90A:       ; %bb.0:
10270; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10271; GFX90A-NEXT:    ;;#ASMSTART
10272; GFX90A-NEXT:    ; def s[4:5]
10273; GFX90A-NEXT:    ;;#ASMEND
10274; GFX90A-NEXT:    ;;#ASMSTART
10275; GFX90A-NEXT:    ; def s[6:7]
10276; GFX90A-NEXT:    ;;#ASMEND
10277; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s6, s4
10278; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
10279; GFX90A-NEXT:    ;;#ASMSTART
10280; GFX90A-NEXT:    ; use s[8:9]
10281; GFX90A-NEXT:    ;;#ASMEND
10282; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10283;
10284; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_4_1:
10285; GFX940:       ; %bb.0:
10286; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10287; GFX940-NEXT:    ;;#ASMSTART
10288; GFX940-NEXT:    ; def s[0:1]
10289; GFX940-NEXT:    ;;#ASMEND
10290; GFX940-NEXT:    ;;#ASMSTART
10291; GFX940-NEXT:    ; def s[2:3]
10292; GFX940-NEXT:    ;;#ASMEND
10293; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s2, s0
10294; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
10295; GFX940-NEXT:    ;;#ASMSTART
10296; GFX940-NEXT:    ; use s[8:9]
10297; GFX940-NEXT:    ;;#ASMEND
10298; GFX940-NEXT:    s_setpc_b64 s[30:31]
10299  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10300  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10301  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10302  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10303  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 1>
10304  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
10305  ret void
10306}
10307
10308define void @s_shuffle_v4i16_v3i16__u_2_2_2() {
10309; GFX900-LABEL: s_shuffle_v4i16_v3i16__u_2_2_2:
10310; GFX900:       ; %bb.0:
10311; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10312; GFX900-NEXT:    ;;#ASMSTART
10313; GFX900-NEXT:    ; def s[4:5]
10314; GFX900-NEXT:    ;;#ASMEND
10315; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
10316; GFX900-NEXT:    s_lshl_b32 s8, s5, 16
10317; GFX900-NEXT:    ;;#ASMSTART
10318; GFX900-NEXT:    ; use s[8:9]
10319; GFX900-NEXT:    ;;#ASMEND
10320; GFX900-NEXT:    s_setpc_b64 s[30:31]
10321;
10322; GFX90A-LABEL: s_shuffle_v4i16_v3i16__u_2_2_2:
10323; GFX90A:       ; %bb.0:
10324; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10325; GFX90A-NEXT:    ;;#ASMSTART
10326; GFX90A-NEXT:    ; def s[4:5]
10327; GFX90A-NEXT:    ;;#ASMEND
10328; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
10329; GFX90A-NEXT:    s_lshl_b32 s8, s5, 16
10330; GFX90A-NEXT:    ;;#ASMSTART
10331; GFX90A-NEXT:    ; use s[8:9]
10332; GFX90A-NEXT:    ;;#ASMEND
10333; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10334;
10335; GFX940-LABEL: s_shuffle_v4i16_v3i16__u_2_2_2:
10336; GFX940:       ; %bb.0:
10337; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10338; GFX940-NEXT:    ;;#ASMSTART
10339; GFX940-NEXT:    ; def s[0:1]
10340; GFX940-NEXT:    ;;#ASMEND
10341; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
10342; GFX940-NEXT:    s_lshl_b32 s8, s1, 16
10343; GFX940-NEXT:    ;;#ASMSTART
10344; GFX940-NEXT:    ; use s[8:9]
10345; GFX940-NEXT:    ;;#ASMEND
10346; GFX940-NEXT:    s_setpc_b64 s[30:31]
10347  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10348  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10349  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
10350  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
10351  ret void
10352}
10353
10354define void @s_shuffle_v4i16_v3i16__0_2_2_2() {
10355; GFX900-LABEL: s_shuffle_v4i16_v3i16__0_2_2_2:
10356; GFX900:       ; %bb.0:
10357; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10358; GFX900-NEXT:    ;;#ASMSTART
10359; GFX900-NEXT:    ; def s[4:5]
10360; GFX900-NEXT:    ;;#ASMEND
10361; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
10362; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
10363; GFX900-NEXT:    ;;#ASMSTART
10364; GFX900-NEXT:    ; use s[8:9]
10365; GFX900-NEXT:    ;;#ASMEND
10366; GFX900-NEXT:    s_setpc_b64 s[30:31]
10367;
10368; GFX90A-LABEL: s_shuffle_v4i16_v3i16__0_2_2_2:
10369; GFX90A:       ; %bb.0:
10370; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10371; GFX90A-NEXT:    ;;#ASMSTART
10372; GFX90A-NEXT:    ; def s[4:5]
10373; GFX90A-NEXT:    ;;#ASMEND
10374; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
10375; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
10376; GFX90A-NEXT:    ;;#ASMSTART
10377; GFX90A-NEXT:    ; use s[8:9]
10378; GFX90A-NEXT:    ;;#ASMEND
10379; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10380;
10381; GFX940-LABEL: s_shuffle_v4i16_v3i16__0_2_2_2:
10382; GFX940:       ; %bb.0:
10383; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10384; GFX940-NEXT:    ;;#ASMSTART
10385; GFX940-NEXT:    ; def s[0:1]
10386; GFX940-NEXT:    ;;#ASMEND
10387; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
10388; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
10389; GFX940-NEXT:    ;;#ASMSTART
10390; GFX940-NEXT:    ; use s[8:9]
10391; GFX940-NEXT:    ;;#ASMEND
10392; GFX940-NEXT:    s_setpc_b64 s[30:31]
10393  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10394  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10395  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
10396  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
10397  ret void
10398}
10399
10400define void @s_shuffle_v4i16_v3i16__1_2_2_2() {
10401; GFX900-LABEL: s_shuffle_v4i16_v3i16__1_2_2_2:
10402; GFX900:       ; %bb.0:
10403; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10404; GFX900-NEXT:    ;;#ASMSTART
10405; GFX900-NEXT:    ; def s[4:5]
10406; GFX900-NEXT:    ;;#ASMEND
10407; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
10408; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
10409; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
10410; GFX900-NEXT:    ;;#ASMSTART
10411; GFX900-NEXT:    ; use s[8:9]
10412; GFX900-NEXT:    ;;#ASMEND
10413; GFX900-NEXT:    s_setpc_b64 s[30:31]
10414;
10415; GFX90A-LABEL: s_shuffle_v4i16_v3i16__1_2_2_2:
10416; GFX90A:       ; %bb.0:
10417; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10418; GFX90A-NEXT:    ;;#ASMSTART
10419; GFX90A-NEXT:    ; def s[4:5]
10420; GFX90A-NEXT:    ;;#ASMEND
10421; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
10422; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
10423; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
10424; GFX90A-NEXT:    ;;#ASMSTART
10425; GFX90A-NEXT:    ; use s[8:9]
10426; GFX90A-NEXT:    ;;#ASMEND
10427; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10428;
10429; GFX940-LABEL: s_shuffle_v4i16_v3i16__1_2_2_2:
10430; GFX940:       ; %bb.0:
10431; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10432; GFX940-NEXT:    ;;#ASMSTART
10433; GFX940-NEXT:    ; def s[0:1]
10434; GFX940-NEXT:    ;;#ASMEND
10435; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
10436; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
10437; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
10438; GFX940-NEXT:    ;;#ASMSTART
10439; GFX940-NEXT:    ; use s[8:9]
10440; GFX940-NEXT:    ;;#ASMEND
10441; GFX940-NEXT:    s_setpc_b64 s[30:31]
10442  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10443  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10444  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
10445  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
10446  ret void
10447}
10448
10449define void @s_shuffle_v4i16_v3i16__2_2_2_2() {
10450; GFX900-LABEL: s_shuffle_v4i16_v3i16__2_2_2_2:
10451; GFX900:       ; %bb.0:
10452; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10453; GFX900-NEXT:    ;;#ASMSTART
10454; GFX900-NEXT:    ; def s[4:5]
10455; GFX900-NEXT:    ;;#ASMEND
10456; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
10457; GFX900-NEXT:    s_mov_b32 s9, s8
10458; GFX900-NEXT:    ;;#ASMSTART
10459; GFX900-NEXT:    ; use s[8:9]
10460; GFX900-NEXT:    ;;#ASMEND
10461; GFX900-NEXT:    s_setpc_b64 s[30:31]
10462;
10463; GFX90A-LABEL: s_shuffle_v4i16_v3i16__2_2_2_2:
10464; GFX90A:       ; %bb.0:
10465; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10466; GFX90A-NEXT:    ;;#ASMSTART
10467; GFX90A-NEXT:    ; def s[4:5]
10468; GFX90A-NEXT:    ;;#ASMEND
10469; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
10470; GFX90A-NEXT:    s_mov_b32 s9, s8
10471; GFX90A-NEXT:    ;;#ASMSTART
10472; GFX90A-NEXT:    ; use s[8:9]
10473; GFX90A-NEXT:    ;;#ASMEND
10474; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10475;
10476; GFX940-LABEL: s_shuffle_v4i16_v3i16__2_2_2_2:
10477; GFX940:       ; %bb.0:
10478; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10479; GFX940-NEXT:    ;;#ASMSTART
10480; GFX940-NEXT:    ; def s[0:1]
10481; GFX940-NEXT:    ;;#ASMEND
10482; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
10483; GFX940-NEXT:    s_mov_b32 s9, s8
10484; GFX940-NEXT:    ;;#ASMSTART
10485; GFX940-NEXT:    ; use s[8:9]
10486; GFX940-NEXT:    ;;#ASMEND
10487; GFX940-NEXT:    s_setpc_b64 s[30:31]
10488  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10489  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10490  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
10491  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
10492  ret void
10493}
10494
10495define void @s_shuffle_v4i16_v3i16__3_2_2_2() {
10496; GFX900-LABEL: s_shuffle_v4i16_v3i16__3_2_2_2:
10497; GFX900:       ; %bb.0:
10498; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10499; GFX900-NEXT:    ;;#ASMSTART
10500; GFX900-NEXT:    ; def s[4:5]
10501; GFX900-NEXT:    ;;#ASMEND
10502; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
10503; GFX900-NEXT:    s_lshl_b32 s8, s5, 16
10504; GFX900-NEXT:    ;;#ASMSTART
10505; GFX900-NEXT:    ; use s[8:9]
10506; GFX900-NEXT:    ;;#ASMEND
10507; GFX900-NEXT:    s_setpc_b64 s[30:31]
10508;
10509; GFX90A-LABEL: s_shuffle_v4i16_v3i16__3_2_2_2:
10510; GFX90A:       ; %bb.0:
10511; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10512; GFX90A-NEXT:    ;;#ASMSTART
10513; GFX90A-NEXT:    ; def s[4:5]
10514; GFX90A-NEXT:    ;;#ASMEND
10515; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
10516; GFX90A-NEXT:    s_lshl_b32 s8, s5, 16
10517; GFX90A-NEXT:    ;;#ASMSTART
10518; GFX90A-NEXT:    ; use s[8:9]
10519; GFX90A-NEXT:    ;;#ASMEND
10520; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10521;
10522; GFX940-LABEL: s_shuffle_v4i16_v3i16__3_2_2_2:
10523; GFX940:       ; %bb.0:
10524; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10525; GFX940-NEXT:    ;;#ASMSTART
10526; GFX940-NEXT:    ; def s[0:1]
10527; GFX940-NEXT:    ;;#ASMEND
10528; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
10529; GFX940-NEXT:    s_lshl_b32 s8, s1, 16
10530; GFX940-NEXT:    ;;#ASMSTART
10531; GFX940-NEXT:    ; use s[8:9]
10532; GFX940-NEXT:    ;;#ASMEND
10533; GFX940-NEXT:    s_setpc_b64 s[30:31]
10534  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10535  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10536  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
10537  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
10538  ret void
10539}
10540
10541define void @s_shuffle_v4i16_v3i16__4_2_2_2() {
10542; GFX900-LABEL: s_shuffle_v4i16_v3i16__4_2_2_2:
10543; GFX900:       ; %bb.0:
10544; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10545; GFX900-NEXT:    ;;#ASMSTART
10546; GFX900-NEXT:    ; def s[4:5]
10547; GFX900-NEXT:    ;;#ASMEND
10548; GFX900-NEXT:    ;;#ASMSTART
10549; GFX900-NEXT:    ; def s[6:7]
10550; GFX900-NEXT:    ;;#ASMEND
10551; GFX900-NEXT:    s_lshr_b32 s4, s6, 16
10552; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
10553; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
10554; GFX900-NEXT:    ;;#ASMSTART
10555; GFX900-NEXT:    ; use s[8:9]
10556; GFX900-NEXT:    ;;#ASMEND
10557; GFX900-NEXT:    s_setpc_b64 s[30:31]
10558;
10559; GFX90A-LABEL: s_shuffle_v4i16_v3i16__4_2_2_2:
10560; GFX90A:       ; %bb.0:
10561; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10562; GFX90A-NEXT:    ;;#ASMSTART
10563; GFX90A-NEXT:    ; def s[4:5]
10564; GFX90A-NEXT:    ;;#ASMEND
10565; GFX90A-NEXT:    ;;#ASMSTART
10566; GFX90A-NEXT:    ; def s[6:7]
10567; GFX90A-NEXT:    ;;#ASMEND
10568; GFX90A-NEXT:    s_lshr_b32 s4, s6, 16
10569; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
10570; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
10571; GFX90A-NEXT:    ;;#ASMSTART
10572; GFX90A-NEXT:    ; use s[8:9]
10573; GFX90A-NEXT:    ;;#ASMEND
10574; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10575;
10576; GFX940-LABEL: s_shuffle_v4i16_v3i16__4_2_2_2:
10577; GFX940:       ; %bb.0:
10578; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10579; GFX940-NEXT:    ;;#ASMSTART
10580; GFX940-NEXT:    ; def s[0:1]
10581; GFX940-NEXT:    ;;#ASMEND
10582; GFX940-NEXT:    ;;#ASMSTART
10583; GFX940-NEXT:    ; def s[2:3]
10584; GFX940-NEXT:    ;;#ASMEND
10585; GFX940-NEXT:    s_lshr_b32 s0, s2, 16
10586; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
10587; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
10588; GFX940-NEXT:    ;;#ASMSTART
10589; GFX940-NEXT:    ; use s[8:9]
10590; GFX940-NEXT:    ;;#ASMEND
10591; GFX940-NEXT:    s_setpc_b64 s[30:31]
10592  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10593  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10594  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10595  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10596  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 2, i32 2, i32 2>
10597  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
10598  ret void
10599}
10600
10601define void @s_shuffle_v4i16_v3i16__5_2_2_2() {
10602; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_2_2_2:
10603; GFX900:       ; %bb.0:
10604; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10605; GFX900-NEXT:    ;;#ASMSTART
10606; GFX900-NEXT:    ; def s[4:5]
10607; GFX900-NEXT:    ;;#ASMEND
10608; GFX900-NEXT:    ;;#ASMSTART
10609; GFX900-NEXT:    ; def s[6:7]
10610; GFX900-NEXT:    ;;#ASMEND
10611; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
10612; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
10613; GFX900-NEXT:    ;;#ASMSTART
10614; GFX900-NEXT:    ; use s[8:9]
10615; GFX900-NEXT:    ;;#ASMEND
10616; GFX900-NEXT:    s_setpc_b64 s[30:31]
10617;
10618; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_2_2_2:
10619; GFX90A:       ; %bb.0:
10620; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10621; GFX90A-NEXT:    ;;#ASMSTART
10622; GFX90A-NEXT:    ; def s[4:5]
10623; GFX90A-NEXT:    ;;#ASMEND
10624; GFX90A-NEXT:    ;;#ASMSTART
10625; GFX90A-NEXT:    ; def s[6:7]
10626; GFX90A-NEXT:    ;;#ASMEND
10627; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
10628; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
10629; GFX90A-NEXT:    ;;#ASMSTART
10630; GFX90A-NEXT:    ; use s[8:9]
10631; GFX90A-NEXT:    ;;#ASMEND
10632; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10633;
10634; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_2_2_2:
10635; GFX940:       ; %bb.0:
10636; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10637; GFX940-NEXT:    ;;#ASMSTART
10638; GFX940-NEXT:    ; def s[0:1]
10639; GFX940-NEXT:    ;;#ASMEND
10640; GFX940-NEXT:    ;;#ASMSTART
10641; GFX940-NEXT:    ; def s[2:3]
10642; GFX940-NEXT:    ;;#ASMEND
10643; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s1
10644; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
10645; GFX940-NEXT:    ;;#ASMSTART
10646; GFX940-NEXT:    ; use s[8:9]
10647; GFX940-NEXT:    ;;#ASMEND
10648; GFX940-NEXT:    s_setpc_b64 s[30:31]
10649  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10650  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10651  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10652  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10653  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 2, i32 2>
10654  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
10655  ret void
10656}
10657
10658define void @s_shuffle_v4i16_v3i16__5_u_2_2() {
10659; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_u_2_2:
10660; GFX900:       ; %bb.0:
10661; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10662; GFX900-NEXT:    ;;#ASMSTART
10663; GFX900-NEXT:    ; def s[4:5]
10664; GFX900-NEXT:    ;;#ASMEND
10665; GFX900-NEXT:    ;;#ASMSTART
10666; GFX900-NEXT:    ; def s[6:7]
10667; GFX900-NEXT:    ;;#ASMEND
10668; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
10669; GFX900-NEXT:    s_mov_b32 s8, s7
10670; GFX900-NEXT:    ;;#ASMSTART
10671; GFX900-NEXT:    ; use s[8:9]
10672; GFX900-NEXT:    ;;#ASMEND
10673; GFX900-NEXT:    s_setpc_b64 s[30:31]
10674;
10675; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_u_2_2:
10676; GFX90A:       ; %bb.0:
10677; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10678; GFX90A-NEXT:    ;;#ASMSTART
10679; GFX90A-NEXT:    ; def s[4:5]
10680; GFX90A-NEXT:    ;;#ASMEND
10681; GFX90A-NEXT:    ;;#ASMSTART
10682; GFX90A-NEXT:    ; def s[6:7]
10683; GFX90A-NEXT:    ;;#ASMEND
10684; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
10685; GFX90A-NEXT:    s_mov_b32 s8, s7
10686; GFX90A-NEXT:    ;;#ASMSTART
10687; GFX90A-NEXT:    ; use s[8:9]
10688; GFX90A-NEXT:    ;;#ASMEND
10689; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10690;
10691; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_u_2_2:
10692; GFX940:       ; %bb.0:
10693; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10694; GFX940-NEXT:    ;;#ASMSTART
10695; GFX940-NEXT:    ; def s[0:1]
10696; GFX940-NEXT:    ;;#ASMEND
10697; GFX940-NEXT:    ;;#ASMSTART
10698; GFX940-NEXT:    ; def s[2:3]
10699; GFX940-NEXT:    ;;#ASMEND
10700; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
10701; GFX940-NEXT:    s_mov_b32 s8, s3
10702; GFX940-NEXT:    ;;#ASMSTART
10703; GFX940-NEXT:    ; use s[8:9]
10704; GFX940-NEXT:    ;;#ASMEND
10705; GFX940-NEXT:    s_setpc_b64 s[30:31]
10706  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10707  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10708  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10709  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10710  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 2, i32 2>
10711  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
10712  ret void
10713}
10714
10715define void @s_shuffle_v4i16_v3i16__5_0_2_2() {
10716; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_0_2_2:
10717; GFX900:       ; %bb.0:
10718; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10719; GFX900-NEXT:    ;;#ASMSTART
10720; GFX900-NEXT:    ; def s[4:5]
10721; GFX900-NEXT:    ;;#ASMEND
10722; GFX900-NEXT:    ;;#ASMSTART
10723; GFX900-NEXT:    ; def s[6:7]
10724; GFX900-NEXT:    ;;#ASMEND
10725; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
10726; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
10727; GFX900-NEXT:    ;;#ASMSTART
10728; GFX900-NEXT:    ; use s[8:9]
10729; GFX900-NEXT:    ;;#ASMEND
10730; GFX900-NEXT:    s_setpc_b64 s[30:31]
10731;
10732; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_0_2_2:
10733; GFX90A:       ; %bb.0:
10734; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10735; GFX90A-NEXT:    ;;#ASMSTART
10736; GFX90A-NEXT:    ; def s[4:5]
10737; GFX90A-NEXT:    ;;#ASMEND
10738; GFX90A-NEXT:    ;;#ASMSTART
10739; GFX90A-NEXT:    ; def s[6:7]
10740; GFX90A-NEXT:    ;;#ASMEND
10741; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
10742; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
10743; GFX90A-NEXT:    ;;#ASMSTART
10744; GFX90A-NEXT:    ; use s[8:9]
10745; GFX90A-NEXT:    ;;#ASMEND
10746; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10747;
10748; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_0_2_2:
10749; GFX940:       ; %bb.0:
10750; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10751; GFX940-NEXT:    ;;#ASMSTART
10752; GFX940-NEXT:    ; def s[0:1]
10753; GFX940-NEXT:    ;;#ASMEND
10754; GFX940-NEXT:    ;;#ASMSTART
10755; GFX940-NEXT:    ; def s[2:3]
10756; GFX940-NEXT:    ;;#ASMEND
10757; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s0
10758; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
10759; GFX940-NEXT:    ;;#ASMSTART
10760; GFX940-NEXT:    ; use s[8:9]
10761; GFX940-NEXT:    ;;#ASMEND
10762; GFX940-NEXT:    s_setpc_b64 s[30:31]
10763  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10764  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10765  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10766  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10767  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 2, i32 2>
10768  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
10769  ret void
10770}
10771
10772define void @s_shuffle_v4i16_v3i16__5_1_2_2() {
10773; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_1_2_2:
10774; GFX900:       ; %bb.0:
10775; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10776; GFX900-NEXT:    ;;#ASMSTART
10777; GFX900-NEXT:    ; def s[4:5]
10778; GFX900-NEXT:    ;;#ASMEND
10779; GFX900-NEXT:    ;;#ASMSTART
10780; GFX900-NEXT:    ; def s[6:7]
10781; GFX900-NEXT:    ;;#ASMEND
10782; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
10783; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
10784; GFX900-NEXT:    ;;#ASMSTART
10785; GFX900-NEXT:    ; use s[8:9]
10786; GFX900-NEXT:    ;;#ASMEND
10787; GFX900-NEXT:    s_setpc_b64 s[30:31]
10788;
10789; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_1_2_2:
10790; GFX90A:       ; %bb.0:
10791; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10792; GFX90A-NEXT:    ;;#ASMSTART
10793; GFX90A-NEXT:    ; def s[4:5]
10794; GFX90A-NEXT:    ;;#ASMEND
10795; GFX90A-NEXT:    ;;#ASMSTART
10796; GFX90A-NEXT:    ; def s[6:7]
10797; GFX90A-NEXT:    ;;#ASMEND
10798; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
10799; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
10800; GFX90A-NEXT:    ;;#ASMSTART
10801; GFX90A-NEXT:    ; use s[8:9]
10802; GFX90A-NEXT:    ;;#ASMEND
10803; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10804;
10805; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_1_2_2:
10806; GFX940:       ; %bb.0:
10807; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10808; GFX940-NEXT:    ;;#ASMSTART
10809; GFX940-NEXT:    ; def s[0:1]
10810; GFX940-NEXT:    ;;#ASMEND
10811; GFX940-NEXT:    ;;#ASMSTART
10812; GFX940-NEXT:    ; def s[2:3]
10813; GFX940-NEXT:    ;;#ASMEND
10814; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s3, s0
10815; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
10816; GFX940-NEXT:    ;;#ASMSTART
10817; GFX940-NEXT:    ; use s[8:9]
10818; GFX940-NEXT:    ;;#ASMEND
10819; GFX940-NEXT:    s_setpc_b64 s[30:31]
10820  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10821  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10822  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10823  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10824  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 2, i32 2>
10825  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
10826  ret void
10827}
10828
10829define void @s_shuffle_v4i16_v3i16__5_3_2_2() {
10830; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_3_2_2:
10831; GFX900:       ; %bb.0:
10832; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10833; GFX900-NEXT:    ;;#ASMSTART
10834; GFX900-NEXT:    ; def s[4:5]
10835; GFX900-NEXT:    ;;#ASMEND
10836; GFX900-NEXT:    ;;#ASMSTART
10837; GFX900-NEXT:    ; def s[6:7]
10838; GFX900-NEXT:    ;;#ASMEND
10839; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s6
10840; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
10841; GFX900-NEXT:    ;;#ASMSTART
10842; GFX900-NEXT:    ; use s[8:9]
10843; GFX900-NEXT:    ;;#ASMEND
10844; GFX900-NEXT:    s_setpc_b64 s[30:31]
10845;
10846; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_3_2_2:
10847; GFX90A:       ; %bb.0:
10848; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10849; GFX90A-NEXT:    ;;#ASMSTART
10850; GFX90A-NEXT:    ; def s[4:5]
10851; GFX90A-NEXT:    ;;#ASMEND
10852; GFX90A-NEXT:    ;;#ASMSTART
10853; GFX90A-NEXT:    ; def s[6:7]
10854; GFX90A-NEXT:    ;;#ASMEND
10855; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s6
10856; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
10857; GFX90A-NEXT:    ;;#ASMSTART
10858; GFX90A-NEXT:    ; use s[8:9]
10859; GFX90A-NEXT:    ;;#ASMEND
10860; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10861;
10862; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_3_2_2:
10863; GFX940:       ; %bb.0:
10864; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10865; GFX940-NEXT:    ;;#ASMSTART
10866; GFX940-NEXT:    ; def s[0:1]
10867; GFX940-NEXT:    ;;#ASMEND
10868; GFX940-NEXT:    ;;#ASMSTART
10869; GFX940-NEXT:    ; def s[2:3]
10870; GFX940-NEXT:    ;;#ASMEND
10871; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s2
10872; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
10873; GFX940-NEXT:    ;;#ASMSTART
10874; GFX940-NEXT:    ; use s[8:9]
10875; GFX940-NEXT:    ;;#ASMEND
10876; GFX940-NEXT:    s_setpc_b64 s[30:31]
10877  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10878  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10879  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10880  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10881  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 2, i32 2>
10882  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
10883  ret void
10884}
10885
10886define void @s_shuffle_v4i16_v3i16__5_4_2_2() {
10887; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_4_2_2:
10888; GFX900:       ; %bb.0:
10889; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10890; GFX900-NEXT:    ;;#ASMSTART
10891; GFX900-NEXT:    ; def s[4:5]
10892; GFX900-NEXT:    ;;#ASMEND
10893; GFX900-NEXT:    ;;#ASMSTART
10894; GFX900-NEXT:    ; def s[6:7]
10895; GFX900-NEXT:    ;;#ASMEND
10896; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s7, s6
10897; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
10898; GFX900-NEXT:    ;;#ASMSTART
10899; GFX900-NEXT:    ; use s[8:9]
10900; GFX900-NEXT:    ;;#ASMEND
10901; GFX900-NEXT:    s_setpc_b64 s[30:31]
10902;
10903; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_4_2_2:
10904; GFX90A:       ; %bb.0:
10905; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10906; GFX90A-NEXT:    ;;#ASMSTART
10907; GFX90A-NEXT:    ; def s[4:5]
10908; GFX90A-NEXT:    ;;#ASMEND
10909; GFX90A-NEXT:    ;;#ASMSTART
10910; GFX90A-NEXT:    ; def s[6:7]
10911; GFX90A-NEXT:    ;;#ASMEND
10912; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s7, s6
10913; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
10914; GFX90A-NEXT:    ;;#ASMSTART
10915; GFX90A-NEXT:    ; use s[8:9]
10916; GFX90A-NEXT:    ;;#ASMEND
10917; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10918;
10919; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_4_2_2:
10920; GFX940:       ; %bb.0:
10921; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10922; GFX940-NEXT:    ;;#ASMSTART
10923; GFX940-NEXT:    ; def s[0:1]
10924; GFX940-NEXT:    ;;#ASMEND
10925; GFX940-NEXT:    ;;#ASMSTART
10926; GFX940-NEXT:    ; def s[2:3]
10927; GFX940-NEXT:    ;;#ASMEND
10928; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s3, s2
10929; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
10930; GFX940-NEXT:    ;;#ASMSTART
10931; GFX940-NEXT:    ; use s[8:9]
10932; GFX940-NEXT:    ;;#ASMEND
10933; GFX940-NEXT:    s_setpc_b64 s[30:31]
10934  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10935  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10936  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10937  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10938  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 2, i32 2>
10939  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
10940  ret void
10941}
10942
10943define void @s_shuffle_v4i16_v3i16__5_5_2_2() {
10944; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_2_2:
10945; GFX900:       ; %bb.0:
10946; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10947; GFX900-NEXT:    ;;#ASMSTART
10948; GFX900-NEXT:    ; def s[4:5]
10949; GFX900-NEXT:    ;;#ASMEND
10950; GFX900-NEXT:    ;;#ASMSTART
10951; GFX900-NEXT:    ; def s[6:7]
10952; GFX900-NEXT:    ;;#ASMEND
10953; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
10954; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
10955; GFX900-NEXT:    ;;#ASMSTART
10956; GFX900-NEXT:    ; use s[8:9]
10957; GFX900-NEXT:    ;;#ASMEND
10958; GFX900-NEXT:    s_setpc_b64 s[30:31]
10959;
10960; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_2_2:
10961; GFX90A:       ; %bb.0:
10962; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10963; GFX90A-NEXT:    ;;#ASMSTART
10964; GFX90A-NEXT:    ; def s[4:5]
10965; GFX90A-NEXT:    ;;#ASMEND
10966; GFX90A-NEXT:    ;;#ASMSTART
10967; GFX90A-NEXT:    ; def s[6:7]
10968; GFX90A-NEXT:    ;;#ASMEND
10969; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
10970; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
10971; GFX90A-NEXT:    ;;#ASMSTART
10972; GFX90A-NEXT:    ; use s[8:9]
10973; GFX90A-NEXT:    ;;#ASMEND
10974; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10975;
10976; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_2_2:
10977; GFX940:       ; %bb.0:
10978; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10979; GFX940-NEXT:    ;;#ASMSTART
10980; GFX940-NEXT:    ; def s[0:1]
10981; GFX940-NEXT:    ;;#ASMEND
10982; GFX940-NEXT:    ;;#ASMSTART
10983; GFX940-NEXT:    ; def s[2:3]
10984; GFX940-NEXT:    ;;#ASMEND
10985; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
10986; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
10987; GFX940-NEXT:    ;;#ASMSTART
10988; GFX940-NEXT:    ; use s[8:9]
10989; GFX940-NEXT:    ;;#ASMEND
10990; GFX940-NEXT:    s_setpc_b64 s[30:31]
10991  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10992  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10993  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10994  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
10995  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 2>
10996  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
10997  ret void
10998}
10999
11000define void @s_shuffle_v4i16_v3i16__5_5_u_2() {
11001; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_u_2:
11002; GFX900:       ; %bb.0:
11003; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11004; GFX900-NEXT:    ;;#ASMSTART
11005; GFX900-NEXT:    ; def s[4:5]
11006; GFX900-NEXT:    ;;#ASMEND
11007; GFX900-NEXT:    ;;#ASMSTART
11008; GFX900-NEXT:    ; def s[6:7]
11009; GFX900-NEXT:    ;;#ASMEND
11010; GFX900-NEXT:    s_lshl_b32 s9, s5, 16
11011; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
11012; GFX900-NEXT:    ;;#ASMSTART
11013; GFX900-NEXT:    ; use s[8:9]
11014; GFX900-NEXT:    ;;#ASMEND
11015; GFX900-NEXT:    s_setpc_b64 s[30:31]
11016;
11017; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_u_2:
11018; GFX90A:       ; %bb.0:
11019; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11020; GFX90A-NEXT:    ;;#ASMSTART
11021; GFX90A-NEXT:    ; def s[4:5]
11022; GFX90A-NEXT:    ;;#ASMEND
11023; GFX90A-NEXT:    ;;#ASMSTART
11024; GFX90A-NEXT:    ; def s[6:7]
11025; GFX90A-NEXT:    ;;#ASMEND
11026; GFX90A-NEXT:    s_lshl_b32 s9, s5, 16
11027; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
11028; GFX90A-NEXT:    ;;#ASMSTART
11029; GFX90A-NEXT:    ; use s[8:9]
11030; GFX90A-NEXT:    ;;#ASMEND
11031; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11032;
11033; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_u_2:
11034; GFX940:       ; %bb.0:
11035; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11036; GFX940-NEXT:    ;;#ASMSTART
11037; GFX940-NEXT:    ; def s[0:1]
11038; GFX940-NEXT:    ;;#ASMEND
11039; GFX940-NEXT:    ;;#ASMSTART
11040; GFX940-NEXT:    ; def s[2:3]
11041; GFX940-NEXT:    ;;#ASMEND
11042; GFX940-NEXT:    s_lshl_b32 s9, s1, 16
11043; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
11044; GFX940-NEXT:    ;;#ASMSTART
11045; GFX940-NEXT:    ; use s[8:9]
11046; GFX940-NEXT:    ;;#ASMEND
11047; GFX940-NEXT:    s_setpc_b64 s[30:31]
11048  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11049  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11050  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11051  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11052  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 2>
11053  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
11054  ret void
11055}
11056
11057define void @s_shuffle_v4i16_v3i16__5_5_0_2() {
11058; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_0_2:
11059; GFX900:       ; %bb.0:
11060; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11061; GFX900-NEXT:    ;;#ASMSTART
11062; GFX900-NEXT:    ; def s[4:5]
11063; GFX900-NEXT:    ;;#ASMEND
11064; GFX900-NEXT:    ;;#ASMSTART
11065; GFX900-NEXT:    ; def s[6:7]
11066; GFX900-NEXT:    ;;#ASMEND
11067; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s5
11068; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
11069; GFX900-NEXT:    ;;#ASMSTART
11070; GFX900-NEXT:    ; use s[8:9]
11071; GFX900-NEXT:    ;;#ASMEND
11072; GFX900-NEXT:    s_setpc_b64 s[30:31]
11073;
11074; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_0_2:
11075; GFX90A:       ; %bb.0:
11076; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11077; GFX90A-NEXT:    ;;#ASMSTART
11078; GFX90A-NEXT:    ; def s[4:5]
11079; GFX90A-NEXT:    ;;#ASMEND
11080; GFX90A-NEXT:    ;;#ASMSTART
11081; GFX90A-NEXT:    ; def s[6:7]
11082; GFX90A-NEXT:    ;;#ASMEND
11083; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s5
11084; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
11085; GFX90A-NEXT:    ;;#ASMSTART
11086; GFX90A-NEXT:    ; use s[8:9]
11087; GFX90A-NEXT:    ;;#ASMEND
11088; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11089;
11090; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_0_2:
11091; GFX940:       ; %bb.0:
11092; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11093; GFX940-NEXT:    ;;#ASMSTART
11094; GFX940-NEXT:    ; def s[0:1]
11095; GFX940-NEXT:    ;;#ASMEND
11096; GFX940-NEXT:    ;;#ASMSTART
11097; GFX940-NEXT:    ; def s[2:3]
11098; GFX940-NEXT:    ;;#ASMEND
11099; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s1
11100; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
11101; GFX940-NEXT:    ;;#ASMSTART
11102; GFX940-NEXT:    ; use s[8:9]
11103; GFX940-NEXT:    ;;#ASMEND
11104; GFX940-NEXT:    s_setpc_b64 s[30:31]
11105  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11106  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11107  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11108  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11109  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 2>
11110  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
11111  ret void
11112}
11113
11114define void @s_shuffle_v4i16_v3i16__5_5_1_2() {
11115; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_1_2:
11116; GFX900:       ; %bb.0:
11117; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11118; GFX900-NEXT:    ;;#ASMSTART
11119; GFX900-NEXT:    ; def s[4:5]
11120; GFX900-NEXT:    ;;#ASMEND
11121; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
11122; GFX900-NEXT:    ;;#ASMSTART
11123; GFX900-NEXT:    ; def s[6:7]
11124; GFX900-NEXT:    ;;#ASMEND
11125; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s5
11126; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
11127; GFX900-NEXT:    ;;#ASMSTART
11128; GFX900-NEXT:    ; use s[8:9]
11129; GFX900-NEXT:    ;;#ASMEND
11130; GFX900-NEXT:    s_setpc_b64 s[30:31]
11131;
11132; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_1_2:
11133; GFX90A:       ; %bb.0:
11134; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11135; GFX90A-NEXT:    ;;#ASMSTART
11136; GFX90A-NEXT:    ; def s[4:5]
11137; GFX90A-NEXT:    ;;#ASMEND
11138; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
11139; GFX90A-NEXT:    ;;#ASMSTART
11140; GFX90A-NEXT:    ; def s[6:7]
11141; GFX90A-NEXT:    ;;#ASMEND
11142; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s5
11143; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
11144; GFX90A-NEXT:    ;;#ASMSTART
11145; GFX90A-NEXT:    ; use s[8:9]
11146; GFX90A-NEXT:    ;;#ASMEND
11147; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11148;
11149; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_1_2:
11150; GFX940:       ; %bb.0:
11151; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11152; GFX940-NEXT:    ;;#ASMSTART
11153; GFX940-NEXT:    ; def s[0:1]
11154; GFX940-NEXT:    ;;#ASMEND
11155; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
11156; GFX940-NEXT:    ;;#ASMSTART
11157; GFX940-NEXT:    ; def s[2:3]
11158; GFX940-NEXT:    ;;#ASMEND
11159; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s1
11160; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
11161; GFX940-NEXT:    ;;#ASMSTART
11162; GFX940-NEXT:    ; use s[8:9]
11163; GFX940-NEXT:    ;;#ASMEND
11164; GFX940-NEXT:    s_setpc_b64 s[30:31]
11165  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11166  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11167  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11168  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11169  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 2>
11170  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
11171  ret void
11172}
11173
11174define void @s_shuffle_v4i16_v3i16__5_5_3_2() {
11175; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_3_2:
11176; GFX900:       ; %bb.0:
11177; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11178; GFX900-NEXT:    ;;#ASMSTART
11179; GFX900-NEXT:    ; def s[4:5]
11180; GFX900-NEXT:    ;;#ASMEND
11181; GFX900-NEXT:    ;;#ASMSTART
11182; GFX900-NEXT:    ; def s[6:7]
11183; GFX900-NEXT:    ;;#ASMEND
11184; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s6, s5
11185; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
11186; GFX900-NEXT:    ;;#ASMSTART
11187; GFX900-NEXT:    ; use s[8:9]
11188; GFX900-NEXT:    ;;#ASMEND
11189; GFX900-NEXT:    s_setpc_b64 s[30:31]
11190;
11191; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_3_2:
11192; GFX90A:       ; %bb.0:
11193; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11194; GFX90A-NEXT:    ;;#ASMSTART
11195; GFX90A-NEXT:    ; def s[4:5]
11196; GFX90A-NEXT:    ;;#ASMEND
11197; GFX90A-NEXT:    ;;#ASMSTART
11198; GFX90A-NEXT:    ; def s[6:7]
11199; GFX90A-NEXT:    ;;#ASMEND
11200; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s6, s5
11201; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
11202; GFX90A-NEXT:    ;;#ASMSTART
11203; GFX90A-NEXT:    ; use s[8:9]
11204; GFX90A-NEXT:    ;;#ASMEND
11205; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11206;
11207; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_3_2:
11208; GFX940:       ; %bb.0:
11209; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11210; GFX940-NEXT:    ;;#ASMSTART
11211; GFX940-NEXT:    ; def s[0:1]
11212; GFX940-NEXT:    ;;#ASMEND
11213; GFX940-NEXT:    ;;#ASMSTART
11214; GFX940-NEXT:    ; def s[2:3]
11215; GFX940-NEXT:    ;;#ASMEND
11216; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s2, s1
11217; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
11218; GFX940-NEXT:    ;;#ASMSTART
11219; GFX940-NEXT:    ; use s[8:9]
11220; GFX940-NEXT:    ;;#ASMEND
11221; GFX940-NEXT:    s_setpc_b64 s[30:31]
11222  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11223  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11224  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11225  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11226  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 2>
11227  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
11228  ret void
11229}
11230
11231define void @s_shuffle_v4i16_v3i16__5_5_4_2() {
11232; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_4_2:
11233; GFX900:       ; %bb.0:
11234; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11235; GFX900-NEXT:    ;;#ASMSTART
11236; GFX900-NEXT:    ; def s[4:5]
11237; GFX900-NEXT:    ;;#ASMEND
11238; GFX900-NEXT:    ;;#ASMSTART
11239; GFX900-NEXT:    ; def s[6:7]
11240; GFX900-NEXT:    ;;#ASMEND
11241; GFX900-NEXT:    s_lshr_b32 s4, s6, 16
11242; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s5
11243; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
11244; GFX900-NEXT:    ;;#ASMSTART
11245; GFX900-NEXT:    ; use s[8:9]
11246; GFX900-NEXT:    ;;#ASMEND
11247; GFX900-NEXT:    s_setpc_b64 s[30:31]
11248;
11249; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_4_2:
11250; GFX90A:       ; %bb.0:
11251; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11252; GFX90A-NEXT:    ;;#ASMSTART
11253; GFX90A-NEXT:    ; def s[4:5]
11254; GFX90A-NEXT:    ;;#ASMEND
11255; GFX90A-NEXT:    ;;#ASMSTART
11256; GFX90A-NEXT:    ; def s[6:7]
11257; GFX90A-NEXT:    ;;#ASMEND
11258; GFX90A-NEXT:    s_lshr_b32 s4, s6, 16
11259; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s5
11260; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
11261; GFX90A-NEXT:    ;;#ASMSTART
11262; GFX90A-NEXT:    ; use s[8:9]
11263; GFX90A-NEXT:    ;;#ASMEND
11264; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11265;
11266; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_4_2:
11267; GFX940:       ; %bb.0:
11268; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11269; GFX940-NEXT:    ;;#ASMSTART
11270; GFX940-NEXT:    ; def s[0:1]
11271; GFX940-NEXT:    ;;#ASMEND
11272; GFX940-NEXT:    ;;#ASMSTART
11273; GFX940-NEXT:    ; def s[2:3]
11274; GFX940-NEXT:    ;;#ASMEND
11275; GFX940-NEXT:    s_lshr_b32 s0, s2, 16
11276; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s1
11277; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
11278; GFX940-NEXT:    ;;#ASMSTART
11279; GFX940-NEXT:    ; use s[8:9]
11280; GFX940-NEXT:    ;;#ASMEND
11281; GFX940-NEXT:    s_setpc_b64 s[30:31]
11282  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11283  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11284  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11285  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11286  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 2>
11287  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
11288  ret void
11289}
11290
11291define void @s_shuffle_v4i16_v3i16__u_3_3_3() {
11292; GFX9-LABEL: s_shuffle_v4i16_v3i16__u_3_3_3:
11293; GFX9:       ; %bb.0:
11294; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11295; GFX9-NEXT:    ;;#ASMSTART
11296; GFX9-NEXT:    ; use s[8:9]
11297; GFX9-NEXT:    ;;#ASMEND
11298; GFX9-NEXT:    s_setpc_b64 s[30:31]
11299  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11300  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11301  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
11302  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
11303  ret void
11304}
11305
11306define void @s_shuffle_v4i16_v3i16__0_3_3_3() {
11307; GFX900-LABEL: s_shuffle_v4i16_v3i16__0_3_3_3:
11308; GFX900:       ; %bb.0:
11309; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11310; GFX900-NEXT:    ;;#ASMSTART
11311; GFX900-NEXT:    ; def s[8:9]
11312; GFX900-NEXT:    ;;#ASMEND
11313; GFX900-NEXT:    ;;#ASMSTART
11314; GFX900-NEXT:    ; use s[8:9]
11315; GFX900-NEXT:    ;;#ASMEND
11316; GFX900-NEXT:    s_setpc_b64 s[30:31]
11317;
11318; GFX90A-LABEL: s_shuffle_v4i16_v3i16__0_3_3_3:
11319; GFX90A:       ; %bb.0:
11320; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11321; GFX90A-NEXT:    ;;#ASMSTART
11322; GFX90A-NEXT:    ; def s[8:9]
11323; GFX90A-NEXT:    ;;#ASMEND
11324; GFX90A-NEXT:    ;;#ASMSTART
11325; GFX90A-NEXT:    ; use s[8:9]
11326; GFX90A-NEXT:    ;;#ASMEND
11327; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11328;
11329; GFX940-LABEL: s_shuffle_v4i16_v3i16__0_3_3_3:
11330; GFX940:       ; %bb.0:
11331; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11332; GFX940-NEXT:    ;;#ASMSTART
11333; GFX940-NEXT:    ; def s[8:9]
11334; GFX940-NEXT:    ;;#ASMEND
11335; GFX940-NEXT:    s_nop 0
11336; GFX940-NEXT:    ;;#ASMSTART
11337; GFX940-NEXT:    ; use s[8:9]
11338; GFX940-NEXT:    ;;#ASMEND
11339; GFX940-NEXT:    s_setpc_b64 s[30:31]
11340  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11341  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11342  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
11343  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
11344  ret void
11345}
11346
11347define void @s_shuffle_v4i16_v3i16__1_3_3_3() {
11348; GFX900-LABEL: s_shuffle_v4i16_v3i16__1_3_3_3:
11349; GFX900:       ; %bb.0:
11350; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11351; GFX900-NEXT:    ;;#ASMSTART
11352; GFX900-NEXT:    ; def s[4:5]
11353; GFX900-NEXT:    ;;#ASMEND
11354; GFX900-NEXT:    s_lshr_b32 s8, s4, 16
11355; GFX900-NEXT:    ;;#ASMSTART
11356; GFX900-NEXT:    ; use s[8:9]
11357; GFX900-NEXT:    ;;#ASMEND
11358; GFX900-NEXT:    s_setpc_b64 s[30:31]
11359;
11360; GFX90A-LABEL: s_shuffle_v4i16_v3i16__1_3_3_3:
11361; GFX90A:       ; %bb.0:
11362; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11363; GFX90A-NEXT:    ;;#ASMSTART
11364; GFX90A-NEXT:    ; def s[4:5]
11365; GFX90A-NEXT:    ;;#ASMEND
11366; GFX90A-NEXT:    s_lshr_b32 s8, s4, 16
11367; GFX90A-NEXT:    ;;#ASMSTART
11368; GFX90A-NEXT:    ; use s[8:9]
11369; GFX90A-NEXT:    ;;#ASMEND
11370; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11371;
11372; GFX940-LABEL: s_shuffle_v4i16_v3i16__1_3_3_3:
11373; GFX940:       ; %bb.0:
11374; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11375; GFX940-NEXT:    ;;#ASMSTART
11376; GFX940-NEXT:    ; def s[0:1]
11377; GFX940-NEXT:    ;;#ASMEND
11378; GFX940-NEXT:    s_lshr_b32 s8, s0, 16
11379; GFX940-NEXT:    ;;#ASMSTART
11380; GFX940-NEXT:    ; use s[8:9]
11381; GFX940-NEXT:    ;;#ASMEND
11382; GFX940-NEXT:    s_setpc_b64 s[30:31]
11383  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11384  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11385  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
11386  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
11387  ret void
11388}
11389
11390define void @s_shuffle_v4i16_v3i16__2_3_3_3() {
11391; GFX900-LABEL: s_shuffle_v4i16_v3i16__2_3_3_3:
11392; GFX900:       ; %bb.0:
11393; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11394; GFX900-NEXT:    ;;#ASMSTART
11395; GFX900-NEXT:    ; def s[4:5]
11396; GFX900-NEXT:    ;;#ASMEND
11397; GFX900-NEXT:    s_mov_b32 s8, s5
11398; GFX900-NEXT:    ;;#ASMSTART
11399; GFX900-NEXT:    ; use s[8:9]
11400; GFX900-NEXT:    ;;#ASMEND
11401; GFX900-NEXT:    s_setpc_b64 s[30:31]
11402;
11403; GFX90A-LABEL: s_shuffle_v4i16_v3i16__2_3_3_3:
11404; GFX90A:       ; %bb.0:
11405; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11406; GFX90A-NEXT:    ;;#ASMSTART
11407; GFX90A-NEXT:    ; def s[4:5]
11408; GFX90A-NEXT:    ;;#ASMEND
11409; GFX90A-NEXT:    s_mov_b32 s8, s5
11410; GFX90A-NEXT:    ;;#ASMSTART
11411; GFX90A-NEXT:    ; use s[8:9]
11412; GFX90A-NEXT:    ;;#ASMEND
11413; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11414;
11415; GFX940-LABEL: s_shuffle_v4i16_v3i16__2_3_3_3:
11416; GFX940:       ; %bb.0:
11417; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11418; GFX940-NEXT:    ;;#ASMSTART
11419; GFX940-NEXT:    ; def s[0:1]
11420; GFX940-NEXT:    ;;#ASMEND
11421; GFX940-NEXT:    s_mov_b32 s8, s1
11422; GFX940-NEXT:    ;;#ASMSTART
11423; GFX940-NEXT:    ; use s[8:9]
11424; GFX940-NEXT:    ;;#ASMEND
11425; GFX940-NEXT:    s_setpc_b64 s[30:31]
11426  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11427  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11428  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
11429  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
11430  ret void
11431}
11432
11433define void @s_shuffle_v4i16_v3i16__3_3_3_3() {
11434; GFX9-LABEL: s_shuffle_v4i16_v3i16__3_3_3_3:
11435; GFX9:       ; %bb.0:
11436; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11437; GFX9-NEXT:    ;;#ASMSTART
11438; GFX9-NEXT:    ; use s[8:9]
11439; GFX9-NEXT:    ;;#ASMEND
11440; GFX9-NEXT:    s_setpc_b64 s[30:31]
11441  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11442  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11443  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
11444  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
11445  ret void
11446}
11447
11448define void @s_shuffle_v4i16_v3i16__4_3_3_3() {
11449; GFX900-LABEL: s_shuffle_v4i16_v3i16__4_3_3_3:
11450; GFX900:       ; %bb.0:
11451; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11452; GFX900-NEXT:    ;;#ASMSTART
11453; GFX900-NEXT:    ; def s[4:5]
11454; GFX900-NEXT:    ;;#ASMEND
11455; GFX900-NEXT:    s_lshr_b32 s5, s4, 16
11456; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
11457; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
11458; GFX900-NEXT:    ;;#ASMSTART
11459; GFX900-NEXT:    ; use s[8:9]
11460; GFX900-NEXT:    ;;#ASMEND
11461; GFX900-NEXT:    s_setpc_b64 s[30:31]
11462;
11463; GFX90A-LABEL: s_shuffle_v4i16_v3i16__4_3_3_3:
11464; GFX90A:       ; %bb.0:
11465; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11466; GFX90A-NEXT:    ;;#ASMSTART
11467; GFX90A-NEXT:    ; def s[4:5]
11468; GFX90A-NEXT:    ;;#ASMEND
11469; GFX90A-NEXT:    s_lshr_b32 s5, s4, 16
11470; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
11471; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
11472; GFX90A-NEXT:    ;;#ASMSTART
11473; GFX90A-NEXT:    ; use s[8:9]
11474; GFX90A-NEXT:    ;;#ASMEND
11475; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11476;
11477; GFX940-LABEL: s_shuffle_v4i16_v3i16__4_3_3_3:
11478; GFX940:       ; %bb.0:
11479; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11480; GFX940-NEXT:    ;;#ASMSTART
11481; GFX940-NEXT:    ; def s[0:1]
11482; GFX940-NEXT:    ;;#ASMEND
11483; GFX940-NEXT:    s_lshr_b32 s1, s0, 16
11484; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
11485; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
11486; GFX940-NEXT:    ;;#ASMSTART
11487; GFX940-NEXT:    ; use s[8:9]
11488; GFX940-NEXT:    ;;#ASMEND
11489; GFX940-NEXT:    s_setpc_b64 s[30:31]
11490  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11491  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11492  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11493  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11494  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 3, i32 3, i32 3>
11495  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
11496  ret void
11497}
11498
11499define void @s_shuffle_v4i16_v3i16__5_3_3_3() {
11500; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_3_3_3:
11501; GFX900:       ; %bb.0:
11502; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11503; GFX900-NEXT:    ;;#ASMSTART
11504; GFX900-NEXT:    ; def s[4:5]
11505; GFX900-NEXT:    ;;#ASMEND
11506; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
11507; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
11508; GFX900-NEXT:    ;;#ASMSTART
11509; GFX900-NEXT:    ; use s[8:9]
11510; GFX900-NEXT:    ;;#ASMEND
11511; GFX900-NEXT:    s_setpc_b64 s[30:31]
11512;
11513; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_3_3_3:
11514; GFX90A:       ; %bb.0:
11515; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11516; GFX90A-NEXT:    ;;#ASMSTART
11517; GFX90A-NEXT:    ; def s[4:5]
11518; GFX90A-NEXT:    ;;#ASMEND
11519; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
11520; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
11521; GFX90A-NEXT:    ;;#ASMSTART
11522; GFX90A-NEXT:    ; use s[8:9]
11523; GFX90A-NEXT:    ;;#ASMEND
11524; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11525;
11526; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_3_3_3:
11527; GFX940:       ; %bb.0:
11528; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11529; GFX940-NEXT:    ;;#ASMSTART
11530; GFX940-NEXT:    ; def s[0:1]
11531; GFX940-NEXT:    ;;#ASMEND
11532; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
11533; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
11534; GFX940-NEXT:    ;;#ASMSTART
11535; GFX940-NEXT:    ; use s[8:9]
11536; GFX940-NEXT:    ;;#ASMEND
11537; GFX940-NEXT:    s_setpc_b64 s[30:31]
11538  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11539  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11540  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11541  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11542  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 3, i32 3>
11543  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
11544  ret void
11545}
11546
11547define void @s_shuffle_v4i16_v3i16__5_u_3_3() {
11548; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_u_3_3:
11549; GFX900:       ; %bb.0:
11550; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11551; GFX900-NEXT:    ;;#ASMSTART
11552; GFX900-NEXT:    ; def s[4:5]
11553; GFX900-NEXT:    ;;#ASMEND
11554; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
11555; GFX900-NEXT:    s_mov_b32 s8, s5
11556; GFX900-NEXT:    ;;#ASMSTART
11557; GFX900-NEXT:    ; use s[8:9]
11558; GFX900-NEXT:    ;;#ASMEND
11559; GFX900-NEXT:    s_setpc_b64 s[30:31]
11560;
11561; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_u_3_3:
11562; GFX90A:       ; %bb.0:
11563; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11564; GFX90A-NEXT:    ;;#ASMSTART
11565; GFX90A-NEXT:    ; def s[4:5]
11566; GFX90A-NEXT:    ;;#ASMEND
11567; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
11568; GFX90A-NEXT:    s_mov_b32 s8, s5
11569; GFX90A-NEXT:    ;;#ASMSTART
11570; GFX90A-NEXT:    ; use s[8:9]
11571; GFX90A-NEXT:    ;;#ASMEND
11572; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11573;
11574; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_u_3_3:
11575; GFX940:       ; %bb.0:
11576; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11577; GFX940-NEXT:    ;;#ASMSTART
11578; GFX940-NEXT:    ; def s[0:1]
11579; GFX940-NEXT:    ;;#ASMEND
11580; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
11581; GFX940-NEXT:    s_mov_b32 s8, s1
11582; GFX940-NEXT:    ;;#ASMSTART
11583; GFX940-NEXT:    ; use s[8:9]
11584; GFX940-NEXT:    ;;#ASMEND
11585; GFX940-NEXT:    s_setpc_b64 s[30:31]
11586  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11587  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11588  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11589  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11590  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 3, i32 3>
11591  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
11592  ret void
11593}
11594
11595define void @s_shuffle_v4i16_v3i16__5_0_3_3() {
11596; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_0_3_3:
11597; GFX900:       ; %bb.0:
11598; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11599; GFX900-NEXT:    ;;#ASMSTART
11600; GFX900-NEXT:    ; def s[4:5]
11601; GFX900-NEXT:    ;;#ASMEND
11602; GFX900-NEXT:    ;;#ASMSTART
11603; GFX900-NEXT:    ; def s[6:7]
11604; GFX900-NEXT:    ;;#ASMEND
11605; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
11606; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s6, s6
11607; GFX900-NEXT:    ;;#ASMSTART
11608; GFX900-NEXT:    ; use s[8:9]
11609; GFX900-NEXT:    ;;#ASMEND
11610; GFX900-NEXT:    s_setpc_b64 s[30:31]
11611;
11612; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_0_3_3:
11613; GFX90A:       ; %bb.0:
11614; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11615; GFX90A-NEXT:    ;;#ASMSTART
11616; GFX90A-NEXT:    ; def s[4:5]
11617; GFX90A-NEXT:    ;;#ASMEND
11618; GFX90A-NEXT:    ;;#ASMSTART
11619; GFX90A-NEXT:    ; def s[6:7]
11620; GFX90A-NEXT:    ;;#ASMEND
11621; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
11622; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s6, s6
11623; GFX90A-NEXT:    ;;#ASMSTART
11624; GFX90A-NEXT:    ; use s[8:9]
11625; GFX90A-NEXT:    ;;#ASMEND
11626; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11627;
11628; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_0_3_3:
11629; GFX940:       ; %bb.0:
11630; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11631; GFX940-NEXT:    ;;#ASMSTART
11632; GFX940-NEXT:    ; def s[0:1]
11633; GFX940-NEXT:    ;;#ASMEND
11634; GFX940-NEXT:    ;;#ASMSTART
11635; GFX940-NEXT:    ; def s[2:3]
11636; GFX940-NEXT:    ;;#ASMEND
11637; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s0
11638; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s2, s2
11639; GFX940-NEXT:    ;;#ASMSTART
11640; GFX940-NEXT:    ; use s[8:9]
11641; GFX940-NEXT:    ;;#ASMEND
11642; GFX940-NEXT:    s_setpc_b64 s[30:31]
11643  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11644  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11645  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11646  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11647  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 3, i32 3>
11648  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
11649  ret void
11650}
11651
11652define void @s_shuffle_v4i16_v3i16__5_1_3_3() {
11653; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_1_3_3:
11654; GFX900:       ; %bb.0:
11655; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11656; GFX900-NEXT:    ;;#ASMSTART
11657; GFX900-NEXT:    ; def s[4:5]
11658; GFX900-NEXT:    ;;#ASMEND
11659; GFX900-NEXT:    ;;#ASMSTART
11660; GFX900-NEXT:    ; def s[6:7]
11661; GFX900-NEXT:    ;;#ASMEND
11662; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
11663; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s6, s6
11664; GFX900-NEXT:    ;;#ASMSTART
11665; GFX900-NEXT:    ; use s[8:9]
11666; GFX900-NEXT:    ;;#ASMEND
11667; GFX900-NEXT:    s_setpc_b64 s[30:31]
11668;
11669; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_1_3_3:
11670; GFX90A:       ; %bb.0:
11671; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11672; GFX90A-NEXT:    ;;#ASMSTART
11673; GFX90A-NEXT:    ; def s[4:5]
11674; GFX90A-NEXT:    ;;#ASMEND
11675; GFX90A-NEXT:    ;;#ASMSTART
11676; GFX90A-NEXT:    ; def s[6:7]
11677; GFX90A-NEXT:    ;;#ASMEND
11678; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
11679; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s6, s6
11680; GFX90A-NEXT:    ;;#ASMSTART
11681; GFX90A-NEXT:    ; use s[8:9]
11682; GFX90A-NEXT:    ;;#ASMEND
11683; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11684;
11685; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_1_3_3:
11686; GFX940:       ; %bb.0:
11687; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11688; GFX940-NEXT:    ;;#ASMSTART
11689; GFX940-NEXT:    ; def s[0:1]
11690; GFX940-NEXT:    ;;#ASMEND
11691; GFX940-NEXT:    ;;#ASMSTART
11692; GFX940-NEXT:    ; def s[2:3]
11693; GFX940-NEXT:    ;;#ASMEND
11694; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s3, s0
11695; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s2, s2
11696; GFX940-NEXT:    ;;#ASMSTART
11697; GFX940-NEXT:    ; use s[8:9]
11698; GFX940-NEXT:    ;;#ASMEND
11699; GFX940-NEXT:    s_setpc_b64 s[30:31]
11700  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11701  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11702  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11703  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11704  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 3, i32 3>
11705  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
11706  ret void
11707}
11708
11709define void @s_shuffle_v4i16_v3i16__5_2_3_3() {
11710; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_2_3_3:
11711; GFX900:       ; %bb.0:
11712; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11713; GFX900-NEXT:    ;;#ASMSTART
11714; GFX900-NEXT:    ; def s[4:5]
11715; GFX900-NEXT:    ;;#ASMEND
11716; GFX900-NEXT:    ;;#ASMSTART
11717; GFX900-NEXT:    ; def s[6:7]
11718; GFX900-NEXT:    ;;#ASMEND
11719; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
11720; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s6, s6
11721; GFX900-NEXT:    ;;#ASMSTART
11722; GFX900-NEXT:    ; use s[8:9]
11723; GFX900-NEXT:    ;;#ASMEND
11724; GFX900-NEXT:    s_setpc_b64 s[30:31]
11725;
11726; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_2_3_3:
11727; GFX90A:       ; %bb.0:
11728; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11729; GFX90A-NEXT:    ;;#ASMSTART
11730; GFX90A-NEXT:    ; def s[4:5]
11731; GFX90A-NEXT:    ;;#ASMEND
11732; GFX90A-NEXT:    ;;#ASMSTART
11733; GFX90A-NEXT:    ; def s[6:7]
11734; GFX90A-NEXT:    ;;#ASMEND
11735; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
11736; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s6, s6
11737; GFX90A-NEXT:    ;;#ASMSTART
11738; GFX90A-NEXT:    ; use s[8:9]
11739; GFX90A-NEXT:    ;;#ASMEND
11740; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11741;
11742; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_2_3_3:
11743; GFX940:       ; %bb.0:
11744; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11745; GFX940-NEXT:    ;;#ASMSTART
11746; GFX940-NEXT:    ; def s[0:1]
11747; GFX940-NEXT:    ;;#ASMEND
11748; GFX940-NEXT:    ;;#ASMSTART
11749; GFX940-NEXT:    ; def s[2:3]
11750; GFX940-NEXT:    ;;#ASMEND
11751; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s1
11752; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s2, s2
11753; GFX940-NEXT:    ;;#ASMSTART
11754; GFX940-NEXT:    ; use s[8:9]
11755; GFX940-NEXT:    ;;#ASMEND
11756; GFX940-NEXT:    s_setpc_b64 s[30:31]
11757  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11758  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11759  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11760  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11761  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 3, i32 3>
11762  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
11763  ret void
11764}
11765
11766define void @s_shuffle_v4i16_v3i16__5_4_3_3() {
11767; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_4_3_3:
11768; GFX900:       ; %bb.0:
11769; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11770; GFX900-NEXT:    ;;#ASMSTART
11771; GFX900-NEXT:    ; def s[4:5]
11772; GFX900-NEXT:    ;;#ASMEND
11773; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
11774; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
11775; GFX900-NEXT:    ;;#ASMSTART
11776; GFX900-NEXT:    ; use s[8:9]
11777; GFX900-NEXT:    ;;#ASMEND
11778; GFX900-NEXT:    s_setpc_b64 s[30:31]
11779;
11780; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_4_3_3:
11781; GFX90A:       ; %bb.0:
11782; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11783; GFX90A-NEXT:    ;;#ASMSTART
11784; GFX90A-NEXT:    ; def s[4:5]
11785; GFX90A-NEXT:    ;;#ASMEND
11786; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
11787; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
11788; GFX90A-NEXT:    ;;#ASMSTART
11789; GFX90A-NEXT:    ; use s[8:9]
11790; GFX90A-NEXT:    ;;#ASMEND
11791; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11792;
11793; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_4_3_3:
11794; GFX940:       ; %bb.0:
11795; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11796; GFX940-NEXT:    ;;#ASMSTART
11797; GFX940-NEXT:    ; def s[0:1]
11798; GFX940-NEXT:    ;;#ASMEND
11799; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s1, s0
11800; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
11801; GFX940-NEXT:    ;;#ASMSTART
11802; GFX940-NEXT:    ; use s[8:9]
11803; GFX940-NEXT:    ;;#ASMEND
11804; GFX940-NEXT:    s_setpc_b64 s[30:31]
11805  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11806  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11807  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11808  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11809  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 3, i32 3>
11810  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
11811  ret void
11812}
11813
11814define void @s_shuffle_v4i16_v3i16__5_5_3_3() {
11815; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_3_3:
11816; GFX900:       ; %bb.0:
11817; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11818; GFX900-NEXT:    ;;#ASMSTART
11819; GFX900-NEXT:    ; def s[4:5]
11820; GFX900-NEXT:    ;;#ASMEND
11821; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
11822; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
11823; GFX900-NEXT:    ;;#ASMSTART
11824; GFX900-NEXT:    ; use s[8:9]
11825; GFX900-NEXT:    ;;#ASMEND
11826; GFX900-NEXT:    s_setpc_b64 s[30:31]
11827;
11828; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_3_3:
11829; GFX90A:       ; %bb.0:
11830; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11831; GFX90A-NEXT:    ;;#ASMSTART
11832; GFX90A-NEXT:    ; def s[4:5]
11833; GFX90A-NEXT:    ;;#ASMEND
11834; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
11835; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
11836; GFX90A-NEXT:    ;;#ASMSTART
11837; GFX90A-NEXT:    ; use s[8:9]
11838; GFX90A-NEXT:    ;;#ASMEND
11839; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11840;
11841; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_3_3:
11842; GFX940:       ; %bb.0:
11843; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11844; GFX940-NEXT:    ;;#ASMSTART
11845; GFX940-NEXT:    ; def s[0:1]
11846; GFX940-NEXT:    ;;#ASMEND
11847; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
11848; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
11849; GFX940-NEXT:    ;;#ASMSTART
11850; GFX940-NEXT:    ; use s[8:9]
11851; GFX940-NEXT:    ;;#ASMEND
11852; GFX940-NEXT:    s_setpc_b64 s[30:31]
11853  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11854  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11855  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11856  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11857  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 3>
11858  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
11859  ret void
11860}
11861
11862define void @s_shuffle_v4i16_v3i16__5_5_u_3() {
11863; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_u_3:
11864; GFX900:       ; %bb.0:
11865; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11866; GFX900-NEXT:    ;;#ASMSTART
11867; GFX900-NEXT:    ; def s[4:5]
11868; GFX900-NEXT:    ;;#ASMEND
11869; GFX900-NEXT:    s_lshl_b32 s9, s4, 16
11870; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
11871; GFX900-NEXT:    ;;#ASMSTART
11872; GFX900-NEXT:    ; use s[8:9]
11873; GFX900-NEXT:    ;;#ASMEND
11874; GFX900-NEXT:    s_setpc_b64 s[30:31]
11875;
11876; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_u_3:
11877; GFX90A:       ; %bb.0:
11878; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11879; GFX90A-NEXT:    ;;#ASMSTART
11880; GFX90A-NEXT:    ; def s[4:5]
11881; GFX90A-NEXT:    ;;#ASMEND
11882; GFX90A-NEXT:    s_lshl_b32 s9, s4, 16
11883; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
11884; GFX90A-NEXT:    ;;#ASMSTART
11885; GFX90A-NEXT:    ; use s[8:9]
11886; GFX90A-NEXT:    ;;#ASMEND
11887; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11888;
11889; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_u_3:
11890; GFX940:       ; %bb.0:
11891; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11892; GFX940-NEXT:    ;;#ASMSTART
11893; GFX940-NEXT:    ; def s[0:1]
11894; GFX940-NEXT:    ;;#ASMEND
11895; GFX940-NEXT:    s_lshl_b32 s9, s0, 16
11896; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
11897; GFX940-NEXT:    ;;#ASMSTART
11898; GFX940-NEXT:    ; use s[8:9]
11899; GFX940-NEXT:    ;;#ASMEND
11900; GFX940-NEXT:    s_setpc_b64 s[30:31]
11901  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11902  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11903  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11904  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11905  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 3>
11906  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
11907  ret void
11908}
11909
11910define void @s_shuffle_v4i16_v3i16__5_5_0_3() {
11911; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_0_3:
11912; GFX900:       ; %bb.0:
11913; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11914; GFX900-NEXT:    ;;#ASMSTART
11915; GFX900-NEXT:    ; def s[4:5]
11916; GFX900-NEXT:    ;;#ASMEND
11917; GFX900-NEXT:    ;;#ASMSTART
11918; GFX900-NEXT:    ; def s[6:7]
11919; GFX900-NEXT:    ;;#ASMEND
11920; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s6
11921; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
11922; GFX900-NEXT:    ;;#ASMSTART
11923; GFX900-NEXT:    ; use s[8:9]
11924; GFX900-NEXT:    ;;#ASMEND
11925; GFX900-NEXT:    s_setpc_b64 s[30:31]
11926;
11927; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_0_3:
11928; GFX90A:       ; %bb.0:
11929; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11930; GFX90A-NEXT:    ;;#ASMSTART
11931; GFX90A-NEXT:    ; def s[4:5]
11932; GFX90A-NEXT:    ;;#ASMEND
11933; GFX90A-NEXT:    ;;#ASMSTART
11934; GFX90A-NEXT:    ; def s[6:7]
11935; GFX90A-NEXT:    ;;#ASMEND
11936; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s6
11937; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
11938; GFX90A-NEXT:    ;;#ASMSTART
11939; GFX90A-NEXT:    ; use s[8:9]
11940; GFX90A-NEXT:    ;;#ASMEND
11941; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11942;
11943; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_0_3:
11944; GFX940:       ; %bb.0:
11945; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11946; GFX940-NEXT:    ;;#ASMSTART
11947; GFX940-NEXT:    ; def s[0:1]
11948; GFX940-NEXT:    ;;#ASMEND
11949; GFX940-NEXT:    ;;#ASMSTART
11950; GFX940-NEXT:    ; def s[2:3]
11951; GFX940-NEXT:    ;;#ASMEND
11952; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s2
11953; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
11954; GFX940-NEXT:    ;;#ASMSTART
11955; GFX940-NEXT:    ; use s[8:9]
11956; GFX940-NEXT:    ;;#ASMEND
11957; GFX940-NEXT:    s_setpc_b64 s[30:31]
11958  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11959  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11960  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11961  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
11962  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 3>
11963  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
11964  ret void
11965}
11966
11967define void @s_shuffle_v4i16_v3i16__5_5_1_3() {
11968; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_1_3:
11969; GFX900:       ; %bb.0:
11970; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11971; GFX900-NEXT:    ;;#ASMSTART
11972; GFX900-NEXT:    ; def s[4:5]
11973; GFX900-NEXT:    ;;#ASMEND
11974; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
11975; GFX900-NEXT:    ;;#ASMSTART
11976; GFX900-NEXT:    ; def s[6:7]
11977; GFX900-NEXT:    ;;#ASMEND
11978; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s6
11979; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
11980; GFX900-NEXT:    ;;#ASMSTART
11981; GFX900-NEXT:    ; use s[8:9]
11982; GFX900-NEXT:    ;;#ASMEND
11983; GFX900-NEXT:    s_setpc_b64 s[30:31]
11984;
11985; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_1_3:
11986; GFX90A:       ; %bb.0:
11987; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11988; GFX90A-NEXT:    ;;#ASMSTART
11989; GFX90A-NEXT:    ; def s[4:5]
11990; GFX90A-NEXT:    ;;#ASMEND
11991; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
11992; GFX90A-NEXT:    ;;#ASMSTART
11993; GFX90A-NEXT:    ; def s[6:7]
11994; GFX90A-NEXT:    ;;#ASMEND
11995; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s6
11996; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
11997; GFX90A-NEXT:    ;;#ASMSTART
11998; GFX90A-NEXT:    ; use s[8:9]
11999; GFX90A-NEXT:    ;;#ASMEND
12000; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12001;
12002; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_1_3:
12003; GFX940:       ; %bb.0:
12004; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12005; GFX940-NEXT:    ;;#ASMSTART
12006; GFX940-NEXT:    ; def s[0:1]
12007; GFX940-NEXT:    ;;#ASMEND
12008; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
12009; GFX940-NEXT:    ;;#ASMSTART
12010; GFX940-NEXT:    ; def s[2:3]
12011; GFX940-NEXT:    ;;#ASMEND
12012; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s2
12013; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
12014; GFX940-NEXT:    ;;#ASMSTART
12015; GFX940-NEXT:    ; use s[8:9]
12016; GFX940-NEXT:    ;;#ASMEND
12017; GFX940-NEXT:    s_setpc_b64 s[30:31]
12018  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12019  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12020  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12021  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12022  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 3>
12023  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12024  ret void
12025}
12026
12027define void @s_shuffle_v4i16_v3i16__5_5_2_3() {
12028; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_2_3:
12029; GFX900:       ; %bb.0:
12030; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12031; GFX900-NEXT:    ;;#ASMSTART
12032; GFX900-NEXT:    ; def s[4:5]
12033; GFX900-NEXT:    ;;#ASMEND
12034; GFX900-NEXT:    ;;#ASMSTART
12035; GFX900-NEXT:    ; def s[6:7]
12036; GFX900-NEXT:    ;;#ASMEND
12037; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s6
12038; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
12039; GFX900-NEXT:    ;;#ASMSTART
12040; GFX900-NEXT:    ; use s[8:9]
12041; GFX900-NEXT:    ;;#ASMEND
12042; GFX900-NEXT:    s_setpc_b64 s[30:31]
12043;
12044; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_2_3:
12045; GFX90A:       ; %bb.0:
12046; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12047; GFX90A-NEXT:    ;;#ASMSTART
12048; GFX90A-NEXT:    ; def s[4:5]
12049; GFX90A-NEXT:    ;;#ASMEND
12050; GFX90A-NEXT:    ;;#ASMSTART
12051; GFX90A-NEXT:    ; def s[6:7]
12052; GFX90A-NEXT:    ;;#ASMEND
12053; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s6
12054; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
12055; GFX90A-NEXT:    ;;#ASMSTART
12056; GFX90A-NEXT:    ; use s[8:9]
12057; GFX90A-NEXT:    ;;#ASMEND
12058; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12059;
12060; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_2_3:
12061; GFX940:       ; %bb.0:
12062; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12063; GFX940-NEXT:    ;;#ASMSTART
12064; GFX940-NEXT:    ; def s[0:1]
12065; GFX940-NEXT:    ;;#ASMEND
12066; GFX940-NEXT:    ;;#ASMSTART
12067; GFX940-NEXT:    ; def s[2:3]
12068; GFX940-NEXT:    ;;#ASMEND
12069; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s2
12070; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
12071; GFX940-NEXT:    ;;#ASMSTART
12072; GFX940-NEXT:    ; use s[8:9]
12073; GFX940-NEXT:    ;;#ASMEND
12074; GFX940-NEXT:    s_setpc_b64 s[30:31]
12075  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12076  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12077  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12078  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12079  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 3>
12080  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12081  ret void
12082}
12083
12084define void @s_shuffle_v4i16_v3i16__5_5_4_3() {
12085; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_4_3:
12086; GFX900:       ; %bb.0:
12087; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12088; GFX900-NEXT:    ;;#ASMSTART
12089; GFX900-NEXT:    ; def s[4:5]
12090; GFX900-NEXT:    ;;#ASMEND
12091; GFX900-NEXT:    s_lshr_b32 s6, s4, 16
12092; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s6, s4
12093; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
12094; GFX900-NEXT:    ;;#ASMSTART
12095; GFX900-NEXT:    ; use s[8:9]
12096; GFX900-NEXT:    ;;#ASMEND
12097; GFX900-NEXT:    s_setpc_b64 s[30:31]
12098;
12099; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_4_3:
12100; GFX90A:       ; %bb.0:
12101; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12102; GFX90A-NEXT:    ;;#ASMSTART
12103; GFX90A-NEXT:    ; def s[4:5]
12104; GFX90A-NEXT:    ;;#ASMEND
12105; GFX90A-NEXT:    s_lshr_b32 s6, s4, 16
12106; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s6, s4
12107; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
12108; GFX90A-NEXT:    ;;#ASMSTART
12109; GFX90A-NEXT:    ; use s[8:9]
12110; GFX90A-NEXT:    ;;#ASMEND
12111; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12112;
12113; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_4_3:
12114; GFX940:       ; %bb.0:
12115; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12116; GFX940-NEXT:    ;;#ASMSTART
12117; GFX940-NEXT:    ; def s[0:1]
12118; GFX940-NEXT:    ;;#ASMEND
12119; GFX940-NEXT:    s_lshr_b32 s2, s0, 16
12120; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s2, s0
12121; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
12122; GFX940-NEXT:    ;;#ASMSTART
12123; GFX940-NEXT:    ; use s[8:9]
12124; GFX940-NEXT:    ;;#ASMEND
12125; GFX940-NEXT:    s_setpc_b64 s[30:31]
12126  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12127  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12128  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12129  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12130  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 3>
12131  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12132  ret void
12133}
12134
12135define void @s_shuffle_v4i16_v3i16__u_4_4_4() {
12136; GFX9-LABEL: s_shuffle_v4i16_v3i16__u_4_4_4:
12137; GFX9:       ; %bb.0:
12138; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12139; GFX9-NEXT:    ;;#ASMSTART
12140; GFX9-NEXT:    ; def s[8:9]
12141; GFX9-NEXT:    ;;#ASMEND
12142; GFX9-NEXT:    s_pack_hh_b32_b16 s9, s8, s8
12143; GFX9-NEXT:    ;;#ASMSTART
12144; GFX9-NEXT:    ; use s[8:9]
12145; GFX9-NEXT:    ;;#ASMEND
12146; GFX9-NEXT:    s_setpc_b64 s[30:31]
12147  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12148  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12149  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12150  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12151  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
12152  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12153  ret void
12154}
12155
12156define void @s_shuffle_v4i16_v3i16__0_4_4_4() {
12157; GFX900-LABEL: s_shuffle_v4i16_v3i16__0_4_4_4:
12158; GFX900:       ; %bb.0:
12159; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12160; GFX900-NEXT:    ;;#ASMSTART
12161; GFX900-NEXT:    ; def s[4:5]
12162; GFX900-NEXT:    ;;#ASMEND
12163; GFX900-NEXT:    ;;#ASMSTART
12164; GFX900-NEXT:    ; def s[6:7]
12165; GFX900-NEXT:    ;;#ASMEND
12166; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s4, s6
12167; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
12168; GFX900-NEXT:    ;;#ASMSTART
12169; GFX900-NEXT:    ; use s[8:9]
12170; GFX900-NEXT:    ;;#ASMEND
12171; GFX900-NEXT:    s_setpc_b64 s[30:31]
12172;
12173; GFX90A-LABEL: s_shuffle_v4i16_v3i16__0_4_4_4:
12174; GFX90A:       ; %bb.0:
12175; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12176; GFX90A-NEXT:    ;;#ASMSTART
12177; GFX90A-NEXT:    ; def s[4:5]
12178; GFX90A-NEXT:    ;;#ASMEND
12179; GFX90A-NEXT:    ;;#ASMSTART
12180; GFX90A-NEXT:    ; def s[6:7]
12181; GFX90A-NEXT:    ;;#ASMEND
12182; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s4, s6
12183; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
12184; GFX90A-NEXT:    ;;#ASMSTART
12185; GFX90A-NEXT:    ; use s[8:9]
12186; GFX90A-NEXT:    ;;#ASMEND
12187; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12188;
12189; GFX940-LABEL: s_shuffle_v4i16_v3i16__0_4_4_4:
12190; GFX940:       ; %bb.0:
12191; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12192; GFX940-NEXT:    ;;#ASMSTART
12193; GFX940-NEXT:    ; def s[0:1]
12194; GFX940-NEXT:    ;;#ASMEND
12195; GFX940-NEXT:    ;;#ASMSTART
12196; GFX940-NEXT:    ; def s[2:3]
12197; GFX940-NEXT:    ;;#ASMEND
12198; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s0, s2
12199; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s2, s2
12200; GFX940-NEXT:    ;;#ASMSTART
12201; GFX940-NEXT:    ; use s[8:9]
12202; GFX940-NEXT:    ;;#ASMEND
12203; GFX940-NEXT:    s_setpc_b64 s[30:31]
12204  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12205  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12206  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12207  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12208  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
12209  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12210  ret void
12211}
12212
12213define void @s_shuffle_v4i16_v3i16__1_4_4_4() {
12214; GFX900-LABEL: s_shuffle_v4i16_v3i16__1_4_4_4:
12215; GFX900:       ; %bb.0:
12216; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12217; GFX900-NEXT:    ;;#ASMSTART
12218; GFX900-NEXT:    ; def s[4:5]
12219; GFX900-NEXT:    ;;#ASMEND
12220; GFX900-NEXT:    ;;#ASMSTART
12221; GFX900-NEXT:    ; def s[6:7]
12222; GFX900-NEXT:    ;;#ASMEND
12223; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s4, s6
12224; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
12225; GFX900-NEXT:    ;;#ASMSTART
12226; GFX900-NEXT:    ; use s[8:9]
12227; GFX900-NEXT:    ;;#ASMEND
12228; GFX900-NEXT:    s_setpc_b64 s[30:31]
12229;
12230; GFX90A-LABEL: s_shuffle_v4i16_v3i16__1_4_4_4:
12231; GFX90A:       ; %bb.0:
12232; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12233; GFX90A-NEXT:    ;;#ASMSTART
12234; GFX90A-NEXT:    ; def s[4:5]
12235; GFX90A-NEXT:    ;;#ASMEND
12236; GFX90A-NEXT:    ;;#ASMSTART
12237; GFX90A-NEXT:    ; def s[6:7]
12238; GFX90A-NEXT:    ;;#ASMEND
12239; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s4, s6
12240; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
12241; GFX90A-NEXT:    ;;#ASMSTART
12242; GFX90A-NEXT:    ; use s[8:9]
12243; GFX90A-NEXT:    ;;#ASMEND
12244; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12245;
12246; GFX940-LABEL: s_shuffle_v4i16_v3i16__1_4_4_4:
12247; GFX940:       ; %bb.0:
12248; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12249; GFX940-NEXT:    ;;#ASMSTART
12250; GFX940-NEXT:    ; def s[0:1]
12251; GFX940-NEXT:    ;;#ASMEND
12252; GFX940-NEXT:    ;;#ASMSTART
12253; GFX940-NEXT:    ; def s[2:3]
12254; GFX940-NEXT:    ;;#ASMEND
12255; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s0, s2
12256; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s2, s2
12257; GFX940-NEXT:    ;;#ASMSTART
12258; GFX940-NEXT:    ; use s[8:9]
12259; GFX940-NEXT:    ;;#ASMEND
12260; GFX940-NEXT:    s_setpc_b64 s[30:31]
12261  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12262  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12263  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12264  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12265  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
12266  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12267  ret void
12268}
12269
12270define void @s_shuffle_v4i16_v3i16__2_4_4_4() {
12271; GFX900-LABEL: s_shuffle_v4i16_v3i16__2_4_4_4:
12272; GFX900:       ; %bb.0:
12273; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12274; GFX900-NEXT:    ;;#ASMSTART
12275; GFX900-NEXT:    ; def s[4:5]
12276; GFX900-NEXT:    ;;#ASMEND
12277; GFX900-NEXT:    ;;#ASMSTART
12278; GFX900-NEXT:    ; def s[6:7]
12279; GFX900-NEXT:    ;;#ASMEND
12280; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s5, s6
12281; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
12282; GFX900-NEXT:    ;;#ASMSTART
12283; GFX900-NEXT:    ; use s[8:9]
12284; GFX900-NEXT:    ;;#ASMEND
12285; GFX900-NEXT:    s_setpc_b64 s[30:31]
12286;
12287; GFX90A-LABEL: s_shuffle_v4i16_v3i16__2_4_4_4:
12288; GFX90A:       ; %bb.0:
12289; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12290; GFX90A-NEXT:    ;;#ASMSTART
12291; GFX90A-NEXT:    ; def s[4:5]
12292; GFX90A-NEXT:    ;;#ASMEND
12293; GFX90A-NEXT:    ;;#ASMSTART
12294; GFX90A-NEXT:    ; def s[6:7]
12295; GFX90A-NEXT:    ;;#ASMEND
12296; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s5, s6
12297; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
12298; GFX90A-NEXT:    ;;#ASMSTART
12299; GFX90A-NEXT:    ; use s[8:9]
12300; GFX90A-NEXT:    ;;#ASMEND
12301; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12302;
12303; GFX940-LABEL: s_shuffle_v4i16_v3i16__2_4_4_4:
12304; GFX940:       ; %bb.0:
12305; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12306; GFX940-NEXT:    ;;#ASMSTART
12307; GFX940-NEXT:    ; def s[0:1]
12308; GFX940-NEXT:    ;;#ASMEND
12309; GFX940-NEXT:    ;;#ASMSTART
12310; GFX940-NEXT:    ; def s[2:3]
12311; GFX940-NEXT:    ;;#ASMEND
12312; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s1, s2
12313; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s2, s2
12314; GFX940-NEXT:    ;;#ASMSTART
12315; GFX940-NEXT:    ; use s[8:9]
12316; GFX940-NEXT:    ;;#ASMEND
12317; GFX940-NEXT:    s_setpc_b64 s[30:31]
12318  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12319  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12320  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12321  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12322  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
12323  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12324  ret void
12325}
12326
12327define void @s_shuffle_v4i16_v3i16__3_4_4_4() {
12328; GFX9-LABEL: s_shuffle_v4i16_v3i16__3_4_4_4:
12329; GFX9:       ; %bb.0:
12330; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12331; GFX9-NEXT:    ;;#ASMSTART
12332; GFX9-NEXT:    ; def s[8:9]
12333; GFX9-NEXT:    ;;#ASMEND
12334; GFX9-NEXT:    s_pack_hh_b32_b16 s9, s8, s8
12335; GFX9-NEXT:    ;;#ASMSTART
12336; GFX9-NEXT:    ; use s[8:9]
12337; GFX9-NEXT:    ;;#ASMEND
12338; GFX9-NEXT:    s_setpc_b64 s[30:31]
12339  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12340  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12341  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12342  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12343  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
12344  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12345  ret void
12346}
12347
12348define void @s_shuffle_v4i16_v3i16__4_4_4_4() {
12349; GFX900-LABEL: s_shuffle_v4i16_v3i16__4_4_4_4:
12350; GFX900:       ; %bb.0:
12351; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12352; GFX900-NEXT:    ;;#ASMSTART
12353; GFX900-NEXT:    ; def s[4:5]
12354; GFX900-NEXT:    ;;#ASMEND
12355; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s4, s4
12356; GFX900-NEXT:    s_mov_b32 s9, s8
12357; GFX900-NEXT:    ;;#ASMSTART
12358; GFX900-NEXT:    ; use s[8:9]
12359; GFX900-NEXT:    ;;#ASMEND
12360; GFX900-NEXT:    s_setpc_b64 s[30:31]
12361;
12362; GFX90A-LABEL: s_shuffle_v4i16_v3i16__4_4_4_4:
12363; GFX90A:       ; %bb.0:
12364; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12365; GFX90A-NEXT:    ;;#ASMSTART
12366; GFX90A-NEXT:    ; def s[4:5]
12367; GFX90A-NEXT:    ;;#ASMEND
12368; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s4, s4
12369; GFX90A-NEXT:    s_mov_b32 s9, s8
12370; GFX90A-NEXT:    ;;#ASMSTART
12371; GFX90A-NEXT:    ; use s[8:9]
12372; GFX90A-NEXT:    ;;#ASMEND
12373; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12374;
12375; GFX940-LABEL: s_shuffle_v4i16_v3i16__4_4_4_4:
12376; GFX940:       ; %bb.0:
12377; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12378; GFX940-NEXT:    ;;#ASMSTART
12379; GFX940-NEXT:    ; def s[0:1]
12380; GFX940-NEXT:    ;;#ASMEND
12381; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s0, s0
12382; GFX940-NEXT:    s_mov_b32 s9, s8
12383; GFX940-NEXT:    ;;#ASMSTART
12384; GFX940-NEXT:    ; use s[8:9]
12385; GFX940-NEXT:    ;;#ASMEND
12386; GFX940-NEXT:    s_setpc_b64 s[30:31]
12387  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12388  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12389  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12390  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12391  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
12392  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12393  ret void
12394}
12395
12396define void @s_shuffle_v4i16_v3i16__5_4_4_4() {
12397; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_4_4_4:
12398; GFX900:       ; %bb.0:
12399; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12400; GFX900-NEXT:    ;;#ASMSTART
12401; GFX900-NEXT:    ; def s[4:5]
12402; GFX900-NEXT:    ;;#ASMEND
12403; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
12404; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
12405; GFX900-NEXT:    ;;#ASMSTART
12406; GFX900-NEXT:    ; use s[8:9]
12407; GFX900-NEXT:    ;;#ASMEND
12408; GFX900-NEXT:    s_setpc_b64 s[30:31]
12409;
12410; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_4_4_4:
12411; GFX90A:       ; %bb.0:
12412; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12413; GFX90A-NEXT:    ;;#ASMSTART
12414; GFX90A-NEXT:    ; def s[4:5]
12415; GFX90A-NEXT:    ;;#ASMEND
12416; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
12417; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
12418; GFX90A-NEXT:    ;;#ASMSTART
12419; GFX90A-NEXT:    ; use s[8:9]
12420; GFX90A-NEXT:    ;;#ASMEND
12421; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12422;
12423; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_4_4_4:
12424; GFX940:       ; %bb.0:
12425; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12426; GFX940-NEXT:    ;;#ASMSTART
12427; GFX940-NEXT:    ; def s[0:1]
12428; GFX940-NEXT:    ;;#ASMEND
12429; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s1, s0
12430; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
12431; GFX940-NEXT:    ;;#ASMSTART
12432; GFX940-NEXT:    ; use s[8:9]
12433; GFX940-NEXT:    ;;#ASMEND
12434; GFX940-NEXT:    s_setpc_b64 s[30:31]
12435  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12436  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12437  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12438  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12439  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
12440  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12441  ret void
12442}
12443
12444define void @s_shuffle_v4i16_v3i16__5_u_4_4() {
12445; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_u_4_4:
12446; GFX900:       ; %bb.0:
12447; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12448; GFX900-NEXT:    ;;#ASMSTART
12449; GFX900-NEXT:    ; def s[4:5]
12450; GFX900-NEXT:    ;;#ASMEND
12451; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
12452; GFX900-NEXT:    s_mov_b32 s8, s5
12453; GFX900-NEXT:    ;;#ASMSTART
12454; GFX900-NEXT:    ; use s[8:9]
12455; GFX900-NEXT:    ;;#ASMEND
12456; GFX900-NEXT:    s_setpc_b64 s[30:31]
12457;
12458; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_u_4_4:
12459; GFX90A:       ; %bb.0:
12460; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12461; GFX90A-NEXT:    ;;#ASMSTART
12462; GFX90A-NEXT:    ; def s[4:5]
12463; GFX90A-NEXT:    ;;#ASMEND
12464; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
12465; GFX90A-NEXT:    s_mov_b32 s8, s5
12466; GFX90A-NEXT:    ;;#ASMSTART
12467; GFX90A-NEXT:    ; use s[8:9]
12468; GFX90A-NEXT:    ;;#ASMEND
12469; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12470;
12471; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_u_4_4:
12472; GFX940:       ; %bb.0:
12473; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12474; GFX940-NEXT:    ;;#ASMSTART
12475; GFX940-NEXT:    ; def s[0:1]
12476; GFX940-NEXT:    ;;#ASMEND
12477; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
12478; GFX940-NEXT:    s_mov_b32 s8, s1
12479; GFX940-NEXT:    ;;#ASMSTART
12480; GFX940-NEXT:    ; use s[8:9]
12481; GFX940-NEXT:    ;;#ASMEND
12482; GFX940-NEXT:    s_setpc_b64 s[30:31]
12483  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12484  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12485  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12486  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12487  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 4, i32 4>
12488  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12489  ret void
12490}
12491
12492define void @s_shuffle_v4i16_v3i16__5_0_4_4() {
12493; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_0_4_4:
12494; GFX900:       ; %bb.0:
12495; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12496; GFX900-NEXT:    ;;#ASMSTART
12497; GFX900-NEXT:    ; def s[4:5]
12498; GFX900-NEXT:    ;;#ASMEND
12499; GFX900-NEXT:    ;;#ASMSTART
12500; GFX900-NEXT:    ; def s[6:7]
12501; GFX900-NEXT:    ;;#ASMEND
12502; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
12503; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
12504; GFX900-NEXT:    ;;#ASMSTART
12505; GFX900-NEXT:    ; use s[8:9]
12506; GFX900-NEXT:    ;;#ASMEND
12507; GFX900-NEXT:    s_setpc_b64 s[30:31]
12508;
12509; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_0_4_4:
12510; GFX90A:       ; %bb.0:
12511; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12512; GFX90A-NEXT:    ;;#ASMSTART
12513; GFX90A-NEXT:    ; def s[4:5]
12514; GFX90A-NEXT:    ;;#ASMEND
12515; GFX90A-NEXT:    ;;#ASMSTART
12516; GFX90A-NEXT:    ; def s[6:7]
12517; GFX90A-NEXT:    ;;#ASMEND
12518; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
12519; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
12520; GFX90A-NEXT:    ;;#ASMSTART
12521; GFX90A-NEXT:    ; use s[8:9]
12522; GFX90A-NEXT:    ;;#ASMEND
12523; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12524;
12525; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_0_4_4:
12526; GFX940:       ; %bb.0:
12527; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12528; GFX940-NEXT:    ;;#ASMSTART
12529; GFX940-NEXT:    ; def s[0:1]
12530; GFX940-NEXT:    ;;#ASMEND
12531; GFX940-NEXT:    ;;#ASMSTART
12532; GFX940-NEXT:    ; def s[2:3]
12533; GFX940-NEXT:    ;;#ASMEND
12534; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s0
12535; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s2, s2
12536; GFX940-NEXT:    ;;#ASMSTART
12537; GFX940-NEXT:    ; use s[8:9]
12538; GFX940-NEXT:    ;;#ASMEND
12539; GFX940-NEXT:    s_setpc_b64 s[30:31]
12540  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12541  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12542  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12543  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12544  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 4, i32 4>
12545  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12546  ret void
12547}
12548
12549define void @s_shuffle_v4i16_v3i16__5_1_4_4() {
12550; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_1_4_4:
12551; GFX900:       ; %bb.0:
12552; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12553; GFX900-NEXT:    ;;#ASMSTART
12554; GFX900-NEXT:    ; def s[4:5]
12555; GFX900-NEXT:    ;;#ASMEND
12556; GFX900-NEXT:    ;;#ASMSTART
12557; GFX900-NEXT:    ; def s[6:7]
12558; GFX900-NEXT:    ;;#ASMEND
12559; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
12560; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
12561; GFX900-NEXT:    ;;#ASMSTART
12562; GFX900-NEXT:    ; use s[8:9]
12563; GFX900-NEXT:    ;;#ASMEND
12564; GFX900-NEXT:    s_setpc_b64 s[30:31]
12565;
12566; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_1_4_4:
12567; GFX90A:       ; %bb.0:
12568; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12569; GFX90A-NEXT:    ;;#ASMSTART
12570; GFX90A-NEXT:    ; def s[4:5]
12571; GFX90A-NEXT:    ;;#ASMEND
12572; GFX90A-NEXT:    ;;#ASMSTART
12573; GFX90A-NEXT:    ; def s[6:7]
12574; GFX90A-NEXT:    ;;#ASMEND
12575; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
12576; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
12577; GFX90A-NEXT:    ;;#ASMSTART
12578; GFX90A-NEXT:    ; use s[8:9]
12579; GFX90A-NEXT:    ;;#ASMEND
12580; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12581;
12582; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_1_4_4:
12583; GFX940:       ; %bb.0:
12584; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12585; GFX940-NEXT:    ;;#ASMSTART
12586; GFX940-NEXT:    ; def s[0:1]
12587; GFX940-NEXT:    ;;#ASMEND
12588; GFX940-NEXT:    ;;#ASMSTART
12589; GFX940-NEXT:    ; def s[2:3]
12590; GFX940-NEXT:    ;;#ASMEND
12591; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s3, s0
12592; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s2, s2
12593; GFX940-NEXT:    ;;#ASMSTART
12594; GFX940-NEXT:    ; use s[8:9]
12595; GFX940-NEXT:    ;;#ASMEND
12596; GFX940-NEXT:    s_setpc_b64 s[30:31]
12597  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12598  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12599  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12600  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12601  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 4, i32 4>
12602  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12603  ret void
12604}
12605
12606define void @s_shuffle_v4i16_v3i16__5_2_4_4() {
12607; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_2_4_4:
12608; GFX900:       ; %bb.0:
12609; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12610; GFX900-NEXT:    ;;#ASMSTART
12611; GFX900-NEXT:    ; def s[4:5]
12612; GFX900-NEXT:    ;;#ASMEND
12613; GFX900-NEXT:    ;;#ASMSTART
12614; GFX900-NEXT:    ; def s[6:7]
12615; GFX900-NEXT:    ;;#ASMEND
12616; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
12617; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
12618; GFX900-NEXT:    ;;#ASMSTART
12619; GFX900-NEXT:    ; use s[8:9]
12620; GFX900-NEXT:    ;;#ASMEND
12621; GFX900-NEXT:    s_setpc_b64 s[30:31]
12622;
12623; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_2_4_4:
12624; GFX90A:       ; %bb.0:
12625; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12626; GFX90A-NEXT:    ;;#ASMSTART
12627; GFX90A-NEXT:    ; def s[4:5]
12628; GFX90A-NEXT:    ;;#ASMEND
12629; GFX90A-NEXT:    ;;#ASMSTART
12630; GFX90A-NEXT:    ; def s[6:7]
12631; GFX90A-NEXT:    ;;#ASMEND
12632; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
12633; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
12634; GFX90A-NEXT:    ;;#ASMSTART
12635; GFX90A-NEXT:    ; use s[8:9]
12636; GFX90A-NEXT:    ;;#ASMEND
12637; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12638;
12639; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_2_4_4:
12640; GFX940:       ; %bb.0:
12641; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12642; GFX940-NEXT:    ;;#ASMSTART
12643; GFX940-NEXT:    ; def s[0:1]
12644; GFX940-NEXT:    ;;#ASMEND
12645; GFX940-NEXT:    ;;#ASMSTART
12646; GFX940-NEXT:    ; def s[2:3]
12647; GFX940-NEXT:    ;;#ASMEND
12648; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s1
12649; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s2, s2
12650; GFX940-NEXT:    ;;#ASMSTART
12651; GFX940-NEXT:    ; use s[8:9]
12652; GFX940-NEXT:    ;;#ASMEND
12653; GFX940-NEXT:    s_setpc_b64 s[30:31]
12654  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12655  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12656  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12657  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12658  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 4, i32 4>
12659  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12660  ret void
12661}
12662
12663define void @s_shuffle_v4i16_v3i16__5_3_4_4() {
12664; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_3_4_4:
12665; GFX900:       ; %bb.0:
12666; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12667; GFX900-NEXT:    ;;#ASMSTART
12668; GFX900-NEXT:    ; def s[4:5]
12669; GFX900-NEXT:    ;;#ASMEND
12670; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
12671; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
12672; GFX900-NEXT:    ;;#ASMSTART
12673; GFX900-NEXT:    ; use s[8:9]
12674; GFX900-NEXT:    ;;#ASMEND
12675; GFX900-NEXT:    s_setpc_b64 s[30:31]
12676;
12677; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_3_4_4:
12678; GFX90A:       ; %bb.0:
12679; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12680; GFX90A-NEXT:    ;;#ASMSTART
12681; GFX90A-NEXT:    ; def s[4:5]
12682; GFX90A-NEXT:    ;;#ASMEND
12683; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
12684; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
12685; GFX90A-NEXT:    ;;#ASMSTART
12686; GFX90A-NEXT:    ; use s[8:9]
12687; GFX90A-NEXT:    ;;#ASMEND
12688; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12689;
12690; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_3_4_4:
12691; GFX940:       ; %bb.0:
12692; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12693; GFX940-NEXT:    ;;#ASMSTART
12694; GFX940-NEXT:    ; def s[0:1]
12695; GFX940-NEXT:    ;;#ASMEND
12696; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
12697; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
12698; GFX940-NEXT:    ;;#ASMSTART
12699; GFX940-NEXT:    ; use s[8:9]
12700; GFX940-NEXT:    ;;#ASMEND
12701; GFX940-NEXT:    s_setpc_b64 s[30:31]
12702  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12703  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12704  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12705  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12706  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 4, i32 4>
12707  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12708  ret void
12709}
12710
12711define void @s_shuffle_v4i16_v3i16__5_5_4_4() {
12712; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_4_4:
12713; GFX900:       ; %bb.0:
12714; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12715; GFX900-NEXT:    ;;#ASMSTART
12716; GFX900-NEXT:    ; def s[4:5]
12717; GFX900-NEXT:    ;;#ASMEND
12718; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
12719; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
12720; GFX900-NEXT:    ;;#ASMSTART
12721; GFX900-NEXT:    ; use s[8:9]
12722; GFX900-NEXT:    ;;#ASMEND
12723; GFX900-NEXT:    s_setpc_b64 s[30:31]
12724;
12725; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_4_4:
12726; GFX90A:       ; %bb.0:
12727; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12728; GFX90A-NEXT:    ;;#ASMSTART
12729; GFX90A-NEXT:    ; def s[4:5]
12730; GFX90A-NEXT:    ;;#ASMEND
12731; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
12732; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
12733; GFX90A-NEXT:    ;;#ASMSTART
12734; GFX90A-NEXT:    ; use s[8:9]
12735; GFX90A-NEXT:    ;;#ASMEND
12736; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12737;
12738; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_4_4:
12739; GFX940:       ; %bb.0:
12740; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12741; GFX940-NEXT:    ;;#ASMSTART
12742; GFX940-NEXT:    ; def s[0:1]
12743; GFX940-NEXT:    ;;#ASMEND
12744; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
12745; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
12746; GFX940-NEXT:    ;;#ASMSTART
12747; GFX940-NEXT:    ; use s[8:9]
12748; GFX940-NEXT:    ;;#ASMEND
12749; GFX940-NEXT:    s_setpc_b64 s[30:31]
12750  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12751  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12752  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12753  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12754  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 4>
12755  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12756  ret void
12757}
12758
12759define void @s_shuffle_v4i16_v3i16__5_5_u_4() {
12760; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_u_4:
12761; GFX900:       ; %bb.0:
12762; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12763; GFX900-NEXT:    ;;#ASMSTART
12764; GFX900-NEXT:    ; def s[4:5]
12765; GFX900-NEXT:    ;;#ASMEND
12766; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
12767; GFX900-NEXT:    s_mov_b32 s9, s4
12768; GFX900-NEXT:    ;;#ASMSTART
12769; GFX900-NEXT:    ; use s[8:9]
12770; GFX900-NEXT:    ;;#ASMEND
12771; GFX900-NEXT:    s_setpc_b64 s[30:31]
12772;
12773; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_u_4:
12774; GFX90A:       ; %bb.0:
12775; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12776; GFX90A-NEXT:    ;;#ASMSTART
12777; GFX90A-NEXT:    ; def s[4:5]
12778; GFX90A-NEXT:    ;;#ASMEND
12779; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
12780; GFX90A-NEXT:    s_mov_b32 s9, s4
12781; GFX90A-NEXT:    ;;#ASMSTART
12782; GFX90A-NEXT:    ; use s[8:9]
12783; GFX90A-NEXT:    ;;#ASMEND
12784; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12785;
12786; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_u_4:
12787; GFX940:       ; %bb.0:
12788; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12789; GFX940-NEXT:    ;;#ASMSTART
12790; GFX940-NEXT:    ; def s[0:1]
12791; GFX940-NEXT:    ;;#ASMEND
12792; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
12793; GFX940-NEXT:    s_mov_b32 s9, s0
12794; GFX940-NEXT:    ;;#ASMSTART
12795; GFX940-NEXT:    ; use s[8:9]
12796; GFX940-NEXT:    ;;#ASMEND
12797; GFX940-NEXT:    s_setpc_b64 s[30:31]
12798  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12799  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12800  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12801  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12802  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 4>
12803  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12804  ret void
12805}
12806
12807define void @s_shuffle_v4i16_v3i16__5_5_0_4() {
12808; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_0_4:
12809; GFX900:       ; %bb.0:
12810; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12811; GFX900-NEXT:    ;;#ASMSTART
12812; GFX900-NEXT:    ; def s[4:5]
12813; GFX900-NEXT:    ;;#ASMEND
12814; GFX900-NEXT:    ;;#ASMSTART
12815; GFX900-NEXT:    ; def s[6:7]
12816; GFX900-NEXT:    ;;#ASMEND
12817; GFX900-NEXT:    s_pack_lh_b32_b16 s9, s4, s6
12818; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
12819; GFX900-NEXT:    ;;#ASMSTART
12820; GFX900-NEXT:    ; use s[8:9]
12821; GFX900-NEXT:    ;;#ASMEND
12822; GFX900-NEXT:    s_setpc_b64 s[30:31]
12823;
12824; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_0_4:
12825; GFX90A:       ; %bb.0:
12826; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12827; GFX90A-NEXT:    ;;#ASMSTART
12828; GFX90A-NEXT:    ; def s[4:5]
12829; GFX90A-NEXT:    ;;#ASMEND
12830; GFX90A-NEXT:    ;;#ASMSTART
12831; GFX90A-NEXT:    ; def s[6:7]
12832; GFX90A-NEXT:    ;;#ASMEND
12833; GFX90A-NEXT:    s_pack_lh_b32_b16 s9, s4, s6
12834; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
12835; GFX90A-NEXT:    ;;#ASMSTART
12836; GFX90A-NEXT:    ; use s[8:9]
12837; GFX90A-NEXT:    ;;#ASMEND
12838; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12839;
12840; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_0_4:
12841; GFX940:       ; %bb.0:
12842; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12843; GFX940-NEXT:    ;;#ASMSTART
12844; GFX940-NEXT:    ; def s[0:1]
12845; GFX940-NEXT:    ;;#ASMEND
12846; GFX940-NEXT:    ;;#ASMSTART
12847; GFX940-NEXT:    ; def s[2:3]
12848; GFX940-NEXT:    ;;#ASMEND
12849; GFX940-NEXT:    s_pack_lh_b32_b16 s9, s0, s2
12850; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
12851; GFX940-NEXT:    ;;#ASMSTART
12852; GFX940-NEXT:    ; use s[8:9]
12853; GFX940-NEXT:    ;;#ASMEND
12854; GFX940-NEXT:    s_setpc_b64 s[30:31]
12855  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12856  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12857  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12858  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12859  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 4>
12860  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12861  ret void
12862}
12863
12864define void @s_shuffle_v4i16_v3i16__5_5_1_4() {
12865; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_1_4:
12866; GFX900:       ; %bb.0:
12867; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12868; GFX900-NEXT:    ;;#ASMSTART
12869; GFX900-NEXT:    ; def s[4:5]
12870; GFX900-NEXT:    ;;#ASMEND
12871; GFX900-NEXT:    ;;#ASMSTART
12872; GFX900-NEXT:    ; def s[6:7]
12873; GFX900-NEXT:    ;;#ASMEND
12874; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s6
12875; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
12876; GFX900-NEXT:    ;;#ASMSTART
12877; GFX900-NEXT:    ; use s[8:9]
12878; GFX900-NEXT:    ;;#ASMEND
12879; GFX900-NEXT:    s_setpc_b64 s[30:31]
12880;
12881; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_1_4:
12882; GFX90A:       ; %bb.0:
12883; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12884; GFX90A-NEXT:    ;;#ASMSTART
12885; GFX90A-NEXT:    ; def s[4:5]
12886; GFX90A-NEXT:    ;;#ASMEND
12887; GFX90A-NEXT:    ;;#ASMSTART
12888; GFX90A-NEXT:    ; def s[6:7]
12889; GFX90A-NEXT:    ;;#ASMEND
12890; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s6
12891; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
12892; GFX90A-NEXT:    ;;#ASMSTART
12893; GFX90A-NEXT:    ; use s[8:9]
12894; GFX90A-NEXT:    ;;#ASMEND
12895; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12896;
12897; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_1_4:
12898; GFX940:       ; %bb.0:
12899; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12900; GFX940-NEXT:    ;;#ASMSTART
12901; GFX940-NEXT:    ; def s[0:1]
12902; GFX940-NEXT:    ;;#ASMEND
12903; GFX940-NEXT:    ;;#ASMSTART
12904; GFX940-NEXT:    ; def s[2:3]
12905; GFX940-NEXT:    ;;#ASMEND
12906; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s2
12907; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
12908; GFX940-NEXT:    ;;#ASMSTART
12909; GFX940-NEXT:    ; use s[8:9]
12910; GFX940-NEXT:    ;;#ASMEND
12911; GFX940-NEXT:    s_setpc_b64 s[30:31]
12912  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12913  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12914  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12915  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12916  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 4>
12917  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12918  ret void
12919}
12920
12921define void @s_shuffle_v4i16_v3i16__5_5_2_4() {
12922; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_2_4:
12923; GFX900:       ; %bb.0:
12924; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12925; GFX900-NEXT:    ;;#ASMSTART
12926; GFX900-NEXT:    ; def s[4:5]
12927; GFX900-NEXT:    ;;#ASMEND
12928; GFX900-NEXT:    ;;#ASMSTART
12929; GFX900-NEXT:    ; def s[6:7]
12930; GFX900-NEXT:    ;;#ASMEND
12931; GFX900-NEXT:    s_pack_lh_b32_b16 s9, s5, s6
12932; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
12933; GFX900-NEXT:    ;;#ASMSTART
12934; GFX900-NEXT:    ; use s[8:9]
12935; GFX900-NEXT:    ;;#ASMEND
12936; GFX900-NEXT:    s_setpc_b64 s[30:31]
12937;
12938; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_2_4:
12939; GFX90A:       ; %bb.0:
12940; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12941; GFX90A-NEXT:    ;;#ASMSTART
12942; GFX90A-NEXT:    ; def s[4:5]
12943; GFX90A-NEXT:    ;;#ASMEND
12944; GFX90A-NEXT:    ;;#ASMSTART
12945; GFX90A-NEXT:    ; def s[6:7]
12946; GFX90A-NEXT:    ;;#ASMEND
12947; GFX90A-NEXT:    s_pack_lh_b32_b16 s9, s5, s6
12948; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
12949; GFX90A-NEXT:    ;;#ASMSTART
12950; GFX90A-NEXT:    ; use s[8:9]
12951; GFX90A-NEXT:    ;;#ASMEND
12952; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12953;
12954; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_2_4:
12955; GFX940:       ; %bb.0:
12956; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12957; GFX940-NEXT:    ;;#ASMSTART
12958; GFX940-NEXT:    ; def s[0:1]
12959; GFX940-NEXT:    ;;#ASMEND
12960; GFX940-NEXT:    ;;#ASMSTART
12961; GFX940-NEXT:    ; def s[2:3]
12962; GFX940-NEXT:    ;;#ASMEND
12963; GFX940-NEXT:    s_pack_lh_b32_b16 s9, s1, s2
12964; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
12965; GFX940-NEXT:    ;;#ASMSTART
12966; GFX940-NEXT:    ; use s[8:9]
12967; GFX940-NEXT:    ;;#ASMEND
12968; GFX940-NEXT:    s_setpc_b64 s[30:31]
12969  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12970  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12971  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12972  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
12973  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 4>
12974  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12975  ret void
12976}
12977
12978define void @s_shuffle_v4i16_v3i16__5_5_3_4() {
12979; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_3_4:
12980; GFX900:       ; %bb.0:
12981; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12982; GFX900-NEXT:    ;;#ASMSTART
12983; GFX900-NEXT:    ; def s[4:5]
12984; GFX900-NEXT:    ;;#ASMEND
12985; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
12986; GFX900-NEXT:    s_mov_b32 s9, s4
12987; GFX900-NEXT:    ;;#ASMSTART
12988; GFX900-NEXT:    ; use s[8:9]
12989; GFX900-NEXT:    ;;#ASMEND
12990; GFX900-NEXT:    s_setpc_b64 s[30:31]
12991;
12992; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_3_4:
12993; GFX90A:       ; %bb.0:
12994; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12995; GFX90A-NEXT:    ;;#ASMSTART
12996; GFX90A-NEXT:    ; def s[4:5]
12997; GFX90A-NEXT:    ;;#ASMEND
12998; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
12999; GFX90A-NEXT:    s_mov_b32 s9, s4
13000; GFX90A-NEXT:    ;;#ASMSTART
13001; GFX90A-NEXT:    ; use s[8:9]
13002; GFX90A-NEXT:    ;;#ASMEND
13003; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13004;
13005; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_3_4:
13006; GFX940:       ; %bb.0:
13007; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13008; GFX940-NEXT:    ;;#ASMSTART
13009; GFX940-NEXT:    ; def s[0:1]
13010; GFX940-NEXT:    ;;#ASMEND
13011; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
13012; GFX940-NEXT:    s_mov_b32 s9, s0
13013; GFX940-NEXT:    ;;#ASMSTART
13014; GFX940-NEXT:    ; use s[8:9]
13015; GFX940-NEXT:    ;;#ASMEND
13016; GFX940-NEXT:    s_setpc_b64 s[30:31]
13017  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13018  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13019  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13020  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13021  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 4>
13022  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13023  ret void
13024}
13025
13026define void @s_shuffle_v4i16_v3i16__u_5_5_5() {
13027; GFX900-LABEL: s_shuffle_v4i16_v3i16__u_5_5_5:
13028; GFX900:       ; %bb.0:
13029; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13030; GFX900-NEXT:    ;;#ASMSTART
13031; GFX900-NEXT:    ; def s[4:5]
13032; GFX900-NEXT:    ;;#ASMEND
13033; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
13034; GFX900-NEXT:    s_lshl_b32 s8, s5, 16
13035; GFX900-NEXT:    ;;#ASMSTART
13036; GFX900-NEXT:    ; use s[8:9]
13037; GFX900-NEXT:    ;;#ASMEND
13038; GFX900-NEXT:    s_setpc_b64 s[30:31]
13039;
13040; GFX90A-LABEL: s_shuffle_v4i16_v3i16__u_5_5_5:
13041; GFX90A:       ; %bb.0:
13042; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13043; GFX90A-NEXT:    ;;#ASMSTART
13044; GFX90A-NEXT:    ; def s[4:5]
13045; GFX90A-NEXT:    ;;#ASMEND
13046; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
13047; GFX90A-NEXT:    s_lshl_b32 s8, s5, 16
13048; GFX90A-NEXT:    ;;#ASMSTART
13049; GFX90A-NEXT:    ; use s[8:9]
13050; GFX90A-NEXT:    ;;#ASMEND
13051; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13052;
13053; GFX940-LABEL: s_shuffle_v4i16_v3i16__u_5_5_5:
13054; GFX940:       ; %bb.0:
13055; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13056; GFX940-NEXT:    ;;#ASMSTART
13057; GFX940-NEXT:    ; def s[0:1]
13058; GFX940-NEXT:    ;;#ASMEND
13059; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
13060; GFX940-NEXT:    s_lshl_b32 s8, s1, 16
13061; GFX940-NEXT:    ;;#ASMSTART
13062; GFX940-NEXT:    ; use s[8:9]
13063; GFX940-NEXT:    ;;#ASMEND
13064; GFX940-NEXT:    s_setpc_b64 s[30:31]
13065  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13066  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13067  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13068  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13069  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
13070  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13071  ret void
13072}
13073
13074define void @s_shuffle_v4i16_v3i16__0_5_5_5() {
13075; GFX900-LABEL: s_shuffle_v4i16_v3i16__0_5_5_5:
13076; GFX900:       ; %bb.0:
13077; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13078; GFX900-NEXT:    ;;#ASMSTART
13079; GFX900-NEXT:    ; def s[4:5]
13080; GFX900-NEXT:    ;;#ASMEND
13081; GFX900-NEXT:    ;;#ASMSTART
13082; GFX900-NEXT:    ; def s[6:7]
13083; GFX900-NEXT:    ;;#ASMEND
13084; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s7
13085; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
13086; GFX900-NEXT:    ;;#ASMSTART
13087; GFX900-NEXT:    ; use s[8:9]
13088; GFX900-NEXT:    ;;#ASMEND
13089; GFX900-NEXT:    s_setpc_b64 s[30:31]
13090;
13091; GFX90A-LABEL: s_shuffle_v4i16_v3i16__0_5_5_5:
13092; GFX90A:       ; %bb.0:
13093; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13094; GFX90A-NEXT:    ;;#ASMSTART
13095; GFX90A-NEXT:    ; def s[4:5]
13096; GFX90A-NEXT:    ;;#ASMEND
13097; GFX90A-NEXT:    ;;#ASMSTART
13098; GFX90A-NEXT:    ; def s[6:7]
13099; GFX90A-NEXT:    ;;#ASMEND
13100; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s7
13101; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
13102; GFX90A-NEXT:    ;;#ASMSTART
13103; GFX90A-NEXT:    ; use s[8:9]
13104; GFX90A-NEXT:    ;;#ASMEND
13105; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13106;
13107; GFX940-LABEL: s_shuffle_v4i16_v3i16__0_5_5_5:
13108; GFX940:       ; %bb.0:
13109; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13110; GFX940-NEXT:    ;;#ASMSTART
13111; GFX940-NEXT:    ; def s[0:1]
13112; GFX940-NEXT:    ;;#ASMEND
13113; GFX940-NEXT:    ;;#ASMSTART
13114; GFX940-NEXT:    ; def s[2:3]
13115; GFX940-NEXT:    ;;#ASMEND
13116; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s3
13117; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s3, s3
13118; GFX940-NEXT:    ;;#ASMSTART
13119; GFX940-NEXT:    ; use s[8:9]
13120; GFX940-NEXT:    ;;#ASMEND
13121; GFX940-NEXT:    s_setpc_b64 s[30:31]
13122  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13123  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13124  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13125  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13126  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
13127  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13128  ret void
13129}
13130
13131define void @s_shuffle_v4i16_v3i16__1_5_5_5() {
13132; GFX900-LABEL: s_shuffle_v4i16_v3i16__1_5_5_5:
13133; GFX900:       ; %bb.0:
13134; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13135; GFX900-NEXT:    ;;#ASMSTART
13136; GFX900-NEXT:    ; def s[4:5]
13137; GFX900-NEXT:    ;;#ASMEND
13138; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
13139; GFX900-NEXT:    ;;#ASMSTART
13140; GFX900-NEXT:    ; def s[6:7]
13141; GFX900-NEXT:    ;;#ASMEND
13142; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s7
13143; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
13144; GFX900-NEXT:    ;;#ASMSTART
13145; GFX900-NEXT:    ; use s[8:9]
13146; GFX900-NEXT:    ;;#ASMEND
13147; GFX900-NEXT:    s_setpc_b64 s[30:31]
13148;
13149; GFX90A-LABEL: s_shuffle_v4i16_v3i16__1_5_5_5:
13150; GFX90A:       ; %bb.0:
13151; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13152; GFX90A-NEXT:    ;;#ASMSTART
13153; GFX90A-NEXT:    ; def s[4:5]
13154; GFX90A-NEXT:    ;;#ASMEND
13155; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
13156; GFX90A-NEXT:    ;;#ASMSTART
13157; GFX90A-NEXT:    ; def s[6:7]
13158; GFX90A-NEXT:    ;;#ASMEND
13159; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s7
13160; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
13161; GFX90A-NEXT:    ;;#ASMSTART
13162; GFX90A-NEXT:    ; use s[8:9]
13163; GFX90A-NEXT:    ;;#ASMEND
13164; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13165;
13166; GFX940-LABEL: s_shuffle_v4i16_v3i16__1_5_5_5:
13167; GFX940:       ; %bb.0:
13168; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13169; GFX940-NEXT:    ;;#ASMSTART
13170; GFX940-NEXT:    ; def s[0:1]
13171; GFX940-NEXT:    ;;#ASMEND
13172; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
13173; GFX940-NEXT:    ;;#ASMSTART
13174; GFX940-NEXT:    ; def s[2:3]
13175; GFX940-NEXT:    ;;#ASMEND
13176; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s3
13177; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s3, s3
13178; GFX940-NEXT:    ;;#ASMSTART
13179; GFX940-NEXT:    ; use s[8:9]
13180; GFX940-NEXT:    ;;#ASMEND
13181; GFX940-NEXT:    s_setpc_b64 s[30:31]
13182  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13183  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13184  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13185  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13186  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
13187  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13188  ret void
13189}
13190
13191define void @s_shuffle_v4i16_v3i16__2_5_5_5() {
13192; GFX900-LABEL: s_shuffle_v4i16_v3i16__2_5_5_5:
13193; GFX900:       ; %bb.0:
13194; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13195; GFX900-NEXT:    ;;#ASMSTART
13196; GFX900-NEXT:    ; def s[4:5]
13197; GFX900-NEXT:    ;;#ASMEND
13198; GFX900-NEXT:    ;;#ASMSTART
13199; GFX900-NEXT:    ; def s[6:7]
13200; GFX900-NEXT:    ;;#ASMEND
13201; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s7
13202; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
13203; GFX900-NEXT:    ;;#ASMSTART
13204; GFX900-NEXT:    ; use s[8:9]
13205; GFX900-NEXT:    ;;#ASMEND
13206; GFX900-NEXT:    s_setpc_b64 s[30:31]
13207;
13208; GFX90A-LABEL: s_shuffle_v4i16_v3i16__2_5_5_5:
13209; GFX90A:       ; %bb.0:
13210; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13211; GFX90A-NEXT:    ;;#ASMSTART
13212; GFX90A-NEXT:    ; def s[4:5]
13213; GFX90A-NEXT:    ;;#ASMEND
13214; GFX90A-NEXT:    ;;#ASMSTART
13215; GFX90A-NEXT:    ; def s[6:7]
13216; GFX90A-NEXT:    ;;#ASMEND
13217; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s7
13218; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
13219; GFX90A-NEXT:    ;;#ASMSTART
13220; GFX90A-NEXT:    ; use s[8:9]
13221; GFX90A-NEXT:    ;;#ASMEND
13222; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13223;
13224; GFX940-LABEL: s_shuffle_v4i16_v3i16__2_5_5_5:
13225; GFX940:       ; %bb.0:
13226; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13227; GFX940-NEXT:    ;;#ASMSTART
13228; GFX940-NEXT:    ; def s[0:1]
13229; GFX940-NEXT:    ;;#ASMEND
13230; GFX940-NEXT:    ;;#ASMSTART
13231; GFX940-NEXT:    ; def s[2:3]
13232; GFX940-NEXT:    ;;#ASMEND
13233; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s3
13234; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s3, s3
13235; GFX940-NEXT:    ;;#ASMSTART
13236; GFX940-NEXT:    ; use s[8:9]
13237; GFX940-NEXT:    ;;#ASMEND
13238; GFX940-NEXT:    s_setpc_b64 s[30:31]
13239  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13240  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13241  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13242  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13243  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
13244  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13245  ret void
13246}
13247
13248define void @s_shuffle_v4i16_v3i16__3_5_5_5() {
13249; GFX900-LABEL: s_shuffle_v4i16_v3i16__3_5_5_5:
13250; GFX900:       ; %bb.0:
13251; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13252; GFX900-NEXT:    ;;#ASMSTART
13253; GFX900-NEXT:    ; def s[4:5]
13254; GFX900-NEXT:    ;;#ASMEND
13255; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
13256; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
13257; GFX900-NEXT:    ;;#ASMSTART
13258; GFX900-NEXT:    ; use s[8:9]
13259; GFX900-NEXT:    ;;#ASMEND
13260; GFX900-NEXT:    s_setpc_b64 s[30:31]
13261;
13262; GFX90A-LABEL: s_shuffle_v4i16_v3i16__3_5_5_5:
13263; GFX90A:       ; %bb.0:
13264; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13265; GFX90A-NEXT:    ;;#ASMSTART
13266; GFX90A-NEXT:    ; def s[4:5]
13267; GFX90A-NEXT:    ;;#ASMEND
13268; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
13269; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
13270; GFX90A-NEXT:    ;;#ASMSTART
13271; GFX90A-NEXT:    ; use s[8:9]
13272; GFX90A-NEXT:    ;;#ASMEND
13273; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13274;
13275; GFX940-LABEL: s_shuffle_v4i16_v3i16__3_5_5_5:
13276; GFX940:       ; %bb.0:
13277; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13278; GFX940-NEXT:    ;;#ASMSTART
13279; GFX940-NEXT:    ; def s[0:1]
13280; GFX940-NEXT:    ;;#ASMEND
13281; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
13282; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
13283; GFX940-NEXT:    ;;#ASMSTART
13284; GFX940-NEXT:    ; use s[8:9]
13285; GFX940-NEXT:    ;;#ASMEND
13286; GFX940-NEXT:    s_setpc_b64 s[30:31]
13287  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13288  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13289  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13290  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13291  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
13292  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13293  ret void
13294}
13295
13296define void @s_shuffle_v4i16_v3i16__4_5_5_5() {
13297; GFX900-LABEL: s_shuffle_v4i16_v3i16__4_5_5_5:
13298; GFX900:       ; %bb.0:
13299; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13300; GFX900-NEXT:    ;;#ASMSTART
13301; GFX900-NEXT:    ; def s[4:5]
13302; GFX900-NEXT:    ;;#ASMEND
13303; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
13304; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
13305; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
13306; GFX900-NEXT:    ;;#ASMSTART
13307; GFX900-NEXT:    ; use s[8:9]
13308; GFX900-NEXT:    ;;#ASMEND
13309; GFX900-NEXT:    s_setpc_b64 s[30:31]
13310;
13311; GFX90A-LABEL: s_shuffle_v4i16_v3i16__4_5_5_5:
13312; GFX90A:       ; %bb.0:
13313; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13314; GFX90A-NEXT:    ;;#ASMSTART
13315; GFX90A-NEXT:    ; def s[4:5]
13316; GFX90A-NEXT:    ;;#ASMEND
13317; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
13318; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
13319; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
13320; GFX90A-NEXT:    ;;#ASMSTART
13321; GFX90A-NEXT:    ; use s[8:9]
13322; GFX90A-NEXT:    ;;#ASMEND
13323; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13324;
13325; GFX940-LABEL: s_shuffle_v4i16_v3i16__4_5_5_5:
13326; GFX940:       ; %bb.0:
13327; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13328; GFX940-NEXT:    ;;#ASMSTART
13329; GFX940-NEXT:    ; def s[0:1]
13330; GFX940-NEXT:    ;;#ASMEND
13331; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
13332; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
13333; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
13334; GFX940-NEXT:    ;;#ASMSTART
13335; GFX940-NEXT:    ; use s[8:9]
13336; GFX940-NEXT:    ;;#ASMEND
13337; GFX940-NEXT:    s_setpc_b64 s[30:31]
13338  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13339  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13340  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13341  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13342  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
13343  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13344  ret void
13345}
13346
13347define void @s_shuffle_v4i16_v3i16__5_u_5_5() {
13348; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_u_5_5:
13349; GFX900:       ; %bb.0:
13350; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13351; GFX900-NEXT:    ;;#ASMSTART
13352; GFX900-NEXT:    ; def s[4:5]
13353; GFX900-NEXT:    ;;#ASMEND
13354; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
13355; GFX900-NEXT:    s_mov_b32 s8, s5
13356; GFX900-NEXT:    ;;#ASMSTART
13357; GFX900-NEXT:    ; use s[8:9]
13358; GFX900-NEXT:    ;;#ASMEND
13359; GFX900-NEXT:    s_setpc_b64 s[30:31]
13360;
13361; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_u_5_5:
13362; GFX90A:       ; %bb.0:
13363; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13364; GFX90A-NEXT:    ;;#ASMSTART
13365; GFX90A-NEXT:    ; def s[4:5]
13366; GFX90A-NEXT:    ;;#ASMEND
13367; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
13368; GFX90A-NEXT:    s_mov_b32 s8, s5
13369; GFX90A-NEXT:    ;;#ASMSTART
13370; GFX90A-NEXT:    ; use s[8:9]
13371; GFX90A-NEXT:    ;;#ASMEND
13372; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13373;
13374; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_u_5_5:
13375; GFX940:       ; %bb.0:
13376; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13377; GFX940-NEXT:    ;;#ASMSTART
13378; GFX940-NEXT:    ; def s[0:1]
13379; GFX940-NEXT:    ;;#ASMEND
13380; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
13381; GFX940-NEXT:    s_mov_b32 s8, s1
13382; GFX940-NEXT:    ;;#ASMSTART
13383; GFX940-NEXT:    ; use s[8:9]
13384; GFX940-NEXT:    ;;#ASMEND
13385; GFX940-NEXT:    s_setpc_b64 s[30:31]
13386  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13387  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13388  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13389  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13390  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 poison, i32 5, i32 5>
13391  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13392  ret void
13393}
13394
13395define void @s_shuffle_v4i16_v3i16__5_0_5_5() {
13396; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_0_5_5:
13397; GFX900:       ; %bb.0:
13398; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13399; GFX900-NEXT:    ;;#ASMSTART
13400; GFX900-NEXT:    ; def s[4:5]
13401; GFX900-NEXT:    ;;#ASMEND
13402; GFX900-NEXT:    ;;#ASMSTART
13403; GFX900-NEXT:    ; def s[6:7]
13404; GFX900-NEXT:    ;;#ASMEND
13405; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
13406; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
13407; GFX900-NEXT:    ;;#ASMSTART
13408; GFX900-NEXT:    ; use s[8:9]
13409; GFX900-NEXT:    ;;#ASMEND
13410; GFX900-NEXT:    s_setpc_b64 s[30:31]
13411;
13412; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_0_5_5:
13413; GFX90A:       ; %bb.0:
13414; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13415; GFX90A-NEXT:    ;;#ASMSTART
13416; GFX90A-NEXT:    ; def s[4:5]
13417; GFX90A-NEXT:    ;;#ASMEND
13418; GFX90A-NEXT:    ;;#ASMSTART
13419; GFX90A-NEXT:    ; def s[6:7]
13420; GFX90A-NEXT:    ;;#ASMEND
13421; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
13422; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
13423; GFX90A-NEXT:    ;;#ASMSTART
13424; GFX90A-NEXT:    ; use s[8:9]
13425; GFX90A-NEXT:    ;;#ASMEND
13426; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13427;
13428; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_0_5_5:
13429; GFX940:       ; %bb.0:
13430; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13431; GFX940-NEXT:    ;;#ASMSTART
13432; GFX940-NEXT:    ; def s[0:1]
13433; GFX940-NEXT:    ;;#ASMEND
13434; GFX940-NEXT:    ;;#ASMSTART
13435; GFX940-NEXT:    ; def s[2:3]
13436; GFX940-NEXT:    ;;#ASMEND
13437; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s0
13438; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s3, s3
13439; GFX940-NEXT:    ;;#ASMSTART
13440; GFX940-NEXT:    ; use s[8:9]
13441; GFX940-NEXT:    ;;#ASMEND
13442; GFX940-NEXT:    s_setpc_b64 s[30:31]
13443  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13444  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13445  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13446  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13447  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 0, i32 5, i32 5>
13448  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13449  ret void
13450}
13451
13452define void @s_shuffle_v4i16_v3i16__5_1_5_5() {
13453; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_1_5_5:
13454; GFX900:       ; %bb.0:
13455; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13456; GFX900-NEXT:    ;;#ASMSTART
13457; GFX900-NEXT:    ; def s[4:5]
13458; GFX900-NEXT:    ;;#ASMEND
13459; GFX900-NEXT:    ;;#ASMSTART
13460; GFX900-NEXT:    ; def s[6:7]
13461; GFX900-NEXT:    ;;#ASMEND
13462; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
13463; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
13464; GFX900-NEXT:    ;;#ASMSTART
13465; GFX900-NEXT:    ; use s[8:9]
13466; GFX900-NEXT:    ;;#ASMEND
13467; GFX900-NEXT:    s_setpc_b64 s[30:31]
13468;
13469; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_1_5_5:
13470; GFX90A:       ; %bb.0:
13471; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13472; GFX90A-NEXT:    ;;#ASMSTART
13473; GFX90A-NEXT:    ; def s[4:5]
13474; GFX90A-NEXT:    ;;#ASMEND
13475; GFX90A-NEXT:    ;;#ASMSTART
13476; GFX90A-NEXT:    ; def s[6:7]
13477; GFX90A-NEXT:    ;;#ASMEND
13478; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
13479; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
13480; GFX90A-NEXT:    ;;#ASMSTART
13481; GFX90A-NEXT:    ; use s[8:9]
13482; GFX90A-NEXT:    ;;#ASMEND
13483; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13484;
13485; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_1_5_5:
13486; GFX940:       ; %bb.0:
13487; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13488; GFX940-NEXT:    ;;#ASMSTART
13489; GFX940-NEXT:    ; def s[0:1]
13490; GFX940-NEXT:    ;;#ASMEND
13491; GFX940-NEXT:    ;;#ASMSTART
13492; GFX940-NEXT:    ; def s[2:3]
13493; GFX940-NEXT:    ;;#ASMEND
13494; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s3, s0
13495; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s3, s3
13496; GFX940-NEXT:    ;;#ASMSTART
13497; GFX940-NEXT:    ; use s[8:9]
13498; GFX940-NEXT:    ;;#ASMEND
13499; GFX940-NEXT:    s_setpc_b64 s[30:31]
13500  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13501  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13502  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13503  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13504  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 1, i32 5, i32 5>
13505  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13506  ret void
13507}
13508
13509define void @s_shuffle_v4i16_v3i16__5_2_5_5() {
13510; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_2_5_5:
13511; GFX900:       ; %bb.0:
13512; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13513; GFX900-NEXT:    ;;#ASMSTART
13514; GFX900-NEXT:    ; def s[4:5]
13515; GFX900-NEXT:    ;;#ASMEND
13516; GFX900-NEXT:    ;;#ASMSTART
13517; GFX900-NEXT:    ; def s[6:7]
13518; GFX900-NEXT:    ;;#ASMEND
13519; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
13520; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
13521; GFX900-NEXT:    ;;#ASMSTART
13522; GFX900-NEXT:    ; use s[8:9]
13523; GFX900-NEXT:    ;;#ASMEND
13524; GFX900-NEXT:    s_setpc_b64 s[30:31]
13525;
13526; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_2_5_5:
13527; GFX90A:       ; %bb.0:
13528; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13529; GFX90A-NEXT:    ;;#ASMSTART
13530; GFX90A-NEXT:    ; def s[4:5]
13531; GFX90A-NEXT:    ;;#ASMEND
13532; GFX90A-NEXT:    ;;#ASMSTART
13533; GFX90A-NEXT:    ; def s[6:7]
13534; GFX90A-NEXT:    ;;#ASMEND
13535; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
13536; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
13537; GFX90A-NEXT:    ;;#ASMSTART
13538; GFX90A-NEXT:    ; use s[8:9]
13539; GFX90A-NEXT:    ;;#ASMEND
13540; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13541;
13542; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_2_5_5:
13543; GFX940:       ; %bb.0:
13544; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13545; GFX940-NEXT:    ;;#ASMSTART
13546; GFX940-NEXT:    ; def s[0:1]
13547; GFX940-NEXT:    ;;#ASMEND
13548; GFX940-NEXT:    ;;#ASMSTART
13549; GFX940-NEXT:    ; def s[2:3]
13550; GFX940-NEXT:    ;;#ASMEND
13551; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s1
13552; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s3, s3
13553; GFX940-NEXT:    ;;#ASMSTART
13554; GFX940-NEXT:    ; use s[8:9]
13555; GFX940-NEXT:    ;;#ASMEND
13556; GFX940-NEXT:    s_setpc_b64 s[30:31]
13557  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13558  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13559  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13560  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13561  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 2, i32 5, i32 5>
13562  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13563  ret void
13564}
13565
13566define void @s_shuffle_v4i16_v3i16__5_3_5_5() {
13567; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_3_5_5:
13568; GFX900:       ; %bb.0:
13569; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13570; GFX900-NEXT:    ;;#ASMSTART
13571; GFX900-NEXT:    ; def s[4:5]
13572; GFX900-NEXT:    ;;#ASMEND
13573; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
13574; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
13575; GFX900-NEXT:    ;;#ASMSTART
13576; GFX900-NEXT:    ; use s[8:9]
13577; GFX900-NEXT:    ;;#ASMEND
13578; GFX900-NEXT:    s_setpc_b64 s[30:31]
13579;
13580; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_3_5_5:
13581; GFX90A:       ; %bb.0:
13582; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13583; GFX90A-NEXT:    ;;#ASMSTART
13584; GFX90A-NEXT:    ; def s[4:5]
13585; GFX90A-NEXT:    ;;#ASMEND
13586; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
13587; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
13588; GFX90A-NEXT:    ;;#ASMSTART
13589; GFX90A-NEXT:    ; use s[8:9]
13590; GFX90A-NEXT:    ;;#ASMEND
13591; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13592;
13593; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_3_5_5:
13594; GFX940:       ; %bb.0:
13595; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13596; GFX940-NEXT:    ;;#ASMSTART
13597; GFX940-NEXT:    ; def s[0:1]
13598; GFX940-NEXT:    ;;#ASMEND
13599; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
13600; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
13601; GFX940-NEXT:    ;;#ASMSTART
13602; GFX940-NEXT:    ; use s[8:9]
13603; GFX940-NEXT:    ;;#ASMEND
13604; GFX940-NEXT:    s_setpc_b64 s[30:31]
13605  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13606  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13607  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13608  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13609  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 3, i32 5, i32 5>
13610  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13611  ret void
13612}
13613
13614define void @s_shuffle_v4i16_v3i16__5_4_5_5() {
13615; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_4_5_5:
13616; GFX900:       ; %bb.0:
13617; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13618; GFX900-NEXT:    ;;#ASMSTART
13619; GFX900-NEXT:    ; def s[4:5]
13620; GFX900-NEXT:    ;;#ASMEND
13621; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
13622; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
13623; GFX900-NEXT:    ;;#ASMSTART
13624; GFX900-NEXT:    ; use s[8:9]
13625; GFX900-NEXT:    ;;#ASMEND
13626; GFX900-NEXT:    s_setpc_b64 s[30:31]
13627;
13628; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_4_5_5:
13629; GFX90A:       ; %bb.0:
13630; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13631; GFX90A-NEXT:    ;;#ASMSTART
13632; GFX90A-NEXT:    ; def s[4:5]
13633; GFX90A-NEXT:    ;;#ASMEND
13634; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
13635; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
13636; GFX90A-NEXT:    ;;#ASMSTART
13637; GFX90A-NEXT:    ; use s[8:9]
13638; GFX90A-NEXT:    ;;#ASMEND
13639; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13640;
13641; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_4_5_5:
13642; GFX940:       ; %bb.0:
13643; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13644; GFX940-NEXT:    ;;#ASMSTART
13645; GFX940-NEXT:    ; def s[0:1]
13646; GFX940-NEXT:    ;;#ASMEND
13647; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s1, s0
13648; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
13649; GFX940-NEXT:    ;;#ASMSTART
13650; GFX940-NEXT:    ; use s[8:9]
13651; GFX940-NEXT:    ;;#ASMEND
13652; GFX940-NEXT:    s_setpc_b64 s[30:31]
13653  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13654  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13655  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13656  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13657  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 4, i32 5, i32 5>
13658  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13659  ret void
13660}
13661
13662define void @s_shuffle_v4i16_v3i16__5_5_u_5() {
13663; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_u_5:
13664; GFX900:       ; %bb.0:
13665; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13666; GFX900-NEXT:    ;;#ASMSTART
13667; GFX900-NEXT:    ; def s[4:5]
13668; GFX900-NEXT:    ;;#ASMEND
13669; GFX900-NEXT:    s_lshl_b32 s9, s5, 16
13670; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
13671; GFX900-NEXT:    ;;#ASMSTART
13672; GFX900-NEXT:    ; use s[8:9]
13673; GFX900-NEXT:    ;;#ASMEND
13674; GFX900-NEXT:    s_setpc_b64 s[30:31]
13675;
13676; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_u_5:
13677; GFX90A:       ; %bb.0:
13678; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13679; GFX90A-NEXT:    ;;#ASMSTART
13680; GFX90A-NEXT:    ; def s[4:5]
13681; GFX90A-NEXT:    ;;#ASMEND
13682; GFX90A-NEXT:    s_lshl_b32 s9, s5, 16
13683; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
13684; GFX90A-NEXT:    ;;#ASMSTART
13685; GFX90A-NEXT:    ; use s[8:9]
13686; GFX90A-NEXT:    ;;#ASMEND
13687; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13688;
13689; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_u_5:
13690; GFX940:       ; %bb.0:
13691; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13692; GFX940-NEXT:    ;;#ASMSTART
13693; GFX940-NEXT:    ; def s[0:1]
13694; GFX940-NEXT:    ;;#ASMEND
13695; GFX940-NEXT:    s_lshl_b32 s9, s1, 16
13696; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
13697; GFX940-NEXT:    ;;#ASMSTART
13698; GFX940-NEXT:    ; use s[8:9]
13699; GFX940-NEXT:    ;;#ASMEND
13700; GFX940-NEXT:    s_setpc_b64 s[30:31]
13701  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13702  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13703  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13704  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13705  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 poison, i32 5>
13706  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13707  ret void
13708}
13709
13710define void @s_shuffle_v4i16_v3i16__5_5_0_5() {
13711; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_0_5:
13712; GFX900:       ; %bb.0:
13713; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13714; GFX900-NEXT:    ;;#ASMSTART
13715; GFX900-NEXT:    ; def s[4:5]
13716; GFX900-NEXT:    ;;#ASMEND
13717; GFX900-NEXT:    ;;#ASMSTART
13718; GFX900-NEXT:    ; def s[6:7]
13719; GFX900-NEXT:    ;;#ASMEND
13720; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s7
13721; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
13722; GFX900-NEXT:    ;;#ASMSTART
13723; GFX900-NEXT:    ; use s[8:9]
13724; GFX900-NEXT:    ;;#ASMEND
13725; GFX900-NEXT:    s_setpc_b64 s[30:31]
13726;
13727; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_0_5:
13728; GFX90A:       ; %bb.0:
13729; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13730; GFX90A-NEXT:    ;;#ASMSTART
13731; GFX90A-NEXT:    ; def s[4:5]
13732; GFX90A-NEXT:    ;;#ASMEND
13733; GFX90A-NEXT:    ;;#ASMSTART
13734; GFX90A-NEXT:    ; def s[6:7]
13735; GFX90A-NEXT:    ;;#ASMEND
13736; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s7
13737; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
13738; GFX90A-NEXT:    ;;#ASMSTART
13739; GFX90A-NEXT:    ; use s[8:9]
13740; GFX90A-NEXT:    ;;#ASMEND
13741; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13742;
13743; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_0_5:
13744; GFX940:       ; %bb.0:
13745; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13746; GFX940-NEXT:    ;;#ASMSTART
13747; GFX940-NEXT:    ; def s[0:1]
13748; GFX940-NEXT:    ;;#ASMEND
13749; GFX940-NEXT:    ;;#ASMSTART
13750; GFX940-NEXT:    ; def s[2:3]
13751; GFX940-NEXT:    ;;#ASMEND
13752; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s3
13753; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
13754; GFX940-NEXT:    ;;#ASMSTART
13755; GFX940-NEXT:    ; use s[8:9]
13756; GFX940-NEXT:    ;;#ASMEND
13757; GFX940-NEXT:    s_setpc_b64 s[30:31]
13758  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13759  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13760  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13761  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13762  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 0, i32 5>
13763  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13764  ret void
13765}
13766
13767define void @s_shuffle_v4i16_v3i16__5_5_1_5() {
13768; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_1_5:
13769; GFX900:       ; %bb.0:
13770; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13771; GFX900-NEXT:    ;;#ASMSTART
13772; GFX900-NEXT:    ; def s[4:5]
13773; GFX900-NEXT:    ;;#ASMEND
13774; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
13775; GFX900-NEXT:    ;;#ASMSTART
13776; GFX900-NEXT:    ; def s[6:7]
13777; GFX900-NEXT:    ;;#ASMEND
13778; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s7
13779; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
13780; GFX900-NEXT:    ;;#ASMSTART
13781; GFX900-NEXT:    ; use s[8:9]
13782; GFX900-NEXT:    ;;#ASMEND
13783; GFX900-NEXT:    s_setpc_b64 s[30:31]
13784;
13785; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_1_5:
13786; GFX90A:       ; %bb.0:
13787; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13788; GFX90A-NEXT:    ;;#ASMSTART
13789; GFX90A-NEXT:    ; def s[4:5]
13790; GFX90A-NEXT:    ;;#ASMEND
13791; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
13792; GFX90A-NEXT:    ;;#ASMSTART
13793; GFX90A-NEXT:    ; def s[6:7]
13794; GFX90A-NEXT:    ;;#ASMEND
13795; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s7
13796; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
13797; GFX90A-NEXT:    ;;#ASMSTART
13798; GFX90A-NEXT:    ; use s[8:9]
13799; GFX90A-NEXT:    ;;#ASMEND
13800; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13801;
13802; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_1_5:
13803; GFX940:       ; %bb.0:
13804; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13805; GFX940-NEXT:    ;;#ASMSTART
13806; GFX940-NEXT:    ; def s[0:1]
13807; GFX940-NEXT:    ;;#ASMEND
13808; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
13809; GFX940-NEXT:    ;;#ASMSTART
13810; GFX940-NEXT:    ; def s[2:3]
13811; GFX940-NEXT:    ;;#ASMEND
13812; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s3
13813; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
13814; GFX940-NEXT:    ;;#ASMSTART
13815; GFX940-NEXT:    ; use s[8:9]
13816; GFX940-NEXT:    ;;#ASMEND
13817; GFX940-NEXT:    s_setpc_b64 s[30:31]
13818  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13819  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13820  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13821  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13822  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 1, i32 5>
13823  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13824  ret void
13825}
13826
13827define void @s_shuffle_v4i16_v3i16__5_5_2_5() {
13828; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_2_5:
13829; GFX900:       ; %bb.0:
13830; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13831; GFX900-NEXT:    ;;#ASMSTART
13832; GFX900-NEXT:    ; def s[4:5]
13833; GFX900-NEXT:    ;;#ASMEND
13834; GFX900-NEXT:    ;;#ASMSTART
13835; GFX900-NEXT:    ; def s[6:7]
13836; GFX900-NEXT:    ;;#ASMEND
13837; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s7
13838; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
13839; GFX900-NEXT:    ;;#ASMSTART
13840; GFX900-NEXT:    ; use s[8:9]
13841; GFX900-NEXT:    ;;#ASMEND
13842; GFX900-NEXT:    s_setpc_b64 s[30:31]
13843;
13844; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_2_5:
13845; GFX90A:       ; %bb.0:
13846; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13847; GFX90A-NEXT:    ;;#ASMSTART
13848; GFX90A-NEXT:    ; def s[4:5]
13849; GFX90A-NEXT:    ;;#ASMEND
13850; GFX90A-NEXT:    ;;#ASMSTART
13851; GFX90A-NEXT:    ; def s[6:7]
13852; GFX90A-NEXT:    ;;#ASMEND
13853; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s7
13854; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
13855; GFX90A-NEXT:    ;;#ASMSTART
13856; GFX90A-NEXT:    ; use s[8:9]
13857; GFX90A-NEXT:    ;;#ASMEND
13858; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13859;
13860; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_2_5:
13861; GFX940:       ; %bb.0:
13862; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13863; GFX940-NEXT:    ;;#ASMSTART
13864; GFX940-NEXT:    ; def s[0:1]
13865; GFX940-NEXT:    ;;#ASMEND
13866; GFX940-NEXT:    ;;#ASMSTART
13867; GFX940-NEXT:    ; def s[2:3]
13868; GFX940-NEXT:    ;;#ASMEND
13869; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s3
13870; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
13871; GFX940-NEXT:    ;;#ASMSTART
13872; GFX940-NEXT:    ; use s[8:9]
13873; GFX940-NEXT:    ;;#ASMEND
13874; GFX940-NEXT:    s_setpc_b64 s[30:31]
13875  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13876  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13877  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13878  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13879  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 2, i32 5>
13880  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13881  ret void
13882}
13883
13884define void @s_shuffle_v4i16_v3i16__5_5_3_5() {
13885; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_3_5:
13886; GFX900:       ; %bb.0:
13887; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13888; GFX900-NEXT:    ;;#ASMSTART
13889; GFX900-NEXT:    ; def s[4:5]
13890; GFX900-NEXT:    ;;#ASMEND
13891; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s5
13892; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
13893; GFX900-NEXT:    ;;#ASMSTART
13894; GFX900-NEXT:    ; use s[8:9]
13895; GFX900-NEXT:    ;;#ASMEND
13896; GFX900-NEXT:    s_setpc_b64 s[30:31]
13897;
13898; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_3_5:
13899; GFX90A:       ; %bb.0:
13900; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13901; GFX90A-NEXT:    ;;#ASMSTART
13902; GFX90A-NEXT:    ; def s[4:5]
13903; GFX90A-NEXT:    ;;#ASMEND
13904; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s5
13905; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
13906; GFX90A-NEXT:    ;;#ASMSTART
13907; GFX90A-NEXT:    ; use s[8:9]
13908; GFX90A-NEXT:    ;;#ASMEND
13909; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13910;
13911; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_3_5:
13912; GFX940:       ; %bb.0:
13913; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13914; GFX940-NEXT:    ;;#ASMSTART
13915; GFX940-NEXT:    ; def s[0:1]
13916; GFX940-NEXT:    ;;#ASMEND
13917; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s1
13918; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
13919; GFX940-NEXT:    ;;#ASMSTART
13920; GFX940-NEXT:    ; use s[8:9]
13921; GFX940-NEXT:    ;;#ASMEND
13922; GFX940-NEXT:    s_setpc_b64 s[30:31]
13923  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13924  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13925  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13926  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13927  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 3, i32 5>
13928  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13929  ret void
13930}
13931
13932define void @s_shuffle_v4i16_v3i16__5_5_4_5() {
13933; GFX900-LABEL: s_shuffle_v4i16_v3i16__5_5_4_5:
13934; GFX900:       ; %bb.0:
13935; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13936; GFX900-NEXT:    ;;#ASMSTART
13937; GFX900-NEXT:    ; def s[4:5]
13938; GFX900-NEXT:    ;;#ASMEND
13939; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
13940; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s5
13941; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
13942; GFX900-NEXT:    ;;#ASMSTART
13943; GFX900-NEXT:    ; use s[8:9]
13944; GFX900-NEXT:    ;;#ASMEND
13945; GFX900-NEXT:    s_setpc_b64 s[30:31]
13946;
13947; GFX90A-LABEL: s_shuffle_v4i16_v3i16__5_5_4_5:
13948; GFX90A:       ; %bb.0:
13949; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13950; GFX90A-NEXT:    ;;#ASMSTART
13951; GFX90A-NEXT:    ; def s[4:5]
13952; GFX90A-NEXT:    ;;#ASMEND
13953; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
13954; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s5
13955; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
13956; GFX90A-NEXT:    ;;#ASMSTART
13957; GFX90A-NEXT:    ; use s[8:9]
13958; GFX90A-NEXT:    ;;#ASMEND
13959; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13960;
13961; GFX940-LABEL: s_shuffle_v4i16_v3i16__5_5_4_5:
13962; GFX940:       ; %bb.0:
13963; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13964; GFX940-NEXT:    ;;#ASMSTART
13965; GFX940-NEXT:    ; def s[0:1]
13966; GFX940-NEXT:    ;;#ASMEND
13967; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
13968; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s1
13969; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
13970; GFX940-NEXT:    ;;#ASMSTART
13971; GFX940-NEXT:    ; use s[8:9]
13972; GFX940-NEXT:    ;;#ASMEND
13973; GFX940-NEXT:    s_setpc_b64 s[30:31]
13974  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13975  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13976  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13977  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
13978  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <4 x i32> <i32 5, i32 5, i32 4, i32 5>
13979  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13980  ret void
13981}
13982;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
13983; GFX90APLUS: {{.*}}
13984