xref: /llvm-project/llvm/test/CodeGen/AMDGPU/shufflevector.v4i16.v4i16.ll (revision 585858aeb6247b3892218edb9d353c63f1c33186)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s
5
6
7define void @v_shuffle_v4i16_v4i16__u_u_u_u(ptr addrspace(1) inreg %ptr) {
8; GFX9-LABEL: v_shuffle_v4i16_v4i16__u_u_u_u:
9; GFX9:       ; %bb.0:
10; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GFX9-NEXT:    s_setpc_b64 s[30:31]
12  %vec0 = call <4 x i16> asm "; def $0", "=v"()
13  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> poison
14  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
15  ret void
16}
17
18define void @v_shuffle_v4i16_v4i16__0_u_u_u(ptr addrspace(1) inreg %ptr) {
19; GFX900-LABEL: v_shuffle_v4i16_v4i16__0_u_u_u:
20; GFX900:       ; %bb.0:
21; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22; GFX900-NEXT:    v_mov_b32_e32 v2, 0
23; GFX900-NEXT:    ;;#ASMSTART
24; GFX900-NEXT:    ; def v[0:1]
25; GFX900-NEXT:    ;;#ASMEND
26; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
27; GFX900-NEXT:    s_waitcnt vmcnt(0)
28; GFX900-NEXT:    s_setpc_b64 s[30:31]
29;
30; GFX90A-LABEL: v_shuffle_v4i16_v4i16__0_u_u_u:
31; GFX90A:       ; %bb.0:
32; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
34; GFX90A-NEXT:    ;;#ASMSTART
35; GFX90A-NEXT:    ; def v[0:1]
36; GFX90A-NEXT:    ;;#ASMEND
37; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
38; GFX90A-NEXT:    s_waitcnt vmcnt(0)
39; GFX90A-NEXT:    s_setpc_b64 s[30:31]
40;
41; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_u_u_u:
42; GFX940:       ; %bb.0:
43; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44; GFX940-NEXT:    v_mov_b32_e32 v2, 0
45; GFX940-NEXT:    ;;#ASMSTART
46; GFX940-NEXT:    ; def v[0:1]
47; GFX940-NEXT:    ;;#ASMEND
48; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
49; GFX940-NEXT:    s_waitcnt vmcnt(0)
50; GFX940-NEXT:    s_setpc_b64 s[30:31]
51  %vec0 = call <4 x i16> asm "; def $0", "=v"()
52  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
53  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
54  ret void
55}
56
57define void @v_shuffle_v4i16_v4i16__1_u_u_u(ptr addrspace(1) inreg %ptr) {
58; GFX900-LABEL: v_shuffle_v4i16_v4i16__1_u_u_u:
59; GFX900:       ; %bb.0:
60; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61; GFX900-NEXT:    ;;#ASMSTART
62; GFX900-NEXT:    ; def v[0:1]
63; GFX900-NEXT:    ;;#ASMEND
64; GFX900-NEXT:    v_mov_b32_e32 v2, 0
65; GFX900-NEXT:    v_alignbit_b32 v0, s4, v0, 16
66; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
67; GFX900-NEXT:    s_waitcnt vmcnt(0)
68; GFX900-NEXT:    s_setpc_b64 s[30:31]
69;
70; GFX90A-LABEL: v_shuffle_v4i16_v4i16__1_u_u_u:
71; GFX90A:       ; %bb.0:
72; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
73; GFX90A-NEXT:    ;;#ASMSTART
74; GFX90A-NEXT:    ; def v[0:1]
75; GFX90A-NEXT:    ;;#ASMEND
76; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
77; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v0, 16
78; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
79; GFX90A-NEXT:    s_waitcnt vmcnt(0)
80; GFX90A-NEXT:    s_setpc_b64 s[30:31]
81;
82; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_u_u_u:
83; GFX940:       ; %bb.0:
84; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85; GFX940-NEXT:    ;;#ASMSTART
86; GFX940-NEXT:    ; def v[0:1]
87; GFX940-NEXT:    ;;#ASMEND
88; GFX940-NEXT:    v_mov_b32_e32 v2, 0
89; GFX940-NEXT:    v_alignbit_b32 v0, s0, v0, 16
90; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
91; GFX940-NEXT:    s_waitcnt vmcnt(0)
92; GFX940-NEXT:    s_setpc_b64 s[30:31]
93  %vec0 = call <4 x i16> asm "; def $0", "=v"()
94  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
95  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
96  ret void
97}
98
99define void @v_shuffle_v4i16_v4i16__2_u_u_u(ptr addrspace(1) inreg %ptr) {
100; GFX900-LABEL: v_shuffle_v4i16_v4i16__2_u_u_u:
101; GFX900:       ; %bb.0:
102; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103; GFX900-NEXT:    ;;#ASMSTART
104; GFX900-NEXT:    ; def v[0:1]
105; GFX900-NEXT:    ;;#ASMEND
106; GFX900-NEXT:    v_mov_b32_e32 v2, 0
107; GFX900-NEXT:    v_mov_b32_e32 v0, v1
108; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
109; GFX900-NEXT:    s_waitcnt vmcnt(0)
110; GFX900-NEXT:    s_setpc_b64 s[30:31]
111;
112; GFX90A-LABEL: v_shuffle_v4i16_v4i16__2_u_u_u:
113; GFX90A:       ; %bb.0:
114; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
115; GFX90A-NEXT:    ;;#ASMSTART
116; GFX90A-NEXT:    ; def v[0:1]
117; GFX90A-NEXT:    ;;#ASMEND
118; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
119; GFX90A-NEXT:    v_mov_b32_e32 v0, v1
120; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
121; GFX90A-NEXT:    s_waitcnt vmcnt(0)
122; GFX90A-NEXT:    s_setpc_b64 s[30:31]
123;
124; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_u_u_u:
125; GFX940:       ; %bb.0:
126; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127; GFX940-NEXT:    ;;#ASMSTART
128; GFX940-NEXT:    ; def v[0:1]
129; GFX940-NEXT:    ;;#ASMEND
130; GFX940-NEXT:    v_mov_b32_e32 v2, 0
131; GFX940-NEXT:    v_mov_b32_e32 v0, v1
132; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
133; GFX940-NEXT:    s_waitcnt vmcnt(0)
134; GFX940-NEXT:    s_setpc_b64 s[30:31]
135  %vec0 = call <4 x i16> asm "; def $0", "=v"()
136  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
137  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
138  ret void
139}
140
141define void @v_shuffle_v4i16_v4i16__3_u_u_u(ptr addrspace(1) inreg %ptr) {
142; GFX900-LABEL: v_shuffle_v4i16_v4i16__3_u_u_u:
143; GFX900:       ; %bb.0:
144; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145; GFX900-NEXT:    ;;#ASMSTART
146; GFX900-NEXT:    ; def v[0:1]
147; GFX900-NEXT:    ;;#ASMEND
148; GFX900-NEXT:    v_mov_b32_e32 v2, 0
149; GFX900-NEXT:    v_alignbit_b32 v0, s4, v1, 16
150; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
151; GFX900-NEXT:    s_waitcnt vmcnt(0)
152; GFX900-NEXT:    s_setpc_b64 s[30:31]
153;
154; GFX90A-LABEL: v_shuffle_v4i16_v4i16__3_u_u_u:
155; GFX90A:       ; %bb.0:
156; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157; GFX90A-NEXT:    ;;#ASMSTART
158; GFX90A-NEXT:    ; def v[0:1]
159; GFX90A-NEXT:    ;;#ASMEND
160; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
161; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v1, 16
162; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
163; GFX90A-NEXT:    s_waitcnt vmcnt(0)
164; GFX90A-NEXT:    s_setpc_b64 s[30:31]
165;
166; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_u_u_u:
167; GFX940:       ; %bb.0:
168; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
169; GFX940-NEXT:    ;;#ASMSTART
170; GFX940-NEXT:    ; def v[0:1]
171; GFX940-NEXT:    ;;#ASMEND
172; GFX940-NEXT:    v_mov_b32_e32 v2, 0
173; GFX940-NEXT:    v_alignbit_b32 v0, s0, v1, 16
174; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
175; GFX940-NEXT:    s_waitcnt vmcnt(0)
176; GFX940-NEXT:    s_setpc_b64 s[30:31]
177  %vec0 = call <4 x i16> asm "; def $0", "=v"()
178  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
179  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
180  ret void
181}
182
183define void @v_shuffle_v4i16_v4i16__4_u_u_u(ptr addrspace(1) inreg %ptr) {
184; GFX9-LABEL: v_shuffle_v4i16_v4i16__4_u_u_u:
185; GFX9:       ; %bb.0:
186; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
187; GFX9-NEXT:    s_setpc_b64 s[30:31]
188  %vec0 = call <4 x i16> asm "; def $0", "=v"()
189  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 4, i32 poison, i32 poison, i32 poison>
190  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
191  ret void
192}
193
194define void @v_shuffle_v4i16_v4i16__5_u_u_u(ptr addrspace(1) inreg %ptr) {
195; GFX900-LABEL: v_shuffle_v4i16_v4i16__5_u_u_u:
196; GFX900:       ; %bb.0:
197; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
198; GFX900-NEXT:    ;;#ASMSTART
199; GFX900-NEXT:    ; def v[0:1]
200; GFX900-NEXT:    ;;#ASMEND
201; GFX900-NEXT:    v_mov_b32_e32 v2, 0
202; GFX900-NEXT:    v_alignbit_b32 v0, s4, v0, 16
203; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
204; GFX900-NEXT:    s_waitcnt vmcnt(0)
205; GFX900-NEXT:    s_setpc_b64 s[30:31]
206;
207; GFX90A-LABEL: v_shuffle_v4i16_v4i16__5_u_u_u:
208; GFX90A:       ; %bb.0:
209; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210; GFX90A-NEXT:    ;;#ASMSTART
211; GFX90A-NEXT:    ; def v[0:1]
212; GFX90A-NEXT:    ;;#ASMEND
213; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
214; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v0, 16
215; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
216; GFX90A-NEXT:    s_waitcnt vmcnt(0)
217; GFX90A-NEXT:    s_setpc_b64 s[30:31]
218;
219; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_u_u_u:
220; GFX940:       ; %bb.0:
221; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222; GFX940-NEXT:    ;;#ASMSTART
223; GFX940-NEXT:    ; def v[0:1]
224; GFX940-NEXT:    ;;#ASMEND
225; GFX940-NEXT:    v_mov_b32_e32 v2, 0
226; GFX940-NEXT:    v_alignbit_b32 v0, s0, v0, 16
227; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
228; GFX940-NEXT:    s_waitcnt vmcnt(0)
229; GFX940-NEXT:    s_setpc_b64 s[30:31]
230  %vec0 = call <4 x i16> asm "; def $0", "=v"()
231  %vec1 = call <4 x i16> asm "; def $0", "=v"()
232  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 poison, i32 poison, i32 poison>
233  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
234  ret void
235}
236
237define void @v_shuffle_v4i16_v4i16__6_u_u_u(ptr addrspace(1) inreg %ptr) {
238; GFX900-LABEL: v_shuffle_v4i16_v4i16__6_u_u_u:
239; GFX900:       ; %bb.0:
240; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
241; GFX900-NEXT:    ;;#ASMSTART
242; GFX900-NEXT:    ; def v[0:1]
243; GFX900-NEXT:    ;;#ASMEND
244; GFX900-NEXT:    v_mov_b32_e32 v2, 0
245; GFX900-NEXT:    v_mov_b32_e32 v0, v1
246; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
247; GFX900-NEXT:    s_waitcnt vmcnt(0)
248; GFX900-NEXT:    s_setpc_b64 s[30:31]
249;
250; GFX90A-LABEL: v_shuffle_v4i16_v4i16__6_u_u_u:
251; GFX90A:       ; %bb.0:
252; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
253; GFX90A-NEXT:    ;;#ASMSTART
254; GFX90A-NEXT:    ; def v[0:1]
255; GFX90A-NEXT:    ;;#ASMEND
256; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
257; GFX90A-NEXT:    v_mov_b32_e32 v0, v1
258; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
259; GFX90A-NEXT:    s_waitcnt vmcnt(0)
260; GFX90A-NEXT:    s_setpc_b64 s[30:31]
261;
262; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_u_u_u:
263; GFX940:       ; %bb.0:
264; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
265; GFX940-NEXT:    ;;#ASMSTART
266; GFX940-NEXT:    ; def v[0:1]
267; GFX940-NEXT:    ;;#ASMEND
268; GFX940-NEXT:    v_mov_b32_e32 v2, 0
269; GFX940-NEXT:    v_mov_b32_e32 v0, v1
270; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
271; GFX940-NEXT:    s_waitcnt vmcnt(0)
272; GFX940-NEXT:    s_setpc_b64 s[30:31]
273  %vec0 = call <4 x i16> asm "; def $0", "=v"()
274  %vec1 = call <4 x i16> asm "; def $0", "=v"()
275  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 poison, i32 poison, i32 poison>
276  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
277  ret void
278}
279
280define void @v_shuffle_v4i16_v4i16__7_u_u_u(ptr addrspace(1) inreg %ptr) {
281; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_u_u_u:
282; GFX900:       ; %bb.0:
283; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
284; GFX900-NEXT:    ;;#ASMSTART
285; GFX900-NEXT:    ; def v[0:1]
286; GFX900-NEXT:    ;;#ASMEND
287; GFX900-NEXT:    v_mov_b32_e32 v2, 0
288; GFX900-NEXT:    v_alignbit_b32 v0, s4, v1, 16
289; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
290; GFX900-NEXT:    s_waitcnt vmcnt(0)
291; GFX900-NEXT:    s_setpc_b64 s[30:31]
292;
293; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_u_u_u:
294; GFX90A:       ; %bb.0:
295; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
296; GFX90A-NEXT:    ;;#ASMSTART
297; GFX90A-NEXT:    ; def v[0:1]
298; GFX90A-NEXT:    ;;#ASMEND
299; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
300; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v1, 16
301; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
302; GFX90A-NEXT:    s_waitcnt vmcnt(0)
303; GFX90A-NEXT:    s_setpc_b64 s[30:31]
304;
305; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_u_u:
306; GFX940:       ; %bb.0:
307; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308; GFX940-NEXT:    ;;#ASMSTART
309; GFX940-NEXT:    ; def v[0:1]
310; GFX940-NEXT:    ;;#ASMEND
311; GFX940-NEXT:    v_mov_b32_e32 v2, 0
312; GFX940-NEXT:    v_alignbit_b32 v0, s0, v1, 16
313; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
314; GFX940-NEXT:    s_waitcnt vmcnt(0)
315; GFX940-NEXT:    s_setpc_b64 s[30:31]
316  %vec0 = call <4 x i16> asm "; def $0", "=v"()
317  %vec1 = call <4 x i16> asm "; def $0", "=v"()
318  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 poison, i32 poison>
319  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
320  ret void
321}
322
323define void @v_shuffle_v4i16_v4i16__7_0_u_u(ptr addrspace(1) inreg %ptr) {
324; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_0_u_u:
325; GFX900:       ; %bb.0:
326; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
327; GFX900-NEXT:    ;;#ASMSTART
328; GFX900-NEXT:    ; def v[0:1]
329; GFX900-NEXT:    ;;#ASMEND
330; GFX900-NEXT:    v_mov_b32_e32 v3, 0
331; GFX900-NEXT:    ;;#ASMSTART
332; GFX900-NEXT:    ; def v[1:2]
333; GFX900-NEXT:    ;;#ASMEND
334; GFX900-NEXT:    v_alignbit_b32 v0, v0, v2, 16
335; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
336; GFX900-NEXT:    s_waitcnt vmcnt(0)
337; GFX900-NEXT:    s_setpc_b64 s[30:31]
338;
339; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_0_u_u:
340; GFX90A:       ; %bb.0:
341; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
342; GFX90A-NEXT:    ;;#ASMSTART
343; GFX90A-NEXT:    ; def v[0:1]
344; GFX90A-NEXT:    ;;#ASMEND
345; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
346; GFX90A-NEXT:    ;;#ASMSTART
347; GFX90A-NEXT:    ; def v[2:3]
348; GFX90A-NEXT:    ;;#ASMEND
349; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v3, 16
350; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
351; GFX90A-NEXT:    s_waitcnt vmcnt(0)
352; GFX90A-NEXT:    s_setpc_b64 s[30:31]
353;
354; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_u_u:
355; GFX940:       ; %bb.0:
356; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
357; GFX940-NEXT:    ;;#ASMSTART
358; GFX940-NEXT:    ; def v[0:1]
359; GFX940-NEXT:    ;;#ASMEND
360; GFX940-NEXT:    v_mov_b32_e32 v4, 0
361; GFX940-NEXT:    ;;#ASMSTART
362; GFX940-NEXT:    ; def v[2:3]
363; GFX940-NEXT:    ;;#ASMEND
364; GFX940-NEXT:    s_nop 0
365; GFX940-NEXT:    v_alignbit_b32 v0, v0, v3, 16
366; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
367; GFX940-NEXT:    s_waitcnt vmcnt(0)
368; GFX940-NEXT:    s_setpc_b64 s[30:31]
369  %vec0 = call <4 x i16> asm "; def $0", "=v"()
370  %vec1 = call <4 x i16> asm "; def $0", "=v"()
371  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 poison, i32 poison>
372  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
373  ret void
374}
375
376define void @v_shuffle_v4i16_v4i16__7_1_u_u(ptr addrspace(1) inreg %ptr) {
377; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_1_u_u:
378; GFX900:       ; %bb.0:
379; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
380; GFX900-NEXT:    ;;#ASMSTART
381; GFX900-NEXT:    ; def v[0:1]
382; GFX900-NEXT:    ;;#ASMEND
383; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
384; GFX900-NEXT:    v_mov_b32_e32 v3, 0
385; GFX900-NEXT:    ;;#ASMSTART
386; GFX900-NEXT:    ; def v[1:2]
387; GFX900-NEXT:    ;;#ASMEND
388; GFX900-NEXT:    v_perm_b32 v0, v0, v2, s4
389; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
390; GFX900-NEXT:    s_waitcnt vmcnt(0)
391; GFX900-NEXT:    s_setpc_b64 s[30:31]
392;
393; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_1_u_u:
394; GFX90A:       ; %bb.0:
395; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
396; GFX90A-NEXT:    ;;#ASMSTART
397; GFX90A-NEXT:    ; def v[0:1]
398; GFX90A-NEXT:    ;;#ASMEND
399; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
400; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
401; GFX90A-NEXT:    ;;#ASMSTART
402; GFX90A-NEXT:    ; def v[2:3]
403; GFX90A-NEXT:    ;;#ASMEND
404; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
405; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
406; GFX90A-NEXT:    s_waitcnt vmcnt(0)
407; GFX90A-NEXT:    s_setpc_b64 s[30:31]
408;
409; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_u_u:
410; GFX940:       ; %bb.0:
411; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
412; GFX940-NEXT:    ;;#ASMSTART
413; GFX940-NEXT:    ; def v[0:1]
414; GFX940-NEXT:    ;;#ASMEND
415; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
416; GFX940-NEXT:    v_mov_b32_e32 v4, 0
417; GFX940-NEXT:    ;;#ASMSTART
418; GFX940-NEXT:    ; def v[2:3]
419; GFX940-NEXT:    ;;#ASMEND
420; GFX940-NEXT:    s_nop 0
421; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
422; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
423; GFX940-NEXT:    s_waitcnt vmcnt(0)
424; GFX940-NEXT:    s_setpc_b64 s[30:31]
425  %vec0 = call <4 x i16> asm "; def $0", "=v"()
426  %vec1 = call <4 x i16> asm "; def $0", "=v"()
427  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 poison, i32 poison>
428  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
429  ret void
430}
431
432define void @v_shuffle_v4i16_v4i16__7_2_u_u(ptr addrspace(1) inreg %ptr) {
433; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_2_u_u:
434; GFX900:       ; %bb.0:
435; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
436; GFX900-NEXT:    ;;#ASMSTART
437; GFX900-NEXT:    ; def v[0:1]
438; GFX900-NEXT:    ;;#ASMEND
439; GFX900-NEXT:    v_mov_b32_e32 v4, 0
440; GFX900-NEXT:    ;;#ASMSTART
441; GFX900-NEXT:    ; def v[2:3]
442; GFX900-NEXT:    ;;#ASMEND
443; GFX900-NEXT:    v_alignbit_b32 v0, v1, v3, 16
444; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
445; GFX900-NEXT:    s_waitcnt vmcnt(0)
446; GFX900-NEXT:    s_setpc_b64 s[30:31]
447;
448; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_2_u_u:
449; GFX90A:       ; %bb.0:
450; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
451; GFX90A-NEXT:    ;;#ASMSTART
452; GFX90A-NEXT:    ; def v[0:1]
453; GFX90A-NEXT:    ;;#ASMEND
454; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
455; GFX90A-NEXT:    ;;#ASMSTART
456; GFX90A-NEXT:    ; def v[2:3]
457; GFX90A-NEXT:    ;;#ASMEND
458; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v3, 16
459; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
460; GFX90A-NEXT:    s_waitcnt vmcnt(0)
461; GFX90A-NEXT:    s_setpc_b64 s[30:31]
462;
463; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_u_u:
464; GFX940:       ; %bb.0:
465; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
466; GFX940-NEXT:    ;;#ASMSTART
467; GFX940-NEXT:    ; def v[0:1]
468; GFX940-NEXT:    ;;#ASMEND
469; GFX940-NEXT:    v_mov_b32_e32 v4, 0
470; GFX940-NEXT:    ;;#ASMSTART
471; GFX940-NEXT:    ; def v[2:3]
472; GFX940-NEXT:    ;;#ASMEND
473; GFX940-NEXT:    s_nop 0
474; GFX940-NEXT:    v_alignbit_b32 v0, v1, v3, 16
475; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
476; GFX940-NEXT:    s_waitcnt vmcnt(0)
477; GFX940-NEXT:    s_setpc_b64 s[30:31]
478  %vec0 = call <4 x i16> asm "; def $0", "=v"()
479  %vec1 = call <4 x i16> asm "; def $0", "=v"()
480  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 poison, i32 poison>
481  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
482  ret void
483}
484
485define void @v_shuffle_v4i16_v4i16__7_3_u_u(ptr addrspace(1) inreg %ptr) {
486; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_3_u_u:
487; GFX900:       ; %bb.0:
488; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
489; GFX900-NEXT:    ;;#ASMSTART
490; GFX900-NEXT:    ; def v[0:1]
491; GFX900-NEXT:    ;;#ASMEND
492; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
493; GFX900-NEXT:    v_mov_b32_e32 v4, 0
494; GFX900-NEXT:    ;;#ASMSTART
495; GFX900-NEXT:    ; def v[2:3]
496; GFX900-NEXT:    ;;#ASMEND
497; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
498; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
499; GFX900-NEXT:    s_waitcnt vmcnt(0)
500; GFX900-NEXT:    s_setpc_b64 s[30:31]
501;
502; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_3_u_u:
503; GFX90A:       ; %bb.0:
504; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
505; GFX90A-NEXT:    ;;#ASMSTART
506; GFX90A-NEXT:    ; def v[0:1]
507; GFX90A-NEXT:    ;;#ASMEND
508; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
509; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
510; GFX90A-NEXT:    ;;#ASMSTART
511; GFX90A-NEXT:    ; def v[2:3]
512; GFX90A-NEXT:    ;;#ASMEND
513; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
514; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
515; GFX90A-NEXT:    s_waitcnt vmcnt(0)
516; GFX90A-NEXT:    s_setpc_b64 s[30:31]
517;
518; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_u_u:
519; GFX940:       ; %bb.0:
520; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
521; GFX940-NEXT:    ;;#ASMSTART
522; GFX940-NEXT:    ; def v[0:1]
523; GFX940-NEXT:    ;;#ASMEND
524; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
525; GFX940-NEXT:    v_mov_b32_e32 v4, 0
526; GFX940-NEXT:    ;;#ASMSTART
527; GFX940-NEXT:    ; def v[2:3]
528; GFX940-NEXT:    ;;#ASMEND
529; GFX940-NEXT:    s_nop 0
530; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
531; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
532; GFX940-NEXT:    s_waitcnt vmcnt(0)
533; GFX940-NEXT:    s_setpc_b64 s[30:31]
534  %vec0 = call <4 x i16> asm "; def $0", "=v"()
535  %vec1 = call <4 x i16> asm "; def $0", "=v"()
536  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 poison, i32 poison>
537  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
538  ret void
539}
540
541define void @v_shuffle_v4i16_v4i16__7_4_u_u(ptr addrspace(1) inreg %ptr) {
542; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_4_u_u:
543; GFX900:       ; %bb.0:
544; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
545; GFX900-NEXT:    ;;#ASMSTART
546; GFX900-NEXT:    ; def v[0:1]
547; GFX900-NEXT:    ;;#ASMEND
548; GFX900-NEXT:    v_mov_b32_e32 v2, 0
549; GFX900-NEXT:    v_alignbit_b32 v0, v0, v1, 16
550; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
551; GFX900-NEXT:    s_waitcnt vmcnt(0)
552; GFX900-NEXT:    s_setpc_b64 s[30:31]
553;
554; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_4_u_u:
555; GFX90A:       ; %bb.0:
556; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
557; GFX90A-NEXT:    ;;#ASMSTART
558; GFX90A-NEXT:    ; def v[0:1]
559; GFX90A-NEXT:    ;;#ASMEND
560; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
561; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v1, 16
562; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
563; GFX90A-NEXT:    s_waitcnt vmcnt(0)
564; GFX90A-NEXT:    s_setpc_b64 s[30:31]
565;
566; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_u_u:
567; GFX940:       ; %bb.0:
568; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
569; GFX940-NEXT:    ;;#ASMSTART
570; GFX940-NEXT:    ; def v[0:1]
571; GFX940-NEXT:    ;;#ASMEND
572; GFX940-NEXT:    v_mov_b32_e32 v2, 0
573; GFX940-NEXT:    v_alignbit_b32 v0, v0, v1, 16
574; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
575; GFX940-NEXT:    s_waitcnt vmcnt(0)
576; GFX940-NEXT:    s_setpc_b64 s[30:31]
577  %vec0 = call <4 x i16> asm "; def $0", "=v"()
578  %vec1 = call <4 x i16> asm "; def $0", "=v"()
579  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 poison, i32 poison>
580  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
581  ret void
582}
583
584define void @v_shuffle_v4i16_v4i16__7_5_u_u(ptr addrspace(1) inreg %ptr) {
585; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_5_u_u:
586; GFX900:       ; %bb.0:
587; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
588; GFX900-NEXT:    ;;#ASMSTART
589; GFX900-NEXT:    ; def v[0:1]
590; GFX900-NEXT:    ;;#ASMEND
591; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
592; GFX900-NEXT:    v_mov_b32_e32 v2, 0
593; GFX900-NEXT:    v_perm_b32 v0, v0, v1, s4
594; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
595; GFX900-NEXT:    s_waitcnt vmcnt(0)
596; GFX900-NEXT:    s_setpc_b64 s[30:31]
597;
598; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_5_u_u:
599; GFX90A:       ; %bb.0:
600; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
601; GFX90A-NEXT:    ;;#ASMSTART
602; GFX90A-NEXT:    ; def v[0:1]
603; GFX90A-NEXT:    ;;#ASMEND
604; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
605; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
606; GFX90A-NEXT:    v_perm_b32 v0, v0, v1, s4
607; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
608; GFX90A-NEXT:    s_waitcnt vmcnt(0)
609; GFX90A-NEXT:    s_setpc_b64 s[30:31]
610;
611; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_u_u:
612; GFX940:       ; %bb.0:
613; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
614; GFX940-NEXT:    ;;#ASMSTART
615; GFX940-NEXT:    ; def v[0:1]
616; GFX940-NEXT:    ;;#ASMEND
617; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
618; GFX940-NEXT:    v_mov_b32_e32 v2, 0
619; GFX940-NEXT:    v_perm_b32 v0, v0, v1, s2
620; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
621; GFX940-NEXT:    s_waitcnt vmcnt(0)
622; GFX940-NEXT:    s_setpc_b64 s[30:31]
623  %vec0 = call <4 x i16> asm "; def $0", "=v"()
624  %vec1 = call <4 x i16> asm "; def $0", "=v"()
625  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 poison, i32 poison>
626  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
627  ret void
628}
629
630define void @v_shuffle_v4i16_v4i16__7_6_u_u(ptr addrspace(1) inreg %ptr) {
631; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_6_u_u:
632; GFX900:       ; %bb.0:
633; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
634; GFX900-NEXT:    ;;#ASMSTART
635; GFX900-NEXT:    ; def v[0:1]
636; GFX900-NEXT:    ;;#ASMEND
637; GFX900-NEXT:    v_mov_b32_e32 v2, 0
638; GFX900-NEXT:    v_alignbit_b32 v0, v1, v1, 16
639; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
640; GFX900-NEXT:    s_waitcnt vmcnt(0)
641; GFX900-NEXT:    s_setpc_b64 s[30:31]
642;
643; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_6_u_u:
644; GFX90A:       ; %bb.0:
645; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
646; GFX90A-NEXT:    ;;#ASMSTART
647; GFX90A-NEXT:    ; def v[0:1]
648; GFX90A-NEXT:    ;;#ASMEND
649; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
650; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v1, 16
651; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
652; GFX90A-NEXT:    s_waitcnt vmcnt(0)
653; GFX90A-NEXT:    s_setpc_b64 s[30:31]
654;
655; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_u_u:
656; GFX940:       ; %bb.0:
657; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
658; GFX940-NEXT:    ;;#ASMSTART
659; GFX940-NEXT:    ; def v[0:1]
660; GFX940-NEXT:    ;;#ASMEND
661; GFX940-NEXT:    v_mov_b32_e32 v2, 0
662; GFX940-NEXT:    v_alignbit_b32 v0, v1, v1, 16
663; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
664; GFX940-NEXT:    s_waitcnt vmcnt(0)
665; GFX940-NEXT:    s_setpc_b64 s[30:31]
666  %vec0 = call <4 x i16> asm "; def $0", "=v"()
667  %vec1 = call <4 x i16> asm "; def $0", "=v"()
668  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 poison, i32 poison>
669  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
670  ret void
671}
672
673define void @v_shuffle_v4i16_v4i16__7_7_u_u(ptr addrspace(1) inreg %ptr) {
674; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_u_u:
675; GFX900:       ; %bb.0:
676; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
677; GFX900-NEXT:    ;;#ASMSTART
678; GFX900-NEXT:    ; def v[0:1]
679; GFX900-NEXT:    ;;#ASMEND
680; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
681; GFX900-NEXT:    v_mov_b32_e32 v2, 0
682; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
683; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
684; GFX900-NEXT:    s_waitcnt vmcnt(0)
685; GFX900-NEXT:    s_setpc_b64 s[30:31]
686;
687; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_u_u:
688; GFX90A:       ; %bb.0:
689; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
690; GFX90A-NEXT:    ;;#ASMSTART
691; GFX90A-NEXT:    ; def v[0:1]
692; GFX90A-NEXT:    ;;#ASMEND
693; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
694; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
695; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
696; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
697; GFX90A-NEXT:    s_waitcnt vmcnt(0)
698; GFX90A-NEXT:    s_setpc_b64 s[30:31]
699;
700; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_u:
701; GFX940:       ; %bb.0:
702; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
703; GFX940-NEXT:    ;;#ASMSTART
704; GFX940-NEXT:    ; def v[0:1]
705; GFX940-NEXT:    ;;#ASMEND
706; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
707; GFX940-NEXT:    v_mov_b32_e32 v2, 0
708; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
709; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
710; GFX940-NEXT:    s_waitcnt vmcnt(0)
711; GFX940-NEXT:    s_setpc_b64 s[30:31]
712  %vec0 = call <4 x i16> asm "; def $0", "=v"()
713  %vec1 = call <4 x i16> asm "; def $0", "=v"()
714  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 poison>
715  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
716  ret void
717}
718
719define void @v_shuffle_v4i16_v4i16__7_7_0_u(ptr addrspace(1) inreg %ptr) {
720; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_0_u:
721; GFX900:       ; %bb.0:
722; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
723; GFX900-NEXT:    ;;#ASMSTART
724; GFX900-NEXT:    ; def v[0:1]
725; GFX900-NEXT:    ;;#ASMEND
726; GFX900-NEXT:    ;;#ASMSTART
727; GFX900-NEXT:    ; def v[1:2]
728; GFX900-NEXT:    ;;#ASMEND
729; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
730; GFX900-NEXT:    v_mov_b32_e32 v3, 0
731; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
732; GFX900-NEXT:    v_mov_b32_e32 v2, v0
733; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
734; GFX900-NEXT:    s_waitcnt vmcnt(0)
735; GFX900-NEXT:    s_setpc_b64 s[30:31]
736;
737; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_0_u:
738; GFX90A:       ; %bb.0:
739; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
740; GFX90A-NEXT:    ;;#ASMSTART
741; GFX90A-NEXT:    ; def v[2:3]
742; GFX90A-NEXT:    ;;#ASMEND
743; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
744; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
745; GFX90A-NEXT:    ;;#ASMSTART
746; GFX90A-NEXT:    ; def v[0:1]
747; GFX90A-NEXT:    ;;#ASMEND
748; GFX90A-NEXT:    v_perm_b32 v2, v3, v3, s4
749; GFX90A-NEXT:    v_mov_b32_e32 v3, v0
750; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
751; GFX90A-NEXT:    s_waitcnt vmcnt(0)
752; GFX90A-NEXT:    s_setpc_b64 s[30:31]
753;
754; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_u:
755; GFX940:       ; %bb.0:
756; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
757; GFX940-NEXT:    ;;#ASMSTART
758; GFX940-NEXT:    ; def v[2:3]
759; GFX940-NEXT:    ;;#ASMEND
760; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
761; GFX940-NEXT:    v_mov_b32_e32 v4, 0
762; GFX940-NEXT:    ;;#ASMSTART
763; GFX940-NEXT:    ; def v[0:1]
764; GFX940-NEXT:    ;;#ASMEND
765; GFX940-NEXT:    v_perm_b32 v2, v3, v3, s2
766; GFX940-NEXT:    v_mov_b32_e32 v3, v0
767; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
768; GFX940-NEXT:    s_waitcnt vmcnt(0)
769; GFX940-NEXT:    s_setpc_b64 s[30:31]
770  %vec0 = call <4 x i16> asm "; def $0", "=v"()
771  %vec1 = call <4 x i16> asm "; def $0", "=v"()
772  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 poison>
773  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
774  ret void
775}
776
777define void @v_shuffle_v4i16_v4i16__7_7_1_u(ptr addrspace(1) inreg %ptr) {
778; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_1_u:
779; GFX900:       ; %bb.0:
780; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
781; GFX900-NEXT:    ;;#ASMSTART
782; GFX900-NEXT:    ; def v[0:1]
783; GFX900-NEXT:    ;;#ASMEND
784; GFX900-NEXT:    ;;#ASMSTART
785; GFX900-NEXT:    ; def v[1:2]
786; GFX900-NEXT:    ;;#ASMEND
787; GFX900-NEXT:    v_alignbit_b32 v1, s4, v0, 16
788; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
789; GFX900-NEXT:    v_mov_b32_e32 v3, 0
790; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
791; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
792; GFX900-NEXT:    s_waitcnt vmcnt(0)
793; GFX900-NEXT:    s_setpc_b64 s[30:31]
794;
795; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_1_u:
796; GFX90A:       ; %bb.0:
797; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
798; GFX90A-NEXT:    ;;#ASMSTART
799; GFX90A-NEXT:    ; def v[0:1]
800; GFX90A-NEXT:    ;;#ASMEND
801; GFX90A-NEXT:    v_alignbit_b32 v1, s4, v0, 16
802; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
803; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
804; GFX90A-NEXT:    ;;#ASMSTART
805; GFX90A-NEXT:    ; def v[2:3]
806; GFX90A-NEXT:    ;;#ASMEND
807; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
808; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
809; GFX90A-NEXT:    s_waitcnt vmcnt(0)
810; GFX90A-NEXT:    s_setpc_b64 s[30:31]
811;
812; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_u:
813; GFX940:       ; %bb.0:
814; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
815; GFX940-NEXT:    ;;#ASMSTART
816; GFX940-NEXT:    ; def v[0:1]
817; GFX940-NEXT:    ;;#ASMEND
818; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
819; GFX940-NEXT:    v_mov_b32_e32 v4, 0
820; GFX940-NEXT:    ;;#ASMSTART
821; GFX940-NEXT:    ; def v[2:3]
822; GFX940-NEXT:    ;;#ASMEND
823; GFX940-NEXT:    v_alignbit_b32 v1, s0, v0, 16
824; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
825; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
826; GFX940-NEXT:    s_waitcnt vmcnt(0)
827; GFX940-NEXT:    s_setpc_b64 s[30:31]
828  %vec0 = call <4 x i16> asm "; def $0", "=v"()
829  %vec1 = call <4 x i16> asm "; def $0", "=v"()
830  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 poison>
831  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
832  ret void
833}
834
835define void @v_shuffle_v4i16_v4i16__7_7_2_u(ptr addrspace(1) inreg %ptr) {
836; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_2_u:
837; GFX900:       ; %bb.0:
838; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
839; GFX900-NEXT:    ;;#ASMSTART
840; GFX900-NEXT:    ; def v[0:1]
841; GFX900-NEXT:    ;;#ASMEND
842; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
843; GFX900-NEXT:    v_mov_b32_e32 v4, 0
844; GFX900-NEXT:    ;;#ASMSTART
845; GFX900-NEXT:    ; def v[2:3]
846; GFX900-NEXT:    ;;#ASMEND
847; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
848; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
849; GFX900-NEXT:    s_waitcnt vmcnt(0)
850; GFX900-NEXT:    s_setpc_b64 s[30:31]
851;
852; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_2_u:
853; GFX90A:       ; %bb.0:
854; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
855; GFX90A-NEXT:    ;;#ASMSTART
856; GFX90A-NEXT:    ; def v[0:1]
857; GFX90A-NEXT:    ;;#ASMEND
858; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
859; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
860; GFX90A-NEXT:    ;;#ASMSTART
861; GFX90A-NEXT:    ; def v[2:3]
862; GFX90A-NEXT:    ;;#ASMEND
863; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
864; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
865; GFX90A-NEXT:    s_waitcnt vmcnt(0)
866; GFX90A-NEXT:    s_setpc_b64 s[30:31]
867;
868; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_u:
869; GFX940:       ; %bb.0:
870; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
871; GFX940-NEXT:    ;;#ASMSTART
872; GFX940-NEXT:    ; def v[0:1]
873; GFX940-NEXT:    ;;#ASMEND
874; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
875; GFX940-NEXT:    v_mov_b32_e32 v4, 0
876; GFX940-NEXT:    ;;#ASMSTART
877; GFX940-NEXT:    ; def v[2:3]
878; GFX940-NEXT:    ;;#ASMEND
879; GFX940-NEXT:    s_nop 0
880; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
881; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
882; GFX940-NEXT:    s_waitcnt vmcnt(0)
883; GFX940-NEXT:    s_setpc_b64 s[30:31]
884  %vec0 = call <4 x i16> asm "; def $0", "=v"()
885  %vec1 = call <4 x i16> asm "; def $0", "=v"()
886  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 poison>
887  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
888  ret void
889}
890
891define void @v_shuffle_v4i16_v4i16__7_7_3_u(ptr addrspace(1) inreg %ptr) {
892; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_3_u:
893; GFX900:       ; %bb.0:
894; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
895; GFX900-NEXT:    ;;#ASMSTART
896; GFX900-NEXT:    ; def v[0:1]
897; GFX900-NEXT:    ;;#ASMEND
898; GFX900-NEXT:    v_alignbit_b32 v1, s4, v1, 16
899; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
900; GFX900-NEXT:    v_mov_b32_e32 v4, 0
901; GFX900-NEXT:    ;;#ASMSTART
902; GFX900-NEXT:    ; def v[2:3]
903; GFX900-NEXT:    ;;#ASMEND
904; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
905; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
906; GFX900-NEXT:    s_waitcnt vmcnt(0)
907; GFX900-NEXT:    s_setpc_b64 s[30:31]
908;
909; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_3_u:
910; GFX90A:       ; %bb.0:
911; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
912; GFX90A-NEXT:    ;;#ASMSTART
913; GFX90A-NEXT:    ; def v[0:1]
914; GFX90A-NEXT:    ;;#ASMEND
915; GFX90A-NEXT:    v_alignbit_b32 v1, s4, v1, 16
916; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
917; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
918; GFX90A-NEXT:    ;;#ASMSTART
919; GFX90A-NEXT:    ; def v[2:3]
920; GFX90A-NEXT:    ;;#ASMEND
921; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
922; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
923; GFX90A-NEXT:    s_waitcnt vmcnt(0)
924; GFX90A-NEXT:    s_setpc_b64 s[30:31]
925;
926; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_u:
927; GFX940:       ; %bb.0:
928; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
929; GFX940-NEXT:    ;;#ASMSTART
930; GFX940-NEXT:    ; def v[0:1]
931; GFX940-NEXT:    ;;#ASMEND
932; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
933; GFX940-NEXT:    v_mov_b32_e32 v4, 0
934; GFX940-NEXT:    ;;#ASMSTART
935; GFX940-NEXT:    ; def v[2:3]
936; GFX940-NEXT:    ;;#ASMEND
937; GFX940-NEXT:    v_alignbit_b32 v1, s0, v1, 16
938; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
939; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
940; GFX940-NEXT:    s_waitcnt vmcnt(0)
941; GFX940-NEXT:    s_setpc_b64 s[30:31]
942  %vec0 = call <4 x i16> asm "; def $0", "=v"()
943  %vec1 = call <4 x i16> asm "; def $0", "=v"()
944  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 poison>
945  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
946  ret void
947}
948
949define void @v_shuffle_v4i16_v4i16__7_7_4_u(ptr addrspace(1) inreg %ptr) {
950; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_4_u:
951; GFX900:       ; %bb.0:
952; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
953; GFX900-NEXT:    ;;#ASMSTART
954; GFX900-NEXT:    ; def v[0:1]
955; GFX900-NEXT:    ;;#ASMEND
956; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
957; GFX900-NEXT:    v_mov_b32_e32 v3, 0
958; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
959; GFX900-NEXT:    v_mov_b32_e32 v2, v0
960; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
961; GFX900-NEXT:    s_waitcnt vmcnt(0)
962; GFX900-NEXT:    s_setpc_b64 s[30:31]
963;
964; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_4_u:
965; GFX90A:       ; %bb.0:
966; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
967; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
968; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
969; GFX90A-NEXT:    ;;#ASMSTART
970; GFX90A-NEXT:    ; def v[0:1]
971; GFX90A-NEXT:    ;;#ASMEND
972; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
973; GFX90A-NEXT:    v_mov_b32_e32 v3, v0
974; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
975; GFX90A-NEXT:    s_waitcnt vmcnt(0)
976; GFX90A-NEXT:    s_setpc_b64 s[30:31]
977;
978; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_u:
979; GFX940:       ; %bb.0:
980; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
981; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
982; GFX940-NEXT:    v_mov_b32_e32 v4, 0
983; GFX940-NEXT:    ;;#ASMSTART
984; GFX940-NEXT:    ; def v[0:1]
985; GFX940-NEXT:    ;;#ASMEND
986; GFX940-NEXT:    s_nop 0
987; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
988; GFX940-NEXT:    v_mov_b32_e32 v3, v0
989; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
990; GFX940-NEXT:    s_waitcnt vmcnt(0)
991; GFX940-NEXT:    s_setpc_b64 s[30:31]
992  %vec0 = call <4 x i16> asm "; def $0", "=v"()
993  %vec1 = call <4 x i16> asm "; def $0", "=v"()
994  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 poison>
995  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
996  ret void
997}
998
999define void @v_shuffle_v4i16_v4i16__7_7_5_u(ptr addrspace(1) inreg %ptr) {
1000; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_5_u:
1001; GFX900:       ; %bb.0:
1002; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1003; GFX900-NEXT:    ;;#ASMSTART
1004; GFX900-NEXT:    ; def v[0:1]
1005; GFX900-NEXT:    ;;#ASMEND
1006; GFX900-NEXT:    v_alignbit_b32 v2, s4, v0, 16
1007; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1008; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1009; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
1010; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
1011; GFX900-NEXT:    s_waitcnt vmcnt(0)
1012; GFX900-NEXT:    s_setpc_b64 s[30:31]
1013;
1014; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_5_u:
1015; GFX90A:       ; %bb.0:
1016; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1017; GFX90A-NEXT:    ;;#ASMSTART
1018; GFX90A-NEXT:    ; def v[0:1]
1019; GFX90A-NEXT:    ;;#ASMEND
1020; GFX90A-NEXT:    v_alignbit_b32 v3, s4, v0, 16
1021; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1022; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1023; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
1024; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
1025; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1026; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1027;
1028; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_u:
1029; GFX940:       ; %bb.0:
1030; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1031; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1032; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1033; GFX940-NEXT:    ;;#ASMSTART
1034; GFX940-NEXT:    ; def v[0:1]
1035; GFX940-NEXT:    ;;#ASMEND
1036; GFX940-NEXT:    s_nop 0
1037; GFX940-NEXT:    v_alignbit_b32 v3, s0, v0, 16
1038; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
1039; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
1040; GFX940-NEXT:    s_waitcnt vmcnt(0)
1041; GFX940-NEXT:    s_setpc_b64 s[30:31]
1042  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1043  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1044  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 poison>
1045  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1046  ret void
1047}
1048
1049define void @v_shuffle_v4i16_v4i16__7_7_6_u(ptr addrspace(1) inreg %ptr) {
1050; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_6_u:
1051; GFX900:       ; %bb.0:
1052; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1053; GFX900-NEXT:    ;;#ASMSTART
1054; GFX900-NEXT:    ; def v[0:1]
1055; GFX900-NEXT:    ;;#ASMEND
1056; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1057; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1058; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
1059; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
1060; GFX900-NEXT:    s_waitcnt vmcnt(0)
1061; GFX900-NEXT:    s_setpc_b64 s[30:31]
1062;
1063; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_6_u:
1064; GFX90A:       ; %bb.0:
1065; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1066; GFX90A-NEXT:    ;;#ASMSTART
1067; GFX90A-NEXT:    ; def v[0:1]
1068; GFX90A-NEXT:    ;;#ASMEND
1069; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1070; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1071; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
1072; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
1073; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1074; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1075;
1076; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_u:
1077; GFX940:       ; %bb.0:
1078; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1079; GFX940-NEXT:    ;;#ASMSTART
1080; GFX940-NEXT:    ; def v[0:1]
1081; GFX940-NEXT:    ;;#ASMEND
1082; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1083; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1084; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
1085; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1086; GFX940-NEXT:    s_waitcnt vmcnt(0)
1087; GFX940-NEXT:    s_setpc_b64 s[30:31]
1088  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1089  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1090  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 poison>
1091  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1092  ret void
1093}
1094
1095define void @v_shuffle_v4i16_v4i16__7_7_7_u(ptr addrspace(1) inreg %ptr) {
1096; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_7_u:
1097; GFX900:       ; %bb.0:
1098; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1099; GFX900-NEXT:    ;;#ASMSTART
1100; GFX900-NEXT:    ; def v[0:1]
1101; GFX900-NEXT:    ;;#ASMEND
1102; GFX900-NEXT:    v_alignbit_b32 v2, s4, v1, 16
1103; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1104; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1105; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
1106; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
1107; GFX900-NEXT:    s_waitcnt vmcnt(0)
1108; GFX900-NEXT:    s_setpc_b64 s[30:31]
1109;
1110; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_7_u:
1111; GFX90A:       ; %bb.0:
1112; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1113; GFX90A-NEXT:    ;;#ASMSTART
1114; GFX90A-NEXT:    ; def v[0:1]
1115; GFX90A-NEXT:    ;;#ASMEND
1116; GFX90A-NEXT:    v_alignbit_b32 v3, s4, v1, 16
1117; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1118; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1119; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
1120; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
1121; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1122; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1123;
1124; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_u:
1125; GFX940:       ; %bb.0:
1126; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1127; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1128; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1129; GFX940-NEXT:    ;;#ASMSTART
1130; GFX940-NEXT:    ; def v[0:1]
1131; GFX940-NEXT:    ;;#ASMEND
1132; GFX940-NEXT:    s_nop 0
1133; GFX940-NEXT:    v_alignbit_b32 v3, s0, v1, 16
1134; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
1135; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
1136; GFX940-NEXT:    s_waitcnt vmcnt(0)
1137; GFX940-NEXT:    s_setpc_b64 s[30:31]
1138  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1139  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1140  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 poison>
1141  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1142  ret void
1143}
1144
1145define void @v_shuffle_v4i16_v4i16__7_7_7_0(ptr addrspace(1) inreg %ptr) {
1146; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_7_0:
1147; GFX900:       ; %bb.0:
1148; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1149; GFX900-NEXT:    ;;#ASMSTART
1150; GFX900-NEXT:    ; def v[0:1]
1151; GFX900-NEXT:    ;;#ASMEND
1152; GFX900-NEXT:    ;;#ASMSTART
1153; GFX900-NEXT:    ; def v[1:2]
1154; GFX900-NEXT:    ;;#ASMEND
1155; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1156; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1157; GFX900-NEXT:    v_alignbit_b32 v1, v0, v2, 16
1158; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
1159; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
1160; GFX900-NEXT:    s_waitcnt vmcnt(0)
1161; GFX900-NEXT:    s_setpc_b64 s[30:31]
1162;
1163; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_7_0:
1164; GFX90A:       ; %bb.0:
1165; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1166; GFX90A-NEXT:    ;;#ASMSTART
1167; GFX90A-NEXT:    ; def v[0:1]
1168; GFX90A-NEXT:    ;;#ASMEND
1169; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1170; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1171; GFX90A-NEXT:    ;;#ASMSTART
1172; GFX90A-NEXT:    ; def v[2:3]
1173; GFX90A-NEXT:    ;;#ASMEND
1174; GFX90A-NEXT:    v_alignbit_b32 v1, v0, v3, 16
1175; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
1176; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
1177; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1178; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1179;
1180; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_0:
1181; GFX940:       ; %bb.0:
1182; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1183; GFX940-NEXT:    ;;#ASMSTART
1184; GFX940-NEXT:    ; def v[0:1]
1185; GFX940-NEXT:    ;;#ASMEND
1186; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1187; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1188; GFX940-NEXT:    ;;#ASMSTART
1189; GFX940-NEXT:    ; def v[2:3]
1190; GFX940-NEXT:    ;;#ASMEND
1191; GFX940-NEXT:    s_nop 0
1192; GFX940-NEXT:    v_alignbit_b32 v1, v0, v3, 16
1193; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
1194; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
1195; GFX940-NEXT:    s_waitcnt vmcnt(0)
1196; GFX940-NEXT:    s_setpc_b64 s[30:31]
1197  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1198  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1199  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 0>
1200  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1201  ret void
1202}
1203
1204define void @v_shuffle_v4i16_v4i16__7_7_7_1(ptr addrspace(1) inreg %ptr) {
1205; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_7_1:
1206; GFX900:       ; %bb.0:
1207; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1208; GFX900-NEXT:    ;;#ASMSTART
1209; GFX900-NEXT:    ; def v[0:1]
1210; GFX900-NEXT:    ;;#ASMEND
1211; GFX900-NEXT:    ;;#ASMSTART
1212; GFX900-NEXT:    ; def v[1:2]
1213; GFX900-NEXT:    ;;#ASMEND
1214; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1215; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1216; GFX900-NEXT:    v_perm_b32 v1, v0, v2, s4
1217; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
1218; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
1219; GFX900-NEXT:    s_waitcnt vmcnt(0)
1220; GFX900-NEXT:    s_setpc_b64 s[30:31]
1221;
1222; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_7_1:
1223; GFX90A:       ; %bb.0:
1224; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1225; GFX90A-NEXT:    ;;#ASMSTART
1226; GFX90A-NEXT:    ; def v[0:1]
1227; GFX90A-NEXT:    ;;#ASMEND
1228; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1229; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1230; GFX90A-NEXT:    ;;#ASMSTART
1231; GFX90A-NEXT:    ; def v[2:3]
1232; GFX90A-NEXT:    ;;#ASMEND
1233; GFX90A-NEXT:    v_perm_b32 v1, v0, v3, s4
1234; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
1235; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
1236; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1237; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1238;
1239; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_1:
1240; GFX940:       ; %bb.0:
1241; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1242; GFX940-NEXT:    ;;#ASMSTART
1243; GFX940-NEXT:    ; def v[0:1]
1244; GFX940-NEXT:    ;;#ASMEND
1245; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1246; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1247; GFX940-NEXT:    ;;#ASMSTART
1248; GFX940-NEXT:    ; def v[2:3]
1249; GFX940-NEXT:    ;;#ASMEND
1250; GFX940-NEXT:    s_nop 0
1251; GFX940-NEXT:    v_perm_b32 v1, v0, v3, s2
1252; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
1253; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
1254; GFX940-NEXT:    s_waitcnt vmcnt(0)
1255; GFX940-NEXT:    s_setpc_b64 s[30:31]
1256  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1257  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1258  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 1>
1259  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1260  ret void
1261}
1262
1263define void @v_shuffle_v4i16_v4i16__7_7_7_2(ptr addrspace(1) inreg %ptr) {
1264; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_7_2:
1265; GFX900:       ; %bb.0:
1266; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1267; GFX900-NEXT:    ;;#ASMSTART
1268; GFX900-NEXT:    ; def v[0:1]
1269; GFX900-NEXT:    ;;#ASMEND
1270; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1271; GFX900-NEXT:    v_mov_b32_e32 v4, 0
1272; GFX900-NEXT:    ;;#ASMSTART
1273; GFX900-NEXT:    ; def v[2:3]
1274; GFX900-NEXT:    ;;#ASMEND
1275; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
1276; GFX900-NEXT:    v_alignbit_b32 v1, v1, v3, 16
1277; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
1278; GFX900-NEXT:    s_waitcnt vmcnt(0)
1279; GFX900-NEXT:    s_setpc_b64 s[30:31]
1280;
1281; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_7_2:
1282; GFX90A:       ; %bb.0:
1283; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1284; GFX90A-NEXT:    ;;#ASMSTART
1285; GFX90A-NEXT:    ; def v[0:1]
1286; GFX90A-NEXT:    ;;#ASMEND
1287; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1288; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1289; GFX90A-NEXT:    ;;#ASMSTART
1290; GFX90A-NEXT:    ; def v[2:3]
1291; GFX90A-NEXT:    ;;#ASMEND
1292; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
1293; GFX90A-NEXT:    v_alignbit_b32 v1, v1, v3, 16
1294; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
1295; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1296; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1297;
1298; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_2:
1299; GFX940:       ; %bb.0:
1300; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1301; GFX940-NEXT:    ;;#ASMSTART
1302; GFX940-NEXT:    ; def v[0:1]
1303; GFX940-NEXT:    ;;#ASMEND
1304; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1305; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1306; GFX940-NEXT:    ;;#ASMSTART
1307; GFX940-NEXT:    ; def v[2:3]
1308; GFX940-NEXT:    ;;#ASMEND
1309; GFX940-NEXT:    s_nop 0
1310; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
1311; GFX940-NEXT:    v_alignbit_b32 v1, v1, v3, 16
1312; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
1313; GFX940-NEXT:    s_waitcnt vmcnt(0)
1314; GFX940-NEXT:    s_setpc_b64 s[30:31]
1315  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1316  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1317  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 2>
1318  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1319  ret void
1320}
1321
1322define void @v_shuffle_v4i16_v4i16__7_7_7_3(ptr addrspace(1) inreg %ptr) {
1323; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_7_3:
1324; GFX900:       ; %bb.0:
1325; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1326; GFX900-NEXT:    ;;#ASMSTART
1327; GFX900-NEXT:    ; def v[0:1]
1328; GFX900-NEXT:    ;;#ASMEND
1329; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1330; GFX900-NEXT:    v_mov_b32_e32 v4, 0
1331; GFX900-NEXT:    ;;#ASMSTART
1332; GFX900-NEXT:    ; def v[2:3]
1333; GFX900-NEXT:    ;;#ASMEND
1334; GFX900-NEXT:    v_perm_b32 v1, v1, v3, s4
1335; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
1336; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
1337; GFX900-NEXT:    s_waitcnt vmcnt(0)
1338; GFX900-NEXT:    s_setpc_b64 s[30:31]
1339;
1340; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_7_3:
1341; GFX90A:       ; %bb.0:
1342; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1343; GFX90A-NEXT:    ;;#ASMSTART
1344; GFX90A-NEXT:    ; def v[0:1]
1345; GFX90A-NEXT:    ;;#ASMEND
1346; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1347; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1348; GFX90A-NEXT:    ;;#ASMSTART
1349; GFX90A-NEXT:    ; def v[2:3]
1350; GFX90A-NEXT:    ;;#ASMEND
1351; GFX90A-NEXT:    v_perm_b32 v1, v1, v3, s4
1352; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
1353; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
1354; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1355; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1356;
1357; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_3:
1358; GFX940:       ; %bb.0:
1359; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1360; GFX940-NEXT:    ;;#ASMSTART
1361; GFX940-NEXT:    ; def v[0:1]
1362; GFX940-NEXT:    ;;#ASMEND
1363; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1364; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1365; GFX940-NEXT:    ;;#ASMSTART
1366; GFX940-NEXT:    ; def v[2:3]
1367; GFX940-NEXT:    ;;#ASMEND
1368; GFX940-NEXT:    s_nop 0
1369; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s2
1370; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
1371; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
1372; GFX940-NEXT:    s_waitcnt vmcnt(0)
1373; GFX940-NEXT:    s_setpc_b64 s[30:31]
1374  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1375  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1376  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 3>
1377  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1378  ret void
1379}
1380
1381define void @v_shuffle_v4i16_v4i16__7_7_7_4(ptr addrspace(1) inreg %ptr) {
1382; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_7_4:
1383; GFX900:       ; %bb.0:
1384; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1385; GFX900-NEXT:    ;;#ASMSTART
1386; GFX900-NEXT:    ; def v[0:1]
1387; GFX900-NEXT:    ;;#ASMEND
1388; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1389; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1390; GFX900-NEXT:    v_alignbit_b32 v2, v0, v1, 16
1391; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
1392; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
1393; GFX900-NEXT:    s_waitcnt vmcnt(0)
1394; GFX900-NEXT:    s_setpc_b64 s[30:31]
1395;
1396; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_7_4:
1397; GFX90A:       ; %bb.0:
1398; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1399; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1400; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1401; GFX90A-NEXT:    ;;#ASMSTART
1402; GFX90A-NEXT:    ; def v[0:1]
1403; GFX90A-NEXT:    ;;#ASMEND
1404; GFX90A-NEXT:    v_alignbit_b32 v3, v0, v1, 16
1405; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
1406; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
1407; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1408; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1409;
1410; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_4:
1411; GFX940:       ; %bb.0:
1412; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1413; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1414; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1415; GFX940-NEXT:    ;;#ASMSTART
1416; GFX940-NEXT:    ; def v[0:1]
1417; GFX940-NEXT:    ;;#ASMEND
1418; GFX940-NEXT:    s_nop 0
1419; GFX940-NEXT:    v_alignbit_b32 v3, v0, v1, 16
1420; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
1421; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
1422; GFX940-NEXT:    s_waitcnt vmcnt(0)
1423; GFX940-NEXT:    s_setpc_b64 s[30:31]
1424  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1425  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1426  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 4>
1427  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1428  ret void
1429}
1430
1431define void @v_shuffle_v4i16_v4i16__7_7_7_5(ptr addrspace(1) inreg %ptr) {
1432; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_7_5:
1433; GFX900:       ; %bb.0:
1434; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1435; GFX900-NEXT:    ;;#ASMSTART
1436; GFX900-NEXT:    ; def v[0:1]
1437; GFX900-NEXT:    ;;#ASMEND
1438; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1439; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1440; GFX900-NEXT:    v_perm_b32 v2, v0, v1, s4
1441; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
1442; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
1443; GFX900-NEXT:    s_waitcnt vmcnt(0)
1444; GFX900-NEXT:    s_setpc_b64 s[30:31]
1445;
1446; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_7_5:
1447; GFX90A:       ; %bb.0:
1448; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1449; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1450; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1451; GFX90A-NEXT:    ;;#ASMSTART
1452; GFX90A-NEXT:    ; def v[0:1]
1453; GFX90A-NEXT:    ;;#ASMEND
1454; GFX90A-NEXT:    v_perm_b32 v3, v0, v1, s4
1455; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
1456; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
1457; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1458; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1459;
1460; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_5:
1461; GFX940:       ; %bb.0:
1462; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1463; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1464; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1465; GFX940-NEXT:    ;;#ASMSTART
1466; GFX940-NEXT:    ; def v[0:1]
1467; GFX940-NEXT:    ;;#ASMEND
1468; GFX940-NEXT:    s_nop 0
1469; GFX940-NEXT:    v_perm_b32 v3, v0, v1, s2
1470; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
1471; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
1472; GFX940-NEXT:    s_waitcnt vmcnt(0)
1473; GFX940-NEXT:    s_setpc_b64 s[30:31]
1474  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1475  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1476  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 5>
1477  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1478  ret void
1479}
1480
1481define void @v_shuffle_v4i16_v4i16__7_7_7_6(ptr addrspace(1) inreg %ptr) {
1482; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_7_6:
1483; GFX900:       ; %bb.0:
1484; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1485; GFX900-NEXT:    ;;#ASMSTART
1486; GFX900-NEXT:    ; def v[0:1]
1487; GFX900-NEXT:    ;;#ASMEND
1488; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1489; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1490; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
1491; GFX900-NEXT:    v_alignbit_b32 v1, v1, v1, 16
1492; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
1493; GFX900-NEXT:    s_waitcnt vmcnt(0)
1494; GFX900-NEXT:    s_setpc_b64 s[30:31]
1495;
1496; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_7_6:
1497; GFX90A:       ; %bb.0:
1498; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1499; GFX90A-NEXT:    ;;#ASMSTART
1500; GFX90A-NEXT:    ; def v[0:1]
1501; GFX90A-NEXT:    ;;#ASMEND
1502; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1503; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1504; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
1505; GFX90A-NEXT:    v_alignbit_b32 v1, v1, v1, 16
1506; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
1507; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1508; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1509;
1510; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_6:
1511; GFX940:       ; %bb.0:
1512; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1513; GFX940-NEXT:    ;;#ASMSTART
1514; GFX940-NEXT:    ; def v[0:1]
1515; GFX940-NEXT:    ;;#ASMEND
1516; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1517; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1518; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
1519; GFX940-NEXT:    v_alignbit_b32 v1, v1, v1, 16
1520; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1521; GFX940-NEXT:    s_waitcnt vmcnt(0)
1522; GFX940-NEXT:    s_setpc_b64 s[30:31]
1523  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1524  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1525  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 6>
1526  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1527  ret void
1528}
1529
1530define void @v_shuffle_v4i16_v4i16__7_7_7_7(ptr addrspace(1) inreg %ptr) {
1531; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_7_7:
1532; GFX900:       ; %bb.0:
1533; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1534; GFX900-NEXT:    ;;#ASMSTART
1535; GFX900-NEXT:    ; def v[0:1]
1536; GFX900-NEXT:    ;;#ASMEND
1537; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1538; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
1539; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1540; GFX900-NEXT:    v_mov_b32_e32 v1, v0
1541; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
1542; GFX900-NEXT:    s_waitcnt vmcnt(0)
1543; GFX900-NEXT:    s_setpc_b64 s[30:31]
1544;
1545; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_7_7:
1546; GFX90A:       ; %bb.0:
1547; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1548; GFX90A-NEXT:    ;;#ASMSTART
1549; GFX90A-NEXT:    ; def v[0:1]
1550; GFX90A-NEXT:    ;;#ASMEND
1551; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1552; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
1553; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1554; GFX90A-NEXT:    v_mov_b32_e32 v1, v0
1555; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
1556; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1557; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1558;
1559; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_7_7:
1560; GFX940:       ; %bb.0:
1561; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1562; GFX940-NEXT:    ;;#ASMSTART
1563; GFX940-NEXT:    ; def v[0:1]
1564; GFX940-NEXT:    ;;#ASMEND
1565; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1566; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
1567; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1568; GFX940-NEXT:    v_mov_b32_e32 v1, v0
1569; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1570; GFX940-NEXT:    s_waitcnt vmcnt(0)
1571; GFX940-NEXT:    s_setpc_b64 s[30:31]
1572  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1573  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1574  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
1575  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1576  ret void
1577}
1578
1579define void @v_shuffle_v4i16_v4i16__u_0_0_0(ptr addrspace(1) inreg %ptr) {
1580; GFX900-LABEL: v_shuffle_v4i16_v4i16__u_0_0_0:
1581; GFX900:       ; %bb.0:
1582; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1583; GFX900-NEXT:    ;;#ASMSTART
1584; GFX900-NEXT:    ; def v[0:1]
1585; GFX900-NEXT:    ;;#ASMEND
1586; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1587; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1588; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
1589; GFX900-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
1590; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
1591; GFX900-NEXT:    s_waitcnt vmcnt(0)
1592; GFX900-NEXT:    s_setpc_b64 s[30:31]
1593;
1594; GFX90A-LABEL: v_shuffle_v4i16_v4i16__u_0_0_0:
1595; GFX90A:       ; %bb.0:
1596; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1597; GFX90A-NEXT:    ;;#ASMSTART
1598; GFX90A-NEXT:    ; def v[0:1]
1599; GFX90A-NEXT:    ;;#ASMEND
1600; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1601; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1602; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
1603; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
1604; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
1605; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1606; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1607;
1608; GFX940-LABEL: v_shuffle_v4i16_v4i16__u_0_0_0:
1609; GFX940:       ; %bb.0:
1610; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1611; GFX940-NEXT:    ;;#ASMSTART
1612; GFX940-NEXT:    ; def v[0:1]
1613; GFX940-NEXT:    ;;#ASMEND
1614; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1615; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1616; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
1617; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
1618; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1619; GFX940-NEXT:    s_waitcnt vmcnt(0)
1620; GFX940-NEXT:    s_setpc_b64 s[30:31]
1621  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1622  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0>
1623  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1624  ret void
1625}
1626
1627define void @v_shuffle_v4i16_v4i16__0_0_0_0(ptr addrspace(1) inreg %ptr) {
1628; GFX900-LABEL: v_shuffle_v4i16_v4i16__0_0_0_0:
1629; GFX900:       ; %bb.0:
1630; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1631; GFX900-NEXT:    ;;#ASMSTART
1632; GFX900-NEXT:    ; def v[0:1]
1633; GFX900-NEXT:    ;;#ASMEND
1634; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1635; GFX900-NEXT:    v_perm_b32 v0, v0, v0, s4
1636; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1637; GFX900-NEXT:    v_mov_b32_e32 v1, v0
1638; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
1639; GFX900-NEXT:    s_waitcnt vmcnt(0)
1640; GFX900-NEXT:    s_setpc_b64 s[30:31]
1641;
1642; GFX90A-LABEL: v_shuffle_v4i16_v4i16__0_0_0_0:
1643; GFX90A:       ; %bb.0:
1644; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1645; GFX90A-NEXT:    ;;#ASMSTART
1646; GFX90A-NEXT:    ; def v[0:1]
1647; GFX90A-NEXT:    ;;#ASMEND
1648; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1649; GFX90A-NEXT:    v_perm_b32 v0, v0, v0, s4
1650; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1651; GFX90A-NEXT:    v_mov_b32_e32 v1, v0
1652; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
1653; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1654; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1655;
1656; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_0_0_0:
1657; GFX940:       ; %bb.0:
1658; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1659; GFX940-NEXT:    ;;#ASMSTART
1660; GFX940-NEXT:    ; def v[0:1]
1661; GFX940-NEXT:    ;;#ASMEND
1662; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1663; GFX940-NEXT:    v_perm_b32 v0, v0, v0, s2
1664; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1665; GFX940-NEXT:    v_mov_b32_e32 v1, v0
1666; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1667; GFX940-NEXT:    s_waitcnt vmcnt(0)
1668; GFX940-NEXT:    s_setpc_b64 s[30:31]
1669  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1670  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> zeroinitializer
1671  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1672  ret void
1673}
1674
1675define void @v_shuffle_v4i16_v4i16__1_0_0_0(ptr addrspace(1) inreg %ptr) {
1676; GFX900-LABEL: v_shuffle_v4i16_v4i16__1_0_0_0:
1677; GFX900:       ; %bb.0:
1678; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1679; GFX900-NEXT:    ;;#ASMSTART
1680; GFX900-NEXT:    ; def v[0:1]
1681; GFX900-NEXT:    ;;#ASMEND
1682; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1683; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1684; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
1685; GFX900-NEXT:    v_alignbit_b32 v0, v0, v0, 16
1686; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
1687; GFX900-NEXT:    s_waitcnt vmcnt(0)
1688; GFX900-NEXT:    s_setpc_b64 s[30:31]
1689;
1690; GFX90A-LABEL: v_shuffle_v4i16_v4i16__1_0_0_0:
1691; GFX90A:       ; %bb.0:
1692; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1693; GFX90A-NEXT:    ;;#ASMSTART
1694; GFX90A-NEXT:    ; def v[0:1]
1695; GFX90A-NEXT:    ;;#ASMEND
1696; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1697; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1698; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
1699; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v0, 16
1700; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
1701; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1702; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1703;
1704; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_0_0_0:
1705; GFX940:       ; %bb.0:
1706; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1707; GFX940-NEXT:    ;;#ASMSTART
1708; GFX940-NEXT:    ; def v[0:1]
1709; GFX940-NEXT:    ;;#ASMEND
1710; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1711; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1712; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
1713; GFX940-NEXT:    v_alignbit_b32 v0, v0, v0, 16
1714; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1715; GFX940-NEXT:    s_waitcnt vmcnt(0)
1716; GFX940-NEXT:    s_setpc_b64 s[30:31]
1717  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1718  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
1719  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1720  ret void
1721}
1722
1723define void @v_shuffle_v4i16_v4i16__2_0_0_0(ptr addrspace(1) inreg %ptr) {
1724; GFX900-LABEL: v_shuffle_v4i16_v4i16__2_0_0_0:
1725; GFX900:       ; %bb.0:
1726; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1727; GFX900-NEXT:    ;;#ASMSTART
1728; GFX900-NEXT:    ; def v[0:1]
1729; GFX900-NEXT:    ;;#ASMEND
1730; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1731; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1732; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
1733; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
1734; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
1735; GFX900-NEXT:    s_waitcnt vmcnt(0)
1736; GFX900-NEXT:    s_setpc_b64 s[30:31]
1737;
1738; GFX90A-LABEL: v_shuffle_v4i16_v4i16__2_0_0_0:
1739; GFX90A:       ; %bb.0:
1740; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1741; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1742; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1743; GFX90A-NEXT:    ;;#ASMSTART
1744; GFX90A-NEXT:    ; def v[0:1]
1745; GFX90A-NEXT:    ;;#ASMEND
1746; GFX90A-NEXT:    v_perm_b32 v2, v0, v1, s4
1747; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
1748; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
1749; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1750; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1751;
1752; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_0_0_0:
1753; GFX940:       ; %bb.0:
1754; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1755; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1756; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1757; GFX940-NEXT:    ;;#ASMSTART
1758; GFX940-NEXT:    ; def v[0:1]
1759; GFX940-NEXT:    ;;#ASMEND
1760; GFX940-NEXT:    s_nop 0
1761; GFX940-NEXT:    v_perm_b32 v2, v0, v1, s2
1762; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
1763; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
1764; GFX940-NEXT:    s_waitcnt vmcnt(0)
1765; GFX940-NEXT:    s_setpc_b64 s[30:31]
1766  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1767  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
1768  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1769  ret void
1770}
1771
1772define void @v_shuffle_v4i16_v4i16__3_0_0_0(ptr addrspace(1) inreg %ptr) {
1773; GFX900-LABEL: v_shuffle_v4i16_v4i16__3_0_0_0:
1774; GFX900:       ; %bb.0:
1775; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1776; GFX900-NEXT:    ;;#ASMSTART
1777; GFX900-NEXT:    ; def v[0:1]
1778; GFX900-NEXT:    ;;#ASMEND
1779; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1780; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1781; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
1782; GFX900-NEXT:    v_alignbit_b32 v1, v0, v1, 16
1783; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
1784; GFX900-NEXT:    s_waitcnt vmcnt(0)
1785; GFX900-NEXT:    s_setpc_b64 s[30:31]
1786;
1787; GFX90A-LABEL: v_shuffle_v4i16_v4i16__3_0_0_0:
1788; GFX90A:       ; %bb.0:
1789; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1790; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1791; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1792; GFX90A-NEXT:    ;;#ASMSTART
1793; GFX90A-NEXT:    ; def v[0:1]
1794; GFX90A-NEXT:    ;;#ASMEND
1795; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
1796; GFX90A-NEXT:    v_alignbit_b32 v2, v0, v1, 16
1797; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
1798; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1799; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1800;
1801; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_0_0_0:
1802; GFX940:       ; %bb.0:
1803; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1804; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1805; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1806; GFX940-NEXT:    ;;#ASMSTART
1807; GFX940-NEXT:    ; def v[0:1]
1808; GFX940-NEXT:    ;;#ASMEND
1809; GFX940-NEXT:    s_nop 0
1810; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
1811; GFX940-NEXT:    v_alignbit_b32 v2, v0, v1, 16
1812; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
1813; GFX940-NEXT:    s_waitcnt vmcnt(0)
1814; GFX940-NEXT:    s_setpc_b64 s[30:31]
1815  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1816  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
1817  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1818  ret void
1819}
1820
1821define void @v_shuffle_v4i16_v4i16__4_0_0_0(ptr addrspace(1) inreg %ptr) {
1822; GFX900-LABEL: v_shuffle_v4i16_v4i16__4_0_0_0:
1823; GFX900:       ; %bb.0:
1824; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1825; GFX900-NEXT:    ;;#ASMSTART
1826; GFX900-NEXT:    ; def v[0:1]
1827; GFX900-NEXT:    ;;#ASMEND
1828; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1829; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1830; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
1831; GFX900-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
1832; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
1833; GFX900-NEXT:    s_waitcnt vmcnt(0)
1834; GFX900-NEXT:    s_setpc_b64 s[30:31]
1835;
1836; GFX90A-LABEL: v_shuffle_v4i16_v4i16__4_0_0_0:
1837; GFX90A:       ; %bb.0:
1838; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1839; GFX90A-NEXT:    ;;#ASMSTART
1840; GFX90A-NEXT:    ; def v[0:1]
1841; GFX90A-NEXT:    ;;#ASMEND
1842; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1843; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1844; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
1845; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
1846; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
1847; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1848; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1849;
1850; GFX940-LABEL: v_shuffle_v4i16_v4i16__4_0_0_0:
1851; GFX940:       ; %bb.0:
1852; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1853; GFX940-NEXT:    ;;#ASMSTART
1854; GFX940-NEXT:    ; def v[0:1]
1855; GFX940-NEXT:    ;;#ASMEND
1856; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1857; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1858; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
1859; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
1860; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
1861; GFX940-NEXT:    s_waitcnt vmcnt(0)
1862; GFX940-NEXT:    s_setpc_b64 s[30:31]
1863  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1864  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 4, i32 0, i32 0, i32 0>
1865  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1866  ret void
1867}
1868
1869define void @v_shuffle_v4i16_v4i16__5_0_0_0(ptr addrspace(1) inreg %ptr) {
1870; GFX900-LABEL: v_shuffle_v4i16_v4i16__5_0_0_0:
1871; GFX900:       ; %bb.0:
1872; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1873; GFX900-NEXT:    ;;#ASMSTART
1874; GFX900-NEXT:    ; def v[0:1]
1875; GFX900-NEXT:    ;;#ASMEND
1876; GFX900-NEXT:    ;;#ASMSTART
1877; GFX900-NEXT:    ; def v[1:2]
1878; GFX900-NEXT:    ;;#ASMEND
1879; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1880; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1881; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
1882; GFX900-NEXT:    v_alignbit_b32 v1, v0, v1, 16
1883; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
1884; GFX900-NEXT:    s_waitcnt vmcnt(0)
1885; GFX900-NEXT:    s_setpc_b64 s[30:31]
1886;
1887; GFX90A-LABEL: v_shuffle_v4i16_v4i16__5_0_0_0:
1888; GFX90A:       ; %bb.0:
1889; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1890; GFX90A-NEXT:    ;;#ASMSTART
1891; GFX90A-NEXT:    ; def v[0:1]
1892; GFX90A-NEXT:    ;;#ASMEND
1893; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1894; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1895; GFX90A-NEXT:    ;;#ASMSTART
1896; GFX90A-NEXT:    ; def v[2:3]
1897; GFX90A-NEXT:    ;;#ASMEND
1898; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
1899; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v2, 16
1900; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
1901; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1902; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1903;
1904; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_0_0_0:
1905; GFX940:       ; %bb.0:
1906; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1907; GFX940-NEXT:    ;;#ASMSTART
1908; GFX940-NEXT:    ; def v[0:1]
1909; GFX940-NEXT:    ;;#ASMEND
1910; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1911; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1912; GFX940-NEXT:    ;;#ASMSTART
1913; GFX940-NEXT:    ; def v[2:3]
1914; GFX940-NEXT:    ;;#ASMEND
1915; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
1916; GFX940-NEXT:    v_alignbit_b32 v0, v0, v2, 16
1917; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
1918; GFX940-NEXT:    s_waitcnt vmcnt(0)
1919; GFX940-NEXT:    s_setpc_b64 s[30:31]
1920  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1921  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1922  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 0, i32 0, i32 0>
1923  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1924  ret void
1925}
1926
1927define void @v_shuffle_v4i16_v4i16__6_0_0_0(ptr addrspace(1) inreg %ptr) {
1928; GFX900-LABEL: v_shuffle_v4i16_v4i16__6_0_0_0:
1929; GFX900:       ; %bb.0:
1930; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1931; GFX900-NEXT:    ;;#ASMSTART
1932; GFX900-NEXT:    ; def v[0:1]
1933; GFX900-NEXT:    ;;#ASMEND
1934; GFX900-NEXT:    ;;#ASMSTART
1935; GFX900-NEXT:    ; def v[1:2]
1936; GFX900-NEXT:    ;;#ASMEND
1937; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1938; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1939; GFX900-NEXT:    v_perm_b32 v1, v0, v2, s4
1940; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
1941; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
1942; GFX900-NEXT:    s_waitcnt vmcnt(0)
1943; GFX900-NEXT:    s_setpc_b64 s[30:31]
1944;
1945; GFX90A-LABEL: v_shuffle_v4i16_v4i16__6_0_0_0:
1946; GFX90A:       ; %bb.0:
1947; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1948; GFX90A-NEXT:    ;;#ASMSTART
1949; GFX90A-NEXT:    ; def v[2:3]
1950; GFX90A-NEXT:    ;;#ASMEND
1951; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1952; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1953; GFX90A-NEXT:    ;;#ASMSTART
1954; GFX90A-NEXT:    ; def v[0:1]
1955; GFX90A-NEXT:    ;;#ASMEND
1956; GFX90A-NEXT:    v_perm_b32 v2, v0, v3, s4
1957; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
1958; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
1959; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1960; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1961;
1962; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_0_0_0:
1963; GFX940:       ; %bb.0:
1964; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1965; GFX940-NEXT:    ;;#ASMSTART
1966; GFX940-NEXT:    ; def v[2:3]
1967; GFX940-NEXT:    ;;#ASMEND
1968; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1969; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1970; GFX940-NEXT:    ;;#ASMSTART
1971; GFX940-NEXT:    ; def v[0:1]
1972; GFX940-NEXT:    ;;#ASMEND
1973; GFX940-NEXT:    s_nop 0
1974; GFX940-NEXT:    v_perm_b32 v2, v0, v3, s2
1975; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
1976; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
1977; GFX940-NEXT:    s_waitcnt vmcnt(0)
1978; GFX940-NEXT:    s_setpc_b64 s[30:31]
1979  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1980  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1981  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 0, i32 0, i32 0>
1982  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1983  ret void
1984}
1985
1986define void @v_shuffle_v4i16_v4i16__7_0_0_0(ptr addrspace(1) inreg %ptr) {
1987; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_0_0_0:
1988; GFX900:       ; %bb.0:
1989; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1990; GFX900-NEXT:    ;;#ASMSTART
1991; GFX900-NEXT:    ; def v[0:1]
1992; GFX900-NEXT:    ;;#ASMEND
1993; GFX900-NEXT:    ;;#ASMSTART
1994; GFX900-NEXT:    ; def v[1:2]
1995; GFX900-NEXT:    ;;#ASMEND
1996; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1997; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1998; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
1999; GFX900-NEXT:    v_alignbit_b32 v0, v0, v2, 16
2000; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
2001; GFX900-NEXT:    s_waitcnt vmcnt(0)
2002; GFX900-NEXT:    s_setpc_b64 s[30:31]
2003;
2004; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_0_0_0:
2005; GFX90A:       ; %bb.0:
2006; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2007; GFX90A-NEXT:    ;;#ASMSTART
2008; GFX90A-NEXT:    ; def v[0:1]
2009; GFX90A-NEXT:    ;;#ASMEND
2010; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2011; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2012; GFX90A-NEXT:    ;;#ASMSTART
2013; GFX90A-NEXT:    ; def v[2:3]
2014; GFX90A-NEXT:    ;;#ASMEND
2015; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
2016; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v3, 16
2017; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
2018; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2019; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2020;
2021; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_0_0:
2022; GFX940:       ; %bb.0:
2023; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2024; GFX940-NEXT:    ;;#ASMSTART
2025; GFX940-NEXT:    ; def v[0:1]
2026; GFX940-NEXT:    ;;#ASMEND
2027; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2028; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2029; GFX940-NEXT:    ;;#ASMSTART
2030; GFX940-NEXT:    ; def v[2:3]
2031; GFX940-NEXT:    ;;#ASMEND
2032; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
2033; GFX940-NEXT:    v_alignbit_b32 v0, v0, v3, 16
2034; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
2035; GFX940-NEXT:    s_waitcnt vmcnt(0)
2036; GFX940-NEXT:    s_setpc_b64 s[30:31]
2037  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2038  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2039  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 0, i32 0>
2040  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2041  ret void
2042}
2043
2044define void @v_shuffle_v4i16_v4i16__7_u_0_0(ptr addrspace(1) inreg %ptr) {
2045; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_u_0_0:
2046; GFX900:       ; %bb.0:
2047; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2048; GFX900-NEXT:    ;;#ASMSTART
2049; GFX900-NEXT:    ; def v[0:1]
2050; GFX900-NEXT:    ;;#ASMEND
2051; GFX900-NEXT:    ;;#ASMSTART
2052; GFX900-NEXT:    ; def v[1:2]
2053; GFX900-NEXT:    ;;#ASMEND
2054; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2055; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2056; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
2057; GFX900-NEXT:    v_alignbit_b32 v0, s4, v2, 16
2058; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
2059; GFX900-NEXT:    s_waitcnt vmcnt(0)
2060; GFX900-NEXT:    s_setpc_b64 s[30:31]
2061;
2062; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_u_0_0:
2063; GFX90A:       ; %bb.0:
2064; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2065; GFX90A-NEXT:    ;;#ASMSTART
2066; GFX90A-NEXT:    ; def v[0:1]
2067; GFX90A-NEXT:    ;;#ASMEND
2068; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2069; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2070; GFX90A-NEXT:    ;;#ASMSTART
2071; GFX90A-NEXT:    ; def v[2:3]
2072; GFX90A-NEXT:    ;;#ASMEND
2073; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
2074; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v3, 16
2075; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
2076; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2077; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2078;
2079; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_0_0:
2080; GFX940:       ; %bb.0:
2081; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2082; GFX940-NEXT:    ;;#ASMSTART
2083; GFX940-NEXT:    ; def v[0:1]
2084; GFX940-NEXT:    ;;#ASMEND
2085; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2086; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2087; GFX940-NEXT:    ;;#ASMSTART
2088; GFX940-NEXT:    ; def v[2:3]
2089; GFX940-NEXT:    ;;#ASMEND
2090; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
2091; GFX940-NEXT:    v_alignbit_b32 v0, s0, v3, 16
2092; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
2093; GFX940-NEXT:    s_waitcnt vmcnt(0)
2094; GFX940-NEXT:    s_setpc_b64 s[30:31]
2095  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2096  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2097  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 0, i32 0>
2098  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2099  ret void
2100}
2101
2102define void @v_shuffle_v4i16_v4i16__7_1_0_0(ptr addrspace(1) inreg %ptr) {
2103; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_1_0_0:
2104; GFX900:       ; %bb.0:
2105; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2106; GFX900-NEXT:    ;;#ASMSTART
2107; GFX900-NEXT:    ; def v[0:1]
2108; GFX900-NEXT:    ;;#ASMEND
2109; GFX900-NEXT:    ;;#ASMSTART
2110; GFX900-NEXT:    ; def v[1:2]
2111; GFX900-NEXT:    ;;#ASMEND
2112; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2113; GFX900-NEXT:    v_perm_b32 v1, v0, v2, s4
2114; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2115; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2116; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
2117; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
2118; GFX900-NEXT:    s_waitcnt vmcnt(0)
2119; GFX900-NEXT:    s_setpc_b64 s[30:31]
2120;
2121; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_1_0_0:
2122; GFX90A:       ; %bb.0:
2123; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2124; GFX90A-NEXT:    ;;#ASMSTART
2125; GFX90A-NEXT:    ; def v[2:3]
2126; GFX90A-NEXT:    ;;#ASMEND
2127; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2128; GFX90A-NEXT:    ;;#ASMSTART
2129; GFX90A-NEXT:    ; def v[0:1]
2130; GFX90A-NEXT:    ;;#ASMEND
2131; GFX90A-NEXT:    v_perm_b32 v2, v0, v3, s4
2132; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2133; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2134; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
2135; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
2136; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2137; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2138;
2139; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_0_0:
2140; GFX940:       ; %bb.0:
2141; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2142; GFX940-NEXT:    ;;#ASMSTART
2143; GFX940-NEXT:    ; def v[2:3]
2144; GFX940-NEXT:    ;;#ASMEND
2145; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2146; GFX940-NEXT:    ;;#ASMSTART
2147; GFX940-NEXT:    ; def v[0:1]
2148; GFX940-NEXT:    ;;#ASMEND
2149; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2150; GFX940-NEXT:    v_perm_b32 v2, v0, v3, s2
2151; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2152; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
2153; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
2154; GFX940-NEXT:    s_waitcnt vmcnt(0)
2155; GFX940-NEXT:    s_setpc_b64 s[30:31]
2156  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2157  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2158  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 0, i32 0>
2159  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2160  ret void
2161}
2162
2163define void @v_shuffle_v4i16_v4i16__7_2_0_0(ptr addrspace(1) inreg %ptr) {
2164; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_2_0_0:
2165; GFX900:       ; %bb.0:
2166; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2167; GFX900-NEXT:    ;;#ASMSTART
2168; GFX900-NEXT:    ; def v[0:1]
2169; GFX900-NEXT:    ;;#ASMEND
2170; GFX900-NEXT:    ;;#ASMSTART
2171; GFX900-NEXT:    ; def v[2:3]
2172; GFX900-NEXT:    ;;#ASMEND
2173; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2174; GFX900-NEXT:    v_mov_b32_e32 v4, 0
2175; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
2176; GFX900-NEXT:    v_alignbit_b32 v1, v1, v3, 16
2177; GFX900-NEXT:    global_store_dwordx2 v4, v[1:2], s[16:17]
2178; GFX900-NEXT:    s_waitcnt vmcnt(0)
2179; GFX900-NEXT:    s_setpc_b64 s[30:31]
2180;
2181; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_2_0_0:
2182; GFX90A:       ; %bb.0:
2183; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2184; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2185; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
2186; GFX90A-NEXT:    ;;#ASMSTART
2187; GFX90A-NEXT:    ; def v[0:1]
2188; GFX90A-NEXT:    ;;#ASMEND
2189; GFX90A-NEXT:    ;;#ASMSTART
2190; GFX90A-NEXT:    ; def v[2:3]
2191; GFX90A-NEXT:    ;;#ASMEND
2192; GFX90A-NEXT:    v_perm_b32 v5, v0, v0, s4
2193; GFX90A-NEXT:    v_alignbit_b32 v4, v1, v3, 16
2194; GFX90A-NEXT:    global_store_dwordx2 v6, v[4:5], s[16:17]
2195; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2196; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2197;
2198; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_0_0:
2199; GFX940:       ; %bb.0:
2200; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2201; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2202; GFX940-NEXT:    v_mov_b32_e32 v6, 0
2203; GFX940-NEXT:    ;;#ASMSTART
2204; GFX940-NEXT:    ; def v[0:1]
2205; GFX940-NEXT:    ;;#ASMEND
2206; GFX940-NEXT:    ;;#ASMSTART
2207; GFX940-NEXT:    ; def v[2:3]
2208; GFX940-NEXT:    ;;#ASMEND
2209; GFX940-NEXT:    s_nop 0
2210; GFX940-NEXT:    v_perm_b32 v5, v0, v0, s2
2211; GFX940-NEXT:    v_alignbit_b32 v4, v1, v3, 16
2212; GFX940-NEXT:    global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1
2213; GFX940-NEXT:    s_waitcnt vmcnt(0)
2214; GFX940-NEXT:    s_setpc_b64 s[30:31]
2215  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2216  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2217  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 0, i32 0>
2218  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2219  ret void
2220}
2221
2222define void @v_shuffle_v4i16_v4i16__7_3_0_0(ptr addrspace(1) inreg %ptr) {
2223; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_3_0_0:
2224; GFX900:       ; %bb.0:
2225; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2226; GFX900-NEXT:    ;;#ASMSTART
2227; GFX900-NEXT:    ; def v[0:1]
2228; GFX900-NEXT:    ;;#ASMEND
2229; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2230; GFX900-NEXT:    ;;#ASMSTART
2231; GFX900-NEXT:    ; def v[2:3]
2232; GFX900-NEXT:    ;;#ASMEND
2233; GFX900-NEXT:    v_perm_b32 v1, v1, v3, s4
2234; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2235; GFX900-NEXT:    v_mov_b32_e32 v4, 0
2236; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
2237; GFX900-NEXT:    global_store_dwordx2 v4, v[1:2], s[16:17]
2238; GFX900-NEXT:    s_waitcnt vmcnt(0)
2239; GFX900-NEXT:    s_setpc_b64 s[30:31]
2240;
2241; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_3_0_0:
2242; GFX90A:       ; %bb.0:
2243; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2244; GFX90A-NEXT:    ;;#ASMSTART
2245; GFX90A-NEXT:    ; def v[2:3]
2246; GFX90A-NEXT:    ;;#ASMEND
2247; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2248; GFX90A-NEXT:    ;;#ASMSTART
2249; GFX90A-NEXT:    ; def v[0:1]
2250; GFX90A-NEXT:    ;;#ASMEND
2251; GFX90A-NEXT:    v_perm_b32 v2, v1, v3, s4
2252; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2253; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2254; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
2255; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
2256; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2257; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2258;
2259; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_0_0:
2260; GFX940:       ; %bb.0:
2261; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2262; GFX940-NEXT:    ;;#ASMSTART
2263; GFX940-NEXT:    ; def v[2:3]
2264; GFX940-NEXT:    ;;#ASMEND
2265; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2266; GFX940-NEXT:    ;;#ASMSTART
2267; GFX940-NEXT:    ; def v[0:1]
2268; GFX940-NEXT:    ;;#ASMEND
2269; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2270; GFX940-NEXT:    v_perm_b32 v2, v1, v3, s2
2271; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2272; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
2273; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
2274; GFX940-NEXT:    s_waitcnt vmcnt(0)
2275; GFX940-NEXT:    s_setpc_b64 s[30:31]
2276  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2277  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2278  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 0, i32 0>
2279  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2280  ret void
2281}
2282
2283define void @v_shuffle_v4i16_v4i16__7_4_0_0(ptr addrspace(1) inreg %ptr) {
2284; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_4_0_0:
2285; GFX900:       ; %bb.0:
2286; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2287; GFX900-NEXT:    ;;#ASMSTART
2288; GFX900-NEXT:    ; def v[0:1]
2289; GFX900-NEXT:    ;;#ASMEND
2290; GFX900-NEXT:    ;;#ASMSTART
2291; GFX900-NEXT:    ; def v[1:2]
2292; GFX900-NEXT:    ;;#ASMEND
2293; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2294; GFX900-NEXT:    v_mov_b32_e32 v4, 0
2295; GFX900-NEXT:    v_perm_b32 v3, v0, v0, s4
2296; GFX900-NEXT:    v_alignbit_b32 v2, v1, v2, 16
2297; GFX900-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
2298; GFX900-NEXT:    s_waitcnt vmcnt(0)
2299; GFX900-NEXT:    s_setpc_b64 s[30:31]
2300;
2301; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_4_0_0:
2302; GFX90A:       ; %bb.0:
2303; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2304; GFX90A-NEXT:    ;;#ASMSTART
2305; GFX90A-NEXT:    ; def v[0:1]
2306; GFX90A-NEXT:    ;;#ASMEND
2307; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2308; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2309; GFX90A-NEXT:    ;;#ASMSTART
2310; GFX90A-NEXT:    ; def v[2:3]
2311; GFX90A-NEXT:    ;;#ASMEND
2312; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
2313; GFX90A-NEXT:    v_alignbit_b32 v0, v2, v3, 16
2314; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
2315; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2316; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2317;
2318; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_0_0:
2319; GFX940:       ; %bb.0:
2320; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2321; GFX940-NEXT:    ;;#ASMSTART
2322; GFX940-NEXT:    ; def v[0:1]
2323; GFX940-NEXT:    ;;#ASMEND
2324; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2325; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2326; GFX940-NEXT:    ;;#ASMSTART
2327; GFX940-NEXT:    ; def v[2:3]
2328; GFX940-NEXT:    ;;#ASMEND
2329; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
2330; GFX940-NEXT:    v_alignbit_b32 v0, v2, v3, 16
2331; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
2332; GFX940-NEXT:    s_waitcnt vmcnt(0)
2333; GFX940-NEXT:    s_setpc_b64 s[30:31]
2334  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2335  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2336  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 0, i32 0>
2337  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2338  ret void
2339}
2340
2341define void @v_shuffle_v4i16_v4i16__7_5_0_0(ptr addrspace(1) inreg %ptr) {
2342; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_5_0_0:
2343; GFX900:       ; %bb.0:
2344; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2345; GFX900-NEXT:    ;;#ASMSTART
2346; GFX900-NEXT:    ; def v[0:1]
2347; GFX900-NEXT:    ;;#ASMEND
2348; GFX900-NEXT:    ;;#ASMSTART
2349; GFX900-NEXT:    ; def v[1:2]
2350; GFX900-NEXT:    ;;#ASMEND
2351; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2352; GFX900-NEXT:    v_perm_b32 v1, v1, v2, s4
2353; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2354; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2355; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
2356; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
2357; GFX900-NEXT:    s_waitcnt vmcnt(0)
2358; GFX900-NEXT:    s_setpc_b64 s[30:31]
2359;
2360; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_5_0_0:
2361; GFX90A:       ; %bb.0:
2362; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2363; GFX90A-NEXT:    ;;#ASMSTART
2364; GFX90A-NEXT:    ; def v[2:3]
2365; GFX90A-NEXT:    ;;#ASMEND
2366; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2367; GFX90A-NEXT:    v_perm_b32 v2, v2, v3, s4
2368; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2369; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2370; GFX90A-NEXT:    ;;#ASMSTART
2371; GFX90A-NEXT:    ; def v[0:1]
2372; GFX90A-NEXT:    ;;#ASMEND
2373; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
2374; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
2375; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2376; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2377;
2378; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_0_0:
2379; GFX940:       ; %bb.0:
2380; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2381; GFX940-NEXT:    ;;#ASMSTART
2382; GFX940-NEXT:    ; def v[2:3]
2383; GFX940-NEXT:    ;;#ASMEND
2384; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2385; GFX940-NEXT:    v_perm_b32 v2, v2, v3, s2
2386; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2387; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2388; GFX940-NEXT:    ;;#ASMSTART
2389; GFX940-NEXT:    ; def v[0:1]
2390; GFX940-NEXT:    ;;#ASMEND
2391; GFX940-NEXT:    s_nop 0
2392; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
2393; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
2394; GFX940-NEXT:    s_waitcnt vmcnt(0)
2395; GFX940-NEXT:    s_setpc_b64 s[30:31]
2396  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2397  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2398  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 0, i32 0>
2399  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2400  ret void
2401}
2402
2403define void @v_shuffle_v4i16_v4i16__7_6_0_0(ptr addrspace(1) inreg %ptr) {
2404; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_6_0_0:
2405; GFX900:       ; %bb.0:
2406; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2407; GFX900-NEXT:    ;;#ASMSTART
2408; GFX900-NEXT:    ; def v[0:1]
2409; GFX900-NEXT:    ;;#ASMEND
2410; GFX900-NEXT:    ;;#ASMSTART
2411; GFX900-NEXT:    ; def v[1:2]
2412; GFX900-NEXT:    ;;#ASMEND
2413; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2414; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2415; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
2416; GFX900-NEXT:    v_alignbit_b32 v0, v2, v2, 16
2417; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
2418; GFX900-NEXT:    s_waitcnt vmcnt(0)
2419; GFX900-NEXT:    s_setpc_b64 s[30:31]
2420;
2421; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_6_0_0:
2422; GFX90A:       ; %bb.0:
2423; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2424; GFX90A-NEXT:    ;;#ASMSTART
2425; GFX90A-NEXT:    ; def v[0:1]
2426; GFX90A-NEXT:    ;;#ASMEND
2427; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2428; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2429; GFX90A-NEXT:    ;;#ASMSTART
2430; GFX90A-NEXT:    ; def v[2:3]
2431; GFX90A-NEXT:    ;;#ASMEND
2432; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
2433; GFX90A-NEXT:    v_alignbit_b32 v0, v3, v3, 16
2434; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
2435; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2436; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2437;
2438; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_0_0:
2439; GFX940:       ; %bb.0:
2440; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2441; GFX940-NEXT:    ;;#ASMSTART
2442; GFX940-NEXT:    ; def v[0:1]
2443; GFX940-NEXT:    ;;#ASMEND
2444; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2445; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2446; GFX940-NEXT:    ;;#ASMSTART
2447; GFX940-NEXT:    ; def v[2:3]
2448; GFX940-NEXT:    ;;#ASMEND
2449; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
2450; GFX940-NEXT:    v_alignbit_b32 v0, v3, v3, 16
2451; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
2452; GFX940-NEXT:    s_waitcnt vmcnt(0)
2453; GFX940-NEXT:    s_setpc_b64 s[30:31]
2454  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2455  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2456  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 0, i32 0>
2457  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2458  ret void
2459}
2460
2461define void @v_shuffle_v4i16_v4i16__7_7_0_0(ptr addrspace(1) inreg %ptr) {
2462; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_0_0:
2463; GFX900:       ; %bb.0:
2464; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2465; GFX900-NEXT:    ;;#ASMSTART
2466; GFX900-NEXT:    ; def v[0:1]
2467; GFX900-NEXT:    ;;#ASMEND
2468; GFX900-NEXT:    ;;#ASMSTART
2469; GFX900-NEXT:    ; def v[1:2]
2470; GFX900-NEXT:    ;;#ASMEND
2471; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2472; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
2473; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2474; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2475; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
2476; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
2477; GFX900-NEXT:    s_waitcnt vmcnt(0)
2478; GFX900-NEXT:    s_setpc_b64 s[30:31]
2479;
2480; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_0_0:
2481; GFX90A:       ; %bb.0:
2482; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2483; GFX90A-NEXT:    ;;#ASMSTART
2484; GFX90A-NEXT:    ; def v[0:1]
2485; GFX90A-NEXT:    ;;#ASMEND
2486; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2487; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
2488; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2489; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2490; GFX90A-NEXT:    ;;#ASMSTART
2491; GFX90A-NEXT:    ; def v[2:3]
2492; GFX90A-NEXT:    ;;#ASMEND
2493; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
2494; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
2495; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2496; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2497;
2498; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_0:
2499; GFX940:       ; %bb.0:
2500; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2501; GFX940-NEXT:    ;;#ASMSTART
2502; GFX940-NEXT:    ; def v[0:1]
2503; GFX940-NEXT:    ;;#ASMEND
2504; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2505; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
2506; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2507; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2508; GFX940-NEXT:    ;;#ASMSTART
2509; GFX940-NEXT:    ; def v[2:3]
2510; GFX940-NEXT:    ;;#ASMEND
2511; GFX940-NEXT:    s_nop 0
2512; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
2513; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
2514; GFX940-NEXT:    s_waitcnt vmcnt(0)
2515; GFX940-NEXT:    s_setpc_b64 s[30:31]
2516  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2517  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2518  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 0>
2519  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2520  ret void
2521}
2522
2523define void @v_shuffle_v4i16_v4i16__7_7_u_0(ptr addrspace(1) inreg %ptr) {
2524; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_u_0:
2525; GFX900:       ; %bb.0:
2526; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2527; GFX900-NEXT:    ;;#ASMSTART
2528; GFX900-NEXT:    ; def v[0:1]
2529; GFX900-NEXT:    ;;#ASMEND
2530; GFX900-NEXT:    ;;#ASMSTART
2531; GFX900-NEXT:    ; def v[1:2]
2532; GFX900-NEXT:    ;;#ASMEND
2533; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2534; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2535; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
2536; GFX900-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
2537; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
2538; GFX900-NEXT:    s_waitcnt vmcnt(0)
2539; GFX900-NEXT:    s_setpc_b64 s[30:31]
2540;
2541; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_u_0:
2542; GFX90A:       ; %bb.0:
2543; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2544; GFX90A-NEXT:    ;;#ASMSTART
2545; GFX90A-NEXT:    ; def v[2:3]
2546; GFX90A-NEXT:    ;;#ASMEND
2547; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2548; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2549; GFX90A-NEXT:    ;;#ASMSTART
2550; GFX90A-NEXT:    ; def v[0:1]
2551; GFX90A-NEXT:    ;;#ASMEND
2552; GFX90A-NEXT:    v_perm_b32 v2, v3, v3, s4
2553; GFX90A-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
2554; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
2555; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2556; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2557;
2558; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_0:
2559; GFX940:       ; %bb.0:
2560; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2561; GFX940-NEXT:    ;;#ASMSTART
2562; GFX940-NEXT:    ; def v[2:3]
2563; GFX940-NEXT:    ;;#ASMEND
2564; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2565; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2566; GFX940-NEXT:    ;;#ASMSTART
2567; GFX940-NEXT:    ; def v[0:1]
2568; GFX940-NEXT:    ;;#ASMEND
2569; GFX940-NEXT:    v_perm_b32 v2, v3, v3, s2
2570; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
2571; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
2572; GFX940-NEXT:    s_waitcnt vmcnt(0)
2573; GFX940-NEXT:    s_setpc_b64 s[30:31]
2574  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2575  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2576  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 0>
2577  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2578  ret void
2579}
2580
2581define void @v_shuffle_v4i16_v4i16__7_7_1_0(ptr addrspace(1) inreg %ptr) {
2582; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_1_0:
2583; GFX900:       ; %bb.0:
2584; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2585; GFX900-NEXT:    ;;#ASMSTART
2586; GFX900-NEXT:    ; def v[0:1]
2587; GFX900-NEXT:    ;;#ASMEND
2588; GFX900-NEXT:    ;;#ASMSTART
2589; GFX900-NEXT:    ; def v[1:2]
2590; GFX900-NEXT:    ;;#ASMEND
2591; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2592; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2593; GFX900-NEXT:    v_alignbit_b32 v1, v0, v0, 16
2594; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
2595; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
2596; GFX900-NEXT:    s_waitcnt vmcnt(0)
2597; GFX900-NEXT:    s_setpc_b64 s[30:31]
2598;
2599; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_1_0:
2600; GFX90A:       ; %bb.0:
2601; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2602; GFX90A-NEXT:    ;;#ASMSTART
2603; GFX90A-NEXT:    ; def v[0:1]
2604; GFX90A-NEXT:    ;;#ASMEND
2605; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2606; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2607; GFX90A-NEXT:    ;;#ASMSTART
2608; GFX90A-NEXT:    ; def v[2:3]
2609; GFX90A-NEXT:    ;;#ASMEND
2610; GFX90A-NEXT:    v_alignbit_b32 v1, v0, v0, 16
2611; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
2612; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
2613; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2614; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2615;
2616; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_0:
2617; GFX940:       ; %bb.0:
2618; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2619; GFX940-NEXT:    ;;#ASMSTART
2620; GFX940-NEXT:    ; def v[0:1]
2621; GFX940-NEXT:    ;;#ASMEND
2622; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2623; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2624; GFX940-NEXT:    ;;#ASMSTART
2625; GFX940-NEXT:    ; def v[2:3]
2626; GFX940-NEXT:    ;;#ASMEND
2627; GFX940-NEXT:    v_alignbit_b32 v1, v0, v0, 16
2628; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
2629; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
2630; GFX940-NEXT:    s_waitcnt vmcnt(0)
2631; GFX940-NEXT:    s_setpc_b64 s[30:31]
2632  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2633  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2634  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 0>
2635  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2636  ret void
2637}
2638
2639define void @v_shuffle_v4i16_v4i16__7_7_2_0(ptr addrspace(1) inreg %ptr) {
2640; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_2_0:
2641; GFX900:       ; %bb.0:
2642; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2643; GFX900-NEXT:    ;;#ASMSTART
2644; GFX900-NEXT:    ; def v[0:1]
2645; GFX900-NEXT:    ;;#ASMEND
2646; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2647; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
2648; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2649; GFX900-NEXT:    v_mov_b32_e32 v4, 0
2650; GFX900-NEXT:    ;;#ASMSTART
2651; GFX900-NEXT:    ; def v[2:3]
2652; GFX900-NEXT:    ;;#ASMEND
2653; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
2654; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
2655; GFX900-NEXT:    s_waitcnt vmcnt(0)
2656; GFX900-NEXT:    s_setpc_b64 s[30:31]
2657;
2658; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_2_0:
2659; GFX90A:       ; %bb.0:
2660; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2661; GFX90A-NEXT:    ;;#ASMSTART
2662; GFX90A-NEXT:    ; def v[0:1]
2663; GFX90A-NEXT:    ;;#ASMEND
2664; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2665; GFX90A-NEXT:    v_perm_b32 v1, v0, v1, s4
2666; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2667; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2668; GFX90A-NEXT:    ;;#ASMSTART
2669; GFX90A-NEXT:    ; def v[2:3]
2670; GFX90A-NEXT:    ;;#ASMEND
2671; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
2672; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
2673; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2674; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2675;
2676; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_0:
2677; GFX940:       ; %bb.0:
2678; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2679; GFX940-NEXT:    ;;#ASMSTART
2680; GFX940-NEXT:    ; def v[0:1]
2681; GFX940-NEXT:    ;;#ASMEND
2682; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2683; GFX940-NEXT:    v_perm_b32 v1, v0, v1, s2
2684; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2685; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2686; GFX940-NEXT:    ;;#ASMSTART
2687; GFX940-NEXT:    ; def v[2:3]
2688; GFX940-NEXT:    ;;#ASMEND
2689; GFX940-NEXT:    s_nop 0
2690; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
2691; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
2692; GFX940-NEXT:    s_waitcnt vmcnt(0)
2693; GFX940-NEXT:    s_setpc_b64 s[30:31]
2694  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2695  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2696  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 0>
2697  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2698  ret void
2699}
2700
2701define void @v_shuffle_v4i16_v4i16__7_7_3_0(ptr addrspace(1) inreg %ptr) {
2702; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_3_0:
2703; GFX900:       ; %bb.0:
2704; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2705; GFX900-NEXT:    ;;#ASMSTART
2706; GFX900-NEXT:    ; def v[0:1]
2707; GFX900-NEXT:    ;;#ASMEND
2708; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2709; GFX900-NEXT:    v_mov_b32_e32 v4, 0
2710; GFX900-NEXT:    ;;#ASMSTART
2711; GFX900-NEXT:    ; def v[2:3]
2712; GFX900-NEXT:    ;;#ASMEND
2713; GFX900-NEXT:    v_alignbit_b32 v1, v0, v1, 16
2714; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
2715; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
2716; GFX900-NEXT:    s_waitcnt vmcnt(0)
2717; GFX900-NEXT:    s_setpc_b64 s[30:31]
2718;
2719; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_3_0:
2720; GFX90A:       ; %bb.0:
2721; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2722; GFX90A-NEXT:    ;;#ASMSTART
2723; GFX90A-NEXT:    ; def v[0:1]
2724; GFX90A-NEXT:    ;;#ASMEND
2725; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2726; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2727; GFX90A-NEXT:    ;;#ASMSTART
2728; GFX90A-NEXT:    ; def v[2:3]
2729; GFX90A-NEXT:    ;;#ASMEND
2730; GFX90A-NEXT:    v_alignbit_b32 v1, v0, v1, 16
2731; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
2732; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
2733; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2734; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2735;
2736; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_0:
2737; GFX940:       ; %bb.0:
2738; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2739; GFX940-NEXT:    ;;#ASMSTART
2740; GFX940-NEXT:    ; def v[0:1]
2741; GFX940-NEXT:    ;;#ASMEND
2742; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2743; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2744; GFX940-NEXT:    ;;#ASMSTART
2745; GFX940-NEXT:    ; def v[2:3]
2746; GFX940-NEXT:    ;;#ASMEND
2747; GFX940-NEXT:    v_alignbit_b32 v1, v0, v1, 16
2748; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
2749; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
2750; GFX940-NEXT:    s_waitcnt vmcnt(0)
2751; GFX940-NEXT:    s_setpc_b64 s[30:31]
2752  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2753  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2754  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 0>
2755  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2756  ret void
2757}
2758
2759define void @v_shuffle_v4i16_v4i16__7_7_4_0(ptr addrspace(1) inreg %ptr) {
2760; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_4_0:
2761; GFX900:       ; %bb.0:
2762; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2763; GFX900-NEXT:    ;;#ASMSTART
2764; GFX900-NEXT:    ; def v[0:1]
2765; GFX900-NEXT:    ;;#ASMEND
2766; GFX900-NEXT:    ;;#ASMSTART
2767; GFX900-NEXT:    ; def v[1:2]
2768; GFX900-NEXT:    ;;#ASMEND
2769; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2770; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
2771; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2772; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2773; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
2774; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
2775; GFX900-NEXT:    s_waitcnt vmcnt(0)
2776; GFX900-NEXT:    s_setpc_b64 s[30:31]
2777;
2778; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_4_0:
2779; GFX90A:       ; %bb.0:
2780; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2781; GFX90A-NEXT:    ;;#ASMSTART
2782; GFX90A-NEXT:    ; def v[0:1]
2783; GFX90A-NEXT:    ;;#ASMEND
2784; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2785; GFX90A-NEXT:    ;;#ASMSTART
2786; GFX90A-NEXT:    ; def v[2:3]
2787; GFX90A-NEXT:    ;;#ASMEND
2788; GFX90A-NEXT:    v_perm_b32 v1, v0, v2, s4
2789; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2790; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2791; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
2792; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
2793; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2794; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2795;
2796; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_0:
2797; GFX940:       ; %bb.0:
2798; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2799; GFX940-NEXT:    ;;#ASMSTART
2800; GFX940-NEXT:    ; def v[0:1]
2801; GFX940-NEXT:    ;;#ASMEND
2802; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2803; GFX940-NEXT:    ;;#ASMSTART
2804; GFX940-NEXT:    ; def v[2:3]
2805; GFX940-NEXT:    ;;#ASMEND
2806; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2807; GFX940-NEXT:    v_perm_b32 v1, v0, v2, s2
2808; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2809; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
2810; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
2811; GFX940-NEXT:    s_waitcnt vmcnt(0)
2812; GFX940-NEXT:    s_setpc_b64 s[30:31]
2813  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2814  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2815  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 0>
2816  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2817  ret void
2818}
2819
2820define void @v_shuffle_v4i16_v4i16__7_7_5_0(ptr addrspace(1) inreg %ptr) {
2821; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_5_0:
2822; GFX900:       ; %bb.0:
2823; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2824; GFX900-NEXT:    ;;#ASMSTART
2825; GFX900-NEXT:    ; def v[0:1]
2826; GFX900-NEXT:    ;;#ASMEND
2827; GFX900-NEXT:    ;;#ASMSTART
2828; GFX900-NEXT:    ; def v[1:2]
2829; GFX900-NEXT:    ;;#ASMEND
2830; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2831; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2832; GFX900-NEXT:    v_alignbit_b32 v1, v0, v1, 16
2833; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
2834; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
2835; GFX900-NEXT:    s_waitcnt vmcnt(0)
2836; GFX900-NEXT:    s_setpc_b64 s[30:31]
2837;
2838; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_5_0:
2839; GFX90A:       ; %bb.0:
2840; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2841; GFX90A-NEXT:    ;;#ASMSTART
2842; GFX90A-NEXT:    ; def v[0:1]
2843; GFX90A-NEXT:    ;;#ASMEND
2844; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2845; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2846; GFX90A-NEXT:    ;;#ASMSTART
2847; GFX90A-NEXT:    ; def v[2:3]
2848; GFX90A-NEXT:    ;;#ASMEND
2849; GFX90A-NEXT:    v_alignbit_b32 v1, v0, v2, 16
2850; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
2851; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
2852; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2853; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2854;
2855; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_0:
2856; GFX940:       ; %bb.0:
2857; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2858; GFX940-NEXT:    ;;#ASMSTART
2859; GFX940-NEXT:    ; def v[0:1]
2860; GFX940-NEXT:    ;;#ASMEND
2861; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2862; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2863; GFX940-NEXT:    ;;#ASMSTART
2864; GFX940-NEXT:    ; def v[2:3]
2865; GFX940-NEXT:    ;;#ASMEND
2866; GFX940-NEXT:    s_nop 0
2867; GFX940-NEXT:    v_alignbit_b32 v1, v0, v2, 16
2868; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
2869; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
2870; GFX940-NEXT:    s_waitcnt vmcnt(0)
2871; GFX940-NEXT:    s_setpc_b64 s[30:31]
2872  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2873  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2874  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 0>
2875  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2876  ret void
2877}
2878
2879define void @v_shuffle_v4i16_v4i16__7_7_6_0(ptr addrspace(1) inreg %ptr) {
2880; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_6_0:
2881; GFX900:       ; %bb.0:
2882; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2883; GFX900-NEXT:    ;;#ASMSTART
2884; GFX900-NEXT:    ; def v[0:1]
2885; GFX900-NEXT:    ;;#ASMEND
2886; GFX900-NEXT:    ;;#ASMSTART
2887; GFX900-NEXT:    ; def v[1:2]
2888; GFX900-NEXT:    ;;#ASMEND
2889; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2890; GFX900-NEXT:    v_perm_b32 v1, v0, v2, s4
2891; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2892; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2893; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
2894; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
2895; GFX900-NEXT:    s_waitcnt vmcnt(0)
2896; GFX900-NEXT:    s_setpc_b64 s[30:31]
2897;
2898; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_6_0:
2899; GFX90A:       ; %bb.0:
2900; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2901; GFX90A-NEXT:    ;;#ASMSTART
2902; GFX90A-NEXT:    ; def v[0:1]
2903; GFX90A-NEXT:    ;;#ASMEND
2904; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2905; GFX90A-NEXT:    ;;#ASMSTART
2906; GFX90A-NEXT:    ; def v[2:3]
2907; GFX90A-NEXT:    ;;#ASMEND
2908; GFX90A-NEXT:    v_perm_b32 v1, v0, v3, s4
2909; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2910; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2911; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
2912; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
2913; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2914; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2915;
2916; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_0:
2917; GFX940:       ; %bb.0:
2918; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2919; GFX940-NEXT:    ;;#ASMSTART
2920; GFX940-NEXT:    ; def v[0:1]
2921; GFX940-NEXT:    ;;#ASMEND
2922; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2923; GFX940-NEXT:    ;;#ASMSTART
2924; GFX940-NEXT:    ; def v[2:3]
2925; GFX940-NEXT:    ;;#ASMEND
2926; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2927; GFX940-NEXT:    v_perm_b32 v1, v0, v3, s2
2928; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2929; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
2930; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
2931; GFX940-NEXT:    s_waitcnt vmcnt(0)
2932; GFX940-NEXT:    s_setpc_b64 s[30:31]
2933  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2934  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2935  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 0>
2936  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2937  ret void
2938}
2939
2940define void @v_shuffle_v4i16_v4i16__u_1_1_1(ptr addrspace(1) inreg %ptr) {
2941; GFX900-LABEL: v_shuffle_v4i16_v4i16__u_1_1_1:
2942; GFX900:       ; %bb.0:
2943; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2944; GFX900-NEXT:    ;;#ASMSTART
2945; GFX900-NEXT:    ; def v[0:1]
2946; GFX900-NEXT:    ;;#ASMEND
2947; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2948; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2949; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
2950; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
2951; GFX900-NEXT:    s_waitcnt vmcnt(0)
2952; GFX900-NEXT:    s_setpc_b64 s[30:31]
2953;
2954; GFX90A-LABEL: v_shuffle_v4i16_v4i16__u_1_1_1:
2955; GFX90A:       ; %bb.0:
2956; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2957; GFX90A-NEXT:    ;;#ASMSTART
2958; GFX90A-NEXT:    ; def v[0:1]
2959; GFX90A-NEXT:    ;;#ASMEND
2960; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2961; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2962; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
2963; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
2964; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2965; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2966;
2967; GFX940-LABEL: v_shuffle_v4i16_v4i16__u_1_1_1:
2968; GFX940:       ; %bb.0:
2969; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2970; GFX940-NEXT:    ;;#ASMSTART
2971; GFX940-NEXT:    ; def v[0:1]
2972; GFX940-NEXT:    ;;#ASMEND
2973; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2974; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2975; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
2976; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
2977; GFX940-NEXT:    s_waitcnt vmcnt(0)
2978; GFX940-NEXT:    s_setpc_b64 s[30:31]
2979  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2980  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
2981  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2982  ret void
2983}
2984
2985define void @v_shuffle_v4i16_v4i16__0_1_1_1(ptr addrspace(1) inreg %ptr) {
2986; GFX900-LABEL: v_shuffle_v4i16_v4i16__0_1_1_1:
2987; GFX900:       ; %bb.0:
2988; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2989; GFX900-NEXT:    ;;#ASMSTART
2990; GFX900-NEXT:    ; def v[0:1]
2991; GFX900-NEXT:    ;;#ASMEND
2992; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2993; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2994; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
2995; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
2996; GFX900-NEXT:    s_waitcnt vmcnt(0)
2997; GFX900-NEXT:    s_setpc_b64 s[30:31]
2998;
2999; GFX90A-LABEL: v_shuffle_v4i16_v4i16__0_1_1_1:
3000; GFX90A:       ; %bb.0:
3001; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3002; GFX90A-NEXT:    ;;#ASMSTART
3003; GFX90A-NEXT:    ; def v[0:1]
3004; GFX90A-NEXT:    ;;#ASMEND
3005; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3006; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3007; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
3008; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
3009; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3010; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3011;
3012; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_1_1_1:
3013; GFX940:       ; %bb.0:
3014; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3015; GFX940-NEXT:    ;;#ASMSTART
3016; GFX940-NEXT:    ; def v[0:1]
3017; GFX940-NEXT:    ;;#ASMEND
3018; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3019; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3020; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
3021; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
3022; GFX940-NEXT:    s_waitcnt vmcnt(0)
3023; GFX940-NEXT:    s_setpc_b64 s[30:31]
3024  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3025  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
3026  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3027  ret void
3028}
3029
3030define void @v_shuffle_v4i16_v4i16__1_1_1_1(ptr addrspace(1) inreg %ptr) {
3031; GFX900-LABEL: v_shuffle_v4i16_v4i16__1_1_1_1:
3032; GFX900:       ; %bb.0:
3033; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3034; GFX900-NEXT:    ;;#ASMSTART
3035; GFX900-NEXT:    ; def v[0:1]
3036; GFX900-NEXT:    ;;#ASMEND
3037; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3038; GFX900-NEXT:    v_perm_b32 v0, v0, v0, s4
3039; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3040; GFX900-NEXT:    v_mov_b32_e32 v1, v0
3041; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
3042; GFX900-NEXT:    s_waitcnt vmcnt(0)
3043; GFX900-NEXT:    s_setpc_b64 s[30:31]
3044;
3045; GFX90A-LABEL: v_shuffle_v4i16_v4i16__1_1_1_1:
3046; GFX90A:       ; %bb.0:
3047; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3048; GFX90A-NEXT:    ;;#ASMSTART
3049; GFX90A-NEXT:    ; def v[0:1]
3050; GFX90A-NEXT:    ;;#ASMEND
3051; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3052; GFX90A-NEXT:    v_perm_b32 v0, v0, v0, s4
3053; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3054; GFX90A-NEXT:    v_mov_b32_e32 v1, v0
3055; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
3056; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3057; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3058;
3059; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_1_1_1:
3060; GFX940:       ; %bb.0:
3061; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3062; GFX940-NEXT:    ;;#ASMSTART
3063; GFX940-NEXT:    ; def v[0:1]
3064; GFX940-NEXT:    ;;#ASMEND
3065; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3066; GFX940-NEXT:    v_perm_b32 v0, v0, v0, s2
3067; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3068; GFX940-NEXT:    v_mov_b32_e32 v1, v0
3069; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
3070; GFX940-NEXT:    s_waitcnt vmcnt(0)
3071; GFX940-NEXT:    s_setpc_b64 s[30:31]
3072  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3073  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
3074  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3075  ret void
3076}
3077
3078define void @v_shuffle_v4i16_v4i16__2_1_1_1(ptr addrspace(1) inreg %ptr) {
3079; GFX900-LABEL: v_shuffle_v4i16_v4i16__2_1_1_1:
3080; GFX900:       ; %bb.0:
3081; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3082; GFX900-NEXT:    ;;#ASMSTART
3083; GFX900-NEXT:    ; def v[0:1]
3084; GFX900-NEXT:    ;;#ASMEND
3085; GFX900-NEXT:    s_mov_b32 s4, 0xffff
3086; GFX900-NEXT:    v_bfi_b32 v1, s4, v1, v0
3087; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3088; GFX900-NEXT:    v_mov_b32_e32 v3, 0
3089; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
3090; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
3091; GFX900-NEXT:    s_waitcnt vmcnt(0)
3092; GFX900-NEXT:    s_setpc_b64 s[30:31]
3093;
3094; GFX90A-LABEL: v_shuffle_v4i16_v4i16__2_1_1_1:
3095; GFX90A:       ; %bb.0:
3096; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3097; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
3098; GFX90A-NEXT:    ;;#ASMSTART
3099; GFX90A-NEXT:    ; def v[0:1]
3100; GFX90A-NEXT:    ;;#ASMEND
3101; GFX90A-NEXT:    v_bfi_b32 v2, s4, v1, v0
3102; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3103; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3104; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
3105; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
3106; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3107; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3108;
3109; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_1_1_1:
3110; GFX940:       ; %bb.0:
3111; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3112; GFX940-NEXT:    s_mov_b32 s2, 0xffff
3113; GFX940-NEXT:    ;;#ASMSTART
3114; GFX940-NEXT:    ; def v[0:1]
3115; GFX940-NEXT:    ;;#ASMEND
3116; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3117; GFX940-NEXT:    v_bfi_b32 v2, s2, v1, v0
3118; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3119; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
3120; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
3121; GFX940-NEXT:    s_waitcnt vmcnt(0)
3122; GFX940-NEXT:    s_setpc_b64 s[30:31]
3123  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3124  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
3125  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3126  ret void
3127}
3128
3129define void @v_shuffle_v4i16_v4i16__3_1_1_1(ptr addrspace(1) inreg %ptr) {
3130; GFX900-LABEL: v_shuffle_v4i16_v4i16__3_1_1_1:
3131; GFX900:       ; %bb.0:
3132; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3133; GFX900-NEXT:    ;;#ASMSTART
3134; GFX900-NEXT:    ; def v[0:1]
3135; GFX900-NEXT:    ;;#ASMEND
3136; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3137; GFX900-NEXT:    v_mov_b32_e32 v3, 0
3138; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
3139; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
3140; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
3141; GFX900-NEXT:    s_waitcnt vmcnt(0)
3142; GFX900-NEXT:    s_setpc_b64 s[30:31]
3143;
3144; GFX90A-LABEL: v_shuffle_v4i16_v4i16__3_1_1_1:
3145; GFX90A:       ; %bb.0:
3146; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3147; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3148; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3149; GFX90A-NEXT:    ;;#ASMSTART
3150; GFX90A-NEXT:    ; def v[0:1]
3151; GFX90A-NEXT:    ;;#ASMEND
3152; GFX90A-NEXT:    v_perm_b32 v2, v0, v1, s4
3153; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
3154; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
3155; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3156; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3157;
3158; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_1_1_1:
3159; GFX940:       ; %bb.0:
3160; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3161; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3162; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3163; GFX940-NEXT:    ;;#ASMSTART
3164; GFX940-NEXT:    ; def v[0:1]
3165; GFX940-NEXT:    ;;#ASMEND
3166; GFX940-NEXT:    s_nop 0
3167; GFX940-NEXT:    v_perm_b32 v2, v0, v1, s2
3168; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
3169; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
3170; GFX940-NEXT:    s_waitcnt vmcnt(0)
3171; GFX940-NEXT:    s_setpc_b64 s[30:31]
3172  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3173  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
3174  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3175  ret void
3176}
3177
3178define void @v_shuffle_v4i16_v4i16__4_1_1_1(ptr addrspace(1) inreg %ptr) {
3179; GFX900-LABEL: v_shuffle_v4i16_v4i16__4_1_1_1:
3180; GFX900:       ; %bb.0:
3181; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3182; GFX900-NEXT:    ;;#ASMSTART
3183; GFX900-NEXT:    ; def v[0:1]
3184; GFX900-NEXT:    ;;#ASMEND
3185; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3186; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3187; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
3188; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
3189; GFX900-NEXT:    s_waitcnt vmcnt(0)
3190; GFX900-NEXT:    s_setpc_b64 s[30:31]
3191;
3192; GFX90A-LABEL: v_shuffle_v4i16_v4i16__4_1_1_1:
3193; GFX90A:       ; %bb.0:
3194; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3195; GFX90A-NEXT:    ;;#ASMSTART
3196; GFX90A-NEXT:    ; def v[0:1]
3197; GFX90A-NEXT:    ;;#ASMEND
3198; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3199; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3200; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
3201; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
3202; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3203; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3204;
3205; GFX940-LABEL: v_shuffle_v4i16_v4i16__4_1_1_1:
3206; GFX940:       ; %bb.0:
3207; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3208; GFX940-NEXT:    ;;#ASMSTART
3209; GFX940-NEXT:    ; def v[0:1]
3210; GFX940-NEXT:    ;;#ASMEND
3211; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3212; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3213; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
3214; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
3215; GFX940-NEXT:    s_waitcnt vmcnt(0)
3216; GFX940-NEXT:    s_setpc_b64 s[30:31]
3217  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3218  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 4, i32 1, i32 1, i32 1>
3219  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3220  ret void
3221}
3222
3223define void @v_shuffle_v4i16_v4i16__5_1_1_1(ptr addrspace(1) inreg %ptr) {
3224; GFX900-LABEL: v_shuffle_v4i16_v4i16__5_1_1_1:
3225; GFX900:       ; %bb.0:
3226; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3227; GFX900-NEXT:    ;;#ASMSTART
3228; GFX900-NEXT:    ; def v[0:1]
3229; GFX900-NEXT:    ;;#ASMEND
3230; GFX900-NEXT:    ;;#ASMSTART
3231; GFX900-NEXT:    ; def v[1:2]
3232; GFX900-NEXT:    ;;#ASMEND
3233; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3234; GFX900-NEXT:    v_mov_b32_e32 v3, 0
3235; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
3236; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
3237; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
3238; GFX900-NEXT:    s_waitcnt vmcnt(0)
3239; GFX900-NEXT:    s_setpc_b64 s[30:31]
3240;
3241; GFX90A-LABEL: v_shuffle_v4i16_v4i16__5_1_1_1:
3242; GFX90A:       ; %bb.0:
3243; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3244; GFX90A-NEXT:    ;;#ASMSTART
3245; GFX90A-NEXT:    ; def v[2:3]
3246; GFX90A-NEXT:    ;;#ASMEND
3247; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3248; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3249; GFX90A-NEXT:    ;;#ASMSTART
3250; GFX90A-NEXT:    ; def v[0:1]
3251; GFX90A-NEXT:    ;;#ASMEND
3252; GFX90A-NEXT:    v_perm_b32 v2, v0, v2, s4
3253; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
3254; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
3255; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3256; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3257;
3258; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_1_1_1:
3259; GFX940:       ; %bb.0:
3260; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3261; GFX940-NEXT:    ;;#ASMSTART
3262; GFX940-NEXT:    ; def v[2:3]
3263; GFX940-NEXT:    ;;#ASMEND
3264; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3265; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3266; GFX940-NEXT:    ;;#ASMSTART
3267; GFX940-NEXT:    ; def v[0:1]
3268; GFX940-NEXT:    ;;#ASMEND
3269; GFX940-NEXT:    s_nop 0
3270; GFX940-NEXT:    v_perm_b32 v2, v0, v2, s2
3271; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
3272; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
3273; GFX940-NEXT:    s_waitcnt vmcnt(0)
3274; GFX940-NEXT:    s_setpc_b64 s[30:31]
3275  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3276  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3277  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 1, i32 1, i32 1>
3278  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3279  ret void
3280}
3281
3282define void @v_shuffle_v4i16_v4i16__6_1_1_1(ptr addrspace(1) inreg %ptr) {
3283; GFX900-LABEL: v_shuffle_v4i16_v4i16__6_1_1_1:
3284; GFX900:       ; %bb.0:
3285; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3286; GFX900-NEXT:    ;;#ASMSTART
3287; GFX900-NEXT:    ; def v[0:1]
3288; GFX900-NEXT:    ;;#ASMEND
3289; GFX900-NEXT:    ;;#ASMSTART
3290; GFX900-NEXT:    ; def v[1:2]
3291; GFX900-NEXT:    ;;#ASMEND
3292; GFX900-NEXT:    s_mov_b32 s4, 0xffff
3293; GFX900-NEXT:    v_bfi_b32 v1, s4, v2, v0
3294; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3295; GFX900-NEXT:    v_mov_b32_e32 v3, 0
3296; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
3297; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
3298; GFX900-NEXT:    s_waitcnt vmcnt(0)
3299; GFX900-NEXT:    s_setpc_b64 s[30:31]
3300;
3301; GFX90A-LABEL: v_shuffle_v4i16_v4i16__6_1_1_1:
3302; GFX90A:       ; %bb.0:
3303; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3304; GFX90A-NEXT:    ;;#ASMSTART
3305; GFX90A-NEXT:    ; def v[2:3]
3306; GFX90A-NEXT:    ;;#ASMEND
3307; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
3308; GFX90A-NEXT:    ;;#ASMSTART
3309; GFX90A-NEXT:    ; def v[0:1]
3310; GFX90A-NEXT:    ;;#ASMEND
3311; GFX90A-NEXT:    v_bfi_b32 v2, s4, v3, v0
3312; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3313; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3314; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
3315; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
3316; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3317; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3318;
3319; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_1_1_1:
3320; GFX940:       ; %bb.0:
3321; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3322; GFX940-NEXT:    ;;#ASMSTART
3323; GFX940-NEXT:    ; def v[2:3]
3324; GFX940-NEXT:    ;;#ASMEND
3325; GFX940-NEXT:    s_mov_b32 s2, 0xffff
3326; GFX940-NEXT:    ;;#ASMSTART
3327; GFX940-NEXT:    ; def v[0:1]
3328; GFX940-NEXT:    ;;#ASMEND
3329; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3330; GFX940-NEXT:    v_bfi_b32 v2, s2, v3, v0
3331; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3332; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
3333; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
3334; GFX940-NEXT:    s_waitcnt vmcnt(0)
3335; GFX940-NEXT:    s_setpc_b64 s[30:31]
3336  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3337  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3338  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 1, i32 1, i32 1>
3339  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3340  ret void
3341}
3342
3343define void @v_shuffle_v4i16_v4i16__7_1_1_1(ptr addrspace(1) inreg %ptr) {
3344; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_1_1_1:
3345; GFX900:       ; %bb.0:
3346; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3347; GFX900-NEXT:    ;;#ASMSTART
3348; GFX900-NEXT:    ; def v[0:1]
3349; GFX900-NEXT:    ;;#ASMEND
3350; GFX900-NEXT:    ;;#ASMSTART
3351; GFX900-NEXT:    ; def v[1:2]
3352; GFX900-NEXT:    ;;#ASMEND
3353; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3354; GFX900-NEXT:    v_mov_b32_e32 v3, 0
3355; GFX900-NEXT:    v_perm_b32 v1, v0, v2, s4
3356; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
3357; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
3358; GFX900-NEXT:    s_waitcnt vmcnt(0)
3359; GFX900-NEXT:    s_setpc_b64 s[30:31]
3360;
3361; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_1_1_1:
3362; GFX90A:       ; %bb.0:
3363; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3364; GFX90A-NEXT:    ;;#ASMSTART
3365; GFX90A-NEXT:    ; def v[2:3]
3366; GFX90A-NEXT:    ;;#ASMEND
3367; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3368; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3369; GFX90A-NEXT:    ;;#ASMSTART
3370; GFX90A-NEXT:    ; def v[0:1]
3371; GFX90A-NEXT:    ;;#ASMEND
3372; GFX90A-NEXT:    v_perm_b32 v2, v0, v3, s4
3373; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
3374; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
3375; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3376; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3377;
3378; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_1_1:
3379; GFX940:       ; %bb.0:
3380; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3381; GFX940-NEXT:    ;;#ASMSTART
3382; GFX940-NEXT:    ; def v[2:3]
3383; GFX940-NEXT:    ;;#ASMEND
3384; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3385; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3386; GFX940-NEXT:    ;;#ASMSTART
3387; GFX940-NEXT:    ; def v[0:1]
3388; GFX940-NEXT:    ;;#ASMEND
3389; GFX940-NEXT:    s_nop 0
3390; GFX940-NEXT:    v_perm_b32 v2, v0, v3, s2
3391; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
3392; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
3393; GFX940-NEXT:    s_waitcnt vmcnt(0)
3394; GFX940-NEXT:    s_setpc_b64 s[30:31]
3395  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3396  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3397  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 1, i32 1>
3398  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3399  ret void
3400}
3401
3402define void @v_shuffle_v4i16_v4i16__7_u_1_1(ptr addrspace(1) inreg %ptr) {
3403; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_u_1_1:
3404; GFX900:       ; %bb.0:
3405; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3406; GFX900-NEXT:    ;;#ASMSTART
3407; GFX900-NEXT:    ; def v[0:1]
3408; GFX900-NEXT:    ;;#ASMEND
3409; GFX900-NEXT:    ;;#ASMSTART
3410; GFX900-NEXT:    ; def v[1:2]
3411; GFX900-NEXT:    ;;#ASMEND
3412; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3413; GFX900-NEXT:    v_mov_b32_e32 v3, 0
3414; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
3415; GFX900-NEXT:    v_alignbit_b32 v0, s4, v2, 16
3416; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
3417; GFX900-NEXT:    s_waitcnt vmcnt(0)
3418; GFX900-NEXT:    s_setpc_b64 s[30:31]
3419;
3420; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_u_1_1:
3421; GFX90A:       ; %bb.0:
3422; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3423; GFX90A-NEXT:    ;;#ASMSTART
3424; GFX90A-NEXT:    ; def v[0:1]
3425; GFX90A-NEXT:    ;;#ASMEND
3426; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3427; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3428; GFX90A-NEXT:    ;;#ASMSTART
3429; GFX90A-NEXT:    ; def v[2:3]
3430; GFX90A-NEXT:    ;;#ASMEND
3431; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
3432; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v3, 16
3433; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
3434; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3435; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3436;
3437; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_1_1:
3438; GFX940:       ; %bb.0:
3439; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3440; GFX940-NEXT:    ;;#ASMSTART
3441; GFX940-NEXT:    ; def v[0:1]
3442; GFX940-NEXT:    ;;#ASMEND
3443; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3444; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3445; GFX940-NEXT:    ;;#ASMSTART
3446; GFX940-NEXT:    ; def v[2:3]
3447; GFX940-NEXT:    ;;#ASMEND
3448; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
3449; GFX940-NEXT:    v_alignbit_b32 v0, s0, v3, 16
3450; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
3451; GFX940-NEXT:    s_waitcnt vmcnt(0)
3452; GFX940-NEXT:    s_setpc_b64 s[30:31]
3453  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3454  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3455  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 1, i32 1>
3456  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3457  ret void
3458}
3459
3460define void @v_shuffle_v4i16_v4i16__7_0_1_1(ptr addrspace(1) inreg %ptr) {
3461; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_0_1_1:
3462; GFX900:       ; %bb.0:
3463; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3464; GFX900-NEXT:    ;;#ASMSTART
3465; GFX900-NEXT:    ; def v[0:1]
3466; GFX900-NEXT:    ;;#ASMEND
3467; GFX900-NEXT:    ;;#ASMSTART
3468; GFX900-NEXT:    ; def v[1:2]
3469; GFX900-NEXT:    ;;#ASMEND
3470; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3471; GFX900-NEXT:    v_mov_b32_e32 v3, 0
3472; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
3473; GFX900-NEXT:    v_alignbit_b32 v0, v0, v2, 16
3474; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
3475; GFX900-NEXT:    s_waitcnt vmcnt(0)
3476; GFX900-NEXT:    s_setpc_b64 s[30:31]
3477;
3478; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_0_1_1:
3479; GFX90A:       ; %bb.0:
3480; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3481; GFX90A-NEXT:    ;;#ASMSTART
3482; GFX90A-NEXT:    ; def v[0:1]
3483; GFX90A-NEXT:    ;;#ASMEND
3484; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3485; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3486; GFX90A-NEXT:    ;;#ASMSTART
3487; GFX90A-NEXT:    ; def v[2:3]
3488; GFX90A-NEXT:    ;;#ASMEND
3489; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
3490; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v3, 16
3491; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
3492; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3493; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3494;
3495; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_1_1:
3496; GFX940:       ; %bb.0:
3497; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3498; GFX940-NEXT:    ;;#ASMSTART
3499; GFX940-NEXT:    ; def v[0:1]
3500; GFX940-NEXT:    ;;#ASMEND
3501; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3502; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3503; GFX940-NEXT:    ;;#ASMSTART
3504; GFX940-NEXT:    ; def v[2:3]
3505; GFX940-NEXT:    ;;#ASMEND
3506; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
3507; GFX940-NEXT:    v_alignbit_b32 v0, v0, v3, 16
3508; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
3509; GFX940-NEXT:    s_waitcnt vmcnt(0)
3510; GFX940-NEXT:    s_setpc_b64 s[30:31]
3511  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3512  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3513  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 1, i32 1>
3514  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3515  ret void
3516}
3517
3518define void @v_shuffle_v4i16_v4i16__7_2_1_1(ptr addrspace(1) inreg %ptr) {
3519; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_2_1_1:
3520; GFX900:       ; %bb.0:
3521; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3522; GFX900-NEXT:    ;;#ASMSTART
3523; GFX900-NEXT:    ; def v[0:1]
3524; GFX900-NEXT:    ;;#ASMEND
3525; GFX900-NEXT:    ;;#ASMSTART
3526; GFX900-NEXT:    ; def v[2:3]
3527; GFX900-NEXT:    ;;#ASMEND
3528; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3529; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3530; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
3531; GFX900-NEXT:    v_alignbit_b32 v1, v1, v3, 16
3532; GFX900-NEXT:    global_store_dwordx2 v4, v[1:2], s[16:17]
3533; GFX900-NEXT:    s_waitcnt vmcnt(0)
3534; GFX900-NEXT:    s_setpc_b64 s[30:31]
3535;
3536; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_2_1_1:
3537; GFX90A:       ; %bb.0:
3538; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3539; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3540; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
3541; GFX90A-NEXT:    ;;#ASMSTART
3542; GFX90A-NEXT:    ; def v[0:1]
3543; GFX90A-NEXT:    ;;#ASMEND
3544; GFX90A-NEXT:    ;;#ASMSTART
3545; GFX90A-NEXT:    ; def v[2:3]
3546; GFX90A-NEXT:    ;;#ASMEND
3547; GFX90A-NEXT:    v_perm_b32 v5, v0, v0, s4
3548; GFX90A-NEXT:    v_alignbit_b32 v4, v1, v3, 16
3549; GFX90A-NEXT:    global_store_dwordx2 v6, v[4:5], s[16:17]
3550; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3551; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3552;
3553; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_1_1:
3554; GFX940:       ; %bb.0:
3555; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3556; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3557; GFX940-NEXT:    v_mov_b32_e32 v6, 0
3558; GFX940-NEXT:    ;;#ASMSTART
3559; GFX940-NEXT:    ; def v[0:1]
3560; GFX940-NEXT:    ;;#ASMEND
3561; GFX940-NEXT:    ;;#ASMSTART
3562; GFX940-NEXT:    ; def v[2:3]
3563; GFX940-NEXT:    ;;#ASMEND
3564; GFX940-NEXT:    s_nop 0
3565; GFX940-NEXT:    v_perm_b32 v5, v0, v0, s2
3566; GFX940-NEXT:    v_alignbit_b32 v4, v1, v3, 16
3567; GFX940-NEXT:    global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1
3568; GFX940-NEXT:    s_waitcnt vmcnt(0)
3569; GFX940-NEXT:    s_setpc_b64 s[30:31]
3570  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3571  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3572  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 1, i32 1>
3573  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3574  ret void
3575}
3576
3577define void @v_shuffle_v4i16_v4i16__7_3_1_1(ptr addrspace(1) inreg %ptr) {
3578; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_3_1_1:
3579; GFX900:       ; %bb.0:
3580; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3581; GFX900-NEXT:    ;;#ASMSTART
3582; GFX900-NEXT:    ; def v[0:1]
3583; GFX900-NEXT:    ;;#ASMEND
3584; GFX900-NEXT:    ;;#ASMSTART
3585; GFX900-NEXT:    ; def v[2:3]
3586; GFX900-NEXT:    ;;#ASMEND
3587; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3588; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3589; GFX900-NEXT:    v_perm_b32 v1, v1, v3, s4
3590; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
3591; GFX900-NEXT:    global_store_dwordx2 v4, v[1:2], s[16:17]
3592; GFX900-NEXT:    s_waitcnt vmcnt(0)
3593; GFX900-NEXT:    s_setpc_b64 s[30:31]
3594;
3595; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_3_1_1:
3596; GFX90A:       ; %bb.0:
3597; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3598; GFX90A-NEXT:    ;;#ASMSTART
3599; GFX90A-NEXT:    ; def v[2:3]
3600; GFX90A-NEXT:    ;;#ASMEND
3601; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3602; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3603; GFX90A-NEXT:    ;;#ASMSTART
3604; GFX90A-NEXT:    ; def v[0:1]
3605; GFX90A-NEXT:    ;;#ASMEND
3606; GFX90A-NEXT:    v_perm_b32 v2, v1, v3, s4
3607; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
3608; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
3609; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3610; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3611;
3612; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_1_1:
3613; GFX940:       ; %bb.0:
3614; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3615; GFX940-NEXT:    ;;#ASMSTART
3616; GFX940-NEXT:    ; def v[2:3]
3617; GFX940-NEXT:    ;;#ASMEND
3618; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3619; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3620; GFX940-NEXT:    ;;#ASMSTART
3621; GFX940-NEXT:    ; def v[0:1]
3622; GFX940-NEXT:    ;;#ASMEND
3623; GFX940-NEXT:    s_nop 0
3624; GFX940-NEXT:    v_perm_b32 v2, v1, v3, s2
3625; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
3626; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
3627; GFX940-NEXT:    s_waitcnt vmcnt(0)
3628; GFX940-NEXT:    s_setpc_b64 s[30:31]
3629  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3630  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3631  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 1, i32 1>
3632  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3633  ret void
3634}
3635
3636define void @v_shuffle_v4i16_v4i16__7_4_1_1(ptr addrspace(1) inreg %ptr) {
3637; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_4_1_1:
3638; GFX900:       ; %bb.0:
3639; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3640; GFX900-NEXT:    ;;#ASMSTART
3641; GFX900-NEXT:    ; def v[0:1]
3642; GFX900-NEXT:    ;;#ASMEND
3643; GFX900-NEXT:    ;;#ASMSTART
3644; GFX900-NEXT:    ; def v[1:2]
3645; GFX900-NEXT:    ;;#ASMEND
3646; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3647; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3648; GFX900-NEXT:    v_perm_b32 v3, v0, v0, s4
3649; GFX900-NEXT:    v_alignbit_b32 v2, v1, v2, 16
3650; GFX900-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
3651; GFX900-NEXT:    s_waitcnt vmcnt(0)
3652; GFX900-NEXT:    s_setpc_b64 s[30:31]
3653;
3654; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_4_1_1:
3655; GFX90A:       ; %bb.0:
3656; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3657; GFX90A-NEXT:    ;;#ASMSTART
3658; GFX90A-NEXT:    ; def v[0:1]
3659; GFX90A-NEXT:    ;;#ASMEND
3660; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3661; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3662; GFX90A-NEXT:    ;;#ASMSTART
3663; GFX90A-NEXT:    ; def v[2:3]
3664; GFX90A-NEXT:    ;;#ASMEND
3665; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
3666; GFX90A-NEXT:    v_alignbit_b32 v0, v2, v3, 16
3667; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
3668; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3669; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3670;
3671; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_1_1:
3672; GFX940:       ; %bb.0:
3673; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3674; GFX940-NEXT:    ;;#ASMSTART
3675; GFX940-NEXT:    ; def v[0:1]
3676; GFX940-NEXT:    ;;#ASMEND
3677; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3678; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3679; GFX940-NEXT:    ;;#ASMSTART
3680; GFX940-NEXT:    ; def v[2:3]
3681; GFX940-NEXT:    ;;#ASMEND
3682; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
3683; GFX940-NEXT:    v_alignbit_b32 v0, v2, v3, 16
3684; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
3685; GFX940-NEXT:    s_waitcnt vmcnt(0)
3686; GFX940-NEXT:    s_setpc_b64 s[30:31]
3687  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3688  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3689  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 1, i32 1>
3690  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3691  ret void
3692}
3693
3694define void @v_shuffle_v4i16_v4i16__7_5_1_1(ptr addrspace(1) inreg %ptr) {
3695; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_5_1_1:
3696; GFX900:       ; %bb.0:
3697; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3698; GFX900-NEXT:    ;;#ASMSTART
3699; GFX900-NEXT:    ; def v[0:1]
3700; GFX900-NEXT:    ;;#ASMEND
3701; GFX900-NEXT:    ;;#ASMSTART
3702; GFX900-NEXT:    ; def v[1:2]
3703; GFX900-NEXT:    ;;#ASMEND
3704; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3705; GFX900-NEXT:    v_mov_b32_e32 v3, 0
3706; GFX900-NEXT:    v_perm_b32 v1, v1, v2, s4
3707; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
3708; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
3709; GFX900-NEXT:    s_waitcnt vmcnt(0)
3710; GFX900-NEXT:    s_setpc_b64 s[30:31]
3711;
3712; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_5_1_1:
3713; GFX90A:       ; %bb.0:
3714; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3715; GFX90A-NEXT:    ;;#ASMSTART
3716; GFX90A-NEXT:    ; def v[2:3]
3717; GFX90A-NEXT:    ;;#ASMEND
3718; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3719; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3720; GFX90A-NEXT:    ;;#ASMSTART
3721; GFX90A-NEXT:    ; def v[0:1]
3722; GFX90A-NEXT:    ;;#ASMEND
3723; GFX90A-NEXT:    v_perm_b32 v2, v2, v3, s4
3724; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
3725; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
3726; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3727; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3728;
3729; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_1_1:
3730; GFX940:       ; %bb.0:
3731; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3732; GFX940-NEXT:    ;;#ASMSTART
3733; GFX940-NEXT:    ; def v[2:3]
3734; GFX940-NEXT:    ;;#ASMEND
3735; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3736; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3737; GFX940-NEXT:    ;;#ASMSTART
3738; GFX940-NEXT:    ; def v[0:1]
3739; GFX940-NEXT:    ;;#ASMEND
3740; GFX940-NEXT:    v_perm_b32 v2, v2, v3, s2
3741; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
3742; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
3743; GFX940-NEXT:    s_waitcnt vmcnt(0)
3744; GFX940-NEXT:    s_setpc_b64 s[30:31]
3745  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3746  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3747  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 1, i32 1>
3748  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3749  ret void
3750}
3751
3752define void @v_shuffle_v4i16_v4i16__7_6_1_1(ptr addrspace(1) inreg %ptr) {
3753; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_6_1_1:
3754; GFX900:       ; %bb.0:
3755; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3756; GFX900-NEXT:    ;;#ASMSTART
3757; GFX900-NEXT:    ; def v[0:1]
3758; GFX900-NEXT:    ;;#ASMEND
3759; GFX900-NEXT:    ;;#ASMSTART
3760; GFX900-NEXT:    ; def v[1:2]
3761; GFX900-NEXT:    ;;#ASMEND
3762; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3763; GFX900-NEXT:    v_mov_b32_e32 v3, 0
3764; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
3765; GFX900-NEXT:    v_alignbit_b32 v0, v2, v2, 16
3766; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
3767; GFX900-NEXT:    s_waitcnt vmcnt(0)
3768; GFX900-NEXT:    s_setpc_b64 s[30:31]
3769;
3770; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_6_1_1:
3771; GFX90A:       ; %bb.0:
3772; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3773; GFX90A-NEXT:    ;;#ASMSTART
3774; GFX90A-NEXT:    ; def v[0:1]
3775; GFX90A-NEXT:    ;;#ASMEND
3776; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3777; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3778; GFX90A-NEXT:    ;;#ASMSTART
3779; GFX90A-NEXT:    ; def v[2:3]
3780; GFX90A-NEXT:    ;;#ASMEND
3781; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
3782; GFX90A-NEXT:    v_alignbit_b32 v0, v3, v3, 16
3783; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
3784; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3785; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3786;
3787; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_1_1:
3788; GFX940:       ; %bb.0:
3789; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3790; GFX940-NEXT:    ;;#ASMSTART
3791; GFX940-NEXT:    ; def v[0:1]
3792; GFX940-NEXT:    ;;#ASMEND
3793; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3794; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3795; GFX940-NEXT:    ;;#ASMSTART
3796; GFX940-NEXT:    ; def v[2:3]
3797; GFX940-NEXT:    ;;#ASMEND
3798; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
3799; GFX940-NEXT:    v_alignbit_b32 v0, v3, v3, 16
3800; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
3801; GFX940-NEXT:    s_waitcnt vmcnt(0)
3802; GFX940-NEXT:    s_setpc_b64 s[30:31]
3803  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3804  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3805  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 1, i32 1>
3806  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3807  ret void
3808}
3809
3810define void @v_shuffle_v4i16_v4i16__7_7_1_1(ptr addrspace(1) inreg %ptr) {
3811; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_1_1:
3812; GFX900:       ; %bb.0:
3813; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3814; GFX900-NEXT:    ;;#ASMSTART
3815; GFX900-NEXT:    ; def v[0:1]
3816; GFX900-NEXT:    ;;#ASMEND
3817; GFX900-NEXT:    ;;#ASMSTART
3818; GFX900-NEXT:    ; def v[1:2]
3819; GFX900-NEXT:    ;;#ASMEND
3820; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3821; GFX900-NEXT:    v_mov_b32_e32 v3, 0
3822; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
3823; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
3824; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
3825; GFX900-NEXT:    s_waitcnt vmcnt(0)
3826; GFX900-NEXT:    s_setpc_b64 s[30:31]
3827;
3828; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_1_1:
3829; GFX90A:       ; %bb.0:
3830; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3831; GFX90A-NEXT:    ;;#ASMSTART
3832; GFX90A-NEXT:    ; def v[0:1]
3833; GFX90A-NEXT:    ;;#ASMEND
3834; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3835; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3836; GFX90A-NEXT:    ;;#ASMSTART
3837; GFX90A-NEXT:    ; def v[2:3]
3838; GFX90A-NEXT:    ;;#ASMEND
3839; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
3840; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
3841; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
3842; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3843; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3844;
3845; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_1:
3846; GFX940:       ; %bb.0:
3847; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3848; GFX940-NEXT:    ;;#ASMSTART
3849; GFX940-NEXT:    ; def v[0:1]
3850; GFX940-NEXT:    ;;#ASMEND
3851; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3852; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3853; GFX940-NEXT:    ;;#ASMSTART
3854; GFX940-NEXT:    ; def v[2:3]
3855; GFX940-NEXT:    ;;#ASMEND
3856; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
3857; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
3858; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
3859; GFX940-NEXT:    s_waitcnt vmcnt(0)
3860; GFX940-NEXT:    s_setpc_b64 s[30:31]
3861  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3862  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3863  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 1>
3864  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3865  ret void
3866}
3867
3868define void @v_shuffle_v4i16_v4i16__7_7_u_1(ptr addrspace(1) inreg %ptr) {
3869; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_u_1:
3870; GFX900:       ; %bb.0:
3871; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3872; GFX900-NEXT:    ;;#ASMSTART
3873; GFX900-NEXT:    ; def v[0:1]
3874; GFX900-NEXT:    ;;#ASMEND
3875; GFX900-NEXT:    ;;#ASMSTART
3876; GFX900-NEXT:    ; def v[1:2]
3877; GFX900-NEXT:    ;;#ASMEND
3878; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3879; GFX900-NEXT:    v_mov_b32_e32 v3, 0
3880; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
3881; GFX900-NEXT:    v_mov_b32_e32 v2, v0
3882; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
3883; GFX900-NEXT:    s_waitcnt vmcnt(0)
3884; GFX900-NEXT:    s_setpc_b64 s[30:31]
3885;
3886; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_u_1:
3887; GFX90A:       ; %bb.0:
3888; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3889; GFX90A-NEXT:    ;;#ASMSTART
3890; GFX90A-NEXT:    ; def v[2:3]
3891; GFX90A-NEXT:    ;;#ASMEND
3892; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3893; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3894; GFX90A-NEXT:    ;;#ASMSTART
3895; GFX90A-NEXT:    ; def v[0:1]
3896; GFX90A-NEXT:    ;;#ASMEND
3897; GFX90A-NEXT:    v_perm_b32 v2, v3, v3, s4
3898; GFX90A-NEXT:    v_mov_b32_e32 v3, v0
3899; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
3900; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3901; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3902;
3903; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_1:
3904; GFX940:       ; %bb.0:
3905; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3906; GFX940-NEXT:    ;;#ASMSTART
3907; GFX940-NEXT:    ; def v[2:3]
3908; GFX940-NEXT:    ;;#ASMEND
3909; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3910; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3911; GFX940-NEXT:    ;;#ASMSTART
3912; GFX940-NEXT:    ; def v[0:1]
3913; GFX940-NEXT:    ;;#ASMEND
3914; GFX940-NEXT:    v_perm_b32 v2, v3, v3, s2
3915; GFX940-NEXT:    v_mov_b32_e32 v3, v0
3916; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
3917; GFX940-NEXT:    s_waitcnt vmcnt(0)
3918; GFX940-NEXT:    s_setpc_b64 s[30:31]
3919  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3920  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3921  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 1>
3922  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3923  ret void
3924}
3925
3926define void @v_shuffle_v4i16_v4i16__7_7_0_1(ptr addrspace(1) inreg %ptr) {
3927; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_0_1:
3928; GFX900:       ; %bb.0:
3929; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3930; GFX900-NEXT:    ;;#ASMSTART
3931; GFX900-NEXT:    ; def v[0:1]
3932; GFX900-NEXT:    ;;#ASMEND
3933; GFX900-NEXT:    ;;#ASMSTART
3934; GFX900-NEXT:    ; def v[1:2]
3935; GFX900-NEXT:    ;;#ASMEND
3936; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3937; GFX900-NEXT:    v_mov_b32_e32 v3, 0
3938; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
3939; GFX900-NEXT:    v_mov_b32_e32 v2, v0
3940; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
3941; GFX900-NEXT:    s_waitcnt vmcnt(0)
3942; GFX900-NEXT:    s_setpc_b64 s[30:31]
3943;
3944; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_0_1:
3945; GFX90A:       ; %bb.0:
3946; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3947; GFX90A-NEXT:    ;;#ASMSTART
3948; GFX90A-NEXT:    ; def v[2:3]
3949; GFX90A-NEXT:    ;;#ASMEND
3950; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3951; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3952; GFX90A-NEXT:    ;;#ASMSTART
3953; GFX90A-NEXT:    ; def v[0:1]
3954; GFX90A-NEXT:    ;;#ASMEND
3955; GFX90A-NEXT:    v_perm_b32 v2, v3, v3, s4
3956; GFX90A-NEXT:    v_mov_b32_e32 v3, v0
3957; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
3958; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3959; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3960;
3961; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_1:
3962; GFX940:       ; %bb.0:
3963; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3964; GFX940-NEXT:    ;;#ASMSTART
3965; GFX940-NEXT:    ; def v[2:3]
3966; GFX940-NEXT:    ;;#ASMEND
3967; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3968; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3969; GFX940-NEXT:    ;;#ASMSTART
3970; GFX940-NEXT:    ; def v[0:1]
3971; GFX940-NEXT:    ;;#ASMEND
3972; GFX940-NEXT:    v_perm_b32 v2, v3, v3, s2
3973; GFX940-NEXT:    v_mov_b32_e32 v3, v0
3974; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
3975; GFX940-NEXT:    s_waitcnt vmcnt(0)
3976; GFX940-NEXT:    s_setpc_b64 s[30:31]
3977  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3978  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3979  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 1>
3980  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3981  ret void
3982}
3983
3984define void @v_shuffle_v4i16_v4i16__7_7_2_1(ptr addrspace(1) inreg %ptr) {
3985; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_2_1:
3986; GFX900:       ; %bb.0:
3987; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3988; GFX900-NEXT:    ;;#ASMSTART
3989; GFX900-NEXT:    ; def v[0:1]
3990; GFX900-NEXT:    ;;#ASMEND
3991; GFX900-NEXT:    s_mov_b32 s4, 0xffff
3992; GFX900-NEXT:    v_bfi_b32 v1, s4, v1, v0
3993; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3994; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3995; GFX900-NEXT:    ;;#ASMSTART
3996; GFX900-NEXT:    ; def v[2:3]
3997; GFX900-NEXT:    ;;#ASMEND
3998; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
3999; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4000; GFX900-NEXT:    s_waitcnt vmcnt(0)
4001; GFX900-NEXT:    s_setpc_b64 s[30:31]
4002;
4003; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_2_1:
4004; GFX90A:       ; %bb.0:
4005; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4006; GFX90A-NEXT:    ;;#ASMSTART
4007; GFX90A-NEXT:    ; def v[0:1]
4008; GFX90A-NEXT:    ;;#ASMEND
4009; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
4010; GFX90A-NEXT:    v_bfi_b32 v1, s4, v1, v0
4011; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
4012; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4013; GFX90A-NEXT:    ;;#ASMSTART
4014; GFX90A-NEXT:    ; def v[2:3]
4015; GFX90A-NEXT:    ;;#ASMEND
4016; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
4017; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4018; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4019; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4020;
4021; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_1:
4022; GFX940:       ; %bb.0:
4023; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4024; GFX940-NEXT:    ;;#ASMSTART
4025; GFX940-NEXT:    ; def v[0:1]
4026; GFX940-NEXT:    ;;#ASMEND
4027; GFX940-NEXT:    s_mov_b32 s2, 0xffff
4028; GFX940-NEXT:    v_bfi_b32 v1, s2, v1, v0
4029; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
4030; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4031; GFX940-NEXT:    ;;#ASMSTART
4032; GFX940-NEXT:    ; def v[2:3]
4033; GFX940-NEXT:    ;;#ASMEND
4034; GFX940-NEXT:    s_nop 0
4035; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
4036; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
4037; GFX940-NEXT:    s_waitcnt vmcnt(0)
4038; GFX940-NEXT:    s_setpc_b64 s[30:31]
4039  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4040  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4041  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 1>
4042  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4043  ret void
4044}
4045
4046define void @v_shuffle_v4i16_v4i16__7_7_3_1(ptr addrspace(1) inreg %ptr) {
4047; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_3_1:
4048; GFX900:       ; %bb.0:
4049; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4050; GFX900-NEXT:    ;;#ASMSTART
4051; GFX900-NEXT:    ; def v[0:1]
4052; GFX900-NEXT:    ;;#ASMEND
4053; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
4054; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4055; GFX900-NEXT:    ;;#ASMSTART
4056; GFX900-NEXT:    ; def v[2:3]
4057; GFX900-NEXT:    ;;#ASMEND
4058; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
4059; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
4060; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4061; GFX900-NEXT:    s_waitcnt vmcnt(0)
4062; GFX900-NEXT:    s_setpc_b64 s[30:31]
4063;
4064; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_3_1:
4065; GFX90A:       ; %bb.0:
4066; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4067; GFX90A-NEXT:    ;;#ASMSTART
4068; GFX90A-NEXT:    ; def v[0:1]
4069; GFX90A-NEXT:    ;;#ASMEND
4070; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
4071; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4072; GFX90A-NEXT:    ;;#ASMSTART
4073; GFX90A-NEXT:    ; def v[2:3]
4074; GFX90A-NEXT:    ;;#ASMEND
4075; GFX90A-NEXT:    v_perm_b32 v1, v0, v1, s4
4076; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
4077; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4078; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4079; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4080;
4081; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_1:
4082; GFX940:       ; %bb.0:
4083; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4084; GFX940-NEXT:    ;;#ASMSTART
4085; GFX940-NEXT:    ; def v[0:1]
4086; GFX940-NEXT:    ;;#ASMEND
4087; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
4088; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4089; GFX940-NEXT:    ;;#ASMSTART
4090; GFX940-NEXT:    ; def v[2:3]
4091; GFX940-NEXT:    ;;#ASMEND
4092; GFX940-NEXT:    v_perm_b32 v1, v0, v1, s2
4093; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
4094; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
4095; GFX940-NEXT:    s_waitcnt vmcnt(0)
4096; GFX940-NEXT:    s_setpc_b64 s[30:31]
4097  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4098  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4099  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 1>
4100  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4101  ret void
4102}
4103
4104define void @v_shuffle_v4i16_v4i16__7_7_4_1(ptr addrspace(1) inreg %ptr) {
4105; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_4_1:
4106; GFX900:       ; %bb.0:
4107; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4108; GFX900-NEXT:    ;;#ASMSTART
4109; GFX900-NEXT:    ; def v[0:1]
4110; GFX900-NEXT:    ;;#ASMEND
4111; GFX900-NEXT:    ;;#ASMSTART
4112; GFX900-NEXT:    ; def v[1:2]
4113; GFX900-NEXT:    ;;#ASMEND
4114; GFX900-NEXT:    s_mov_b32 s4, 0xffff
4115; GFX900-NEXT:    v_bfi_b32 v1, s4, v1, v0
4116; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
4117; GFX900-NEXT:    v_mov_b32_e32 v3, 0
4118; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
4119; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
4120; GFX900-NEXT:    s_waitcnt vmcnt(0)
4121; GFX900-NEXT:    s_setpc_b64 s[30:31]
4122;
4123; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_4_1:
4124; GFX90A:       ; %bb.0:
4125; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4126; GFX90A-NEXT:    ;;#ASMSTART
4127; GFX90A-NEXT:    ; def v[0:1]
4128; GFX90A-NEXT:    ;;#ASMEND
4129; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
4130; GFX90A-NEXT:    ;;#ASMSTART
4131; GFX90A-NEXT:    ; def v[2:3]
4132; GFX90A-NEXT:    ;;#ASMEND
4133; GFX90A-NEXT:    v_bfi_b32 v1, s4, v2, v0
4134; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
4135; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4136; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
4137; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4138; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4139; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4140;
4141; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_1:
4142; GFX940:       ; %bb.0:
4143; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4144; GFX940-NEXT:    ;;#ASMSTART
4145; GFX940-NEXT:    ; def v[0:1]
4146; GFX940-NEXT:    ;;#ASMEND
4147; GFX940-NEXT:    s_mov_b32 s2, 0xffff
4148; GFX940-NEXT:    ;;#ASMSTART
4149; GFX940-NEXT:    ; def v[2:3]
4150; GFX940-NEXT:    ;;#ASMEND
4151; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4152; GFX940-NEXT:    v_bfi_b32 v1, s2, v2, v0
4153; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
4154; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
4155; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
4156; GFX940-NEXT:    s_waitcnt vmcnt(0)
4157; GFX940-NEXT:    s_setpc_b64 s[30:31]
4158  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4159  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4160  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 1>
4161  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4162  ret void
4163}
4164
4165define void @v_shuffle_v4i16_v4i16__7_7_5_1(ptr addrspace(1) inreg %ptr) {
4166; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_5_1:
4167; GFX900:       ; %bb.0:
4168; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4169; GFX900-NEXT:    ;;#ASMSTART
4170; GFX900-NEXT:    ; def v[0:1]
4171; GFX900-NEXT:    ;;#ASMEND
4172; GFX900-NEXT:    ;;#ASMSTART
4173; GFX900-NEXT:    ; def v[1:2]
4174; GFX900-NEXT:    ;;#ASMEND
4175; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
4176; GFX900-NEXT:    v_mov_b32_e32 v3, 0
4177; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
4178; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
4179; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
4180; GFX900-NEXT:    s_waitcnt vmcnt(0)
4181; GFX900-NEXT:    s_setpc_b64 s[30:31]
4182;
4183; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_5_1:
4184; GFX90A:       ; %bb.0:
4185; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4186; GFX90A-NEXT:    ;;#ASMSTART
4187; GFX90A-NEXT:    ; def v[0:1]
4188; GFX90A-NEXT:    ;;#ASMEND
4189; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
4190; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4191; GFX90A-NEXT:    ;;#ASMSTART
4192; GFX90A-NEXT:    ; def v[2:3]
4193; GFX90A-NEXT:    ;;#ASMEND
4194; GFX90A-NEXT:    v_perm_b32 v1, v0, v2, s4
4195; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
4196; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4197; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4198; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4199;
4200; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_1:
4201; GFX940:       ; %bb.0:
4202; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4203; GFX940-NEXT:    ;;#ASMSTART
4204; GFX940-NEXT:    ; def v[0:1]
4205; GFX940-NEXT:    ;;#ASMEND
4206; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
4207; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4208; GFX940-NEXT:    ;;#ASMSTART
4209; GFX940-NEXT:    ; def v[2:3]
4210; GFX940-NEXT:    ;;#ASMEND
4211; GFX940-NEXT:    s_nop 0
4212; GFX940-NEXT:    v_perm_b32 v1, v0, v2, s2
4213; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
4214; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
4215; GFX940-NEXT:    s_waitcnt vmcnt(0)
4216; GFX940-NEXT:    s_setpc_b64 s[30:31]
4217  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4218  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4219  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 1>
4220  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4221  ret void
4222}
4223
4224define void @v_shuffle_v4i16_v4i16__7_7_6_1(ptr addrspace(1) inreg %ptr) {
4225; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_6_1:
4226; GFX900:       ; %bb.0:
4227; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4228; GFX900-NEXT:    ;;#ASMSTART
4229; GFX900-NEXT:    ; def v[0:1]
4230; GFX900-NEXT:    ;;#ASMEND
4231; GFX900-NEXT:    ;;#ASMSTART
4232; GFX900-NEXT:    ; def v[1:2]
4233; GFX900-NEXT:    ;;#ASMEND
4234; GFX900-NEXT:    s_mov_b32 s4, 0xffff
4235; GFX900-NEXT:    v_bfi_b32 v1, s4, v2, v0
4236; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
4237; GFX900-NEXT:    v_mov_b32_e32 v3, 0
4238; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
4239; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
4240; GFX900-NEXT:    s_waitcnt vmcnt(0)
4241; GFX900-NEXT:    s_setpc_b64 s[30:31]
4242;
4243; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_6_1:
4244; GFX90A:       ; %bb.0:
4245; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4246; GFX90A-NEXT:    ;;#ASMSTART
4247; GFX90A-NEXT:    ; def v[0:1]
4248; GFX90A-NEXT:    ;;#ASMEND
4249; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
4250; GFX90A-NEXT:    ;;#ASMSTART
4251; GFX90A-NEXT:    ; def v[2:3]
4252; GFX90A-NEXT:    ;;#ASMEND
4253; GFX90A-NEXT:    v_bfi_b32 v1, s4, v3, v0
4254; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
4255; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4256; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
4257; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4258; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4259; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4260;
4261; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_1:
4262; GFX940:       ; %bb.0:
4263; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4264; GFX940-NEXT:    ;;#ASMSTART
4265; GFX940-NEXT:    ; def v[0:1]
4266; GFX940-NEXT:    ;;#ASMEND
4267; GFX940-NEXT:    s_mov_b32 s2, 0xffff
4268; GFX940-NEXT:    ;;#ASMSTART
4269; GFX940-NEXT:    ; def v[2:3]
4270; GFX940-NEXT:    ;;#ASMEND
4271; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4272; GFX940-NEXT:    v_bfi_b32 v1, s2, v3, v0
4273; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
4274; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
4275; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
4276; GFX940-NEXT:    s_waitcnt vmcnt(0)
4277; GFX940-NEXT:    s_setpc_b64 s[30:31]
4278  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4279  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4280  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 1>
4281  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4282  ret void
4283}
4284
4285define void @v_shuffle_v4i16_v4i16__u_2_2_2(ptr addrspace(1) inreg %ptr) {
4286; GFX900-LABEL: v_shuffle_v4i16_v4i16__u_2_2_2:
4287; GFX900:       ; %bb.0:
4288; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4289; GFX900-NEXT:    ;;#ASMSTART
4290; GFX900-NEXT:    ; def v[0:1]
4291; GFX900-NEXT:    ;;#ASMEND
4292; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4293; GFX900-NEXT:    v_mov_b32_e32 v3, 0
4294; GFX900-NEXT:    v_perm_b32 v2, v1, v1, s4
4295; GFX900-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
4296; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
4297; GFX900-NEXT:    s_waitcnt vmcnt(0)
4298; GFX900-NEXT:    s_setpc_b64 s[30:31]
4299;
4300; GFX90A-LABEL: v_shuffle_v4i16_v4i16__u_2_2_2:
4301; GFX90A:       ; %bb.0:
4302; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4303; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4304; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4305; GFX90A-NEXT:    ;;#ASMSTART
4306; GFX90A-NEXT:    ; def v[0:1]
4307; GFX90A-NEXT:    ;;#ASMEND
4308; GFX90A-NEXT:    v_perm_b32 v3, v1, v1, s4
4309; GFX90A-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
4310; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
4311; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4312; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4313;
4314; GFX940-LABEL: v_shuffle_v4i16_v4i16__u_2_2_2:
4315; GFX940:       ; %bb.0:
4316; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4317; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4318; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4319; GFX940-NEXT:    ;;#ASMSTART
4320; GFX940-NEXT:    ; def v[0:1]
4321; GFX940-NEXT:    ;;#ASMEND
4322; GFX940-NEXT:    s_nop 0
4323; GFX940-NEXT:    v_perm_b32 v3, v1, v1, s2
4324; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
4325; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
4326; GFX940-NEXT:    s_waitcnt vmcnt(0)
4327; GFX940-NEXT:    s_setpc_b64 s[30:31]
4328  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4329  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
4330  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4331  ret void
4332}
4333
4334define void @v_shuffle_v4i16_v4i16__0_2_2_2(ptr addrspace(1) inreg %ptr) {
4335; GFX900-LABEL: v_shuffle_v4i16_v4i16__0_2_2_2:
4336; GFX900:       ; %bb.0:
4337; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4338; GFX900-NEXT:    ;;#ASMSTART
4339; GFX900-NEXT:    ; def v[0:1]
4340; GFX900-NEXT:    ;;#ASMEND
4341; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4342; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4343; GFX900-NEXT:    v_perm_b32 v0, v1, v0, s4
4344; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
4345; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
4346; GFX900-NEXT:    s_waitcnt vmcnt(0)
4347; GFX900-NEXT:    s_setpc_b64 s[30:31]
4348;
4349; GFX90A-LABEL: v_shuffle_v4i16_v4i16__0_2_2_2:
4350; GFX90A:       ; %bb.0:
4351; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4352; GFX90A-NEXT:    ;;#ASMSTART
4353; GFX90A-NEXT:    ; def v[0:1]
4354; GFX90A-NEXT:    ;;#ASMEND
4355; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4356; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4357; GFX90A-NEXT:    v_perm_b32 v0, v1, v0, s4
4358; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
4359; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
4360; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4361; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4362;
4363; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_2_2_2:
4364; GFX940:       ; %bb.0:
4365; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4366; GFX940-NEXT:    ;;#ASMSTART
4367; GFX940-NEXT:    ; def v[0:1]
4368; GFX940-NEXT:    ;;#ASMEND
4369; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4370; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4371; GFX940-NEXT:    v_perm_b32 v0, v1, v0, s2
4372; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
4373; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
4374; GFX940-NEXT:    s_waitcnt vmcnt(0)
4375; GFX940-NEXT:    s_setpc_b64 s[30:31]
4376  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4377  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
4378  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4379  ret void
4380}
4381
4382define void @v_shuffle_v4i16_v4i16__1_2_2_2(ptr addrspace(1) inreg %ptr) {
4383; GFX900-LABEL: v_shuffle_v4i16_v4i16__1_2_2_2:
4384; GFX900:       ; %bb.0:
4385; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4386; GFX900-NEXT:    ;;#ASMSTART
4387; GFX900-NEXT:    ; def v[0:1]
4388; GFX900-NEXT:    ;;#ASMEND
4389; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4390; GFX900-NEXT:    v_mov_b32_e32 v3, 0
4391; GFX900-NEXT:    v_perm_b32 v2, v1, v1, s4
4392; GFX900-NEXT:    v_alignbit_b32 v1, v1, v0, 16
4393; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
4394; GFX900-NEXT:    s_waitcnt vmcnt(0)
4395; GFX900-NEXT:    s_setpc_b64 s[30:31]
4396;
4397; GFX90A-LABEL: v_shuffle_v4i16_v4i16__1_2_2_2:
4398; GFX90A:       ; %bb.0:
4399; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4400; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4401; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4402; GFX90A-NEXT:    ;;#ASMSTART
4403; GFX90A-NEXT:    ; def v[0:1]
4404; GFX90A-NEXT:    ;;#ASMEND
4405; GFX90A-NEXT:    v_perm_b32 v3, v1, v1, s4
4406; GFX90A-NEXT:    v_alignbit_b32 v2, v1, v0, 16
4407; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
4408; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4409; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4410;
4411; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_2_2_2:
4412; GFX940:       ; %bb.0:
4413; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4414; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4415; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4416; GFX940-NEXT:    ;;#ASMSTART
4417; GFX940-NEXT:    ; def v[0:1]
4418; GFX940-NEXT:    ;;#ASMEND
4419; GFX940-NEXT:    s_nop 0
4420; GFX940-NEXT:    v_perm_b32 v3, v1, v1, s2
4421; GFX940-NEXT:    v_alignbit_b32 v2, v1, v0, 16
4422; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
4423; GFX940-NEXT:    s_waitcnt vmcnt(0)
4424; GFX940-NEXT:    s_setpc_b64 s[30:31]
4425  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4426  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
4427  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4428  ret void
4429}
4430
4431define void @v_shuffle_v4i16_v4i16__2_2_2_2(ptr addrspace(1) inreg %ptr) {
4432; GFX900-LABEL: v_shuffle_v4i16_v4i16__2_2_2_2:
4433; GFX900:       ; %bb.0:
4434; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4435; GFX900-NEXT:    ;;#ASMSTART
4436; GFX900-NEXT:    ; def v[0:1]
4437; GFX900-NEXT:    ;;#ASMEND
4438; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4439; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
4440; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4441; GFX900-NEXT:    v_mov_b32_e32 v1, v0
4442; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
4443; GFX900-NEXT:    s_waitcnt vmcnt(0)
4444; GFX900-NEXT:    s_setpc_b64 s[30:31]
4445;
4446; GFX90A-LABEL: v_shuffle_v4i16_v4i16__2_2_2_2:
4447; GFX90A:       ; %bb.0:
4448; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4449; GFX90A-NEXT:    ;;#ASMSTART
4450; GFX90A-NEXT:    ; def v[0:1]
4451; GFX90A-NEXT:    ;;#ASMEND
4452; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4453; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
4454; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4455; GFX90A-NEXT:    v_mov_b32_e32 v1, v0
4456; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
4457; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4458; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4459;
4460; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_2_2_2:
4461; GFX940:       ; %bb.0:
4462; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4463; GFX940-NEXT:    ;;#ASMSTART
4464; GFX940-NEXT:    ; def v[0:1]
4465; GFX940-NEXT:    ;;#ASMEND
4466; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4467; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
4468; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4469; GFX940-NEXT:    v_mov_b32_e32 v1, v0
4470; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
4471; GFX940-NEXT:    s_waitcnt vmcnt(0)
4472; GFX940-NEXT:    s_setpc_b64 s[30:31]
4473  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4474  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
4475  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4476  ret void
4477}
4478
4479define void @v_shuffle_v4i16_v4i16__3_2_2_2(ptr addrspace(1) inreg %ptr) {
4480; GFX900-LABEL: v_shuffle_v4i16_v4i16__3_2_2_2:
4481; GFX900:       ; %bb.0:
4482; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4483; GFX900-NEXT:    ;;#ASMSTART
4484; GFX900-NEXT:    ; def v[0:1]
4485; GFX900-NEXT:    ;;#ASMEND
4486; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4487; GFX900-NEXT:    v_mov_b32_e32 v3, 0
4488; GFX900-NEXT:    v_perm_b32 v2, v1, v1, s4
4489; GFX900-NEXT:    v_alignbit_b32 v1, v1, v1, 16
4490; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
4491; GFX900-NEXT:    s_waitcnt vmcnt(0)
4492; GFX900-NEXT:    s_setpc_b64 s[30:31]
4493;
4494; GFX90A-LABEL: v_shuffle_v4i16_v4i16__3_2_2_2:
4495; GFX90A:       ; %bb.0:
4496; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4497; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4498; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4499; GFX90A-NEXT:    ;;#ASMSTART
4500; GFX90A-NEXT:    ; def v[0:1]
4501; GFX90A-NEXT:    ;;#ASMEND
4502; GFX90A-NEXT:    v_perm_b32 v3, v1, v1, s4
4503; GFX90A-NEXT:    v_alignbit_b32 v2, v1, v1, 16
4504; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
4505; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4506; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4507;
4508; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_2_2_2:
4509; GFX940:       ; %bb.0:
4510; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4511; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4512; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4513; GFX940-NEXT:    ;;#ASMSTART
4514; GFX940-NEXT:    ; def v[0:1]
4515; GFX940-NEXT:    ;;#ASMEND
4516; GFX940-NEXT:    s_nop 0
4517; GFX940-NEXT:    v_perm_b32 v3, v1, v1, s2
4518; GFX940-NEXT:    v_alignbit_b32 v2, v1, v1, 16
4519; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
4520; GFX940-NEXT:    s_waitcnt vmcnt(0)
4521; GFX940-NEXT:    s_setpc_b64 s[30:31]
4522  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4523  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
4524  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4525  ret void
4526}
4527
4528define void @v_shuffle_v4i16_v4i16__4_2_2_2(ptr addrspace(1) inreg %ptr) {
4529; GFX900-LABEL: v_shuffle_v4i16_v4i16__4_2_2_2:
4530; GFX900:       ; %bb.0:
4531; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4532; GFX900-NEXT:    ;;#ASMSTART
4533; GFX900-NEXT:    ; def v[0:1]
4534; GFX900-NEXT:    ;;#ASMEND
4535; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4536; GFX900-NEXT:    v_mov_b32_e32 v3, 0
4537; GFX900-NEXT:    v_perm_b32 v2, v1, v1, s4
4538; GFX900-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
4539; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
4540; GFX900-NEXT:    s_waitcnt vmcnt(0)
4541; GFX900-NEXT:    s_setpc_b64 s[30:31]
4542;
4543; GFX90A-LABEL: v_shuffle_v4i16_v4i16__4_2_2_2:
4544; GFX90A:       ; %bb.0:
4545; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4546; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4547; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4548; GFX90A-NEXT:    ;;#ASMSTART
4549; GFX90A-NEXT:    ; def v[0:1]
4550; GFX90A-NEXT:    ;;#ASMEND
4551; GFX90A-NEXT:    v_perm_b32 v3, v1, v1, s4
4552; GFX90A-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
4553; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
4554; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4555; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4556;
4557; GFX940-LABEL: v_shuffle_v4i16_v4i16__4_2_2_2:
4558; GFX940:       ; %bb.0:
4559; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4560; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4561; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4562; GFX940-NEXT:    ;;#ASMSTART
4563; GFX940-NEXT:    ; def v[0:1]
4564; GFX940-NEXT:    ;;#ASMEND
4565; GFX940-NEXT:    s_nop 0
4566; GFX940-NEXT:    v_perm_b32 v3, v1, v1, s2
4567; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
4568; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
4569; GFX940-NEXT:    s_waitcnt vmcnt(0)
4570; GFX940-NEXT:    s_setpc_b64 s[30:31]
4571  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4572  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 4, i32 2, i32 2, i32 2>
4573  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4574  ret void
4575}
4576
4577define void @v_shuffle_v4i16_v4i16__5_2_2_2(ptr addrspace(1) inreg %ptr) {
4578; GFX900-LABEL: v_shuffle_v4i16_v4i16__5_2_2_2:
4579; GFX900:       ; %bb.0:
4580; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4581; GFX900-NEXT:    ;;#ASMSTART
4582; GFX900-NEXT:    ; def v[2:3]
4583; GFX900-NEXT:    ;;#ASMEND
4584; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4585; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4586; GFX900-NEXT:    ;;#ASMSTART
4587; GFX900-NEXT:    ; def v[0:1]
4588; GFX900-NEXT:    ;;#ASMEND
4589; GFX900-NEXT:    v_perm_b32 v3, v1, v1, s4
4590; GFX900-NEXT:    v_alignbit_b32 v2, v1, v2, 16
4591; GFX900-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
4592; GFX900-NEXT:    s_waitcnt vmcnt(0)
4593; GFX900-NEXT:    s_setpc_b64 s[30:31]
4594;
4595; GFX90A-LABEL: v_shuffle_v4i16_v4i16__5_2_2_2:
4596; GFX90A:       ; %bb.0:
4597; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4598; GFX90A-NEXT:    ;;#ASMSTART
4599; GFX90A-NEXT:    ; def v[2:3]
4600; GFX90A-NEXT:    ;;#ASMEND
4601; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4602; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4603; GFX90A-NEXT:    ;;#ASMSTART
4604; GFX90A-NEXT:    ; def v[0:1]
4605; GFX90A-NEXT:    ;;#ASMEND
4606; GFX90A-NEXT:    v_perm_b32 v3, v1, v1, s4
4607; GFX90A-NEXT:    v_alignbit_b32 v2, v1, v2, 16
4608; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
4609; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4610; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4611;
4612; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_2_2_2:
4613; GFX940:       ; %bb.0:
4614; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4615; GFX940-NEXT:    ;;#ASMSTART
4616; GFX940-NEXT:    ; def v[2:3]
4617; GFX940-NEXT:    ;;#ASMEND
4618; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4619; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4620; GFX940-NEXT:    ;;#ASMSTART
4621; GFX940-NEXT:    ; def v[0:1]
4622; GFX940-NEXT:    ;;#ASMEND
4623; GFX940-NEXT:    s_nop 0
4624; GFX940-NEXT:    v_perm_b32 v3, v1, v1, s2
4625; GFX940-NEXT:    v_alignbit_b32 v2, v1, v2, 16
4626; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
4627; GFX940-NEXT:    s_waitcnt vmcnt(0)
4628; GFX940-NEXT:    s_setpc_b64 s[30:31]
4629  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4630  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4631  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2>
4632  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4633  ret void
4634}
4635
4636define void @v_shuffle_v4i16_v4i16__6_2_2_2(ptr addrspace(1) inreg %ptr) {
4637; GFX900-LABEL: v_shuffle_v4i16_v4i16__6_2_2_2:
4638; GFX900:       ; %bb.0:
4639; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4640; GFX900-NEXT:    ;;#ASMSTART
4641; GFX900-NEXT:    ; def v[0:1]
4642; GFX900-NEXT:    ;;#ASMEND
4643; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4644; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4645; GFX900-NEXT:    ;;#ASMSTART
4646; GFX900-NEXT:    ; def v[2:3]
4647; GFX900-NEXT:    ;;#ASMEND
4648; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
4649; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
4650; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4651; GFX900-NEXT:    s_waitcnt vmcnt(0)
4652; GFX900-NEXT:    s_setpc_b64 s[30:31]
4653;
4654; GFX90A-LABEL: v_shuffle_v4i16_v4i16__6_2_2_2:
4655; GFX90A:       ; %bb.0:
4656; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4657; GFX90A-NEXT:    ;;#ASMSTART
4658; GFX90A-NEXT:    ; def v[0:1]
4659; GFX90A-NEXT:    ;;#ASMEND
4660; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4661; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4662; GFX90A-NEXT:    ;;#ASMSTART
4663; GFX90A-NEXT:    ; def v[2:3]
4664; GFX90A-NEXT:    ;;#ASMEND
4665; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
4666; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
4667; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4668; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4669; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4670;
4671; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_2_2_2:
4672; GFX940:       ; %bb.0:
4673; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4674; GFX940-NEXT:    ;;#ASMSTART
4675; GFX940-NEXT:    ; def v[0:1]
4676; GFX940-NEXT:    ;;#ASMEND
4677; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4678; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4679; GFX940-NEXT:    ;;#ASMSTART
4680; GFX940-NEXT:    ; def v[2:3]
4681; GFX940-NEXT:    ;;#ASMEND
4682; GFX940-NEXT:    s_nop 0
4683; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
4684; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
4685; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
4686; GFX940-NEXT:    s_waitcnt vmcnt(0)
4687; GFX940-NEXT:    s_setpc_b64 s[30:31]
4688  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4689  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4690  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 2, i32 2, i32 2>
4691  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4692  ret void
4693}
4694
4695define void @v_shuffle_v4i16_v4i16__7_2_2_2(ptr addrspace(1) inreg %ptr) {
4696; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_2_2_2:
4697; GFX900:       ; %bb.0:
4698; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4699; GFX900-NEXT:    ;;#ASMSTART
4700; GFX900-NEXT:    ; def v[0:1]
4701; GFX900-NEXT:    ;;#ASMEND
4702; GFX900-NEXT:    ;;#ASMSTART
4703; GFX900-NEXT:    ; def v[2:3]
4704; GFX900-NEXT:    ;;#ASMEND
4705; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4706; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4707; GFX900-NEXT:    v_perm_b32 v2, v1, v1, s4
4708; GFX900-NEXT:    v_alignbit_b32 v1, v1, v3, 16
4709; GFX900-NEXT:    global_store_dwordx2 v4, v[1:2], s[16:17]
4710; GFX900-NEXT:    s_waitcnt vmcnt(0)
4711; GFX900-NEXT:    s_setpc_b64 s[30:31]
4712;
4713; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_2_2_2:
4714; GFX90A:       ; %bb.0:
4715; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4716; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4717; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
4718; GFX90A-NEXT:    ;;#ASMSTART
4719; GFX90A-NEXT:    ; def v[0:1]
4720; GFX90A-NEXT:    ;;#ASMEND
4721; GFX90A-NEXT:    ;;#ASMSTART
4722; GFX90A-NEXT:    ; def v[2:3]
4723; GFX90A-NEXT:    ;;#ASMEND
4724; GFX90A-NEXT:    v_perm_b32 v5, v1, v1, s4
4725; GFX90A-NEXT:    v_alignbit_b32 v4, v1, v3, 16
4726; GFX90A-NEXT:    global_store_dwordx2 v6, v[4:5], s[16:17]
4727; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4728; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4729;
4730; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_2_2:
4731; GFX940:       ; %bb.0:
4732; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4733; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4734; GFX940-NEXT:    v_mov_b32_e32 v6, 0
4735; GFX940-NEXT:    ;;#ASMSTART
4736; GFX940-NEXT:    ; def v[0:1]
4737; GFX940-NEXT:    ;;#ASMEND
4738; GFX940-NEXT:    ;;#ASMSTART
4739; GFX940-NEXT:    ; def v[2:3]
4740; GFX940-NEXT:    ;;#ASMEND
4741; GFX940-NEXT:    s_nop 0
4742; GFX940-NEXT:    v_perm_b32 v5, v1, v1, s2
4743; GFX940-NEXT:    v_alignbit_b32 v4, v1, v3, 16
4744; GFX940-NEXT:    global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1
4745; GFX940-NEXT:    s_waitcnt vmcnt(0)
4746; GFX940-NEXT:    s_setpc_b64 s[30:31]
4747  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4748  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4749  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 2, i32 2>
4750  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4751  ret void
4752}
4753
4754define void @v_shuffle_v4i16_v4i16__7_u_2_2(ptr addrspace(1) inreg %ptr) {
4755; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_u_2_2:
4756; GFX900:       ; %bb.0:
4757; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4758; GFX900-NEXT:    ;;#ASMSTART
4759; GFX900-NEXT:    ; def v[0:1]
4760; GFX900-NEXT:    ;;#ASMEND
4761; GFX900-NEXT:    ;;#ASMSTART
4762; GFX900-NEXT:    ; def v[2:3]
4763; GFX900-NEXT:    ;;#ASMEND
4764; GFX900-NEXT:    v_alignbit_b32 v0, s4, v3, 16
4765; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4766; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4767; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
4768; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4769; GFX900-NEXT:    s_waitcnt vmcnt(0)
4770; GFX900-NEXT:    s_setpc_b64 s[30:31]
4771;
4772; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_u_2_2:
4773; GFX90A:       ; %bb.0:
4774; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4775; GFX90A-NEXT:    ;;#ASMSTART
4776; GFX90A-NEXT:    ; def v[0:1]
4777; GFX90A-NEXT:    ;;#ASMEND
4778; GFX90A-NEXT:    ;;#ASMSTART
4779; GFX90A-NEXT:    ; def v[2:3]
4780; GFX90A-NEXT:    ;;#ASMEND
4781; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v3, 16
4782; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4783; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4784; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
4785; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4786; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4787; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4788;
4789; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_2_2:
4790; GFX940:       ; %bb.0:
4791; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4792; GFX940-NEXT:    ;;#ASMSTART
4793; GFX940-NEXT:    ; def v[0:1]
4794; GFX940-NEXT:    ;;#ASMEND
4795; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4796; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4797; GFX940-NEXT:    ;;#ASMSTART
4798; GFX940-NEXT:    ; def v[2:3]
4799; GFX940-NEXT:    ;;#ASMEND
4800; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
4801; GFX940-NEXT:    v_alignbit_b32 v0, s0, v3, 16
4802; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
4803; GFX940-NEXT:    s_waitcnt vmcnt(0)
4804; GFX940-NEXT:    s_setpc_b64 s[30:31]
4805  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4806  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4807  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 2, i32 2>
4808  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4809  ret void
4810}
4811
4812define void @v_shuffle_v4i16_v4i16__7_0_2_2(ptr addrspace(1) inreg %ptr) {
4813; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_0_2_2:
4814; GFX900:       ; %bb.0:
4815; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4816; GFX900-NEXT:    ;;#ASMSTART
4817; GFX900-NEXT:    ; def v[0:1]
4818; GFX900-NEXT:    ;;#ASMEND
4819; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4820; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4821; GFX900-NEXT:    ;;#ASMSTART
4822; GFX900-NEXT:    ; def v[2:3]
4823; GFX900-NEXT:    ;;#ASMEND
4824; GFX900-NEXT:    v_alignbit_b32 v0, v0, v3, 16
4825; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
4826; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4827; GFX900-NEXT:    s_waitcnt vmcnt(0)
4828; GFX900-NEXT:    s_setpc_b64 s[30:31]
4829;
4830; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_0_2_2:
4831; GFX90A:       ; %bb.0:
4832; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4833; GFX90A-NEXT:    ;;#ASMSTART
4834; GFX90A-NEXT:    ; def v[0:1]
4835; GFX90A-NEXT:    ;;#ASMEND
4836; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4837; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4838; GFX90A-NEXT:    ;;#ASMSTART
4839; GFX90A-NEXT:    ; def v[2:3]
4840; GFX90A-NEXT:    ;;#ASMEND
4841; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v3, 16
4842; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
4843; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4844; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4845; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4846;
4847; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_2_2:
4848; GFX940:       ; %bb.0:
4849; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4850; GFX940-NEXT:    ;;#ASMSTART
4851; GFX940-NEXT:    ; def v[0:1]
4852; GFX940-NEXT:    ;;#ASMEND
4853; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4854; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4855; GFX940-NEXT:    ;;#ASMSTART
4856; GFX940-NEXT:    ; def v[2:3]
4857; GFX940-NEXT:    ;;#ASMEND
4858; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
4859; GFX940-NEXT:    v_alignbit_b32 v0, v0, v3, 16
4860; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
4861; GFX940-NEXT:    s_waitcnt vmcnt(0)
4862; GFX940-NEXT:    s_setpc_b64 s[30:31]
4863  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4864  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4865  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 2, i32 2>
4866  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4867  ret void
4868}
4869
4870define void @v_shuffle_v4i16_v4i16__7_1_2_2(ptr addrspace(1) inreg %ptr) {
4871; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_1_2_2:
4872; GFX900:       ; %bb.0:
4873; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4874; GFX900-NEXT:    ;;#ASMSTART
4875; GFX900-NEXT:    ; def v[0:1]
4876; GFX900-NEXT:    ;;#ASMEND
4877; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
4878; GFX900-NEXT:    ;;#ASMSTART
4879; GFX900-NEXT:    ; def v[2:3]
4880; GFX900-NEXT:    ;;#ASMEND
4881; GFX900-NEXT:    v_perm_b32 v0, v0, v3, s4
4882; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4883; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4884; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
4885; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4886; GFX900-NEXT:    s_waitcnt vmcnt(0)
4887; GFX900-NEXT:    s_setpc_b64 s[30:31]
4888;
4889; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_1_2_2:
4890; GFX90A:       ; %bb.0:
4891; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4892; GFX90A-NEXT:    ;;#ASMSTART
4893; GFX90A-NEXT:    ; def v[0:1]
4894; GFX90A-NEXT:    ;;#ASMEND
4895; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
4896; GFX90A-NEXT:    ;;#ASMSTART
4897; GFX90A-NEXT:    ; def v[2:3]
4898; GFX90A-NEXT:    ;;#ASMEND
4899; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
4900; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4901; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4902; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
4903; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4904; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4905; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4906;
4907; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_2_2:
4908; GFX940:       ; %bb.0:
4909; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4910; GFX940-NEXT:    ;;#ASMSTART
4911; GFX940-NEXT:    ; def v[0:1]
4912; GFX940-NEXT:    ;;#ASMEND
4913; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
4914; GFX940-NEXT:    ;;#ASMSTART
4915; GFX940-NEXT:    ; def v[2:3]
4916; GFX940-NEXT:    ;;#ASMEND
4917; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4918; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
4919; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4920; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
4921; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
4922; GFX940-NEXT:    s_waitcnt vmcnt(0)
4923; GFX940-NEXT:    s_setpc_b64 s[30:31]
4924  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4925  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4926  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 2, i32 2>
4927  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4928  ret void
4929}
4930
4931define void @v_shuffle_v4i16_v4i16__7_3_2_2(ptr addrspace(1) inreg %ptr) {
4932; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_3_2_2:
4933; GFX900:       ; %bb.0:
4934; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4935; GFX900-NEXT:    ;;#ASMSTART
4936; GFX900-NEXT:    ; def v[0:1]
4937; GFX900-NEXT:    ;;#ASMEND
4938; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
4939; GFX900-NEXT:    ;;#ASMSTART
4940; GFX900-NEXT:    ; def v[2:3]
4941; GFX900-NEXT:    ;;#ASMEND
4942; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
4943; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4944; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4945; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
4946; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4947; GFX900-NEXT:    s_waitcnt vmcnt(0)
4948; GFX900-NEXT:    s_setpc_b64 s[30:31]
4949;
4950; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_3_2_2:
4951; GFX90A:       ; %bb.0:
4952; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4953; GFX90A-NEXT:    ;;#ASMSTART
4954; GFX90A-NEXT:    ; def v[0:1]
4955; GFX90A-NEXT:    ;;#ASMEND
4956; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
4957; GFX90A-NEXT:    ;;#ASMSTART
4958; GFX90A-NEXT:    ; def v[2:3]
4959; GFX90A-NEXT:    ;;#ASMEND
4960; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
4961; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4962; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4963; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
4964; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
4965; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4966; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4967;
4968; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_2_2:
4969; GFX940:       ; %bb.0:
4970; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4971; GFX940-NEXT:    ;;#ASMSTART
4972; GFX940-NEXT:    ; def v[0:1]
4973; GFX940-NEXT:    ;;#ASMEND
4974; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
4975; GFX940-NEXT:    ;;#ASMSTART
4976; GFX940-NEXT:    ; def v[2:3]
4977; GFX940-NEXT:    ;;#ASMEND
4978; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4979; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
4980; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4981; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
4982; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
4983; GFX940-NEXT:    s_waitcnt vmcnt(0)
4984; GFX940-NEXT:    s_setpc_b64 s[30:31]
4985  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4986  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4987  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 2, i32 2>
4988  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4989  ret void
4990}
4991
4992define void @v_shuffle_v4i16_v4i16__7_4_2_2(ptr addrspace(1) inreg %ptr) {
4993; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_4_2_2:
4994; GFX900:       ; %bb.0:
4995; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4996; GFX900-NEXT:    ;;#ASMSTART
4997; GFX900-NEXT:    ; def v[0:1]
4998; GFX900-NEXT:    ;;#ASMEND
4999; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
5000; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5001; GFX900-NEXT:    ;;#ASMSTART
5002; GFX900-NEXT:    ; def v[2:3]
5003; GFX900-NEXT:    ;;#ASMEND
5004; GFX900-NEXT:    v_alignbit_b32 v0, v2, v3, 16
5005; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
5006; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5007; GFX900-NEXT:    s_waitcnt vmcnt(0)
5008; GFX900-NEXT:    s_setpc_b64 s[30:31]
5009;
5010; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_4_2_2:
5011; GFX90A:       ; %bb.0:
5012; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5013; GFX90A-NEXT:    ;;#ASMSTART
5014; GFX90A-NEXT:    ; def v[0:1]
5015; GFX90A-NEXT:    ;;#ASMEND
5016; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
5017; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5018; GFX90A-NEXT:    ;;#ASMSTART
5019; GFX90A-NEXT:    ; def v[2:3]
5020; GFX90A-NEXT:    ;;#ASMEND
5021; GFX90A-NEXT:    v_alignbit_b32 v0, v2, v3, 16
5022; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
5023; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5024; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5025; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5026;
5027; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_2_2:
5028; GFX940:       ; %bb.0:
5029; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5030; GFX940-NEXT:    ;;#ASMSTART
5031; GFX940-NEXT:    ; def v[0:1]
5032; GFX940-NEXT:    ;;#ASMEND
5033; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
5034; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5035; GFX940-NEXT:    ;;#ASMSTART
5036; GFX940-NEXT:    ; def v[2:3]
5037; GFX940-NEXT:    ;;#ASMEND
5038; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
5039; GFX940-NEXT:    v_alignbit_b32 v0, v2, v3, 16
5040; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
5041; GFX940-NEXT:    s_waitcnt vmcnt(0)
5042; GFX940-NEXT:    s_setpc_b64 s[30:31]
5043  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5044  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5045  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 2, i32 2>
5046  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5047  ret void
5048}
5049
5050define void @v_shuffle_v4i16_v4i16__7_5_2_2(ptr addrspace(1) inreg %ptr) {
5051; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_5_2_2:
5052; GFX900:       ; %bb.0:
5053; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5054; GFX900-NEXT:    ;;#ASMSTART
5055; GFX900-NEXT:    ; def v[0:1]
5056; GFX900-NEXT:    ;;#ASMEND
5057; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5058; GFX900-NEXT:    ;;#ASMSTART
5059; GFX900-NEXT:    ; def v[2:3]
5060; GFX900-NEXT:    ;;#ASMEND
5061; GFX900-NEXT:    v_perm_b32 v0, v2, v3, s4
5062; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
5063; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5064; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
5065; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5066; GFX900-NEXT:    s_waitcnt vmcnt(0)
5067; GFX900-NEXT:    s_setpc_b64 s[30:31]
5068;
5069; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_5_2_2:
5070; GFX90A:       ; %bb.0:
5071; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5072; GFX90A-NEXT:    ;;#ASMSTART
5073; GFX90A-NEXT:    ; def v[0:1]
5074; GFX90A-NEXT:    ;;#ASMEND
5075; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5076; GFX90A-NEXT:    ;;#ASMSTART
5077; GFX90A-NEXT:    ; def v[2:3]
5078; GFX90A-NEXT:    ;;#ASMEND
5079; GFX90A-NEXT:    v_perm_b32 v0, v2, v3, s4
5080; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
5081; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5082; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
5083; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5084; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5085; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5086;
5087; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_2_2:
5088; GFX940:       ; %bb.0:
5089; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5090; GFX940-NEXT:    ;;#ASMSTART
5091; GFX940-NEXT:    ; def v[0:1]
5092; GFX940-NEXT:    ;;#ASMEND
5093; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5094; GFX940-NEXT:    ;;#ASMSTART
5095; GFX940-NEXT:    ; def v[2:3]
5096; GFX940-NEXT:    ;;#ASMEND
5097; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5098; GFX940-NEXT:    v_perm_b32 v0, v2, v3, s2
5099; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
5100; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
5101; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
5102; GFX940-NEXT:    s_waitcnt vmcnt(0)
5103; GFX940-NEXT:    s_setpc_b64 s[30:31]
5104  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5105  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5106  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 2, i32 2>
5107  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5108  ret void
5109}
5110
5111define void @v_shuffle_v4i16_v4i16__7_6_2_2(ptr addrspace(1) inreg %ptr) {
5112; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_6_2_2:
5113; GFX900:       ; %bb.0:
5114; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5115; GFX900-NEXT:    ;;#ASMSTART
5116; GFX900-NEXT:    ; def v[0:1]
5117; GFX900-NEXT:    ;;#ASMEND
5118; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
5119; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5120; GFX900-NEXT:    ;;#ASMSTART
5121; GFX900-NEXT:    ; def v[2:3]
5122; GFX900-NEXT:    ;;#ASMEND
5123; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
5124; GFX900-NEXT:    v_alignbit_b32 v0, v3, v3, 16
5125; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5126; GFX900-NEXT:    s_waitcnt vmcnt(0)
5127; GFX900-NEXT:    s_setpc_b64 s[30:31]
5128;
5129; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_6_2_2:
5130; GFX90A:       ; %bb.0:
5131; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5132; GFX90A-NEXT:    ;;#ASMSTART
5133; GFX90A-NEXT:    ; def v[0:1]
5134; GFX90A-NEXT:    ;;#ASMEND
5135; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
5136; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5137; GFX90A-NEXT:    ;;#ASMSTART
5138; GFX90A-NEXT:    ; def v[2:3]
5139; GFX90A-NEXT:    ;;#ASMEND
5140; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
5141; GFX90A-NEXT:    v_alignbit_b32 v0, v3, v3, 16
5142; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5143; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5144; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5145;
5146; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_2_2:
5147; GFX940:       ; %bb.0:
5148; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5149; GFX940-NEXT:    ;;#ASMSTART
5150; GFX940-NEXT:    ; def v[0:1]
5151; GFX940-NEXT:    ;;#ASMEND
5152; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
5153; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5154; GFX940-NEXT:    ;;#ASMSTART
5155; GFX940-NEXT:    ; def v[2:3]
5156; GFX940-NEXT:    ;;#ASMEND
5157; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
5158; GFX940-NEXT:    v_alignbit_b32 v0, v3, v3, 16
5159; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
5160; GFX940-NEXT:    s_waitcnt vmcnt(0)
5161; GFX940-NEXT:    s_setpc_b64 s[30:31]
5162  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5163  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5164  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 2, i32 2>
5165  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5166  ret void
5167}
5168
5169define void @v_shuffle_v4i16_v4i16__7_7_2_2(ptr addrspace(1) inreg %ptr) {
5170; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_2_2:
5171; GFX900:       ; %bb.0:
5172; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5173; GFX900-NEXT:    ;;#ASMSTART
5174; GFX900-NEXT:    ; def v[0:1]
5175; GFX900-NEXT:    ;;#ASMEND
5176; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5177; GFX900-NEXT:    ;;#ASMSTART
5178; GFX900-NEXT:    ; def v[2:3]
5179; GFX900-NEXT:    ;;#ASMEND
5180; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
5181; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
5182; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5183; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
5184; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5185; GFX900-NEXT:    s_waitcnt vmcnt(0)
5186; GFX900-NEXT:    s_setpc_b64 s[30:31]
5187;
5188; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_2_2:
5189; GFX90A:       ; %bb.0:
5190; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5191; GFX90A-NEXT:    ;;#ASMSTART
5192; GFX90A-NEXT:    ; def v[0:1]
5193; GFX90A-NEXT:    ;;#ASMEND
5194; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5195; GFX90A-NEXT:    ;;#ASMSTART
5196; GFX90A-NEXT:    ; def v[2:3]
5197; GFX90A-NEXT:    ;;#ASMEND
5198; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
5199; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
5200; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5201; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
5202; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5203; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5204; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5205;
5206; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_2:
5207; GFX940:       ; %bb.0:
5208; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5209; GFX940-NEXT:    ;;#ASMSTART
5210; GFX940-NEXT:    ; def v[0:1]
5211; GFX940-NEXT:    ;;#ASMEND
5212; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5213; GFX940-NEXT:    ;;#ASMSTART
5214; GFX940-NEXT:    ; def v[2:3]
5215; GFX940-NEXT:    ;;#ASMEND
5216; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5217; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
5218; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
5219; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
5220; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
5221; GFX940-NEXT:    s_waitcnt vmcnt(0)
5222; GFX940-NEXT:    s_setpc_b64 s[30:31]
5223  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5224  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5225  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 2>
5226  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5227  ret void
5228}
5229
5230define void @v_shuffle_v4i16_v4i16__7_7_u_2(ptr addrspace(1) inreg %ptr) {
5231; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_u_2:
5232; GFX900:       ; %bb.0:
5233; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5234; GFX900-NEXT:    ;;#ASMSTART
5235; GFX900-NEXT:    ; def v[0:1]
5236; GFX900-NEXT:    ;;#ASMEND
5237; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5238; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5239; GFX900-NEXT:    ;;#ASMSTART
5240; GFX900-NEXT:    ; def v[2:3]
5241; GFX900-NEXT:    ;;#ASMEND
5242; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
5243; GFX900-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
5244; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5245; GFX900-NEXT:    s_waitcnt vmcnt(0)
5246; GFX900-NEXT:    s_setpc_b64 s[30:31]
5247;
5248; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_u_2:
5249; GFX90A:       ; %bb.0:
5250; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5251; GFX90A-NEXT:    ;;#ASMSTART
5252; GFX90A-NEXT:    ; def v[0:1]
5253; GFX90A-NEXT:    ;;#ASMEND
5254; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5255; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5256; GFX90A-NEXT:    ;;#ASMSTART
5257; GFX90A-NEXT:    ; def v[2:3]
5258; GFX90A-NEXT:    ;;#ASMEND
5259; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
5260; GFX90A-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
5261; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5262; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5263; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5264;
5265; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_2:
5266; GFX940:       ; %bb.0:
5267; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5268; GFX940-NEXT:    ;;#ASMSTART
5269; GFX940-NEXT:    ; def v[0:1]
5270; GFX940-NEXT:    ;;#ASMEND
5271; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5272; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5273; GFX940-NEXT:    ;;#ASMSTART
5274; GFX940-NEXT:    ; def v[2:3]
5275; GFX940-NEXT:    ;;#ASMEND
5276; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
5277; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
5278; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
5279; GFX940-NEXT:    s_waitcnt vmcnt(0)
5280; GFX940-NEXT:    s_setpc_b64 s[30:31]
5281  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5282  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5283  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 2>
5284  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5285  ret void
5286}
5287
5288define void @v_shuffle_v4i16_v4i16__7_7_0_2(ptr addrspace(1) inreg %ptr) {
5289; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_0_2:
5290; GFX900:       ; %bb.0:
5291; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5292; GFX900-NEXT:    ;;#ASMSTART
5293; GFX900-NEXT:    ; def v[0:1]
5294; GFX900-NEXT:    ;;#ASMEND
5295; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
5296; GFX900-NEXT:    v_perm_b32 v1, v1, v0, s4
5297; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5298; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5299; GFX900-NEXT:    ;;#ASMSTART
5300; GFX900-NEXT:    ; def v[2:3]
5301; GFX900-NEXT:    ;;#ASMEND
5302; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
5303; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5304; GFX900-NEXT:    s_waitcnt vmcnt(0)
5305; GFX900-NEXT:    s_setpc_b64 s[30:31]
5306;
5307; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_0_2:
5308; GFX90A:       ; %bb.0:
5309; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5310; GFX90A-NEXT:    ;;#ASMSTART
5311; GFX90A-NEXT:    ; def v[0:1]
5312; GFX90A-NEXT:    ;;#ASMEND
5313; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
5314; GFX90A-NEXT:    v_perm_b32 v1, v1, v0, s4
5315; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5316; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5317; GFX90A-NEXT:    ;;#ASMSTART
5318; GFX90A-NEXT:    ; def v[2:3]
5319; GFX90A-NEXT:    ;;#ASMEND
5320; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
5321; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5322; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5323; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5324;
5325; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_2:
5326; GFX940:       ; %bb.0:
5327; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5328; GFX940-NEXT:    ;;#ASMSTART
5329; GFX940-NEXT:    ; def v[0:1]
5330; GFX940-NEXT:    ;;#ASMEND
5331; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
5332; GFX940-NEXT:    v_perm_b32 v1, v1, v0, s2
5333; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5334; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5335; GFX940-NEXT:    ;;#ASMSTART
5336; GFX940-NEXT:    ; def v[2:3]
5337; GFX940-NEXT:    ;;#ASMEND
5338; GFX940-NEXT:    s_nop 0
5339; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
5340; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
5341; GFX940-NEXT:    s_waitcnt vmcnt(0)
5342; GFX940-NEXT:    s_setpc_b64 s[30:31]
5343  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5344  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5345  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 2>
5346  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5347  ret void
5348}
5349
5350define void @v_shuffle_v4i16_v4i16__7_7_1_2(ptr addrspace(1) inreg %ptr) {
5351; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_1_2:
5352; GFX900:       ; %bb.0:
5353; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5354; GFX900-NEXT:    ;;#ASMSTART
5355; GFX900-NEXT:    ; def v[2:3]
5356; GFX900-NEXT:    ;;#ASMEND
5357; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5358; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5359; GFX900-NEXT:    ;;#ASMSTART
5360; GFX900-NEXT:    ; def v[0:1]
5361; GFX900-NEXT:    ;;#ASMEND
5362; GFX900-NEXT:    v_perm_b32 v2, v3, v3, s4
5363; GFX900-NEXT:    v_alignbit_b32 v3, v1, v0, 16
5364; GFX900-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
5365; GFX900-NEXT:    s_waitcnt vmcnt(0)
5366; GFX900-NEXT:    s_setpc_b64 s[30:31]
5367;
5368; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_1_2:
5369; GFX90A:       ; %bb.0:
5370; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5371; GFX90A-NEXT:    ;;#ASMSTART
5372; GFX90A-NEXT:    ; def v[2:3]
5373; GFX90A-NEXT:    ;;#ASMEND
5374; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5375; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5376; GFX90A-NEXT:    ;;#ASMSTART
5377; GFX90A-NEXT:    ; def v[0:1]
5378; GFX90A-NEXT:    ;;#ASMEND
5379; GFX90A-NEXT:    v_perm_b32 v2, v3, v3, s4
5380; GFX90A-NEXT:    v_alignbit_b32 v3, v1, v0, 16
5381; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
5382; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5383; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5384;
5385; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_2:
5386; GFX940:       ; %bb.0:
5387; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5388; GFX940-NEXT:    ;;#ASMSTART
5389; GFX940-NEXT:    ; def v[2:3]
5390; GFX940-NEXT:    ;;#ASMEND
5391; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5392; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5393; GFX940-NEXT:    ;;#ASMSTART
5394; GFX940-NEXT:    ; def v[0:1]
5395; GFX940-NEXT:    ;;#ASMEND
5396; GFX940-NEXT:    v_perm_b32 v2, v3, v3, s2
5397; GFX940-NEXT:    v_alignbit_b32 v3, v1, v0, 16
5398; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
5399; GFX940-NEXT:    s_waitcnt vmcnt(0)
5400; GFX940-NEXT:    s_setpc_b64 s[30:31]
5401  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5402  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5403  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 2>
5404  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5405  ret void
5406}
5407
5408define void @v_shuffle_v4i16_v4i16__7_7_3_2(ptr addrspace(1) inreg %ptr) {
5409; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_3_2:
5410; GFX900:       ; %bb.0:
5411; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5412; GFX900-NEXT:    ;;#ASMSTART
5413; GFX900-NEXT:    ; def v[0:1]
5414; GFX900-NEXT:    ;;#ASMEND
5415; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5416; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5417; GFX900-NEXT:    ;;#ASMSTART
5418; GFX900-NEXT:    ; def v[2:3]
5419; GFX900-NEXT:    ;;#ASMEND
5420; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
5421; GFX900-NEXT:    v_alignbit_b32 v1, v1, v1, 16
5422; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5423; GFX900-NEXT:    s_waitcnt vmcnt(0)
5424; GFX900-NEXT:    s_setpc_b64 s[30:31]
5425;
5426; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_3_2:
5427; GFX90A:       ; %bb.0:
5428; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5429; GFX90A-NEXT:    ;;#ASMSTART
5430; GFX90A-NEXT:    ; def v[0:1]
5431; GFX90A-NEXT:    ;;#ASMEND
5432; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5433; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5434; GFX90A-NEXT:    ;;#ASMSTART
5435; GFX90A-NEXT:    ; def v[2:3]
5436; GFX90A-NEXT:    ;;#ASMEND
5437; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
5438; GFX90A-NEXT:    v_alignbit_b32 v1, v1, v1, 16
5439; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5440; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5441; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5442;
5443; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_2:
5444; GFX940:       ; %bb.0:
5445; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5446; GFX940-NEXT:    ;;#ASMSTART
5447; GFX940-NEXT:    ; def v[0:1]
5448; GFX940-NEXT:    ;;#ASMEND
5449; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5450; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5451; GFX940-NEXT:    ;;#ASMSTART
5452; GFX940-NEXT:    ; def v[2:3]
5453; GFX940-NEXT:    ;;#ASMEND
5454; GFX940-NEXT:    v_alignbit_b32 v1, v1, v1, 16
5455; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
5456; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
5457; GFX940-NEXT:    s_waitcnt vmcnt(0)
5458; GFX940-NEXT:    s_setpc_b64 s[30:31]
5459  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5460  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5461  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 2>
5462  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5463  ret void
5464}
5465
5466define void @v_shuffle_v4i16_v4i16__7_7_4_2(ptr addrspace(1) inreg %ptr) {
5467; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_4_2:
5468; GFX900:       ; %bb.0:
5469; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5470; GFX900-NEXT:    ;;#ASMSTART
5471; GFX900-NEXT:    ; def v[0:1]
5472; GFX900-NEXT:    ;;#ASMEND
5473; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
5474; GFX900-NEXT:    ;;#ASMSTART
5475; GFX900-NEXT:    ; def v[2:3]
5476; GFX900-NEXT:    ;;#ASMEND
5477; GFX900-NEXT:    v_perm_b32 v1, v1, v2, s4
5478; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5479; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5480; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
5481; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5482; GFX900-NEXT:    s_waitcnt vmcnt(0)
5483; GFX900-NEXT:    s_setpc_b64 s[30:31]
5484;
5485; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_4_2:
5486; GFX90A:       ; %bb.0:
5487; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5488; GFX90A-NEXT:    ;;#ASMSTART
5489; GFX90A-NEXT:    ; def v[0:1]
5490; GFX90A-NEXT:    ;;#ASMEND
5491; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
5492; GFX90A-NEXT:    ;;#ASMSTART
5493; GFX90A-NEXT:    ; def v[2:3]
5494; GFX90A-NEXT:    ;;#ASMEND
5495; GFX90A-NEXT:    v_perm_b32 v1, v1, v2, s4
5496; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5497; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5498; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
5499; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5500; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5501; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5502;
5503; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_2:
5504; GFX940:       ; %bb.0:
5505; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5506; GFX940-NEXT:    ;;#ASMSTART
5507; GFX940-NEXT:    ; def v[0:1]
5508; GFX940-NEXT:    ;;#ASMEND
5509; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
5510; GFX940-NEXT:    ;;#ASMSTART
5511; GFX940-NEXT:    ; def v[2:3]
5512; GFX940-NEXT:    ;;#ASMEND
5513; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5514; GFX940-NEXT:    v_perm_b32 v1, v1, v2, s2
5515; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5516; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
5517; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
5518; GFX940-NEXT:    s_waitcnt vmcnt(0)
5519; GFX940-NEXT:    s_setpc_b64 s[30:31]
5520  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5521  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5522  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 2>
5523  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5524  ret void
5525}
5526
5527define void @v_shuffle_v4i16_v4i16__7_7_5_2(ptr addrspace(1) inreg %ptr) {
5528; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_5_2:
5529; GFX900:       ; %bb.0:
5530; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5531; GFX900-NEXT:    ;;#ASMSTART
5532; GFX900-NEXT:    ; def v[0:1]
5533; GFX900-NEXT:    ;;#ASMEND
5534; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5535; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5536; GFX900-NEXT:    ;;#ASMSTART
5537; GFX900-NEXT:    ; def v[2:3]
5538; GFX900-NEXT:    ;;#ASMEND
5539; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
5540; GFX900-NEXT:    v_alignbit_b32 v1, v1, v2, 16
5541; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5542; GFX900-NEXT:    s_waitcnt vmcnt(0)
5543; GFX900-NEXT:    s_setpc_b64 s[30:31]
5544;
5545; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_5_2:
5546; GFX90A:       ; %bb.0:
5547; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5548; GFX90A-NEXT:    ;;#ASMSTART
5549; GFX90A-NEXT:    ; def v[0:1]
5550; GFX90A-NEXT:    ;;#ASMEND
5551; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5552; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5553; GFX90A-NEXT:    ;;#ASMSTART
5554; GFX90A-NEXT:    ; def v[2:3]
5555; GFX90A-NEXT:    ;;#ASMEND
5556; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
5557; GFX90A-NEXT:    v_alignbit_b32 v1, v1, v2, 16
5558; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5559; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5560; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5561;
5562; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_2:
5563; GFX940:       ; %bb.0:
5564; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5565; GFX940-NEXT:    ;;#ASMSTART
5566; GFX940-NEXT:    ; def v[0:1]
5567; GFX940-NEXT:    ;;#ASMEND
5568; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5569; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5570; GFX940-NEXT:    ;;#ASMSTART
5571; GFX940-NEXT:    ; def v[2:3]
5572; GFX940-NEXT:    ;;#ASMEND
5573; GFX940-NEXT:    s_nop 0
5574; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
5575; GFX940-NEXT:    v_alignbit_b32 v1, v1, v2, 16
5576; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
5577; GFX940-NEXT:    s_waitcnt vmcnt(0)
5578; GFX940-NEXT:    s_setpc_b64 s[30:31]
5579  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5580  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5581  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 2>
5582  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5583  ret void
5584}
5585
5586define void @v_shuffle_v4i16_v4i16__7_7_6_2(ptr addrspace(1) inreg %ptr) {
5587; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_6_2:
5588; GFX900:       ; %bb.0:
5589; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5590; GFX900-NEXT:    ;;#ASMSTART
5591; GFX900-NEXT:    ; def v[0:1]
5592; GFX900-NEXT:    ;;#ASMEND
5593; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
5594; GFX900-NEXT:    ;;#ASMSTART
5595; GFX900-NEXT:    ; def v[2:3]
5596; GFX900-NEXT:    ;;#ASMEND
5597; GFX900-NEXT:    v_perm_b32 v1, v1, v3, s4
5598; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5599; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5600; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
5601; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5602; GFX900-NEXT:    s_waitcnt vmcnt(0)
5603; GFX900-NEXT:    s_setpc_b64 s[30:31]
5604;
5605; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_6_2:
5606; GFX90A:       ; %bb.0:
5607; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5608; GFX90A-NEXT:    ;;#ASMSTART
5609; GFX90A-NEXT:    ; def v[0:1]
5610; GFX90A-NEXT:    ;;#ASMEND
5611; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
5612; GFX90A-NEXT:    ;;#ASMSTART
5613; GFX90A-NEXT:    ; def v[2:3]
5614; GFX90A-NEXT:    ;;#ASMEND
5615; GFX90A-NEXT:    v_perm_b32 v1, v1, v3, s4
5616; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5617; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5618; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
5619; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5620; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5621; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5622;
5623; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_2:
5624; GFX940:       ; %bb.0:
5625; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5626; GFX940-NEXT:    ;;#ASMSTART
5627; GFX940-NEXT:    ; def v[0:1]
5628; GFX940-NEXT:    ;;#ASMEND
5629; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
5630; GFX940-NEXT:    ;;#ASMSTART
5631; GFX940-NEXT:    ; def v[2:3]
5632; GFX940-NEXT:    ;;#ASMEND
5633; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5634; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s2
5635; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5636; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
5637; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
5638; GFX940-NEXT:    s_waitcnt vmcnt(0)
5639; GFX940-NEXT:    s_setpc_b64 s[30:31]
5640  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5641  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5642  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 2>
5643  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5644  ret void
5645}
5646
5647define void @v_shuffle_v4i16_v4i16__u_3_3_3(ptr addrspace(1) inreg %ptr) {
5648; GFX900-LABEL: v_shuffle_v4i16_v4i16__u_3_3_3:
5649; GFX900:       ; %bb.0:
5650; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5651; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5652; GFX900-NEXT:    v_mov_b32_e32 v3, 0
5653; GFX900-NEXT:    ;;#ASMSTART
5654; GFX900-NEXT:    ; def v[0:1]
5655; GFX900-NEXT:    ;;#ASMEND
5656; GFX900-NEXT:    v_perm_b32 v2, v1, v1, s4
5657; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
5658; GFX900-NEXT:    s_waitcnt vmcnt(0)
5659; GFX900-NEXT:    s_setpc_b64 s[30:31]
5660;
5661; GFX90A-LABEL: v_shuffle_v4i16_v4i16__u_3_3_3:
5662; GFX90A:       ; %bb.0:
5663; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5664; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5665; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5666; GFX90A-NEXT:    ;;#ASMSTART
5667; GFX90A-NEXT:    ; def v[0:1]
5668; GFX90A-NEXT:    ;;#ASMEND
5669; GFX90A-NEXT:    v_perm_b32 v3, v1, v1, s4
5670; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
5671; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
5672; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5673; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5674;
5675; GFX940-LABEL: v_shuffle_v4i16_v4i16__u_3_3_3:
5676; GFX940:       ; %bb.0:
5677; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5678; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5679; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5680; GFX940-NEXT:    ;;#ASMSTART
5681; GFX940-NEXT:    ; def v[0:1]
5682; GFX940-NEXT:    ;;#ASMEND
5683; GFX940-NEXT:    s_nop 0
5684; GFX940-NEXT:    v_perm_b32 v3, v1, v1, s2
5685; GFX940-NEXT:    v_mov_b32_e32 v2, v1
5686; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
5687; GFX940-NEXT:    s_waitcnt vmcnt(0)
5688; GFX940-NEXT:    s_setpc_b64 s[30:31]
5689  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5690  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
5691  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5692  ret void
5693}
5694
5695define void @v_shuffle_v4i16_v4i16__0_3_3_3(ptr addrspace(1) inreg %ptr) {
5696; GFX900-LABEL: v_shuffle_v4i16_v4i16__0_3_3_3:
5697; GFX900:       ; %bb.0:
5698; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5699; GFX900-NEXT:    ;;#ASMSTART
5700; GFX900-NEXT:    ; def v[0:1]
5701; GFX900-NEXT:    ;;#ASMEND
5702; GFX900-NEXT:    s_mov_b32 s4, 0xffff
5703; GFX900-NEXT:    v_bfi_b32 v0, s4, v0, v1
5704; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5705; GFX900-NEXT:    v_mov_b32_e32 v2, 0
5706; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
5707; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
5708; GFX900-NEXT:    s_waitcnt vmcnt(0)
5709; GFX900-NEXT:    s_setpc_b64 s[30:31]
5710;
5711; GFX90A-LABEL: v_shuffle_v4i16_v4i16__0_3_3_3:
5712; GFX90A:       ; %bb.0:
5713; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5714; GFX90A-NEXT:    ;;#ASMSTART
5715; GFX90A-NEXT:    ; def v[0:1]
5716; GFX90A-NEXT:    ;;#ASMEND
5717; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
5718; GFX90A-NEXT:    v_bfi_b32 v0, s4, v0, v1
5719; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5720; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
5721; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
5722; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
5723; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5724; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5725;
5726; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_3_3_3:
5727; GFX940:       ; %bb.0:
5728; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5729; GFX940-NEXT:    ;;#ASMSTART
5730; GFX940-NEXT:    ; def v[0:1]
5731; GFX940-NEXT:    ;;#ASMEND
5732; GFX940-NEXT:    s_mov_b32 s2, 0xffff
5733; GFX940-NEXT:    v_bfi_b32 v0, s2, v0, v1
5734; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5735; GFX940-NEXT:    v_mov_b32_e32 v2, 0
5736; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
5737; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
5738; GFX940-NEXT:    s_waitcnt vmcnt(0)
5739; GFX940-NEXT:    s_setpc_b64 s[30:31]
5740  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5741  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
5742  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5743  ret void
5744}
5745
5746define void @v_shuffle_v4i16_v4i16__1_3_3_3(ptr addrspace(1) inreg %ptr) {
5747; GFX900-LABEL: v_shuffle_v4i16_v4i16__1_3_3_3:
5748; GFX900:       ; %bb.0:
5749; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5750; GFX900-NEXT:    ;;#ASMSTART
5751; GFX900-NEXT:    ; def v[0:1]
5752; GFX900-NEXT:    ;;#ASMEND
5753; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5754; GFX900-NEXT:    v_mov_b32_e32 v2, 0
5755; GFX900-NEXT:    v_perm_b32 v0, v1, v0, s4
5756; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
5757; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
5758; GFX900-NEXT:    s_waitcnt vmcnt(0)
5759; GFX900-NEXT:    s_setpc_b64 s[30:31]
5760;
5761; GFX90A-LABEL: v_shuffle_v4i16_v4i16__1_3_3_3:
5762; GFX90A:       ; %bb.0:
5763; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5764; GFX90A-NEXT:    ;;#ASMSTART
5765; GFX90A-NEXT:    ; def v[0:1]
5766; GFX90A-NEXT:    ;;#ASMEND
5767; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5768; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
5769; GFX90A-NEXT:    v_perm_b32 v0, v1, v0, s4
5770; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
5771; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
5772; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5773; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5774;
5775; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_3_3_3:
5776; GFX940:       ; %bb.0:
5777; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5778; GFX940-NEXT:    ;;#ASMSTART
5779; GFX940-NEXT:    ; def v[0:1]
5780; GFX940-NEXT:    ;;#ASMEND
5781; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5782; GFX940-NEXT:    v_mov_b32_e32 v2, 0
5783; GFX940-NEXT:    v_perm_b32 v0, v1, v0, s2
5784; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
5785; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
5786; GFX940-NEXT:    s_waitcnt vmcnt(0)
5787; GFX940-NEXT:    s_setpc_b64 s[30:31]
5788  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5789  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
5790  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5791  ret void
5792}
5793
5794define void @v_shuffle_v4i16_v4i16__2_3_3_3(ptr addrspace(1) inreg %ptr) {
5795; GFX900-LABEL: v_shuffle_v4i16_v4i16__2_3_3_3:
5796; GFX900:       ; %bb.0:
5797; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5798; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5799; GFX900-NEXT:    v_mov_b32_e32 v3, 0
5800; GFX900-NEXT:    ;;#ASMSTART
5801; GFX900-NEXT:    ; def v[0:1]
5802; GFX900-NEXT:    ;;#ASMEND
5803; GFX900-NEXT:    v_perm_b32 v2, v1, v1, s4
5804; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
5805; GFX900-NEXT:    s_waitcnt vmcnt(0)
5806; GFX900-NEXT:    s_setpc_b64 s[30:31]
5807;
5808; GFX90A-LABEL: v_shuffle_v4i16_v4i16__2_3_3_3:
5809; GFX90A:       ; %bb.0:
5810; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5811; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5812; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5813; GFX90A-NEXT:    ;;#ASMSTART
5814; GFX90A-NEXT:    ; def v[0:1]
5815; GFX90A-NEXT:    ;;#ASMEND
5816; GFX90A-NEXT:    v_perm_b32 v3, v1, v1, s4
5817; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
5818; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
5819; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5820; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5821;
5822; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_3_3_3:
5823; GFX940:       ; %bb.0:
5824; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5825; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5826; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5827; GFX940-NEXT:    ;;#ASMSTART
5828; GFX940-NEXT:    ; def v[0:1]
5829; GFX940-NEXT:    ;;#ASMEND
5830; GFX940-NEXT:    s_nop 0
5831; GFX940-NEXT:    v_perm_b32 v3, v1, v1, s2
5832; GFX940-NEXT:    v_mov_b32_e32 v2, v1
5833; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
5834; GFX940-NEXT:    s_waitcnt vmcnt(0)
5835; GFX940-NEXT:    s_setpc_b64 s[30:31]
5836  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5837  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
5838  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5839  ret void
5840}
5841
5842define void @v_shuffle_v4i16_v4i16__3_3_3_3(ptr addrspace(1) inreg %ptr) {
5843; GFX900-LABEL: v_shuffle_v4i16_v4i16__3_3_3_3:
5844; GFX900:       ; %bb.0:
5845; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5846; GFX900-NEXT:    ;;#ASMSTART
5847; GFX900-NEXT:    ; def v[0:1]
5848; GFX900-NEXT:    ;;#ASMEND
5849; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5850; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
5851; GFX900-NEXT:    v_mov_b32_e32 v2, 0
5852; GFX900-NEXT:    v_mov_b32_e32 v1, v0
5853; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
5854; GFX900-NEXT:    s_waitcnt vmcnt(0)
5855; GFX900-NEXT:    s_setpc_b64 s[30:31]
5856;
5857; GFX90A-LABEL: v_shuffle_v4i16_v4i16__3_3_3_3:
5858; GFX90A:       ; %bb.0:
5859; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5860; GFX90A-NEXT:    ;;#ASMSTART
5861; GFX90A-NEXT:    ; def v[0:1]
5862; GFX90A-NEXT:    ;;#ASMEND
5863; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5864; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
5865; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
5866; GFX90A-NEXT:    v_mov_b32_e32 v1, v0
5867; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
5868; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5869; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5870;
5871; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_3_3_3:
5872; GFX940:       ; %bb.0:
5873; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5874; GFX940-NEXT:    ;;#ASMSTART
5875; GFX940-NEXT:    ; def v[0:1]
5876; GFX940-NEXT:    ;;#ASMEND
5877; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5878; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
5879; GFX940-NEXT:    v_mov_b32_e32 v2, 0
5880; GFX940-NEXT:    v_mov_b32_e32 v1, v0
5881; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
5882; GFX940-NEXT:    s_waitcnt vmcnt(0)
5883; GFX940-NEXT:    s_setpc_b64 s[30:31]
5884  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5885  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
5886  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5887  ret void
5888}
5889
5890define void @v_shuffle_v4i16_v4i16__4_3_3_3(ptr addrspace(1) inreg %ptr) {
5891; GFX900-LABEL: v_shuffle_v4i16_v4i16__4_3_3_3:
5892; GFX900:       ; %bb.0:
5893; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5894; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5895; GFX900-NEXT:    v_mov_b32_e32 v3, 0
5896; GFX900-NEXT:    ;;#ASMSTART
5897; GFX900-NEXT:    ; def v[0:1]
5898; GFX900-NEXT:    ;;#ASMEND
5899; GFX900-NEXT:    v_perm_b32 v2, v1, v1, s4
5900; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
5901; GFX900-NEXT:    s_waitcnt vmcnt(0)
5902; GFX900-NEXT:    s_setpc_b64 s[30:31]
5903;
5904; GFX90A-LABEL: v_shuffle_v4i16_v4i16__4_3_3_3:
5905; GFX90A:       ; %bb.0:
5906; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5907; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5908; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5909; GFX90A-NEXT:    ;;#ASMSTART
5910; GFX90A-NEXT:    ; def v[0:1]
5911; GFX90A-NEXT:    ;;#ASMEND
5912; GFX90A-NEXT:    v_perm_b32 v3, v1, v1, s4
5913; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
5914; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
5915; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5916; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5917;
5918; GFX940-LABEL: v_shuffle_v4i16_v4i16__4_3_3_3:
5919; GFX940:       ; %bb.0:
5920; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5921; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5922; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5923; GFX940-NEXT:    ;;#ASMSTART
5924; GFX940-NEXT:    ; def v[0:1]
5925; GFX940-NEXT:    ;;#ASMEND
5926; GFX940-NEXT:    s_nop 0
5927; GFX940-NEXT:    v_perm_b32 v3, v1, v1, s2
5928; GFX940-NEXT:    v_mov_b32_e32 v2, v1
5929; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
5930; GFX940-NEXT:    s_waitcnt vmcnt(0)
5931; GFX940-NEXT:    s_setpc_b64 s[30:31]
5932  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5933  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 4, i32 3, i32 3, i32 3>
5934  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5935  ret void
5936}
5937
5938define void @v_shuffle_v4i16_v4i16__5_3_3_3(ptr addrspace(1) inreg %ptr) {
5939; GFX900-LABEL: v_shuffle_v4i16_v4i16__5_3_3_3:
5940; GFX900:       ; %bb.0:
5941; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5942; GFX900-NEXT:    ;;#ASMSTART
5943; GFX900-NEXT:    ; def v[0:1]
5944; GFX900-NEXT:    ;;#ASMEND
5945; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5946; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5947; GFX900-NEXT:    ;;#ASMSTART
5948; GFX900-NEXT:    ; def v[2:3]
5949; GFX900-NEXT:    ;;#ASMEND
5950; GFX900-NEXT:    v_perm_b32 v0, v1, v2, s4
5951; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
5952; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5953; GFX900-NEXT:    s_waitcnt vmcnt(0)
5954; GFX900-NEXT:    s_setpc_b64 s[30:31]
5955;
5956; GFX90A-LABEL: v_shuffle_v4i16_v4i16__5_3_3_3:
5957; GFX90A:       ; %bb.0:
5958; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5959; GFX90A-NEXT:    ;;#ASMSTART
5960; GFX90A-NEXT:    ; def v[0:1]
5961; GFX90A-NEXT:    ;;#ASMEND
5962; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5963; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5964; GFX90A-NEXT:    ;;#ASMSTART
5965; GFX90A-NEXT:    ; def v[2:3]
5966; GFX90A-NEXT:    ;;#ASMEND
5967; GFX90A-NEXT:    v_perm_b32 v0, v1, v2, s4
5968; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
5969; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
5970; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5971; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5972;
5973; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_3_3_3:
5974; GFX940:       ; %bb.0:
5975; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5976; GFX940-NEXT:    ;;#ASMSTART
5977; GFX940-NEXT:    ; def v[0:1]
5978; GFX940-NEXT:    ;;#ASMEND
5979; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5980; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5981; GFX940-NEXT:    ;;#ASMSTART
5982; GFX940-NEXT:    ; def v[2:3]
5983; GFX940-NEXT:    ;;#ASMEND
5984; GFX940-NEXT:    s_nop 0
5985; GFX940-NEXT:    v_perm_b32 v0, v1, v2, s2
5986; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
5987; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
5988; GFX940-NEXT:    s_waitcnt vmcnt(0)
5989; GFX940-NEXT:    s_setpc_b64 s[30:31]
5990  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5991  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5992  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3>
5993  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5994  ret void
5995}
5996
5997define void @v_shuffle_v4i16_v4i16__6_3_3_3(ptr addrspace(1) inreg %ptr) {
5998; GFX900-LABEL: v_shuffle_v4i16_v4i16__6_3_3_3:
5999; GFX900:       ; %bb.0:
6000; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6001; GFX900-NEXT:    ;;#ASMSTART
6002; GFX900-NEXT:    ; def v[0:1]
6003; GFX900-NEXT:    ;;#ASMEND
6004; GFX900-NEXT:    s_mov_b32 s4, 0xffff
6005; GFX900-NEXT:    ;;#ASMSTART
6006; GFX900-NEXT:    ; def v[2:3]
6007; GFX900-NEXT:    ;;#ASMEND
6008; GFX900-NEXT:    v_bfi_b32 v0, s4, v3, v1
6009; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
6010; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6011; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
6012; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6013; GFX900-NEXT:    s_waitcnt vmcnt(0)
6014; GFX900-NEXT:    s_setpc_b64 s[30:31]
6015;
6016; GFX90A-LABEL: v_shuffle_v4i16_v4i16__6_3_3_3:
6017; GFX90A:       ; %bb.0:
6018; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6019; GFX90A-NEXT:    ;;#ASMSTART
6020; GFX90A-NEXT:    ; def v[0:1]
6021; GFX90A-NEXT:    ;;#ASMEND
6022; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
6023; GFX90A-NEXT:    ;;#ASMSTART
6024; GFX90A-NEXT:    ; def v[2:3]
6025; GFX90A-NEXT:    ;;#ASMEND
6026; GFX90A-NEXT:    v_bfi_b32 v0, s4, v3, v1
6027; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
6028; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6029; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
6030; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6031; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6032; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6033;
6034; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_3_3_3:
6035; GFX940:       ; %bb.0:
6036; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6037; GFX940-NEXT:    ;;#ASMSTART
6038; GFX940-NEXT:    ; def v[0:1]
6039; GFX940-NEXT:    ;;#ASMEND
6040; GFX940-NEXT:    s_mov_b32 s2, 0xffff
6041; GFX940-NEXT:    ;;#ASMSTART
6042; GFX940-NEXT:    ; def v[2:3]
6043; GFX940-NEXT:    ;;#ASMEND
6044; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6045; GFX940-NEXT:    v_bfi_b32 v0, s2, v3, v1
6046; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
6047; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
6048; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
6049; GFX940-NEXT:    s_waitcnt vmcnt(0)
6050; GFX940-NEXT:    s_setpc_b64 s[30:31]
6051  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6052  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6053  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 3, i32 3, i32 3>
6054  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6055  ret void
6056}
6057
6058define void @v_shuffle_v4i16_v4i16__7_3_3_3(ptr addrspace(1) inreg %ptr) {
6059; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_3_3_3:
6060; GFX900:       ; %bb.0:
6061; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6062; GFX900-NEXT:    ;;#ASMSTART
6063; GFX900-NEXT:    ; def v[0:1]
6064; GFX900-NEXT:    ;;#ASMEND
6065; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
6066; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6067; GFX900-NEXT:    ;;#ASMSTART
6068; GFX900-NEXT:    ; def v[2:3]
6069; GFX900-NEXT:    ;;#ASMEND
6070; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
6071; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
6072; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6073; GFX900-NEXT:    s_waitcnt vmcnt(0)
6074; GFX900-NEXT:    s_setpc_b64 s[30:31]
6075;
6076; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_3_3_3:
6077; GFX90A:       ; %bb.0:
6078; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6079; GFX90A-NEXT:    ;;#ASMSTART
6080; GFX90A-NEXT:    ; def v[0:1]
6081; GFX90A-NEXT:    ;;#ASMEND
6082; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
6083; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6084; GFX90A-NEXT:    ;;#ASMSTART
6085; GFX90A-NEXT:    ; def v[2:3]
6086; GFX90A-NEXT:    ;;#ASMEND
6087; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
6088; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
6089; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6090; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6091; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6092;
6093; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_3_3:
6094; GFX940:       ; %bb.0:
6095; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6096; GFX940-NEXT:    ;;#ASMSTART
6097; GFX940-NEXT:    ; def v[0:1]
6098; GFX940-NEXT:    ;;#ASMEND
6099; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
6100; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6101; GFX940-NEXT:    ;;#ASMSTART
6102; GFX940-NEXT:    ; def v[2:3]
6103; GFX940-NEXT:    ;;#ASMEND
6104; GFX940-NEXT:    s_nop 0
6105; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
6106; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
6107; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
6108; GFX940-NEXT:    s_waitcnt vmcnt(0)
6109; GFX940-NEXT:    s_setpc_b64 s[30:31]
6110  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6111  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6112  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 3, i32 3>
6113  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6114  ret void
6115}
6116
6117define void @v_shuffle_v4i16_v4i16__7_u_3_3(ptr addrspace(1) inreg %ptr) {
6118; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_u_3_3:
6119; GFX900:       ; %bb.0:
6120; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6121; GFX900-NEXT:    ;;#ASMSTART
6122; GFX900-NEXT:    ; def v[0:1]
6123; GFX900-NEXT:    ;;#ASMEND
6124; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
6125; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6126; GFX900-NEXT:    ;;#ASMSTART
6127; GFX900-NEXT:    ; def v[2:3]
6128; GFX900-NEXT:    ;;#ASMEND
6129; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
6130; GFX900-NEXT:    v_alignbit_b32 v0, s4, v3, 16
6131; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6132; GFX900-NEXT:    s_waitcnt vmcnt(0)
6133; GFX900-NEXT:    s_setpc_b64 s[30:31]
6134;
6135; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_u_3_3:
6136; GFX90A:       ; %bb.0:
6137; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6138; GFX90A-NEXT:    ;;#ASMSTART
6139; GFX90A-NEXT:    ; def v[0:1]
6140; GFX90A-NEXT:    ;;#ASMEND
6141; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
6142; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6143; GFX90A-NEXT:    ;;#ASMSTART
6144; GFX90A-NEXT:    ; def v[2:3]
6145; GFX90A-NEXT:    ;;#ASMEND
6146; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
6147; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v3, 16
6148; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6149; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6150; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6151;
6152; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_3_3:
6153; GFX940:       ; %bb.0:
6154; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6155; GFX940-NEXT:    ;;#ASMSTART
6156; GFX940-NEXT:    ; def v[0:1]
6157; GFX940-NEXT:    ;;#ASMEND
6158; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
6159; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6160; GFX940-NEXT:    ;;#ASMSTART
6161; GFX940-NEXT:    ; def v[2:3]
6162; GFX940-NEXT:    ;;#ASMEND
6163; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
6164; GFX940-NEXT:    v_alignbit_b32 v0, s0, v3, 16
6165; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
6166; GFX940-NEXT:    s_waitcnt vmcnt(0)
6167; GFX940-NEXT:    s_setpc_b64 s[30:31]
6168  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6169  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6170  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 3, i32 3>
6171  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6172  ret void
6173}
6174
6175define void @v_shuffle_v4i16_v4i16__7_0_3_3(ptr addrspace(1) inreg %ptr) {
6176; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_0_3_3:
6177; GFX900:       ; %bb.0:
6178; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6179; GFX900-NEXT:    ;;#ASMSTART
6180; GFX900-NEXT:    ; def v[0:1]
6181; GFX900-NEXT:    ;;#ASMEND
6182; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
6183; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6184; GFX900-NEXT:    ;;#ASMSTART
6185; GFX900-NEXT:    ; def v[2:3]
6186; GFX900-NEXT:    ;;#ASMEND
6187; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
6188; GFX900-NEXT:    v_alignbit_b32 v0, v0, v3, 16
6189; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6190; GFX900-NEXT:    s_waitcnt vmcnt(0)
6191; GFX900-NEXT:    s_setpc_b64 s[30:31]
6192;
6193; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_0_3_3:
6194; GFX90A:       ; %bb.0:
6195; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6196; GFX90A-NEXT:    ;;#ASMSTART
6197; GFX90A-NEXT:    ; def v[0:1]
6198; GFX90A-NEXT:    ;;#ASMEND
6199; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
6200; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6201; GFX90A-NEXT:    ;;#ASMSTART
6202; GFX90A-NEXT:    ; def v[2:3]
6203; GFX90A-NEXT:    ;;#ASMEND
6204; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
6205; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v3, 16
6206; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6207; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6208; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6209;
6210; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_3_3:
6211; GFX940:       ; %bb.0:
6212; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6213; GFX940-NEXT:    ;;#ASMSTART
6214; GFX940-NEXT:    ; def v[0:1]
6215; GFX940-NEXT:    ;;#ASMEND
6216; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
6217; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6218; GFX940-NEXT:    ;;#ASMSTART
6219; GFX940-NEXT:    ; def v[2:3]
6220; GFX940-NEXT:    ;;#ASMEND
6221; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
6222; GFX940-NEXT:    v_alignbit_b32 v0, v0, v3, 16
6223; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
6224; GFX940-NEXT:    s_waitcnt vmcnt(0)
6225; GFX940-NEXT:    s_setpc_b64 s[30:31]
6226  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6227  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6228  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 3, i32 3>
6229  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6230  ret void
6231}
6232
6233define void @v_shuffle_v4i16_v4i16__7_1_3_3(ptr addrspace(1) inreg %ptr) {
6234; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_1_3_3:
6235; GFX900:       ; %bb.0:
6236; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6237; GFX900-NEXT:    ;;#ASMSTART
6238; GFX900-NEXT:    ; def v[0:1]
6239; GFX900-NEXT:    ;;#ASMEND
6240; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
6241; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6242; GFX900-NEXT:    ;;#ASMSTART
6243; GFX900-NEXT:    ; def v[2:3]
6244; GFX900-NEXT:    ;;#ASMEND
6245; GFX900-NEXT:    v_perm_b32 v0, v0, v3, s4
6246; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
6247; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6248; GFX900-NEXT:    s_waitcnt vmcnt(0)
6249; GFX900-NEXT:    s_setpc_b64 s[30:31]
6250;
6251; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_1_3_3:
6252; GFX90A:       ; %bb.0:
6253; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6254; GFX90A-NEXT:    ;;#ASMSTART
6255; GFX90A-NEXT:    ; def v[0:1]
6256; GFX90A-NEXT:    ;;#ASMEND
6257; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
6258; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6259; GFX90A-NEXT:    ;;#ASMSTART
6260; GFX90A-NEXT:    ; def v[2:3]
6261; GFX90A-NEXT:    ;;#ASMEND
6262; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
6263; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
6264; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6265; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6266; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6267;
6268; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_3_3:
6269; GFX940:       ; %bb.0:
6270; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6271; GFX940-NEXT:    ;;#ASMSTART
6272; GFX940-NEXT:    ; def v[0:1]
6273; GFX940-NEXT:    ;;#ASMEND
6274; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
6275; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6276; GFX940-NEXT:    ;;#ASMSTART
6277; GFX940-NEXT:    ; def v[2:3]
6278; GFX940-NEXT:    ;;#ASMEND
6279; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
6280; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
6281; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
6282; GFX940-NEXT:    s_waitcnt vmcnt(0)
6283; GFX940-NEXT:    s_setpc_b64 s[30:31]
6284  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6285  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6286  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 3, i32 3>
6287  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6288  ret void
6289}
6290
6291define void @v_shuffle_v4i16_v4i16__7_2_3_3(ptr addrspace(1) inreg %ptr) {
6292; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_2_3_3:
6293; GFX900:       ; %bb.0:
6294; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6295; GFX900-NEXT:    ;;#ASMSTART
6296; GFX900-NEXT:    ; def v[0:1]
6297; GFX900-NEXT:    ;;#ASMEND
6298; GFX900-NEXT:    ;;#ASMSTART
6299; GFX900-NEXT:    ; def v[2:3]
6300; GFX900-NEXT:    ;;#ASMEND
6301; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
6302; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6303; GFX900-NEXT:    v_perm_b32 v2, v1, v1, s4
6304; GFX900-NEXT:    v_alignbit_b32 v1, v1, v3, 16
6305; GFX900-NEXT:    global_store_dwordx2 v4, v[1:2], s[16:17]
6306; GFX900-NEXT:    s_waitcnt vmcnt(0)
6307; GFX900-NEXT:    s_setpc_b64 s[30:31]
6308;
6309; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_2_3_3:
6310; GFX90A:       ; %bb.0:
6311; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6312; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
6313; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
6314; GFX90A-NEXT:    ;;#ASMSTART
6315; GFX90A-NEXT:    ; def v[0:1]
6316; GFX90A-NEXT:    ;;#ASMEND
6317; GFX90A-NEXT:    ;;#ASMSTART
6318; GFX90A-NEXT:    ; def v[2:3]
6319; GFX90A-NEXT:    ;;#ASMEND
6320; GFX90A-NEXT:    v_perm_b32 v5, v1, v1, s4
6321; GFX90A-NEXT:    v_alignbit_b32 v4, v1, v3, 16
6322; GFX90A-NEXT:    global_store_dwordx2 v6, v[4:5], s[16:17]
6323; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6324; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6325;
6326; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_3_3:
6327; GFX940:       ; %bb.0:
6328; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6329; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
6330; GFX940-NEXT:    v_mov_b32_e32 v6, 0
6331; GFX940-NEXT:    ;;#ASMSTART
6332; GFX940-NEXT:    ; def v[0:1]
6333; GFX940-NEXT:    ;;#ASMEND
6334; GFX940-NEXT:    ;;#ASMSTART
6335; GFX940-NEXT:    ; def v[2:3]
6336; GFX940-NEXT:    ;;#ASMEND
6337; GFX940-NEXT:    s_nop 0
6338; GFX940-NEXT:    v_perm_b32 v5, v1, v1, s2
6339; GFX940-NEXT:    v_alignbit_b32 v4, v1, v3, 16
6340; GFX940-NEXT:    global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1
6341; GFX940-NEXT:    s_waitcnt vmcnt(0)
6342; GFX940-NEXT:    s_setpc_b64 s[30:31]
6343  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6344  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6345  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 3, i32 3>
6346  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6347  ret void
6348}
6349
6350define void @v_shuffle_v4i16_v4i16__7_4_3_3(ptr addrspace(1) inreg %ptr) {
6351; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_4_3_3:
6352; GFX900:       ; %bb.0:
6353; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6354; GFX900-NEXT:    ;;#ASMSTART
6355; GFX900-NEXT:    ; def v[0:1]
6356; GFX900-NEXT:    ;;#ASMEND
6357; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
6358; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6359; GFX900-NEXT:    ;;#ASMSTART
6360; GFX900-NEXT:    ; def v[2:3]
6361; GFX900-NEXT:    ;;#ASMEND
6362; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
6363; GFX900-NEXT:    v_alignbit_b32 v0, v2, v3, 16
6364; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6365; GFX900-NEXT:    s_waitcnt vmcnt(0)
6366; GFX900-NEXT:    s_setpc_b64 s[30:31]
6367;
6368; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_4_3_3:
6369; GFX90A:       ; %bb.0:
6370; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6371; GFX90A-NEXT:    ;;#ASMSTART
6372; GFX90A-NEXT:    ; def v[0:1]
6373; GFX90A-NEXT:    ;;#ASMEND
6374; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
6375; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6376; GFX90A-NEXT:    ;;#ASMSTART
6377; GFX90A-NEXT:    ; def v[2:3]
6378; GFX90A-NEXT:    ;;#ASMEND
6379; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
6380; GFX90A-NEXT:    v_alignbit_b32 v0, v2, v3, 16
6381; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6382; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6383; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6384;
6385; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_3_3:
6386; GFX940:       ; %bb.0:
6387; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6388; GFX940-NEXT:    ;;#ASMSTART
6389; GFX940-NEXT:    ; def v[0:1]
6390; GFX940-NEXT:    ;;#ASMEND
6391; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
6392; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6393; GFX940-NEXT:    ;;#ASMSTART
6394; GFX940-NEXT:    ; def v[2:3]
6395; GFX940-NEXT:    ;;#ASMEND
6396; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
6397; GFX940-NEXT:    v_alignbit_b32 v0, v2, v3, 16
6398; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
6399; GFX940-NEXT:    s_waitcnt vmcnt(0)
6400; GFX940-NEXT:    s_setpc_b64 s[30:31]
6401  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6402  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6403  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 3, i32 3>
6404  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6405  ret void
6406}
6407
6408define void @v_shuffle_v4i16_v4i16__7_5_3_3(ptr addrspace(1) inreg %ptr) {
6409; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_5_3_3:
6410; GFX900:       ; %bb.0:
6411; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6412; GFX900-NEXT:    ;;#ASMSTART
6413; GFX900-NEXT:    ; def v[0:1]
6414; GFX900-NEXT:    ;;#ASMEND
6415; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
6416; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6417; GFX900-NEXT:    ;;#ASMSTART
6418; GFX900-NEXT:    ; def v[2:3]
6419; GFX900-NEXT:    ;;#ASMEND
6420; GFX900-NEXT:    v_perm_b32 v0, v2, v3, s4
6421; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
6422; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6423; GFX900-NEXT:    s_waitcnt vmcnt(0)
6424; GFX900-NEXT:    s_setpc_b64 s[30:31]
6425;
6426; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_5_3_3:
6427; GFX90A:       ; %bb.0:
6428; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6429; GFX90A-NEXT:    ;;#ASMSTART
6430; GFX90A-NEXT:    ; def v[0:1]
6431; GFX90A-NEXT:    ;;#ASMEND
6432; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
6433; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6434; GFX90A-NEXT:    ;;#ASMSTART
6435; GFX90A-NEXT:    ; def v[2:3]
6436; GFX90A-NEXT:    ;;#ASMEND
6437; GFX90A-NEXT:    v_perm_b32 v0, v2, v3, s4
6438; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
6439; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6440; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6441; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6442;
6443; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_3_3:
6444; GFX940:       ; %bb.0:
6445; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6446; GFX940-NEXT:    ;;#ASMSTART
6447; GFX940-NEXT:    ; def v[0:1]
6448; GFX940-NEXT:    ;;#ASMEND
6449; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
6450; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6451; GFX940-NEXT:    ;;#ASMSTART
6452; GFX940-NEXT:    ; def v[2:3]
6453; GFX940-NEXT:    ;;#ASMEND
6454; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
6455; GFX940-NEXT:    v_perm_b32 v0, v2, v3, s2
6456; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
6457; GFX940-NEXT:    s_waitcnt vmcnt(0)
6458; GFX940-NEXT:    s_setpc_b64 s[30:31]
6459  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6460  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6461  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 3, i32 3>
6462  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6463  ret void
6464}
6465
6466define void @v_shuffle_v4i16_v4i16__7_6_3_3(ptr addrspace(1) inreg %ptr) {
6467; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_6_3_3:
6468; GFX900:       ; %bb.0:
6469; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6470; GFX900-NEXT:    ;;#ASMSTART
6471; GFX900-NEXT:    ; def v[0:1]
6472; GFX900-NEXT:    ;;#ASMEND
6473; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
6474; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6475; GFX900-NEXT:    ;;#ASMSTART
6476; GFX900-NEXT:    ; def v[2:3]
6477; GFX900-NEXT:    ;;#ASMEND
6478; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
6479; GFX900-NEXT:    v_alignbit_b32 v0, v3, v3, 16
6480; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6481; GFX900-NEXT:    s_waitcnt vmcnt(0)
6482; GFX900-NEXT:    s_setpc_b64 s[30:31]
6483;
6484; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_6_3_3:
6485; GFX90A:       ; %bb.0:
6486; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6487; GFX90A-NEXT:    ;;#ASMSTART
6488; GFX90A-NEXT:    ; def v[0:1]
6489; GFX90A-NEXT:    ;;#ASMEND
6490; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
6491; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6492; GFX90A-NEXT:    ;;#ASMSTART
6493; GFX90A-NEXT:    ; def v[2:3]
6494; GFX90A-NEXT:    ;;#ASMEND
6495; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
6496; GFX90A-NEXT:    v_alignbit_b32 v0, v3, v3, 16
6497; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6498; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6499; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6500;
6501; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_3_3:
6502; GFX940:       ; %bb.0:
6503; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6504; GFX940-NEXT:    ;;#ASMSTART
6505; GFX940-NEXT:    ; def v[0:1]
6506; GFX940-NEXT:    ;;#ASMEND
6507; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
6508; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6509; GFX940-NEXT:    ;;#ASMSTART
6510; GFX940-NEXT:    ; def v[2:3]
6511; GFX940-NEXT:    ;;#ASMEND
6512; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
6513; GFX940-NEXT:    v_alignbit_b32 v0, v3, v3, 16
6514; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
6515; GFX940-NEXT:    s_waitcnt vmcnt(0)
6516; GFX940-NEXT:    s_setpc_b64 s[30:31]
6517  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6518  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6519  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 3, i32 3>
6520  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6521  ret void
6522}
6523
6524define void @v_shuffle_v4i16_v4i16__7_7_3_3(ptr addrspace(1) inreg %ptr) {
6525; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_3_3:
6526; GFX900:       ; %bb.0:
6527; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6528; GFX900-NEXT:    ;;#ASMSTART
6529; GFX900-NEXT:    ; def v[0:1]
6530; GFX900-NEXT:    ;;#ASMEND
6531; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
6532; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6533; GFX900-NEXT:    ;;#ASMSTART
6534; GFX900-NEXT:    ; def v[2:3]
6535; GFX900-NEXT:    ;;#ASMEND
6536; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
6537; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
6538; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6539; GFX900-NEXT:    s_waitcnt vmcnt(0)
6540; GFX900-NEXT:    s_setpc_b64 s[30:31]
6541;
6542; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_3_3:
6543; GFX90A:       ; %bb.0:
6544; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6545; GFX90A-NEXT:    ;;#ASMSTART
6546; GFX90A-NEXT:    ; def v[0:1]
6547; GFX90A-NEXT:    ;;#ASMEND
6548; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
6549; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6550; GFX90A-NEXT:    ;;#ASMSTART
6551; GFX90A-NEXT:    ; def v[2:3]
6552; GFX90A-NEXT:    ;;#ASMEND
6553; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
6554; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
6555; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6556; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6557; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6558;
6559; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_3:
6560; GFX940:       ; %bb.0:
6561; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6562; GFX940-NEXT:    ;;#ASMSTART
6563; GFX940-NEXT:    ; def v[0:1]
6564; GFX940-NEXT:    ;;#ASMEND
6565; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
6566; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6567; GFX940-NEXT:    ;;#ASMSTART
6568; GFX940-NEXT:    ; def v[2:3]
6569; GFX940-NEXT:    ;;#ASMEND
6570; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
6571; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
6572; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
6573; GFX940-NEXT:    s_waitcnt vmcnt(0)
6574; GFX940-NEXT:    s_setpc_b64 s[30:31]
6575  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6576  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6577  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 3>
6578  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6579  ret void
6580}
6581
6582define void @v_shuffle_v4i16_v4i16__7_7_u_3(ptr addrspace(1) inreg %ptr) {
6583; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_u_3:
6584; GFX900:       ; %bb.0:
6585; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6586; GFX900-NEXT:    ;;#ASMSTART
6587; GFX900-NEXT:    ; def v[0:1]
6588; GFX900-NEXT:    ;;#ASMEND
6589; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
6590; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6591; GFX900-NEXT:    ;;#ASMSTART
6592; GFX900-NEXT:    ; def v[2:3]
6593; GFX900-NEXT:    ;;#ASMEND
6594; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
6595; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6596; GFX900-NEXT:    s_waitcnt vmcnt(0)
6597; GFX900-NEXT:    s_setpc_b64 s[30:31]
6598;
6599; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_u_3:
6600; GFX90A:       ; %bb.0:
6601; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6602; GFX90A-NEXT:    ;;#ASMSTART
6603; GFX90A-NEXT:    ; def v[0:1]
6604; GFX90A-NEXT:    ;;#ASMEND
6605; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
6606; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6607; GFX90A-NEXT:    ;;#ASMSTART
6608; GFX90A-NEXT:    ; def v[2:3]
6609; GFX90A-NEXT:    ;;#ASMEND
6610; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
6611; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6612; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6613; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6614;
6615; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_3:
6616; GFX940:       ; %bb.0:
6617; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6618; GFX940-NEXT:    ;;#ASMSTART
6619; GFX940-NEXT:    ; def v[0:1]
6620; GFX940-NEXT:    ;;#ASMEND
6621; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
6622; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6623; GFX940-NEXT:    ;;#ASMSTART
6624; GFX940-NEXT:    ; def v[2:3]
6625; GFX940-NEXT:    ;;#ASMEND
6626; GFX940-NEXT:    s_nop 0
6627; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
6628; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
6629; GFX940-NEXT:    s_waitcnt vmcnt(0)
6630; GFX940-NEXT:    s_setpc_b64 s[30:31]
6631  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6632  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6633  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 3>
6634  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6635  ret void
6636}
6637
6638define void @v_shuffle_v4i16_v4i16__7_7_0_3(ptr addrspace(1) inreg %ptr) {
6639; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_0_3:
6640; GFX900:       ; %bb.0:
6641; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6642; GFX900-NEXT:    ;;#ASMSTART
6643; GFX900-NEXT:    ; def v[0:1]
6644; GFX900-NEXT:    ;;#ASMEND
6645; GFX900-NEXT:    s_mov_b32 s4, 0xffff
6646; GFX900-NEXT:    v_bfi_b32 v1, s4, v0, v1
6647; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
6648; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6649; GFX900-NEXT:    ;;#ASMSTART
6650; GFX900-NEXT:    ; def v[2:3]
6651; GFX900-NEXT:    ;;#ASMEND
6652; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
6653; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6654; GFX900-NEXT:    s_waitcnt vmcnt(0)
6655; GFX900-NEXT:    s_setpc_b64 s[30:31]
6656;
6657; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_0_3:
6658; GFX90A:       ; %bb.0:
6659; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6660; GFX90A-NEXT:    ;;#ASMSTART
6661; GFX90A-NEXT:    ; def v[0:1]
6662; GFX90A-NEXT:    ;;#ASMEND
6663; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
6664; GFX90A-NEXT:    v_bfi_b32 v1, s4, v0, v1
6665; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
6666; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6667; GFX90A-NEXT:    ;;#ASMSTART
6668; GFX90A-NEXT:    ; def v[2:3]
6669; GFX90A-NEXT:    ;;#ASMEND
6670; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
6671; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6672; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6673; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6674;
6675; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_3:
6676; GFX940:       ; %bb.0:
6677; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6678; GFX940-NEXT:    ;;#ASMSTART
6679; GFX940-NEXT:    ; def v[0:1]
6680; GFX940-NEXT:    ;;#ASMEND
6681; GFX940-NEXT:    s_mov_b32 s2, 0xffff
6682; GFX940-NEXT:    v_bfi_b32 v1, s2, v0, v1
6683; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
6684; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6685; GFX940-NEXT:    ;;#ASMSTART
6686; GFX940-NEXT:    ; def v[2:3]
6687; GFX940-NEXT:    ;;#ASMEND
6688; GFX940-NEXT:    s_nop 0
6689; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
6690; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
6691; GFX940-NEXT:    s_waitcnt vmcnt(0)
6692; GFX940-NEXT:    s_setpc_b64 s[30:31]
6693  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6694  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6695  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 3>
6696  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6697  ret void
6698}
6699
6700define void @v_shuffle_v4i16_v4i16__7_7_1_3(ptr addrspace(1) inreg %ptr) {
6701; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_1_3:
6702; GFX900:       ; %bb.0:
6703; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6704; GFX900-NEXT:    ;;#ASMSTART
6705; GFX900-NEXT:    ; def v[0:1]
6706; GFX900-NEXT:    ;;#ASMEND
6707; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
6708; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6709; GFX900-NEXT:    ;;#ASMSTART
6710; GFX900-NEXT:    ; def v[2:3]
6711; GFX900-NEXT:    ;;#ASMEND
6712; GFX900-NEXT:    v_perm_b32 v1, v1, v0, s4
6713; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
6714; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6715; GFX900-NEXT:    s_waitcnt vmcnt(0)
6716; GFX900-NEXT:    s_setpc_b64 s[30:31]
6717;
6718; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_1_3:
6719; GFX90A:       ; %bb.0:
6720; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6721; GFX90A-NEXT:    ;;#ASMSTART
6722; GFX90A-NEXT:    ; def v[0:1]
6723; GFX90A-NEXT:    ;;#ASMEND
6724; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
6725; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6726; GFX90A-NEXT:    ;;#ASMSTART
6727; GFX90A-NEXT:    ; def v[2:3]
6728; GFX90A-NEXT:    ;;#ASMEND
6729; GFX90A-NEXT:    v_perm_b32 v1, v1, v0, s4
6730; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
6731; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6732; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6733; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6734;
6735; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_3:
6736; GFX940:       ; %bb.0:
6737; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6738; GFX940-NEXT:    ;;#ASMSTART
6739; GFX940-NEXT:    ; def v[0:1]
6740; GFX940-NEXT:    ;;#ASMEND
6741; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
6742; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6743; GFX940-NEXT:    ;;#ASMSTART
6744; GFX940-NEXT:    ; def v[2:3]
6745; GFX940-NEXT:    ;;#ASMEND
6746; GFX940-NEXT:    v_perm_b32 v1, v1, v0, s2
6747; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
6748; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
6749; GFX940-NEXT:    s_waitcnt vmcnt(0)
6750; GFX940-NEXT:    s_setpc_b64 s[30:31]
6751  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6752  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6753  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 3>
6754  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6755  ret void
6756}
6757
6758define void @v_shuffle_v4i16_v4i16__7_7_2_3(ptr addrspace(1) inreg %ptr) {
6759; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_2_3:
6760; GFX900:       ; %bb.0:
6761; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6762; GFX900-NEXT:    ;;#ASMSTART
6763; GFX900-NEXT:    ; def v[0:1]
6764; GFX900-NEXT:    ;;#ASMEND
6765; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
6766; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6767; GFX900-NEXT:    ;;#ASMSTART
6768; GFX900-NEXT:    ; def v[2:3]
6769; GFX900-NEXT:    ;;#ASMEND
6770; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
6771; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6772; GFX900-NEXT:    s_waitcnt vmcnt(0)
6773; GFX900-NEXT:    s_setpc_b64 s[30:31]
6774;
6775; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_2_3:
6776; GFX90A:       ; %bb.0:
6777; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6778; GFX90A-NEXT:    ;;#ASMSTART
6779; GFX90A-NEXT:    ; def v[0:1]
6780; GFX90A-NEXT:    ;;#ASMEND
6781; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
6782; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6783; GFX90A-NEXT:    ;;#ASMSTART
6784; GFX90A-NEXT:    ; def v[2:3]
6785; GFX90A-NEXT:    ;;#ASMEND
6786; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
6787; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6788; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6789; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6790;
6791; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_3:
6792; GFX940:       ; %bb.0:
6793; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6794; GFX940-NEXT:    ;;#ASMSTART
6795; GFX940-NEXT:    ; def v[0:1]
6796; GFX940-NEXT:    ;;#ASMEND
6797; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
6798; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6799; GFX940-NEXT:    ;;#ASMSTART
6800; GFX940-NEXT:    ; def v[2:3]
6801; GFX940-NEXT:    ;;#ASMEND
6802; GFX940-NEXT:    s_nop 0
6803; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
6804; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
6805; GFX940-NEXT:    s_waitcnt vmcnt(0)
6806; GFX940-NEXT:    s_setpc_b64 s[30:31]
6807  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6808  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6809  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 3>
6810  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6811  ret void
6812}
6813
6814define void @v_shuffle_v4i16_v4i16__7_7_4_3(ptr addrspace(1) inreg %ptr) {
6815; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_4_3:
6816; GFX900:       ; %bb.0:
6817; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6818; GFX900-NEXT:    ;;#ASMSTART
6819; GFX900-NEXT:    ; def v[0:1]
6820; GFX900-NEXT:    ;;#ASMEND
6821; GFX900-NEXT:    s_mov_b32 s4, 0xffff
6822; GFX900-NEXT:    ;;#ASMSTART
6823; GFX900-NEXT:    ; def v[2:3]
6824; GFX900-NEXT:    ;;#ASMEND
6825; GFX900-NEXT:    v_bfi_b32 v1, s4, v2, v1
6826; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
6827; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6828; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
6829; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6830; GFX900-NEXT:    s_waitcnt vmcnt(0)
6831; GFX900-NEXT:    s_setpc_b64 s[30:31]
6832;
6833; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_4_3:
6834; GFX90A:       ; %bb.0:
6835; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6836; GFX90A-NEXT:    ;;#ASMSTART
6837; GFX90A-NEXT:    ; def v[0:1]
6838; GFX90A-NEXT:    ;;#ASMEND
6839; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
6840; GFX90A-NEXT:    ;;#ASMSTART
6841; GFX90A-NEXT:    ; def v[2:3]
6842; GFX90A-NEXT:    ;;#ASMEND
6843; GFX90A-NEXT:    v_bfi_b32 v1, s4, v2, v1
6844; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
6845; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6846; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
6847; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6848; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6849; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6850;
6851; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_3:
6852; GFX940:       ; %bb.0:
6853; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6854; GFX940-NEXT:    ;;#ASMSTART
6855; GFX940-NEXT:    ; def v[0:1]
6856; GFX940-NEXT:    ;;#ASMEND
6857; GFX940-NEXT:    s_mov_b32 s2, 0xffff
6858; GFX940-NEXT:    ;;#ASMSTART
6859; GFX940-NEXT:    ; def v[2:3]
6860; GFX940-NEXT:    ;;#ASMEND
6861; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6862; GFX940-NEXT:    v_bfi_b32 v1, s2, v2, v1
6863; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
6864; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
6865; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
6866; GFX940-NEXT:    s_waitcnt vmcnt(0)
6867; GFX940-NEXT:    s_setpc_b64 s[30:31]
6868  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6869  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6870  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 3>
6871  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6872  ret void
6873}
6874
6875define void @v_shuffle_v4i16_v4i16__7_7_5_3(ptr addrspace(1) inreg %ptr) {
6876; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_5_3:
6877; GFX900:       ; %bb.0:
6878; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6879; GFX900-NEXT:    ;;#ASMSTART
6880; GFX900-NEXT:    ; def v[0:1]
6881; GFX900-NEXT:    ;;#ASMEND
6882; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
6883; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6884; GFX900-NEXT:    ;;#ASMSTART
6885; GFX900-NEXT:    ; def v[2:3]
6886; GFX900-NEXT:    ;;#ASMEND
6887; GFX900-NEXT:    v_perm_b32 v1, v1, v2, s4
6888; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
6889; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6890; GFX900-NEXT:    s_waitcnt vmcnt(0)
6891; GFX900-NEXT:    s_setpc_b64 s[30:31]
6892;
6893; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_5_3:
6894; GFX90A:       ; %bb.0:
6895; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6896; GFX90A-NEXT:    ;;#ASMSTART
6897; GFX90A-NEXT:    ; def v[0:1]
6898; GFX90A-NEXT:    ;;#ASMEND
6899; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
6900; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6901; GFX90A-NEXT:    ;;#ASMSTART
6902; GFX90A-NEXT:    ; def v[2:3]
6903; GFX90A-NEXT:    ;;#ASMEND
6904; GFX90A-NEXT:    v_perm_b32 v1, v1, v2, s4
6905; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
6906; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6907; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6908; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6909;
6910; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_3:
6911; GFX940:       ; %bb.0:
6912; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6913; GFX940-NEXT:    ;;#ASMSTART
6914; GFX940-NEXT:    ; def v[0:1]
6915; GFX940-NEXT:    ;;#ASMEND
6916; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
6917; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6918; GFX940-NEXT:    ;;#ASMSTART
6919; GFX940-NEXT:    ; def v[2:3]
6920; GFX940-NEXT:    ;;#ASMEND
6921; GFX940-NEXT:    s_nop 0
6922; GFX940-NEXT:    v_perm_b32 v1, v1, v2, s2
6923; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
6924; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
6925; GFX940-NEXT:    s_waitcnt vmcnt(0)
6926; GFX940-NEXT:    s_setpc_b64 s[30:31]
6927  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6928  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6929  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 3>
6930  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6931  ret void
6932}
6933
6934define void @v_shuffle_v4i16_v4i16__7_7_6_3(ptr addrspace(1) inreg %ptr) {
6935; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_6_3:
6936; GFX900:       ; %bb.0:
6937; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6938; GFX900-NEXT:    ;;#ASMSTART
6939; GFX900-NEXT:    ; def v[0:1]
6940; GFX900-NEXT:    ;;#ASMEND
6941; GFX900-NEXT:    s_mov_b32 s4, 0xffff
6942; GFX900-NEXT:    ;;#ASMSTART
6943; GFX900-NEXT:    ; def v[2:3]
6944; GFX900-NEXT:    ;;#ASMEND
6945; GFX900-NEXT:    v_bfi_b32 v1, s4, v3, v1
6946; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
6947; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6948; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
6949; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6950; GFX900-NEXT:    s_waitcnt vmcnt(0)
6951; GFX900-NEXT:    s_setpc_b64 s[30:31]
6952;
6953; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_6_3:
6954; GFX90A:       ; %bb.0:
6955; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6956; GFX90A-NEXT:    ;;#ASMSTART
6957; GFX90A-NEXT:    ; def v[0:1]
6958; GFX90A-NEXT:    ;;#ASMEND
6959; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
6960; GFX90A-NEXT:    ;;#ASMSTART
6961; GFX90A-NEXT:    ; def v[2:3]
6962; GFX90A-NEXT:    ;;#ASMEND
6963; GFX90A-NEXT:    v_bfi_b32 v1, s4, v3, v1
6964; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
6965; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6966; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
6967; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
6968; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6969; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6970;
6971; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_3:
6972; GFX940:       ; %bb.0:
6973; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6974; GFX940-NEXT:    ;;#ASMSTART
6975; GFX940-NEXT:    ; def v[0:1]
6976; GFX940-NEXT:    ;;#ASMEND
6977; GFX940-NEXT:    s_mov_b32 s2, 0xffff
6978; GFX940-NEXT:    ;;#ASMSTART
6979; GFX940-NEXT:    ; def v[2:3]
6980; GFX940-NEXT:    ;;#ASMEND
6981; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6982; GFX940-NEXT:    v_bfi_b32 v1, s2, v3, v1
6983; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
6984; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
6985; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
6986; GFX940-NEXT:    s_waitcnt vmcnt(0)
6987; GFX940-NEXT:    s_setpc_b64 s[30:31]
6988  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6989  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6990  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 3>
6991  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6992  ret void
6993}
6994
6995define void @v_shuffle_v4i16_v4i16__u_4_4_4(ptr addrspace(1) inreg %ptr) {
6996; GFX9-LABEL: v_shuffle_v4i16_v4i16__u_4_4_4:
6997; GFX9:       ; %bb.0:
6998; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6999; GFX9-NEXT:    s_setpc_b64 s[30:31]
7000  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7001  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
7002  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7003  ret void
7004}
7005
7006define void @v_shuffle_v4i16_v4i16__0_4_4_4(ptr addrspace(1) inreg %ptr) {
7007; GFX900-LABEL: v_shuffle_v4i16_v4i16__0_4_4_4:
7008; GFX900:       ; %bb.0:
7009; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7010; GFX900-NEXT:    v_mov_b32_e32 v2, 0
7011; GFX900-NEXT:    ;;#ASMSTART
7012; GFX900-NEXT:    ; def v[0:1]
7013; GFX900-NEXT:    ;;#ASMEND
7014; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
7015; GFX900-NEXT:    s_waitcnt vmcnt(0)
7016; GFX900-NEXT:    s_setpc_b64 s[30:31]
7017;
7018; GFX90A-LABEL: v_shuffle_v4i16_v4i16__0_4_4_4:
7019; GFX90A:       ; %bb.0:
7020; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7021; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
7022; GFX90A-NEXT:    ;;#ASMSTART
7023; GFX90A-NEXT:    ; def v[0:1]
7024; GFX90A-NEXT:    ;;#ASMEND
7025; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
7026; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7027; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7028;
7029; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_4_4_4:
7030; GFX940:       ; %bb.0:
7031; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7032; GFX940-NEXT:    v_mov_b32_e32 v2, 0
7033; GFX940-NEXT:    ;;#ASMSTART
7034; GFX940-NEXT:    ; def v[0:1]
7035; GFX940-NEXT:    ;;#ASMEND
7036; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
7037; GFX940-NEXT:    s_waitcnt vmcnt(0)
7038; GFX940-NEXT:    s_setpc_b64 s[30:31]
7039  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7040  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
7041  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7042  ret void
7043}
7044
7045define void @v_shuffle_v4i16_v4i16__1_4_4_4(ptr addrspace(1) inreg %ptr) {
7046; GFX900-LABEL: v_shuffle_v4i16_v4i16__1_4_4_4:
7047; GFX900:       ; %bb.0:
7048; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7049; GFX900-NEXT:    ;;#ASMSTART
7050; GFX900-NEXT:    ; def v[0:1]
7051; GFX900-NEXT:    ;;#ASMEND
7052; GFX900-NEXT:    v_mov_b32_e32 v2, 0
7053; GFX900-NEXT:    v_alignbit_b32 v0, s4, v0, 16
7054; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
7055; GFX900-NEXT:    s_waitcnt vmcnt(0)
7056; GFX900-NEXT:    s_setpc_b64 s[30:31]
7057;
7058; GFX90A-LABEL: v_shuffle_v4i16_v4i16__1_4_4_4:
7059; GFX90A:       ; %bb.0:
7060; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7061; GFX90A-NEXT:    ;;#ASMSTART
7062; GFX90A-NEXT:    ; def v[0:1]
7063; GFX90A-NEXT:    ;;#ASMEND
7064; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
7065; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v0, 16
7066; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
7067; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7068; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7069;
7070; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_4_4_4:
7071; GFX940:       ; %bb.0:
7072; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7073; GFX940-NEXT:    ;;#ASMSTART
7074; GFX940-NEXT:    ; def v[0:1]
7075; GFX940-NEXT:    ;;#ASMEND
7076; GFX940-NEXT:    v_mov_b32_e32 v2, 0
7077; GFX940-NEXT:    v_alignbit_b32 v0, s0, v0, 16
7078; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
7079; GFX940-NEXT:    s_waitcnt vmcnt(0)
7080; GFX940-NEXT:    s_setpc_b64 s[30:31]
7081  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7082  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
7083  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7084  ret void
7085}
7086
7087define void @v_shuffle_v4i16_v4i16__2_4_4_4(ptr addrspace(1) inreg %ptr) {
7088; GFX900-LABEL: v_shuffle_v4i16_v4i16__2_4_4_4:
7089; GFX900:       ; %bb.0:
7090; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7091; GFX900-NEXT:    ;;#ASMSTART
7092; GFX900-NEXT:    ; def v[0:1]
7093; GFX900-NEXT:    ;;#ASMEND
7094; GFX900-NEXT:    v_mov_b32_e32 v2, 0
7095; GFX900-NEXT:    v_mov_b32_e32 v0, v1
7096; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
7097; GFX900-NEXT:    s_waitcnt vmcnt(0)
7098; GFX900-NEXT:    s_setpc_b64 s[30:31]
7099;
7100; GFX90A-LABEL: v_shuffle_v4i16_v4i16__2_4_4_4:
7101; GFX90A:       ; %bb.0:
7102; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7103; GFX90A-NEXT:    ;;#ASMSTART
7104; GFX90A-NEXT:    ; def v[0:1]
7105; GFX90A-NEXT:    ;;#ASMEND
7106; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
7107; GFX90A-NEXT:    v_mov_b32_e32 v0, v1
7108; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
7109; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7110; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7111;
7112; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_4_4_4:
7113; GFX940:       ; %bb.0:
7114; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7115; GFX940-NEXT:    ;;#ASMSTART
7116; GFX940-NEXT:    ; def v[0:1]
7117; GFX940-NEXT:    ;;#ASMEND
7118; GFX940-NEXT:    v_mov_b32_e32 v2, 0
7119; GFX940-NEXT:    v_mov_b32_e32 v0, v1
7120; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
7121; GFX940-NEXT:    s_waitcnt vmcnt(0)
7122; GFX940-NEXT:    s_setpc_b64 s[30:31]
7123  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7124  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
7125  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7126  ret void
7127}
7128
7129define void @v_shuffle_v4i16_v4i16__3_4_4_4(ptr addrspace(1) inreg %ptr) {
7130; GFX900-LABEL: v_shuffle_v4i16_v4i16__3_4_4_4:
7131; GFX900:       ; %bb.0:
7132; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7133; GFX900-NEXT:    ;;#ASMSTART
7134; GFX900-NEXT:    ; def v[0:1]
7135; GFX900-NEXT:    ;;#ASMEND
7136; GFX900-NEXT:    v_mov_b32_e32 v2, 0
7137; GFX900-NEXT:    v_alignbit_b32 v0, s4, v1, 16
7138; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
7139; GFX900-NEXT:    s_waitcnt vmcnt(0)
7140; GFX900-NEXT:    s_setpc_b64 s[30:31]
7141;
7142; GFX90A-LABEL: v_shuffle_v4i16_v4i16__3_4_4_4:
7143; GFX90A:       ; %bb.0:
7144; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7145; GFX90A-NEXT:    ;;#ASMSTART
7146; GFX90A-NEXT:    ; def v[0:1]
7147; GFX90A-NEXT:    ;;#ASMEND
7148; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
7149; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v1, 16
7150; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
7151; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7152; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7153;
7154; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_4_4_4:
7155; GFX940:       ; %bb.0:
7156; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7157; GFX940-NEXT:    ;;#ASMSTART
7158; GFX940-NEXT:    ; def v[0:1]
7159; GFX940-NEXT:    ;;#ASMEND
7160; GFX940-NEXT:    v_mov_b32_e32 v2, 0
7161; GFX940-NEXT:    v_alignbit_b32 v0, s0, v1, 16
7162; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
7163; GFX940-NEXT:    s_waitcnt vmcnt(0)
7164; GFX940-NEXT:    s_setpc_b64 s[30:31]
7165  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7166  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
7167  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7168  ret void
7169}
7170
7171define void @v_shuffle_v4i16_v4i16__4_4_4_4(ptr addrspace(1) inreg %ptr) {
7172; GFX9-LABEL: v_shuffle_v4i16_v4i16__4_4_4_4:
7173; GFX9:       ; %bb.0:
7174; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7175; GFX9-NEXT:    s_setpc_b64 s[30:31]
7176  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7177  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
7178  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7179  ret void
7180}
7181
7182define void @v_shuffle_v4i16_v4i16__5_4_4_4(ptr addrspace(1) inreg %ptr) {
7183; GFX900-LABEL: v_shuffle_v4i16_v4i16__5_4_4_4:
7184; GFX900:       ; %bb.0:
7185; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7186; GFX900-NEXT:    ;;#ASMSTART
7187; GFX900-NEXT:    ; def v[0:1]
7188; GFX900-NEXT:    ;;#ASMEND
7189; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
7190; GFX900-NEXT:    v_mov_b32_e32 v2, 0
7191; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
7192; GFX900-NEXT:    v_alignbit_b32 v0, v0, v0, 16
7193; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
7194; GFX900-NEXT:    s_waitcnt vmcnt(0)
7195; GFX900-NEXT:    s_setpc_b64 s[30:31]
7196;
7197; GFX90A-LABEL: v_shuffle_v4i16_v4i16__5_4_4_4:
7198; GFX90A:       ; %bb.0:
7199; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7200; GFX90A-NEXT:    ;;#ASMSTART
7201; GFX90A-NEXT:    ; def v[0:1]
7202; GFX90A-NEXT:    ;;#ASMEND
7203; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
7204; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
7205; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
7206; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v0, 16
7207; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
7208; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7209; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7210;
7211; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_4_4_4:
7212; GFX940:       ; %bb.0:
7213; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7214; GFX940-NEXT:    ;;#ASMSTART
7215; GFX940-NEXT:    ; def v[0:1]
7216; GFX940-NEXT:    ;;#ASMEND
7217; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
7218; GFX940-NEXT:    v_mov_b32_e32 v2, 0
7219; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
7220; GFX940-NEXT:    v_alignbit_b32 v0, v0, v0, 16
7221; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
7222; GFX940-NEXT:    s_waitcnt vmcnt(0)
7223; GFX940-NEXT:    s_setpc_b64 s[30:31]
7224  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7225  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7226  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
7227  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7228  ret void
7229}
7230
7231define void @v_shuffle_v4i16_v4i16__6_4_4_4(ptr addrspace(1) inreg %ptr) {
7232; GFX900-LABEL: v_shuffle_v4i16_v4i16__6_4_4_4:
7233; GFX900:       ; %bb.0:
7234; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7235; GFX900-NEXT:    ;;#ASMSTART
7236; GFX900-NEXT:    ; def v[0:1]
7237; GFX900-NEXT:    ;;#ASMEND
7238; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
7239; GFX900-NEXT:    v_mov_b32_e32 v3, 0
7240; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
7241; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
7242; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
7243; GFX900-NEXT:    s_waitcnt vmcnt(0)
7244; GFX900-NEXT:    s_setpc_b64 s[30:31]
7245;
7246; GFX90A-LABEL: v_shuffle_v4i16_v4i16__6_4_4_4:
7247; GFX90A:       ; %bb.0:
7248; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7249; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
7250; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7251; GFX90A-NEXT:    ;;#ASMSTART
7252; GFX90A-NEXT:    ; def v[0:1]
7253; GFX90A-NEXT:    ;;#ASMEND
7254; GFX90A-NEXT:    v_perm_b32 v2, v0, v1, s4
7255; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
7256; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
7257; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7258; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7259;
7260; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_4_4_4:
7261; GFX940:       ; %bb.0:
7262; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7263; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
7264; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7265; GFX940-NEXT:    ;;#ASMSTART
7266; GFX940-NEXT:    ; def v[0:1]
7267; GFX940-NEXT:    ;;#ASMEND
7268; GFX940-NEXT:    s_nop 0
7269; GFX940-NEXT:    v_perm_b32 v2, v0, v1, s2
7270; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
7271; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
7272; GFX940-NEXT:    s_waitcnt vmcnt(0)
7273; GFX940-NEXT:    s_setpc_b64 s[30:31]
7274  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7275  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7276  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 4, i32 4, i32 4>
7277  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7278  ret void
7279}
7280
7281define void @v_shuffle_v4i16_v4i16__7_4_4_4(ptr addrspace(1) inreg %ptr) {
7282; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_4_4_4:
7283; GFX900:       ; %bb.0:
7284; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7285; GFX900-NEXT:    ;;#ASMSTART
7286; GFX900-NEXT:    ; def v[0:1]
7287; GFX900-NEXT:    ;;#ASMEND
7288; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
7289; GFX900-NEXT:    v_mov_b32_e32 v3, 0
7290; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
7291; GFX900-NEXT:    v_alignbit_b32 v1, v0, v1, 16
7292; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
7293; GFX900-NEXT:    s_waitcnt vmcnt(0)
7294; GFX900-NEXT:    s_setpc_b64 s[30:31]
7295;
7296; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_4_4_4:
7297; GFX90A:       ; %bb.0:
7298; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7299; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
7300; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7301; GFX90A-NEXT:    ;;#ASMSTART
7302; GFX90A-NEXT:    ; def v[0:1]
7303; GFX90A-NEXT:    ;;#ASMEND
7304; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
7305; GFX90A-NEXT:    v_alignbit_b32 v2, v0, v1, 16
7306; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
7307; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7308; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7309;
7310; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_4_4:
7311; GFX940:       ; %bb.0:
7312; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7313; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
7314; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7315; GFX940-NEXT:    ;;#ASMSTART
7316; GFX940-NEXT:    ; def v[0:1]
7317; GFX940-NEXT:    ;;#ASMEND
7318; GFX940-NEXT:    s_nop 0
7319; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
7320; GFX940-NEXT:    v_alignbit_b32 v2, v0, v1, 16
7321; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
7322; GFX940-NEXT:    s_waitcnt vmcnt(0)
7323; GFX940-NEXT:    s_setpc_b64 s[30:31]
7324  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7325  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7326  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 4, i32 4>
7327  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7328  ret void
7329}
7330
7331define void @v_shuffle_v4i16_v4i16__7_u_4_4(ptr addrspace(1) inreg %ptr) {
7332; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_u_4_4:
7333; GFX900:       ; %bb.0:
7334; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7335; GFX900-NEXT:    ;;#ASMSTART
7336; GFX900-NEXT:    ; def v[0:1]
7337; GFX900-NEXT:    ;;#ASMEND
7338; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
7339; GFX900-NEXT:    v_mov_b32_e32 v3, 0
7340; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
7341; GFX900-NEXT:    v_alignbit_b32 v1, s4, v1, 16
7342; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
7343; GFX900-NEXT:    s_waitcnt vmcnt(0)
7344; GFX900-NEXT:    s_setpc_b64 s[30:31]
7345;
7346; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_u_4_4:
7347; GFX90A:       ; %bb.0:
7348; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7349; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
7350; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7351; GFX90A-NEXT:    ;;#ASMSTART
7352; GFX90A-NEXT:    ; def v[0:1]
7353; GFX90A-NEXT:    ;;#ASMEND
7354; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
7355; GFX90A-NEXT:    v_alignbit_b32 v2, s4, v1, 16
7356; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
7357; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7358; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7359;
7360; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_4_4:
7361; GFX940:       ; %bb.0:
7362; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7363; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
7364; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7365; GFX940-NEXT:    ;;#ASMSTART
7366; GFX940-NEXT:    ; def v[0:1]
7367; GFX940-NEXT:    ;;#ASMEND
7368; GFX940-NEXT:    s_nop 0
7369; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
7370; GFX940-NEXT:    v_alignbit_b32 v2, s0, v1, 16
7371; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
7372; GFX940-NEXT:    s_waitcnt vmcnt(0)
7373; GFX940-NEXT:    s_setpc_b64 s[30:31]
7374  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7375  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7376  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 4, i32 4>
7377  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7378  ret void
7379}
7380
7381define void @v_shuffle_v4i16_v4i16__7_0_4_4(ptr addrspace(1) inreg %ptr) {
7382; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_0_4_4:
7383; GFX900:       ; %bb.0:
7384; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7385; GFX900-NEXT:    ;;#ASMSTART
7386; GFX900-NEXT:    ; def v[0:1]
7387; GFX900-NEXT:    ;;#ASMEND
7388; GFX900-NEXT:    ;;#ASMSTART
7389; GFX900-NEXT:    ; def v[1:2]
7390; GFX900-NEXT:    ;;#ASMEND
7391; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
7392; GFX900-NEXT:    v_mov_b32_e32 v3, 0
7393; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
7394; GFX900-NEXT:    v_alignbit_b32 v0, v0, v2, 16
7395; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
7396; GFX900-NEXT:    s_waitcnt vmcnt(0)
7397; GFX900-NEXT:    s_setpc_b64 s[30:31]
7398;
7399; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_0_4_4:
7400; GFX90A:       ; %bb.0:
7401; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7402; GFX90A-NEXT:    ;;#ASMSTART
7403; GFX90A-NEXT:    ; def v[0:1]
7404; GFX90A-NEXT:    ;;#ASMEND
7405; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
7406; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7407; GFX90A-NEXT:    ;;#ASMSTART
7408; GFX90A-NEXT:    ; def v[2:3]
7409; GFX90A-NEXT:    ;;#ASMEND
7410; GFX90A-NEXT:    v_perm_b32 v1, v2, v2, s4
7411; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v3, 16
7412; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
7413; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7414; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7415;
7416; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_4_4:
7417; GFX940:       ; %bb.0:
7418; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7419; GFX940-NEXT:    ;;#ASMSTART
7420; GFX940-NEXT:    ; def v[0:1]
7421; GFX940-NEXT:    ;;#ASMEND
7422; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
7423; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7424; GFX940-NEXT:    ;;#ASMSTART
7425; GFX940-NEXT:    ; def v[2:3]
7426; GFX940-NEXT:    ;;#ASMEND
7427; GFX940-NEXT:    s_nop 0
7428; GFX940-NEXT:    v_perm_b32 v1, v2, v2, s2
7429; GFX940-NEXT:    v_alignbit_b32 v0, v0, v3, 16
7430; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
7431; GFX940-NEXT:    s_waitcnt vmcnt(0)
7432; GFX940-NEXT:    s_setpc_b64 s[30:31]
7433  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7434  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7435  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 4, i32 4>
7436  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7437  ret void
7438}
7439
7440define void @v_shuffle_v4i16_v4i16__7_1_4_4(ptr addrspace(1) inreg %ptr) {
7441; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_1_4_4:
7442; GFX900:       ; %bb.0:
7443; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7444; GFX900-NEXT:    ;;#ASMSTART
7445; GFX900-NEXT:    ; def v[0:1]
7446; GFX900-NEXT:    ;;#ASMEND
7447; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
7448; GFX900-NEXT:    ;;#ASMSTART
7449; GFX900-NEXT:    ; def v[1:2]
7450; GFX900-NEXT:    ;;#ASMEND
7451; GFX900-NEXT:    v_perm_b32 v0, v0, v2, s4
7452; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
7453; GFX900-NEXT:    v_mov_b32_e32 v3, 0
7454; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
7455; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
7456; GFX900-NEXT:    s_waitcnt vmcnt(0)
7457; GFX900-NEXT:    s_setpc_b64 s[30:31]
7458;
7459; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_1_4_4:
7460; GFX90A:       ; %bb.0:
7461; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7462; GFX90A-NEXT:    ;;#ASMSTART
7463; GFX90A-NEXT:    ; def v[0:1]
7464; GFX90A-NEXT:    ;;#ASMEND
7465; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
7466; GFX90A-NEXT:    ;;#ASMSTART
7467; GFX90A-NEXT:    ; def v[2:3]
7468; GFX90A-NEXT:    ;;#ASMEND
7469; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
7470; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
7471; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7472; GFX90A-NEXT:    v_perm_b32 v1, v2, v2, s4
7473; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
7474; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7475; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7476;
7477; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_4_4:
7478; GFX940:       ; %bb.0:
7479; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7480; GFX940-NEXT:    ;;#ASMSTART
7481; GFX940-NEXT:    ; def v[0:1]
7482; GFX940-NEXT:    ;;#ASMEND
7483; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
7484; GFX940-NEXT:    ;;#ASMSTART
7485; GFX940-NEXT:    ; def v[2:3]
7486; GFX940-NEXT:    ;;#ASMEND
7487; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7488; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
7489; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
7490; GFX940-NEXT:    v_perm_b32 v1, v2, v2, s2
7491; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
7492; GFX940-NEXT:    s_waitcnt vmcnt(0)
7493; GFX940-NEXT:    s_setpc_b64 s[30:31]
7494  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7495  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7496  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 4, i32 4>
7497  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7498  ret void
7499}
7500
7501define void @v_shuffle_v4i16_v4i16__7_2_4_4(ptr addrspace(1) inreg %ptr) {
7502; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_2_4_4:
7503; GFX900:       ; %bb.0:
7504; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7505; GFX900-NEXT:    ;;#ASMSTART
7506; GFX900-NEXT:    ; def v[0:1]
7507; GFX900-NEXT:    ;;#ASMEND
7508; GFX900-NEXT:    ;;#ASMSTART
7509; GFX900-NEXT:    ; def v[2:3]
7510; GFX900-NEXT:    ;;#ASMEND
7511; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
7512; GFX900-NEXT:    v_mov_b32_e32 v4, 0
7513; GFX900-NEXT:    v_perm_b32 v2, v2, v2, s4
7514; GFX900-NEXT:    v_alignbit_b32 v1, v1, v3, 16
7515; GFX900-NEXT:    global_store_dwordx2 v4, v[1:2], s[16:17]
7516; GFX900-NEXT:    s_waitcnt vmcnt(0)
7517; GFX900-NEXT:    s_setpc_b64 s[30:31]
7518;
7519; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_2_4_4:
7520; GFX90A:       ; %bb.0:
7521; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7522; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
7523; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
7524; GFX90A-NEXT:    ;;#ASMSTART
7525; GFX90A-NEXT:    ; def v[0:1]
7526; GFX90A-NEXT:    ;;#ASMEND
7527; GFX90A-NEXT:    ;;#ASMSTART
7528; GFX90A-NEXT:    ; def v[2:3]
7529; GFX90A-NEXT:    ;;#ASMEND
7530; GFX90A-NEXT:    v_perm_b32 v5, v2, v2, s4
7531; GFX90A-NEXT:    v_alignbit_b32 v4, v1, v3, 16
7532; GFX90A-NEXT:    global_store_dwordx2 v6, v[4:5], s[16:17]
7533; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7534; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7535;
7536; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_4_4:
7537; GFX940:       ; %bb.0:
7538; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7539; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
7540; GFX940-NEXT:    v_mov_b32_e32 v6, 0
7541; GFX940-NEXT:    ;;#ASMSTART
7542; GFX940-NEXT:    ; def v[0:1]
7543; GFX940-NEXT:    ;;#ASMEND
7544; GFX940-NEXT:    ;;#ASMSTART
7545; GFX940-NEXT:    ; def v[2:3]
7546; GFX940-NEXT:    ;;#ASMEND
7547; GFX940-NEXT:    s_nop 0
7548; GFX940-NEXT:    v_perm_b32 v5, v2, v2, s2
7549; GFX940-NEXT:    v_alignbit_b32 v4, v1, v3, 16
7550; GFX940-NEXT:    global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1
7551; GFX940-NEXT:    s_waitcnt vmcnt(0)
7552; GFX940-NEXT:    s_setpc_b64 s[30:31]
7553  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7554  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7555  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 4, i32 4>
7556  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7557  ret void
7558}
7559
7560define void @v_shuffle_v4i16_v4i16__7_3_4_4(ptr addrspace(1) inreg %ptr) {
7561; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_3_4_4:
7562; GFX900:       ; %bb.0:
7563; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7564; GFX900-NEXT:    ;;#ASMSTART
7565; GFX900-NEXT:    ; def v[0:1]
7566; GFX900-NEXT:    ;;#ASMEND
7567; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
7568; GFX900-NEXT:    ;;#ASMSTART
7569; GFX900-NEXT:    ; def v[2:3]
7570; GFX900-NEXT:    ;;#ASMEND
7571; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
7572; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
7573; GFX900-NEXT:    v_mov_b32_e32 v4, 0
7574; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
7575; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
7576; GFX900-NEXT:    s_waitcnt vmcnt(0)
7577; GFX900-NEXT:    s_setpc_b64 s[30:31]
7578;
7579; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_3_4_4:
7580; GFX90A:       ; %bb.0:
7581; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7582; GFX90A-NEXT:    ;;#ASMSTART
7583; GFX90A-NEXT:    ; def v[0:1]
7584; GFX90A-NEXT:    ;;#ASMEND
7585; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
7586; GFX90A-NEXT:    ;;#ASMSTART
7587; GFX90A-NEXT:    ; def v[2:3]
7588; GFX90A-NEXT:    ;;#ASMEND
7589; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
7590; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
7591; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7592; GFX90A-NEXT:    v_perm_b32 v1, v2, v2, s4
7593; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
7594; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7595; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7596;
7597; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_4_4:
7598; GFX940:       ; %bb.0:
7599; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7600; GFX940-NEXT:    ;;#ASMSTART
7601; GFX940-NEXT:    ; def v[0:1]
7602; GFX940-NEXT:    ;;#ASMEND
7603; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
7604; GFX940-NEXT:    ;;#ASMSTART
7605; GFX940-NEXT:    ; def v[2:3]
7606; GFX940-NEXT:    ;;#ASMEND
7607; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7608; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
7609; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
7610; GFX940-NEXT:    v_perm_b32 v1, v2, v2, s2
7611; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
7612; GFX940-NEXT:    s_waitcnt vmcnt(0)
7613; GFX940-NEXT:    s_setpc_b64 s[30:31]
7614  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7615  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7616  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 4, i32 4>
7617  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7618  ret void
7619}
7620
7621define void @v_shuffle_v4i16_v4i16__7_5_4_4(ptr addrspace(1) inreg %ptr) {
7622; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_5_4_4:
7623; GFX900:       ; %bb.0:
7624; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7625; GFX900-NEXT:    ;;#ASMSTART
7626; GFX900-NEXT:    ; def v[0:1]
7627; GFX900-NEXT:    ;;#ASMEND
7628; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
7629; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
7630; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
7631; GFX900-NEXT:    v_mov_b32_e32 v3, 0
7632; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
7633; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
7634; GFX900-NEXT:    s_waitcnt vmcnt(0)
7635; GFX900-NEXT:    s_setpc_b64 s[30:31]
7636;
7637; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_5_4_4:
7638; GFX90A:       ; %bb.0:
7639; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7640; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
7641; GFX90A-NEXT:    ;;#ASMSTART
7642; GFX90A-NEXT:    ; def v[0:1]
7643; GFX90A-NEXT:    ;;#ASMEND
7644; GFX90A-NEXT:    v_perm_b32 v2, v0, v1, s4
7645; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
7646; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7647; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
7648; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
7649; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7650; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7651;
7652; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_4_4:
7653; GFX940:       ; %bb.0:
7654; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7655; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
7656; GFX940-NEXT:    ;;#ASMSTART
7657; GFX940-NEXT:    ; def v[0:1]
7658; GFX940-NEXT:    ;;#ASMEND
7659; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7660; GFX940-NEXT:    v_perm_b32 v2, v0, v1, s2
7661; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
7662; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
7663; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
7664; GFX940-NEXT:    s_waitcnt vmcnt(0)
7665; GFX940-NEXT:    s_setpc_b64 s[30:31]
7666  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7667  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7668  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 4, i32 4>
7669  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7670  ret void
7671}
7672
7673define void @v_shuffle_v4i16_v4i16__7_6_4_4(ptr addrspace(1) inreg %ptr) {
7674; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_6_4_4:
7675; GFX900:       ; %bb.0:
7676; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7677; GFX900-NEXT:    ;;#ASMSTART
7678; GFX900-NEXT:    ; def v[0:1]
7679; GFX900-NEXT:    ;;#ASMEND
7680; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
7681; GFX900-NEXT:    v_mov_b32_e32 v3, 0
7682; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
7683; GFX900-NEXT:    v_alignbit_b32 v1, v1, v1, 16
7684; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
7685; GFX900-NEXT:    s_waitcnt vmcnt(0)
7686; GFX900-NEXT:    s_setpc_b64 s[30:31]
7687;
7688; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_6_4_4:
7689; GFX90A:       ; %bb.0:
7690; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7691; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
7692; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7693; GFX90A-NEXT:    ;;#ASMSTART
7694; GFX90A-NEXT:    ; def v[0:1]
7695; GFX90A-NEXT:    ;;#ASMEND
7696; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
7697; GFX90A-NEXT:    v_alignbit_b32 v2, v1, v1, 16
7698; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
7699; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7700; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7701;
7702; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_4_4:
7703; GFX940:       ; %bb.0:
7704; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7705; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
7706; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7707; GFX940-NEXT:    ;;#ASMSTART
7708; GFX940-NEXT:    ; def v[0:1]
7709; GFX940-NEXT:    ;;#ASMEND
7710; GFX940-NEXT:    s_nop 0
7711; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
7712; GFX940-NEXT:    v_alignbit_b32 v2, v1, v1, 16
7713; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
7714; GFX940-NEXT:    s_waitcnt vmcnt(0)
7715; GFX940-NEXT:    s_setpc_b64 s[30:31]
7716  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7717  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7718  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 4, i32 4>
7719  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7720  ret void
7721}
7722
7723define void @v_shuffle_v4i16_v4i16__7_7_4_4(ptr addrspace(1) inreg %ptr) {
7724; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_4_4:
7725; GFX900:       ; %bb.0:
7726; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7727; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
7728; GFX900-NEXT:    ;;#ASMSTART
7729; GFX900-NEXT:    ; def v[0:1]
7730; GFX900-NEXT:    ;;#ASMEND
7731; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
7732; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
7733; GFX900-NEXT:    v_mov_b32_e32 v3, 0
7734; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
7735; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
7736; GFX900-NEXT:    s_waitcnt vmcnt(0)
7737; GFX900-NEXT:    s_setpc_b64 s[30:31]
7738;
7739; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_4_4:
7740; GFX90A:       ; %bb.0:
7741; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7742; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
7743; GFX90A-NEXT:    ;;#ASMSTART
7744; GFX90A-NEXT:    ; def v[0:1]
7745; GFX90A-NEXT:    ;;#ASMEND
7746; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
7747; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
7748; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7749; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
7750; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
7751; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7752; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7753;
7754; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_4:
7755; GFX940:       ; %bb.0:
7756; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7757; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
7758; GFX940-NEXT:    ;;#ASMSTART
7759; GFX940-NEXT:    ; def v[0:1]
7760; GFX940-NEXT:    ;;#ASMEND
7761; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7762; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
7763; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
7764; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
7765; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
7766; GFX940-NEXT:    s_waitcnt vmcnt(0)
7767; GFX940-NEXT:    s_setpc_b64 s[30:31]
7768  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7769  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7770  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 4>
7771  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7772  ret void
7773}
7774
7775define void @v_shuffle_v4i16_v4i16__7_7_u_4(ptr addrspace(1) inreg %ptr) {
7776; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_u_4:
7777; GFX900:       ; %bb.0:
7778; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7779; GFX900-NEXT:    ;;#ASMSTART
7780; GFX900-NEXT:    ; def v[0:1]
7781; GFX900-NEXT:    ;;#ASMEND
7782; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
7783; GFX900-NEXT:    v_mov_b32_e32 v3, 0
7784; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
7785; GFX900-NEXT:    v_lshlrev_b32_e32 v2, 16, v0
7786; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
7787; GFX900-NEXT:    s_waitcnt vmcnt(0)
7788; GFX900-NEXT:    s_setpc_b64 s[30:31]
7789;
7790; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_u_4:
7791; GFX90A:       ; %bb.0:
7792; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7793; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
7794; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7795; GFX90A-NEXT:    ;;#ASMSTART
7796; GFX90A-NEXT:    ; def v[0:1]
7797; GFX90A-NEXT:    ;;#ASMEND
7798; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
7799; GFX90A-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
7800; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
7801; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7802; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7803;
7804; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_4:
7805; GFX940:       ; %bb.0:
7806; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7807; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
7808; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7809; GFX940-NEXT:    ;;#ASMSTART
7810; GFX940-NEXT:    ; def v[0:1]
7811; GFX940-NEXT:    ;;#ASMEND
7812; GFX940-NEXT:    s_nop 0
7813; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
7814; GFX940-NEXT:    v_lshlrev_b32_e32 v3, 16, v0
7815; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
7816; GFX940-NEXT:    s_waitcnt vmcnt(0)
7817; GFX940-NEXT:    s_setpc_b64 s[30:31]
7818  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7819  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7820  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 4>
7821  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7822  ret void
7823}
7824
7825define void @v_shuffle_v4i16_v4i16__7_7_0_4(ptr addrspace(1) inreg %ptr) {
7826; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_0_4:
7827; GFX900:       ; %bb.0:
7828; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7829; GFX900-NEXT:    ;;#ASMSTART
7830; GFX900-NEXT:    ; def v[0:1]
7831; GFX900-NEXT:    ;;#ASMEND
7832; GFX900-NEXT:    ;;#ASMSTART
7833; GFX900-NEXT:    ; def v[1:2]
7834; GFX900-NEXT:    ;;#ASMEND
7835; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
7836; GFX900-NEXT:    v_perm_b32 v1, v1, v0, s4
7837; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
7838; GFX900-NEXT:    v_mov_b32_e32 v3, 0
7839; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
7840; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
7841; GFX900-NEXT:    s_waitcnt vmcnt(0)
7842; GFX900-NEXT:    s_setpc_b64 s[30:31]
7843;
7844; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_0_4:
7845; GFX90A:       ; %bb.0:
7846; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7847; GFX90A-NEXT:    ;;#ASMSTART
7848; GFX90A-NEXT:    ; def v[0:1]
7849; GFX90A-NEXT:    ;;#ASMEND
7850; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
7851; GFX90A-NEXT:    ;;#ASMSTART
7852; GFX90A-NEXT:    ; def v[2:3]
7853; GFX90A-NEXT:    ;;#ASMEND
7854; GFX90A-NEXT:    v_perm_b32 v1, v2, v0, s4
7855; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
7856; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7857; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
7858; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
7859; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7860; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7861;
7862; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_4:
7863; GFX940:       ; %bb.0:
7864; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7865; GFX940-NEXT:    ;;#ASMSTART
7866; GFX940-NEXT:    ; def v[0:1]
7867; GFX940-NEXT:    ;;#ASMEND
7868; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
7869; GFX940-NEXT:    ;;#ASMSTART
7870; GFX940-NEXT:    ; def v[2:3]
7871; GFX940-NEXT:    ;;#ASMEND
7872; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7873; GFX940-NEXT:    v_perm_b32 v1, v2, v0, s2
7874; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
7875; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
7876; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
7877; GFX940-NEXT:    s_waitcnt vmcnt(0)
7878; GFX940-NEXT:    s_setpc_b64 s[30:31]
7879  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7880  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7881  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 4>
7882  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7883  ret void
7884}
7885
7886define void @v_shuffle_v4i16_v4i16__7_7_1_4(ptr addrspace(1) inreg %ptr) {
7887; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_1_4:
7888; GFX900:       ; %bb.0:
7889; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7890; GFX900-NEXT:    ;;#ASMSTART
7891; GFX900-NEXT:    ; def v[0:1]
7892; GFX900-NEXT:    ;;#ASMEND
7893; GFX900-NEXT:    ;;#ASMSTART
7894; GFX900-NEXT:    ; def v[1:2]
7895; GFX900-NEXT:    ;;#ASMEND
7896; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
7897; GFX900-NEXT:    v_mov_b32_e32 v3, 0
7898; GFX900-NEXT:    v_alignbit_b32 v1, v1, v0, 16
7899; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
7900; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
7901; GFX900-NEXT:    s_waitcnt vmcnt(0)
7902; GFX900-NEXT:    s_setpc_b64 s[30:31]
7903;
7904; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_1_4:
7905; GFX90A:       ; %bb.0:
7906; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7907; GFX90A-NEXT:    ;;#ASMSTART
7908; GFX90A-NEXT:    ; def v[0:1]
7909; GFX90A-NEXT:    ;;#ASMEND
7910; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
7911; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7912; GFX90A-NEXT:    ;;#ASMSTART
7913; GFX90A-NEXT:    ; def v[2:3]
7914; GFX90A-NEXT:    ;;#ASMEND
7915; GFX90A-NEXT:    v_alignbit_b32 v1, v2, v0, 16
7916; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
7917; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
7918; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7919; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7920;
7921; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_4:
7922; GFX940:       ; %bb.0:
7923; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7924; GFX940-NEXT:    ;;#ASMSTART
7925; GFX940-NEXT:    ; def v[0:1]
7926; GFX940-NEXT:    ;;#ASMEND
7927; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
7928; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7929; GFX940-NEXT:    ;;#ASMSTART
7930; GFX940-NEXT:    ; def v[2:3]
7931; GFX940-NEXT:    ;;#ASMEND
7932; GFX940-NEXT:    s_nop 0
7933; GFX940-NEXT:    v_alignbit_b32 v1, v2, v0, 16
7934; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
7935; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
7936; GFX940-NEXT:    s_waitcnt vmcnt(0)
7937; GFX940-NEXT:    s_setpc_b64 s[30:31]
7938  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7939  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7940  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 4>
7941  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7942  ret void
7943}
7944
7945define void @v_shuffle_v4i16_v4i16__7_7_2_4(ptr addrspace(1) inreg %ptr) {
7946; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_2_4:
7947; GFX900:       ; %bb.0:
7948; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7949; GFX900-NEXT:    ;;#ASMSTART
7950; GFX900-NEXT:    ; def v[0:1]
7951; GFX900-NEXT:    ;;#ASMEND
7952; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
7953; GFX900-NEXT:    ;;#ASMSTART
7954; GFX900-NEXT:    ; def v[2:3]
7955; GFX900-NEXT:    ;;#ASMEND
7956; GFX900-NEXT:    v_perm_b32 v1, v2, v1, s4
7957; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
7958; GFX900-NEXT:    v_mov_b32_e32 v4, 0
7959; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
7960; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
7961; GFX900-NEXT:    s_waitcnt vmcnt(0)
7962; GFX900-NEXT:    s_setpc_b64 s[30:31]
7963;
7964; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_2_4:
7965; GFX90A:       ; %bb.0:
7966; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7967; GFX90A-NEXT:    ;;#ASMSTART
7968; GFX90A-NEXT:    ; def v[0:1]
7969; GFX90A-NEXT:    ;;#ASMEND
7970; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
7971; GFX90A-NEXT:    ;;#ASMSTART
7972; GFX90A-NEXT:    ; def v[2:3]
7973; GFX90A-NEXT:    ;;#ASMEND
7974; GFX90A-NEXT:    v_perm_b32 v1, v2, v1, s4
7975; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
7976; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7977; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
7978; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
7979; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7980; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7981;
7982; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_4:
7983; GFX940:       ; %bb.0:
7984; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7985; GFX940-NEXT:    ;;#ASMSTART
7986; GFX940-NEXT:    ; def v[0:1]
7987; GFX940-NEXT:    ;;#ASMEND
7988; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
7989; GFX940-NEXT:    ;;#ASMSTART
7990; GFX940-NEXT:    ; def v[2:3]
7991; GFX940-NEXT:    ;;#ASMEND
7992; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7993; GFX940-NEXT:    v_perm_b32 v1, v2, v1, s2
7994; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
7995; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
7996; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
7997; GFX940-NEXT:    s_waitcnt vmcnt(0)
7998; GFX940-NEXT:    s_setpc_b64 s[30:31]
7999  %vec0 = call <4 x i16> asm "; def $0", "=v"()
8000  %vec1 = call <4 x i16> asm "; def $0", "=v"()
8001  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 4>
8002  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
8003  ret void
8004}
8005
8006define void @v_shuffle_v4i16_v4i16__7_7_3_4(ptr addrspace(1) inreg %ptr) {
8007; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_3_4:
8008; GFX900:       ; %bb.0:
8009; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8010; GFX900-NEXT:    ;;#ASMSTART
8011; GFX900-NEXT:    ; def v[0:1]
8012; GFX900-NEXT:    ;;#ASMEND
8013; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
8014; GFX900-NEXT:    v_mov_b32_e32 v4, 0
8015; GFX900-NEXT:    ;;#ASMSTART
8016; GFX900-NEXT:    ; def v[2:3]
8017; GFX900-NEXT:    ;;#ASMEND
8018; GFX900-NEXT:    v_alignbit_b32 v1, v2, v1, 16
8019; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
8020; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
8021; GFX900-NEXT:    s_waitcnt vmcnt(0)
8022; GFX900-NEXT:    s_setpc_b64 s[30:31]
8023;
8024; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_3_4:
8025; GFX90A:       ; %bb.0:
8026; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8027; GFX90A-NEXT:    ;;#ASMSTART
8028; GFX90A-NEXT:    ; def v[0:1]
8029; GFX90A-NEXT:    ;;#ASMEND
8030; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
8031; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
8032; GFX90A-NEXT:    ;;#ASMSTART
8033; GFX90A-NEXT:    ; def v[2:3]
8034; GFX90A-NEXT:    ;;#ASMEND
8035; GFX90A-NEXT:    v_alignbit_b32 v1, v2, v1, 16
8036; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
8037; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
8038; GFX90A-NEXT:    s_waitcnt vmcnt(0)
8039; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8040;
8041; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_4:
8042; GFX940:       ; %bb.0:
8043; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8044; GFX940-NEXT:    ;;#ASMSTART
8045; GFX940-NEXT:    ; def v[0:1]
8046; GFX940-NEXT:    ;;#ASMEND
8047; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
8048; GFX940-NEXT:    v_mov_b32_e32 v4, 0
8049; GFX940-NEXT:    ;;#ASMSTART
8050; GFX940-NEXT:    ; def v[2:3]
8051; GFX940-NEXT:    ;;#ASMEND
8052; GFX940-NEXT:    s_nop 0
8053; GFX940-NEXT:    v_alignbit_b32 v1, v2, v1, 16
8054; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
8055; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
8056; GFX940-NEXT:    s_waitcnt vmcnt(0)
8057; GFX940-NEXT:    s_setpc_b64 s[30:31]
8058  %vec0 = call <4 x i16> asm "; def $0", "=v"()
8059  %vec1 = call <4 x i16> asm "; def $0", "=v"()
8060  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 4>
8061  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
8062  ret void
8063}
8064
8065define void @v_shuffle_v4i16_v4i16__7_7_5_4(ptr addrspace(1) inreg %ptr) {
8066; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_5_4:
8067; GFX900:       ; %bb.0:
8068; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8069; GFX900-NEXT:    ;;#ASMSTART
8070; GFX900-NEXT:    ; def v[0:1]
8071; GFX900-NEXT:    ;;#ASMEND
8072; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
8073; GFX900-NEXT:    v_mov_b32_e32 v3, 0
8074; GFX900-NEXT:    v_alignbit_b32 v2, v0, v0, 16
8075; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
8076; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
8077; GFX900-NEXT:    s_waitcnt vmcnt(0)
8078; GFX900-NEXT:    s_setpc_b64 s[30:31]
8079;
8080; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_5_4:
8081; GFX90A:       ; %bb.0:
8082; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8083; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
8084; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
8085; GFX90A-NEXT:    ;;#ASMSTART
8086; GFX90A-NEXT:    ; def v[0:1]
8087; GFX90A-NEXT:    ;;#ASMEND
8088; GFX90A-NEXT:    v_alignbit_b32 v3, v0, v0, 16
8089; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
8090; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
8091; GFX90A-NEXT:    s_waitcnt vmcnt(0)
8092; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8093;
8094; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_4:
8095; GFX940:       ; %bb.0:
8096; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8097; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
8098; GFX940-NEXT:    v_mov_b32_e32 v4, 0
8099; GFX940-NEXT:    ;;#ASMSTART
8100; GFX940-NEXT:    ; def v[0:1]
8101; GFX940-NEXT:    ;;#ASMEND
8102; GFX940-NEXT:    s_nop 0
8103; GFX940-NEXT:    v_alignbit_b32 v3, v0, v0, 16
8104; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
8105; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
8106; GFX940-NEXT:    s_waitcnt vmcnt(0)
8107; GFX940-NEXT:    s_setpc_b64 s[30:31]
8108  %vec0 = call <4 x i16> asm "; def $0", "=v"()
8109  %vec1 = call <4 x i16> asm "; def $0", "=v"()
8110  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 4>
8111  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
8112  ret void
8113}
8114
8115define void @v_shuffle_v4i16_v4i16__7_7_6_4(ptr addrspace(1) inreg %ptr) {
8116; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_6_4:
8117; GFX900:       ; %bb.0:
8118; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8119; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
8120; GFX900-NEXT:    ;;#ASMSTART
8121; GFX900-NEXT:    ; def v[0:1]
8122; GFX900-NEXT:    ;;#ASMEND
8123; GFX900-NEXT:    v_perm_b32 v2, v0, v1, s4
8124; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
8125; GFX900-NEXT:    v_mov_b32_e32 v3, 0
8126; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
8127; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
8128; GFX900-NEXT:    s_waitcnt vmcnt(0)
8129; GFX900-NEXT:    s_setpc_b64 s[30:31]
8130;
8131; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_6_4:
8132; GFX90A:       ; %bb.0:
8133; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8134; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
8135; GFX90A-NEXT:    ;;#ASMSTART
8136; GFX90A-NEXT:    ; def v[0:1]
8137; GFX90A-NEXT:    ;;#ASMEND
8138; GFX90A-NEXT:    v_perm_b32 v3, v0, v1, s4
8139; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
8140; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
8141; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
8142; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
8143; GFX90A-NEXT:    s_waitcnt vmcnt(0)
8144; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8145;
8146; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_4:
8147; GFX940:       ; %bb.0:
8148; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8149; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
8150; GFX940-NEXT:    ;;#ASMSTART
8151; GFX940-NEXT:    ; def v[0:1]
8152; GFX940-NEXT:    ;;#ASMEND
8153; GFX940-NEXT:    v_mov_b32_e32 v4, 0
8154; GFX940-NEXT:    v_perm_b32 v3, v0, v1, s2
8155; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
8156; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
8157; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
8158; GFX940-NEXT:    s_waitcnt vmcnt(0)
8159; GFX940-NEXT:    s_setpc_b64 s[30:31]
8160  %vec0 = call <4 x i16> asm "; def $0", "=v"()
8161  %vec1 = call <4 x i16> asm "; def $0", "=v"()
8162  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 4>
8163  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
8164  ret void
8165}
8166
8167define void @v_shuffle_v4i16_v4i16__u_5_5_5(ptr addrspace(1) inreg %ptr) {
8168; GFX900-LABEL: v_shuffle_v4i16_v4i16__u_5_5_5:
8169; GFX900:       ; %bb.0:
8170; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8171; GFX900-NEXT:    ;;#ASMSTART
8172; GFX900-NEXT:    ; def v[0:1]
8173; GFX900-NEXT:    ;;#ASMEND
8174; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
8175; GFX900-NEXT:    v_mov_b32_e32 v2, 0
8176; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
8177; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
8178; GFX900-NEXT:    s_waitcnt vmcnt(0)
8179; GFX900-NEXT:    s_setpc_b64 s[30:31]
8180;
8181; GFX90A-LABEL: v_shuffle_v4i16_v4i16__u_5_5_5:
8182; GFX90A:       ; %bb.0:
8183; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8184; GFX90A-NEXT:    ;;#ASMSTART
8185; GFX90A-NEXT:    ; def v[0:1]
8186; GFX90A-NEXT:    ;;#ASMEND
8187; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
8188; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
8189; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
8190; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
8191; GFX90A-NEXT:    s_waitcnt vmcnt(0)
8192; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8193;
8194; GFX940-LABEL: v_shuffle_v4i16_v4i16__u_5_5_5:
8195; GFX940:       ; %bb.0:
8196; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8197; GFX940-NEXT:    ;;#ASMSTART
8198; GFX940-NEXT:    ; def v[0:1]
8199; GFX940-NEXT:    ;;#ASMEND
8200; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
8201; GFX940-NEXT:    v_mov_b32_e32 v2, 0
8202; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
8203; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
8204; GFX940-NEXT:    s_waitcnt vmcnt(0)
8205; GFX940-NEXT:    s_setpc_b64 s[30:31]
8206  %vec0 = call <4 x i16> asm "; def $0", "=v"()
8207  %vec1 = call <4 x i16> asm "; def $0", "=v"()
8208  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
8209  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
8210  ret void
8211}
8212
8213define void @v_shuffle_v4i16_v4i16__0_5_5_5(ptr addrspace(1) inreg %ptr) {
8214; GFX900-LABEL: v_shuffle_v4i16_v4i16__0_5_5_5:
8215; GFX900:       ; %bb.0:
8216; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8217; GFX900-NEXT:    ;;#ASMSTART
8218; GFX900-NEXT:    ; def v[0:1]
8219; GFX900-NEXT:    ;;#ASMEND
8220; GFX900-NEXT:    s_mov_b32 s4, 0xffff
8221; GFX900-NEXT:    ;;#ASMSTART
8222; GFX900-NEXT:    ; def v[1:2]
8223; GFX900-NEXT:    ;;#ASMEND
8224; GFX900-NEXT:    v_bfi_b32 v0, s4, v0, v1
8225; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
8226; GFX900-NEXT:    v_mov_b32_e32 v3, 0
8227; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
8228; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
8229; GFX900-NEXT:    s_waitcnt vmcnt(0)
8230; GFX900-NEXT:    s_setpc_b64 s[30:31]
8231;
8232; GFX90A-LABEL: v_shuffle_v4i16_v4i16__0_5_5_5:
8233; GFX90A:       ; %bb.0:
8234; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8235; GFX90A-NEXT:    ;;#ASMSTART
8236; GFX90A-NEXT:    ; def v[0:1]
8237; GFX90A-NEXT:    ;;#ASMEND
8238; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
8239; GFX90A-NEXT:    ;;#ASMSTART
8240; GFX90A-NEXT:    ; def v[2:3]
8241; GFX90A-NEXT:    ;;#ASMEND
8242; GFX90A-NEXT:    v_bfi_b32 v0, s4, v0, v2
8243; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
8244; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
8245; GFX90A-NEXT:    v_perm_b32 v1, v2, v2, s4
8246; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
8247; GFX90A-NEXT:    s_waitcnt vmcnt(0)
8248; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8249;
8250; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_5_5_5:
8251; GFX940:       ; %bb.0:
8252; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8253; GFX940-NEXT:    ;;#ASMSTART
8254; GFX940-NEXT:    ; def v[0:1]
8255; GFX940-NEXT:    ;;#ASMEND
8256; GFX940-NEXT:    s_mov_b32 s2, 0xffff
8257; GFX940-NEXT:    ;;#ASMSTART
8258; GFX940-NEXT:    ; def v[2:3]
8259; GFX940-NEXT:    ;;#ASMEND
8260; GFX940-NEXT:    v_mov_b32_e32 v4, 0
8261; GFX940-NEXT:    v_bfi_b32 v0, s2, v0, v2
8262; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
8263; GFX940-NEXT:    v_perm_b32 v1, v2, v2, s2
8264; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
8265; GFX940-NEXT:    s_waitcnt vmcnt(0)
8266; GFX940-NEXT:    s_setpc_b64 s[30:31]
8267  %vec0 = call <4 x i16> asm "; def $0", "=v"()
8268  %vec1 = call <4 x i16> asm "; def $0", "=v"()
8269  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
8270  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
8271  ret void
8272}
8273
8274define void @v_shuffle_v4i16_v4i16__1_5_5_5(ptr addrspace(1) inreg %ptr) {
8275; GFX900-LABEL: v_shuffle_v4i16_v4i16__1_5_5_5:
8276; GFX900:       ; %bb.0:
8277; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8278; GFX900-NEXT:    ;;#ASMSTART
8279; GFX900-NEXT:    ; def v[0:1]
8280; GFX900-NEXT:    ;;#ASMEND
8281; GFX900-NEXT:    ;;#ASMSTART
8282; GFX900-NEXT:    ; def v[1:2]
8283; GFX900-NEXT:    ;;#ASMEND
8284; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
8285; GFX900-NEXT:    v_mov_b32_e32 v3, 0
8286; GFX900-NEXT:    v_perm_b32 v0, v1, v0, s4
8287; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
8288; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
8289; GFX900-NEXT:    s_waitcnt vmcnt(0)
8290; GFX900-NEXT:    s_setpc_b64 s[30:31]
8291;
8292; GFX90A-LABEL: v_shuffle_v4i16_v4i16__1_5_5_5:
8293; GFX90A:       ; %bb.0:
8294; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8295; GFX90A-NEXT:    ;;#ASMSTART
8296; GFX90A-NEXT:    ; def v[0:1]
8297; GFX90A-NEXT:    ;;#ASMEND
8298; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
8299; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
8300; GFX90A-NEXT:    ;;#ASMSTART
8301; GFX90A-NEXT:    ; def v[2:3]
8302; GFX90A-NEXT:    ;;#ASMEND
8303; GFX90A-NEXT:    v_perm_b32 v0, v2, v0, s4
8304; GFX90A-NEXT:    v_perm_b32 v1, v2, v2, s4
8305; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
8306; GFX90A-NEXT:    s_waitcnt vmcnt(0)
8307; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8308;
8309; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_5_5_5:
8310; GFX940:       ; %bb.0:
8311; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8312; GFX940-NEXT:    ;;#ASMSTART
8313; GFX940-NEXT:    ; def v[0:1]
8314; GFX940-NEXT:    ;;#ASMEND
8315; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
8316; GFX940-NEXT:    v_mov_b32_e32 v4, 0
8317; GFX940-NEXT:    ;;#ASMSTART
8318; GFX940-NEXT:    ; def v[2:3]
8319; GFX940-NEXT:    ;;#ASMEND
8320; GFX940-NEXT:    s_nop 0
8321; GFX940-NEXT:    v_perm_b32 v0, v2, v0, s2
8322; GFX940-NEXT:    v_perm_b32 v1, v2, v2, s2
8323; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
8324; GFX940-NEXT:    s_waitcnt vmcnt(0)
8325; GFX940-NEXT:    s_setpc_b64 s[30:31]
8326  %vec0 = call <4 x i16> asm "; def $0", "=v"()
8327  %vec1 = call <4 x i16> asm "; def $0", "=v"()
8328  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
8329  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
8330  ret void
8331}
8332
8333define void @v_shuffle_v4i16_v4i16__2_5_5_5(ptr addrspace(1) inreg %ptr) {
8334; GFX900-LABEL: v_shuffle_v4i16_v4i16__2_5_5_5:
8335; GFX900:       ; %bb.0:
8336; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8337; GFX900-NEXT:    ;;#ASMSTART
8338; GFX900-NEXT:    ; def v[0:1]
8339; GFX900-NEXT:    ;;#ASMEND
8340; GFX900-NEXT:    s_mov_b32 s4, 0xffff
8341; GFX900-NEXT:    ;;#ASMSTART
8342; GFX900-NEXT:    ; def v[2:3]
8343; GFX900-NEXT:    ;;#ASMEND
8344; GFX900-NEXT:    v_bfi_b32 v0, s4, v1, v2
8345; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
8346; GFX900-NEXT:    v_mov_b32_e32 v4, 0
8347; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
8348; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
8349; GFX900-NEXT:    s_waitcnt vmcnt(0)
8350; GFX900-NEXT:    s_setpc_b64 s[30:31]
8351;
8352; GFX90A-LABEL: v_shuffle_v4i16_v4i16__2_5_5_5:
8353; GFX90A:       ; %bb.0:
8354; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8355; GFX90A-NEXT:    ;;#ASMSTART
8356; GFX90A-NEXT:    ; def v[0:1]
8357; GFX90A-NEXT:    ;;#ASMEND
8358; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
8359; GFX90A-NEXT:    ;;#ASMSTART
8360; GFX90A-NEXT:    ; def v[2:3]
8361; GFX90A-NEXT:    ;;#ASMEND
8362; GFX90A-NEXT:    v_bfi_b32 v0, s4, v1, v2
8363; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
8364; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
8365; GFX90A-NEXT:    v_perm_b32 v1, v2, v2, s4
8366; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
8367; GFX90A-NEXT:    s_waitcnt vmcnt(0)
8368; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8369;
8370; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_5_5_5:
8371; GFX940:       ; %bb.0:
8372; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8373; GFX940-NEXT:    ;;#ASMSTART
8374; GFX940-NEXT:    ; def v[0:1]
8375; GFX940-NEXT:    ;;#ASMEND
8376; GFX940-NEXT:    s_mov_b32 s2, 0xffff
8377; GFX940-NEXT:    ;;#ASMSTART
8378; GFX940-NEXT:    ; def v[2:3]
8379; GFX940-NEXT:    ;;#ASMEND
8380; GFX940-NEXT:    v_mov_b32_e32 v4, 0
8381; GFX940-NEXT:    v_bfi_b32 v0, s2, v1, v2
8382; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
8383; GFX940-NEXT:    v_perm_b32 v1, v2, v2, s2
8384; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
8385; GFX940-NEXT:    s_waitcnt vmcnt(0)
8386; GFX940-NEXT:    s_setpc_b64 s[30:31]
8387  %vec0 = call <4 x i16> asm "; def $0", "=v"()
8388  %vec1 = call <4 x i16> asm "; def $0", "=v"()
8389  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
8390  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
8391  ret void
8392}
8393
8394define void @v_shuffle_v4i16_v4i16__3_5_5_5(ptr addrspace(1) inreg %ptr) {
8395; GFX900-LABEL: v_shuffle_v4i16_v4i16__3_5_5_5:
8396; GFX900:       ; %bb.0:
8397; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8398; GFX900-NEXT:    ;;#ASMSTART
8399; GFX900-NEXT:    ; def v[0:1]
8400; GFX900-NEXT:    ;;#ASMEND
8401; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
8402; GFX900-NEXT:    v_mov_b32_e32 v4, 0
8403; GFX900-NEXT:    ;;#ASMSTART
8404; GFX900-NEXT:    ; def v[2:3]
8405; GFX900-NEXT:    ;;#ASMEND
8406; GFX900-NEXT:    v_perm_b32 v0, v2, v1, s4
8407; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
8408; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
8409; GFX900-NEXT:    s_waitcnt vmcnt(0)
8410; GFX900-NEXT:    s_setpc_b64 s[30:31]
8411;
8412; GFX90A-LABEL: v_shuffle_v4i16_v4i16__3_5_5_5:
8413; GFX90A:       ; %bb.0:
8414; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8415; GFX90A-NEXT:    ;;#ASMSTART
8416; GFX90A-NEXT:    ; def v[0:1]
8417; GFX90A-NEXT:    ;;#ASMEND
8418; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
8419; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
8420; GFX90A-NEXT:    ;;#ASMSTART
8421; GFX90A-NEXT:    ; def v[2:3]
8422; GFX90A-NEXT:    ;;#ASMEND
8423; GFX90A-NEXT:    v_perm_b32 v0, v2, v1, s4
8424; GFX90A-NEXT:    v_perm_b32 v1, v2, v2, s4
8425; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
8426; GFX90A-NEXT:    s_waitcnt vmcnt(0)
8427; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8428;
8429; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_5_5_5:
8430; GFX940:       ; %bb.0:
8431; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8432; GFX940-NEXT:    ;;#ASMSTART
8433; GFX940-NEXT:    ; def v[0:1]
8434; GFX940-NEXT:    ;;#ASMEND
8435; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
8436; GFX940-NEXT:    v_mov_b32_e32 v4, 0
8437; GFX940-NEXT:    ;;#ASMSTART
8438; GFX940-NEXT:    ; def v[2:3]
8439; GFX940-NEXT:    ;;#ASMEND
8440; GFX940-NEXT:    s_nop 0
8441; GFX940-NEXT:    v_perm_b32 v0, v2, v1, s2
8442; GFX940-NEXT:    v_perm_b32 v1, v2, v2, s2
8443; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
8444; GFX940-NEXT:    s_waitcnt vmcnt(0)
8445; GFX940-NEXT:    s_setpc_b64 s[30:31]
8446  %vec0 = call <4 x i16> asm "; def $0", "=v"()
8447  %vec1 = call <4 x i16> asm "; def $0", "=v"()
8448  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
8449  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
8450  ret void
8451}
8452
8453define void @v_shuffle_v4i16_v4i16__4_5_5_5(ptr addrspace(1) inreg %ptr) {
8454; GFX900-LABEL: v_shuffle_v4i16_v4i16__4_5_5_5:
8455; GFX900:       ; %bb.0:
8456; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8457; GFX900-NEXT:    ;;#ASMSTART
8458; GFX900-NEXT:    ; def v[0:1]
8459; GFX900-NEXT:    ;;#ASMEND
8460; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
8461; GFX900-NEXT:    v_mov_b32_e32 v2, 0
8462; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
8463; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
8464; GFX900-NEXT:    s_waitcnt vmcnt(0)
8465; GFX900-NEXT:    s_setpc_b64 s[30:31]
8466;
8467; GFX90A-LABEL: v_shuffle_v4i16_v4i16__4_5_5_5:
8468; GFX90A:       ; %bb.0:
8469; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8470; GFX90A-NEXT:    ;;#ASMSTART
8471; GFX90A-NEXT:    ; def v[0:1]
8472; GFX90A-NEXT:    ;;#ASMEND
8473; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
8474; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
8475; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
8476; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
8477; GFX90A-NEXT:    s_waitcnt vmcnt(0)
8478; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8479;
8480; GFX940-LABEL: v_shuffle_v4i16_v4i16__4_5_5_5:
8481; GFX940:       ; %bb.0:
8482; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8483; GFX940-NEXT:    ;;#ASMSTART
8484; GFX940-NEXT:    ; def v[0:1]
8485; GFX940-NEXT:    ;;#ASMEND
8486; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
8487; GFX940-NEXT:    v_mov_b32_e32 v2, 0
8488; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
8489; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
8490; GFX940-NEXT:    s_waitcnt vmcnt(0)
8491; GFX940-NEXT:    s_setpc_b64 s[30:31]
8492  %vec0 = call <4 x i16> asm "; def $0", "=v"()
8493  %vec1 = call <4 x i16> asm "; def $0", "=v"()
8494  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
8495  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
8496  ret void
8497}
8498
8499define void @v_shuffle_v4i16_v4i16__5_5_5_5(ptr addrspace(1) inreg %ptr) {
8500; GFX900-LABEL: v_shuffle_v4i16_v4i16__5_5_5_5:
8501; GFX900:       ; %bb.0:
8502; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8503; GFX900-NEXT:    ;;#ASMSTART
8504; GFX900-NEXT:    ; def v[0:1]
8505; GFX900-NEXT:    ;;#ASMEND
8506; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
8507; GFX900-NEXT:    v_perm_b32 v0, v0, v0, s4
8508; GFX900-NEXT:    v_mov_b32_e32 v2, 0
8509; GFX900-NEXT:    v_mov_b32_e32 v1, v0
8510; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
8511; GFX900-NEXT:    s_waitcnt vmcnt(0)
8512; GFX900-NEXT:    s_setpc_b64 s[30:31]
8513;
8514; GFX90A-LABEL: v_shuffle_v4i16_v4i16__5_5_5_5:
8515; GFX90A:       ; %bb.0:
8516; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8517; GFX90A-NEXT:    ;;#ASMSTART
8518; GFX90A-NEXT:    ; def v[0:1]
8519; GFX90A-NEXT:    ;;#ASMEND
8520; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
8521; GFX90A-NEXT:    v_perm_b32 v0, v0, v0, s4
8522; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
8523; GFX90A-NEXT:    v_mov_b32_e32 v1, v0
8524; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
8525; GFX90A-NEXT:    s_waitcnt vmcnt(0)
8526; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8527;
8528; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_5_5_5:
8529; GFX940:       ; %bb.0:
8530; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8531; GFX940-NEXT:    ;;#ASMSTART
8532; GFX940-NEXT:    ; def v[0:1]
8533; GFX940-NEXT:    ;;#ASMEND
8534; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
8535; GFX940-NEXT:    v_perm_b32 v0, v0, v0, s2
8536; GFX940-NEXT:    v_mov_b32_e32 v2, 0
8537; GFX940-NEXT:    v_mov_b32_e32 v1, v0
8538; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
8539; GFX940-NEXT:    s_waitcnt vmcnt(0)
8540; GFX940-NEXT:    s_setpc_b64 s[30:31]
8541  %vec0 = call <4 x i16> asm "; def $0", "=v"()
8542  %vec1 = call <4 x i16> asm "; def $0", "=v"()
8543  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
8544  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
8545  ret void
8546}
8547
8548define void @v_shuffle_v4i16_v4i16__6_5_5_5(ptr addrspace(1) inreg %ptr) {
8549; GFX900-LABEL: v_shuffle_v4i16_v4i16__6_5_5_5:
8550; GFX900:       ; %bb.0:
8551; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8552; GFX900-NEXT:    ;;#ASMSTART
8553; GFX900-NEXT:    ; def v[0:1]
8554; GFX900-NEXT:    ;;#ASMEND
8555; GFX900-NEXT:    s_mov_b32 s4, 0xffff
8556; GFX900-NEXT:    v_bfi_b32 v1, s4, v1, v0
8557; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
8558; GFX900-NEXT:    v_mov_b32_e32 v3, 0
8559; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
8560; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
8561; GFX900-NEXT:    s_waitcnt vmcnt(0)
8562; GFX900-NEXT:    s_setpc_b64 s[30:31]
8563;
8564; GFX90A-LABEL: v_shuffle_v4i16_v4i16__6_5_5_5:
8565; GFX90A:       ; %bb.0:
8566; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8567; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
8568; GFX90A-NEXT:    ;;#ASMSTART
8569; GFX90A-NEXT:    ; def v[0:1]
8570; GFX90A-NEXT:    ;;#ASMEND
8571; GFX90A-NEXT:    v_bfi_b32 v2, s4, v1, v0
8572; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
8573; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
8574; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
8575; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
8576; GFX90A-NEXT:    s_waitcnt vmcnt(0)
8577; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8578;
8579; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_5_5_5:
8580; GFX940:       ; %bb.0:
8581; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8582; GFX940-NEXT:    s_mov_b32 s2, 0xffff
8583; GFX940-NEXT:    ;;#ASMSTART
8584; GFX940-NEXT:    ; def v[0:1]
8585; GFX940-NEXT:    ;;#ASMEND
8586; GFX940-NEXT:    v_mov_b32_e32 v4, 0
8587; GFX940-NEXT:    v_bfi_b32 v2, s2, v1, v0
8588; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
8589; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
8590; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
8591; GFX940-NEXT:    s_waitcnt vmcnt(0)
8592; GFX940-NEXT:    s_setpc_b64 s[30:31]
8593  %vec0 = call <4 x i16> asm "; def $0", "=v"()
8594  %vec1 = call <4 x i16> asm "; def $0", "=v"()
8595  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 5, i32 5, i32 5>
8596  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
8597  ret void
8598}
8599
8600define void @v_shuffle_v4i16_v4i16__7_5_5_5(ptr addrspace(1) inreg %ptr) {
8601; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_5_5_5:
8602; GFX900:       ; %bb.0:
8603; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8604; GFX900-NEXT:    ;;#ASMSTART
8605; GFX900-NEXT:    ; def v[0:1]
8606; GFX900-NEXT:    ;;#ASMEND
8607; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
8608; GFX900-NEXT:    v_mov_b32_e32 v3, 0
8609; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
8610; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
8611; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
8612; GFX900-NEXT:    s_waitcnt vmcnt(0)
8613; GFX900-NEXT:    s_setpc_b64 s[30:31]
8614;
8615; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_5_5_5:
8616; GFX90A:       ; %bb.0:
8617; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8618; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
8619; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
8620; GFX90A-NEXT:    ;;#ASMSTART
8621; GFX90A-NEXT:    ; def v[0:1]
8622; GFX90A-NEXT:    ;;#ASMEND
8623; GFX90A-NEXT:    v_perm_b32 v2, v0, v1, s4
8624; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
8625; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
8626; GFX90A-NEXT:    s_waitcnt vmcnt(0)
8627; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8628;
8629; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_5_5:
8630; GFX940:       ; %bb.0:
8631; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8632; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
8633; GFX940-NEXT:    v_mov_b32_e32 v4, 0
8634; GFX940-NEXT:    ;;#ASMSTART
8635; GFX940-NEXT:    ; def v[0:1]
8636; GFX940-NEXT:    ;;#ASMEND
8637; GFX940-NEXT:    s_nop 0
8638; GFX940-NEXT:    v_perm_b32 v2, v0, v1, s2
8639; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
8640; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
8641; GFX940-NEXT:    s_waitcnt vmcnt(0)
8642; GFX940-NEXT:    s_setpc_b64 s[30:31]
8643  %vec0 = call <4 x i16> asm "; def $0", "=v"()
8644  %vec1 = call <4 x i16> asm "; def $0", "=v"()
8645  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 5, i32 5>
8646  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
8647  ret void
8648}
8649
8650define void @v_shuffle_v4i16_v4i16__7_u_5_5(ptr addrspace(1) inreg %ptr) {
8651; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_u_5_5:
8652; GFX900:       ; %bb.0:
8653; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8654; GFX900-NEXT:    ;;#ASMSTART
8655; GFX900-NEXT:    ; def v[0:1]
8656; GFX900-NEXT:    ;;#ASMEND
8657; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
8658; GFX900-NEXT:    v_mov_b32_e32 v3, 0
8659; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
8660; GFX900-NEXT:    v_alignbit_b32 v1, s4, v1, 16
8661; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
8662; GFX900-NEXT:    s_waitcnt vmcnt(0)
8663; GFX900-NEXT:    s_setpc_b64 s[30:31]
8664;
8665; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_u_5_5:
8666; GFX90A:       ; %bb.0:
8667; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8668; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
8669; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
8670; GFX90A-NEXT:    ;;#ASMSTART
8671; GFX90A-NEXT:    ; def v[0:1]
8672; GFX90A-NEXT:    ;;#ASMEND
8673; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
8674; GFX90A-NEXT:    v_alignbit_b32 v2, s4, v1, 16
8675; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
8676; GFX90A-NEXT:    s_waitcnt vmcnt(0)
8677; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8678;
8679; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_5_5:
8680; GFX940:       ; %bb.0:
8681; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8682; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
8683; GFX940-NEXT:    v_mov_b32_e32 v4, 0
8684; GFX940-NEXT:    ;;#ASMSTART
8685; GFX940-NEXT:    ; def v[0:1]
8686; GFX940-NEXT:    ;;#ASMEND
8687; GFX940-NEXT:    s_nop 0
8688; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
8689; GFX940-NEXT:    v_alignbit_b32 v2, s0, v1, 16
8690; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
8691; GFX940-NEXT:    s_waitcnt vmcnt(0)
8692; GFX940-NEXT:    s_setpc_b64 s[30:31]
8693  %vec0 = call <4 x i16> asm "; def $0", "=v"()
8694  %vec1 = call <4 x i16> asm "; def $0", "=v"()
8695  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 5, i32 5>
8696  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
8697  ret void
8698}
8699
8700define void @v_shuffle_v4i16_v4i16__7_0_5_5(ptr addrspace(1) inreg %ptr) {
8701; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_0_5_5:
8702; GFX900:       ; %bb.0:
8703; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8704; GFX900-NEXT:    ;;#ASMSTART
8705; GFX900-NEXT:    ; def v[0:1]
8706; GFX900-NEXT:    ;;#ASMEND
8707; GFX900-NEXT:    ;;#ASMSTART
8708; GFX900-NEXT:    ; def v[1:2]
8709; GFX900-NEXT:    ;;#ASMEND
8710; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
8711; GFX900-NEXT:    v_mov_b32_e32 v3, 0
8712; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
8713; GFX900-NEXT:    v_alignbit_b32 v0, v0, v2, 16
8714; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
8715; GFX900-NEXT:    s_waitcnt vmcnt(0)
8716; GFX900-NEXT:    s_setpc_b64 s[30:31]
8717;
8718; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_0_5_5:
8719; GFX90A:       ; %bb.0:
8720; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8721; GFX90A-NEXT:    ;;#ASMSTART
8722; GFX90A-NEXT:    ; def v[0:1]
8723; GFX90A-NEXT:    ;;#ASMEND
8724; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
8725; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
8726; GFX90A-NEXT:    ;;#ASMSTART
8727; GFX90A-NEXT:    ; def v[2:3]
8728; GFX90A-NEXT:    ;;#ASMEND
8729; GFX90A-NEXT:    v_perm_b32 v1, v2, v2, s4
8730; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v3, 16
8731; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
8732; GFX90A-NEXT:    s_waitcnt vmcnt(0)
8733; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8734;
8735; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_5_5:
8736; GFX940:       ; %bb.0:
8737; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8738; GFX940-NEXT:    ;;#ASMSTART
8739; GFX940-NEXT:    ; def v[0:1]
8740; GFX940-NEXT:    ;;#ASMEND
8741; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
8742; GFX940-NEXT:    v_mov_b32_e32 v4, 0
8743; GFX940-NEXT:    ;;#ASMSTART
8744; GFX940-NEXT:    ; def v[2:3]
8745; GFX940-NEXT:    ;;#ASMEND
8746; GFX940-NEXT:    s_nop 0
8747; GFX940-NEXT:    v_perm_b32 v1, v2, v2, s2
8748; GFX940-NEXT:    v_alignbit_b32 v0, v0, v3, 16
8749; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
8750; GFX940-NEXT:    s_waitcnt vmcnt(0)
8751; GFX940-NEXT:    s_setpc_b64 s[30:31]
8752  %vec0 = call <4 x i16> asm "; def $0", "=v"()
8753  %vec1 = call <4 x i16> asm "; def $0", "=v"()
8754  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 5, i32 5>
8755  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
8756  ret void
8757}
8758
8759define void @v_shuffle_v4i16_v4i16__7_1_5_5(ptr addrspace(1) inreg %ptr) {
8760; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_1_5_5:
8761; GFX900:       ; %bb.0:
8762; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8763; GFX900-NEXT:    ;;#ASMSTART
8764; GFX900-NEXT:    ; def v[0:1]
8765; GFX900-NEXT:    ;;#ASMEND
8766; GFX900-NEXT:    ;;#ASMSTART
8767; GFX900-NEXT:    ; def v[1:2]
8768; GFX900-NEXT:    ;;#ASMEND
8769; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
8770; GFX900-NEXT:    v_mov_b32_e32 v3, 0
8771; GFX900-NEXT:    v_perm_b32 v0, v0, v2, s4
8772; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
8773; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
8774; GFX900-NEXT:    s_waitcnt vmcnt(0)
8775; GFX900-NEXT:    s_setpc_b64 s[30:31]
8776;
8777; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_1_5_5:
8778; GFX90A:       ; %bb.0:
8779; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8780; GFX90A-NEXT:    ;;#ASMSTART
8781; GFX90A-NEXT:    ; def v[0:1]
8782; GFX90A-NEXT:    ;;#ASMEND
8783; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
8784; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
8785; GFX90A-NEXT:    ;;#ASMSTART
8786; GFX90A-NEXT:    ; def v[2:3]
8787; GFX90A-NEXT:    ;;#ASMEND
8788; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
8789; GFX90A-NEXT:    v_perm_b32 v1, v2, v2, s4
8790; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
8791; GFX90A-NEXT:    s_waitcnt vmcnt(0)
8792; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8793;
8794; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_5_5:
8795; GFX940:       ; %bb.0:
8796; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8797; GFX940-NEXT:    ;;#ASMSTART
8798; GFX940-NEXT:    ; def v[0:1]
8799; GFX940-NEXT:    ;;#ASMEND
8800; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
8801; GFX940-NEXT:    v_mov_b32_e32 v4, 0
8802; GFX940-NEXT:    ;;#ASMSTART
8803; GFX940-NEXT:    ; def v[2:3]
8804; GFX940-NEXT:    ;;#ASMEND
8805; GFX940-NEXT:    s_nop 0
8806; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
8807; GFX940-NEXT:    v_perm_b32 v1, v2, v2, s2
8808; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
8809; GFX940-NEXT:    s_waitcnt vmcnt(0)
8810; GFX940-NEXT:    s_setpc_b64 s[30:31]
8811  %vec0 = call <4 x i16> asm "; def $0", "=v"()
8812  %vec1 = call <4 x i16> asm "; def $0", "=v"()
8813  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 5, i32 5>
8814  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
8815  ret void
8816}
8817
8818define void @v_shuffle_v4i16_v4i16__7_2_5_5(ptr addrspace(1) inreg %ptr) {
8819; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_2_5_5:
8820; GFX900:       ; %bb.0:
8821; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8822; GFX900-NEXT:    ;;#ASMSTART
8823; GFX900-NEXT:    ; def v[0:1]
8824; GFX900-NEXT:    ;;#ASMEND
8825; GFX900-NEXT:    ;;#ASMSTART
8826; GFX900-NEXT:    ; def v[2:3]
8827; GFX900-NEXT:    ;;#ASMEND
8828; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
8829; GFX900-NEXT:    v_mov_b32_e32 v4, 0
8830; GFX900-NEXT:    v_perm_b32 v2, v2, v2, s4
8831; GFX900-NEXT:    v_alignbit_b32 v1, v1, v3, 16
8832; GFX900-NEXT:    global_store_dwordx2 v4, v[1:2], s[16:17]
8833; GFX900-NEXT:    s_waitcnt vmcnt(0)
8834; GFX900-NEXT:    s_setpc_b64 s[30:31]
8835;
8836; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_2_5_5:
8837; GFX90A:       ; %bb.0:
8838; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8839; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
8840; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
8841; GFX90A-NEXT:    ;;#ASMSTART
8842; GFX90A-NEXT:    ; def v[0:1]
8843; GFX90A-NEXT:    ;;#ASMEND
8844; GFX90A-NEXT:    ;;#ASMSTART
8845; GFX90A-NEXT:    ; def v[2:3]
8846; GFX90A-NEXT:    ;;#ASMEND
8847; GFX90A-NEXT:    v_perm_b32 v5, v2, v2, s4
8848; GFX90A-NEXT:    v_alignbit_b32 v4, v1, v3, 16
8849; GFX90A-NEXT:    global_store_dwordx2 v6, v[4:5], s[16:17]
8850; GFX90A-NEXT:    s_waitcnt vmcnt(0)
8851; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8852;
8853; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_5_5:
8854; GFX940:       ; %bb.0:
8855; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8856; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
8857; GFX940-NEXT:    v_mov_b32_e32 v6, 0
8858; GFX940-NEXT:    ;;#ASMSTART
8859; GFX940-NEXT:    ; def v[0:1]
8860; GFX940-NEXT:    ;;#ASMEND
8861; GFX940-NEXT:    ;;#ASMSTART
8862; GFX940-NEXT:    ; def v[2:3]
8863; GFX940-NEXT:    ;;#ASMEND
8864; GFX940-NEXT:    s_nop 0
8865; GFX940-NEXT:    v_perm_b32 v5, v2, v2, s2
8866; GFX940-NEXT:    v_alignbit_b32 v4, v1, v3, 16
8867; GFX940-NEXT:    global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1
8868; GFX940-NEXT:    s_waitcnt vmcnt(0)
8869; GFX940-NEXT:    s_setpc_b64 s[30:31]
8870  %vec0 = call <4 x i16> asm "; def $0", "=v"()
8871  %vec1 = call <4 x i16> asm "; def $0", "=v"()
8872  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 5, i32 5>
8873  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
8874  ret void
8875}
8876
8877define void @v_shuffle_v4i16_v4i16__7_3_5_5(ptr addrspace(1) inreg %ptr) {
8878; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_3_5_5:
8879; GFX900:       ; %bb.0:
8880; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8881; GFX900-NEXT:    ;;#ASMSTART
8882; GFX900-NEXT:    ; def v[0:1]
8883; GFX900-NEXT:    ;;#ASMEND
8884; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
8885; GFX900-NEXT:    v_mov_b32_e32 v4, 0
8886; GFX900-NEXT:    ;;#ASMSTART
8887; GFX900-NEXT:    ; def v[2:3]
8888; GFX900-NEXT:    ;;#ASMEND
8889; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
8890; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
8891; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
8892; GFX900-NEXT:    s_waitcnt vmcnt(0)
8893; GFX900-NEXT:    s_setpc_b64 s[30:31]
8894;
8895; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_3_5_5:
8896; GFX90A:       ; %bb.0:
8897; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8898; GFX90A-NEXT:    ;;#ASMSTART
8899; GFX90A-NEXT:    ; def v[0:1]
8900; GFX90A-NEXT:    ;;#ASMEND
8901; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
8902; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
8903; GFX90A-NEXT:    ;;#ASMSTART
8904; GFX90A-NEXT:    ; def v[2:3]
8905; GFX90A-NEXT:    ;;#ASMEND
8906; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
8907; GFX90A-NEXT:    v_perm_b32 v1, v2, v2, s4
8908; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
8909; GFX90A-NEXT:    s_waitcnt vmcnt(0)
8910; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8911;
8912; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_5_5:
8913; GFX940:       ; %bb.0:
8914; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8915; GFX940-NEXT:    ;;#ASMSTART
8916; GFX940-NEXT:    ; def v[0:1]
8917; GFX940-NEXT:    ;;#ASMEND
8918; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
8919; GFX940-NEXT:    v_mov_b32_e32 v4, 0
8920; GFX940-NEXT:    ;;#ASMSTART
8921; GFX940-NEXT:    ; def v[2:3]
8922; GFX940-NEXT:    ;;#ASMEND
8923; GFX940-NEXT:    s_nop 0
8924; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
8925; GFX940-NEXT:    v_perm_b32 v1, v2, v2, s2
8926; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
8927; GFX940-NEXT:    s_waitcnt vmcnt(0)
8928; GFX940-NEXT:    s_setpc_b64 s[30:31]
8929  %vec0 = call <4 x i16> asm "; def $0", "=v"()
8930  %vec1 = call <4 x i16> asm "; def $0", "=v"()
8931  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 5, i32 5>
8932  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
8933  ret void
8934}
8935
8936define void @v_shuffle_v4i16_v4i16__7_4_5_5(ptr addrspace(1) inreg %ptr) {
8937; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_4_5_5:
8938; GFX900:       ; %bb.0:
8939; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8940; GFX900-NEXT:    ;;#ASMSTART
8941; GFX900-NEXT:    ; def v[0:1]
8942; GFX900-NEXT:    ;;#ASMEND
8943; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
8944; GFX900-NEXT:    v_mov_b32_e32 v3, 0
8945; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
8946; GFX900-NEXT:    v_alignbit_b32 v1, v0, v1, 16
8947; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
8948; GFX900-NEXT:    s_waitcnt vmcnt(0)
8949; GFX900-NEXT:    s_setpc_b64 s[30:31]
8950;
8951; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_4_5_5:
8952; GFX90A:       ; %bb.0:
8953; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8954; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
8955; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
8956; GFX90A-NEXT:    ;;#ASMSTART
8957; GFX90A-NEXT:    ; def v[0:1]
8958; GFX90A-NEXT:    ;;#ASMEND
8959; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
8960; GFX90A-NEXT:    v_alignbit_b32 v2, v0, v1, 16
8961; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
8962; GFX90A-NEXT:    s_waitcnt vmcnt(0)
8963; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8964;
8965; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_5_5:
8966; GFX940:       ; %bb.0:
8967; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8968; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
8969; GFX940-NEXT:    v_mov_b32_e32 v4, 0
8970; GFX940-NEXT:    ;;#ASMSTART
8971; GFX940-NEXT:    ; def v[0:1]
8972; GFX940-NEXT:    ;;#ASMEND
8973; GFX940-NEXT:    s_nop 0
8974; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
8975; GFX940-NEXT:    v_alignbit_b32 v2, v0, v1, 16
8976; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
8977; GFX940-NEXT:    s_waitcnt vmcnt(0)
8978; GFX940-NEXT:    s_setpc_b64 s[30:31]
8979  %vec0 = call <4 x i16> asm "; def $0", "=v"()
8980  %vec1 = call <4 x i16> asm "; def $0", "=v"()
8981  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 5, i32 5>
8982  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
8983  ret void
8984}
8985
8986define void @v_shuffle_v4i16_v4i16__7_6_5_5(ptr addrspace(1) inreg %ptr) {
8987; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_6_5_5:
8988; GFX900:       ; %bb.0:
8989; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8990; GFX900-NEXT:    ;;#ASMSTART
8991; GFX900-NEXT:    ; def v[0:1]
8992; GFX900-NEXT:    ;;#ASMEND
8993; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
8994; GFX900-NEXT:    v_mov_b32_e32 v3, 0
8995; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
8996; GFX900-NEXT:    v_alignbit_b32 v1, v1, v1, 16
8997; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
8998; GFX900-NEXT:    s_waitcnt vmcnt(0)
8999; GFX900-NEXT:    s_setpc_b64 s[30:31]
9000;
9001; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_6_5_5:
9002; GFX90A:       ; %bb.0:
9003; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9004; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
9005; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
9006; GFX90A-NEXT:    ;;#ASMSTART
9007; GFX90A-NEXT:    ; def v[0:1]
9008; GFX90A-NEXT:    ;;#ASMEND
9009; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
9010; GFX90A-NEXT:    v_alignbit_b32 v2, v1, v1, 16
9011; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
9012; GFX90A-NEXT:    s_waitcnt vmcnt(0)
9013; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9014;
9015; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_5_5:
9016; GFX940:       ; %bb.0:
9017; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9018; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
9019; GFX940-NEXT:    v_mov_b32_e32 v4, 0
9020; GFX940-NEXT:    ;;#ASMSTART
9021; GFX940-NEXT:    ; def v[0:1]
9022; GFX940-NEXT:    ;;#ASMEND
9023; GFX940-NEXT:    s_nop 0
9024; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
9025; GFX940-NEXT:    v_alignbit_b32 v2, v1, v1, 16
9026; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
9027; GFX940-NEXT:    s_waitcnt vmcnt(0)
9028; GFX940-NEXT:    s_setpc_b64 s[30:31]
9029  %vec0 = call <4 x i16> asm "; def $0", "=v"()
9030  %vec1 = call <4 x i16> asm "; def $0", "=v"()
9031  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 5, i32 5>
9032  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
9033  ret void
9034}
9035
9036define void @v_shuffle_v4i16_v4i16__7_7_5_5(ptr addrspace(1) inreg %ptr) {
9037; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_5_5:
9038; GFX900:       ; %bb.0:
9039; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9040; GFX900-NEXT:    ;;#ASMSTART
9041; GFX900-NEXT:    ; def v[0:1]
9042; GFX900-NEXT:    ;;#ASMEND
9043; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
9044; GFX900-NEXT:    v_mov_b32_e32 v3, 0
9045; GFX900-NEXT:    v_perm_b32 v2, v0, v0, s4
9046; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
9047; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
9048; GFX900-NEXT:    s_waitcnt vmcnt(0)
9049; GFX900-NEXT:    s_setpc_b64 s[30:31]
9050;
9051; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_5_5:
9052; GFX90A:       ; %bb.0:
9053; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9054; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
9055; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
9056; GFX90A-NEXT:    ;;#ASMSTART
9057; GFX90A-NEXT:    ; def v[0:1]
9058; GFX90A-NEXT:    ;;#ASMEND
9059; GFX90A-NEXT:    v_perm_b32 v3, v0, v0, s4
9060; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
9061; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
9062; GFX90A-NEXT:    s_waitcnt vmcnt(0)
9063; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9064;
9065; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_5:
9066; GFX940:       ; %bb.0:
9067; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9068; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
9069; GFX940-NEXT:    v_mov_b32_e32 v4, 0
9070; GFX940-NEXT:    ;;#ASMSTART
9071; GFX940-NEXT:    ; def v[0:1]
9072; GFX940-NEXT:    ;;#ASMEND
9073; GFX940-NEXT:    s_nop 0
9074; GFX940-NEXT:    v_perm_b32 v3, v0, v0, s2
9075; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
9076; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
9077; GFX940-NEXT:    s_waitcnt vmcnt(0)
9078; GFX940-NEXT:    s_setpc_b64 s[30:31]
9079  %vec0 = call <4 x i16> asm "; def $0", "=v"()
9080  %vec1 = call <4 x i16> asm "; def $0", "=v"()
9081  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 5>
9082  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
9083  ret void
9084}
9085
9086define void @v_shuffle_v4i16_v4i16__7_7_u_5(ptr addrspace(1) inreg %ptr) {
9087; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_u_5:
9088; GFX900:       ; %bb.0:
9089; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9090; GFX900-NEXT:    ;;#ASMSTART
9091; GFX900-NEXT:    ; def v[0:1]
9092; GFX900-NEXT:    ;;#ASMEND
9093; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
9094; GFX900-NEXT:    v_mov_b32_e32 v3, 0
9095; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
9096; GFX900-NEXT:    v_mov_b32_e32 v2, v0
9097; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
9098; GFX900-NEXT:    s_waitcnt vmcnt(0)
9099; GFX900-NEXT:    s_setpc_b64 s[30:31]
9100;
9101; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_u_5:
9102; GFX90A:       ; %bb.0:
9103; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9104; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
9105; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
9106; GFX90A-NEXT:    ;;#ASMSTART
9107; GFX90A-NEXT:    ; def v[0:1]
9108; GFX90A-NEXT:    ;;#ASMEND
9109; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
9110; GFX90A-NEXT:    v_mov_b32_e32 v3, v0
9111; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
9112; GFX90A-NEXT:    s_waitcnt vmcnt(0)
9113; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9114;
9115; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_5:
9116; GFX940:       ; %bb.0:
9117; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9118; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
9119; GFX940-NEXT:    v_mov_b32_e32 v4, 0
9120; GFX940-NEXT:    ;;#ASMSTART
9121; GFX940-NEXT:    ; def v[0:1]
9122; GFX940-NEXT:    ;;#ASMEND
9123; GFX940-NEXT:    s_nop 0
9124; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
9125; GFX940-NEXT:    v_mov_b32_e32 v3, v0
9126; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
9127; GFX940-NEXT:    s_waitcnt vmcnt(0)
9128; GFX940-NEXT:    s_setpc_b64 s[30:31]
9129  %vec0 = call <4 x i16> asm "; def $0", "=v"()
9130  %vec1 = call <4 x i16> asm "; def $0", "=v"()
9131  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 5>
9132  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
9133  ret void
9134}
9135
9136define void @v_shuffle_v4i16_v4i16__7_7_0_5(ptr addrspace(1) inreg %ptr) {
9137; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_0_5:
9138; GFX900:       ; %bb.0:
9139; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9140; GFX900-NEXT:    ;;#ASMSTART
9141; GFX900-NEXT:    ; def v[0:1]
9142; GFX900-NEXT:    ;;#ASMEND
9143; GFX900-NEXT:    ;;#ASMSTART
9144; GFX900-NEXT:    ; def v[1:2]
9145; GFX900-NEXT:    ;;#ASMEND
9146; GFX900-NEXT:    s_mov_b32 s4, 0xffff
9147; GFX900-NEXT:    v_bfi_b32 v1, s4, v0, v1
9148; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
9149; GFX900-NEXT:    v_mov_b32_e32 v3, 0
9150; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
9151; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
9152; GFX900-NEXT:    s_waitcnt vmcnt(0)
9153; GFX900-NEXT:    s_setpc_b64 s[30:31]
9154;
9155; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_0_5:
9156; GFX90A:       ; %bb.0:
9157; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9158; GFX90A-NEXT:    ;;#ASMSTART
9159; GFX90A-NEXT:    ; def v[0:1]
9160; GFX90A-NEXT:    ;;#ASMEND
9161; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
9162; GFX90A-NEXT:    ;;#ASMSTART
9163; GFX90A-NEXT:    ; def v[2:3]
9164; GFX90A-NEXT:    ;;#ASMEND
9165; GFX90A-NEXT:    v_bfi_b32 v1, s4, v0, v2
9166; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
9167; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
9168; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
9169; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
9170; GFX90A-NEXT:    s_waitcnt vmcnt(0)
9171; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9172;
9173; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_5:
9174; GFX940:       ; %bb.0:
9175; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9176; GFX940-NEXT:    ;;#ASMSTART
9177; GFX940-NEXT:    ; def v[0:1]
9178; GFX940-NEXT:    ;;#ASMEND
9179; GFX940-NEXT:    s_mov_b32 s2, 0xffff
9180; GFX940-NEXT:    ;;#ASMSTART
9181; GFX940-NEXT:    ; def v[2:3]
9182; GFX940-NEXT:    ;;#ASMEND
9183; GFX940-NEXT:    v_mov_b32_e32 v4, 0
9184; GFX940-NEXT:    v_bfi_b32 v1, s2, v0, v2
9185; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
9186; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
9187; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
9188; GFX940-NEXT:    s_waitcnt vmcnt(0)
9189; GFX940-NEXT:    s_setpc_b64 s[30:31]
9190  %vec0 = call <4 x i16> asm "; def $0", "=v"()
9191  %vec1 = call <4 x i16> asm "; def $0", "=v"()
9192  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 5>
9193  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
9194  ret void
9195}
9196
9197define void @v_shuffle_v4i16_v4i16__7_7_1_5(ptr addrspace(1) inreg %ptr) {
9198; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_1_5:
9199; GFX900:       ; %bb.0:
9200; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9201; GFX900-NEXT:    ;;#ASMSTART
9202; GFX900-NEXT:    ; def v[0:1]
9203; GFX900-NEXT:    ;;#ASMEND
9204; GFX900-NEXT:    ;;#ASMSTART
9205; GFX900-NEXT:    ; def v[1:2]
9206; GFX900-NEXT:    ;;#ASMEND
9207; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
9208; GFX900-NEXT:    v_mov_b32_e32 v3, 0
9209; GFX900-NEXT:    v_perm_b32 v1, v1, v0, s4
9210; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
9211; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
9212; GFX900-NEXT:    s_waitcnt vmcnt(0)
9213; GFX900-NEXT:    s_setpc_b64 s[30:31]
9214;
9215; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_1_5:
9216; GFX90A:       ; %bb.0:
9217; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9218; GFX90A-NEXT:    ;;#ASMSTART
9219; GFX90A-NEXT:    ; def v[0:1]
9220; GFX90A-NEXT:    ;;#ASMEND
9221; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
9222; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
9223; GFX90A-NEXT:    ;;#ASMSTART
9224; GFX90A-NEXT:    ; def v[2:3]
9225; GFX90A-NEXT:    ;;#ASMEND
9226; GFX90A-NEXT:    v_perm_b32 v1, v2, v0, s4
9227; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
9228; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
9229; GFX90A-NEXT:    s_waitcnt vmcnt(0)
9230; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9231;
9232; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_5:
9233; GFX940:       ; %bb.0:
9234; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9235; GFX940-NEXT:    ;;#ASMSTART
9236; GFX940-NEXT:    ; def v[0:1]
9237; GFX940-NEXT:    ;;#ASMEND
9238; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
9239; GFX940-NEXT:    v_mov_b32_e32 v4, 0
9240; GFX940-NEXT:    ;;#ASMSTART
9241; GFX940-NEXT:    ; def v[2:3]
9242; GFX940-NEXT:    ;;#ASMEND
9243; GFX940-NEXT:    s_nop 0
9244; GFX940-NEXT:    v_perm_b32 v1, v2, v0, s2
9245; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
9246; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
9247; GFX940-NEXT:    s_waitcnt vmcnt(0)
9248; GFX940-NEXT:    s_setpc_b64 s[30:31]
9249  %vec0 = call <4 x i16> asm "; def $0", "=v"()
9250  %vec1 = call <4 x i16> asm "; def $0", "=v"()
9251  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 5>
9252  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
9253  ret void
9254}
9255
9256define void @v_shuffle_v4i16_v4i16__7_7_2_5(ptr addrspace(1) inreg %ptr) {
9257; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_2_5:
9258; GFX900:       ; %bb.0:
9259; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9260; GFX900-NEXT:    ;;#ASMSTART
9261; GFX900-NEXT:    ; def v[0:1]
9262; GFX900-NEXT:    ;;#ASMEND
9263; GFX900-NEXT:    s_mov_b32 s4, 0xffff
9264; GFX900-NEXT:    ;;#ASMSTART
9265; GFX900-NEXT:    ; def v[2:3]
9266; GFX900-NEXT:    ;;#ASMEND
9267; GFX900-NEXT:    v_bfi_b32 v1, s4, v1, v2
9268; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
9269; GFX900-NEXT:    v_mov_b32_e32 v4, 0
9270; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
9271; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
9272; GFX900-NEXT:    s_waitcnt vmcnt(0)
9273; GFX900-NEXT:    s_setpc_b64 s[30:31]
9274;
9275; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_2_5:
9276; GFX90A:       ; %bb.0:
9277; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9278; GFX90A-NEXT:    ;;#ASMSTART
9279; GFX90A-NEXT:    ; def v[0:1]
9280; GFX90A-NEXT:    ;;#ASMEND
9281; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
9282; GFX90A-NEXT:    ;;#ASMSTART
9283; GFX90A-NEXT:    ; def v[2:3]
9284; GFX90A-NEXT:    ;;#ASMEND
9285; GFX90A-NEXT:    v_bfi_b32 v1, s4, v1, v2
9286; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
9287; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
9288; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
9289; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
9290; GFX90A-NEXT:    s_waitcnt vmcnt(0)
9291; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9292;
9293; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_5:
9294; GFX940:       ; %bb.0:
9295; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9296; GFX940-NEXT:    ;;#ASMSTART
9297; GFX940-NEXT:    ; def v[0:1]
9298; GFX940-NEXT:    ;;#ASMEND
9299; GFX940-NEXT:    s_mov_b32 s2, 0xffff
9300; GFX940-NEXT:    ;;#ASMSTART
9301; GFX940-NEXT:    ; def v[2:3]
9302; GFX940-NEXT:    ;;#ASMEND
9303; GFX940-NEXT:    v_mov_b32_e32 v4, 0
9304; GFX940-NEXT:    v_bfi_b32 v1, s2, v1, v2
9305; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
9306; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
9307; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
9308; GFX940-NEXT:    s_waitcnt vmcnt(0)
9309; GFX940-NEXT:    s_setpc_b64 s[30:31]
9310  %vec0 = call <4 x i16> asm "; def $0", "=v"()
9311  %vec1 = call <4 x i16> asm "; def $0", "=v"()
9312  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 5>
9313  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
9314  ret void
9315}
9316
9317define void @v_shuffle_v4i16_v4i16__7_7_3_5(ptr addrspace(1) inreg %ptr) {
9318; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_3_5:
9319; GFX900:       ; %bb.0:
9320; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9321; GFX900-NEXT:    ;;#ASMSTART
9322; GFX900-NEXT:    ; def v[0:1]
9323; GFX900-NEXT:    ;;#ASMEND
9324; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
9325; GFX900-NEXT:    v_mov_b32_e32 v4, 0
9326; GFX900-NEXT:    ;;#ASMSTART
9327; GFX900-NEXT:    ; def v[2:3]
9328; GFX900-NEXT:    ;;#ASMEND
9329; GFX900-NEXT:    v_perm_b32 v1, v2, v1, s4
9330; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
9331; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
9332; GFX900-NEXT:    s_waitcnt vmcnt(0)
9333; GFX900-NEXT:    s_setpc_b64 s[30:31]
9334;
9335; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_3_5:
9336; GFX90A:       ; %bb.0:
9337; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9338; GFX90A-NEXT:    ;;#ASMSTART
9339; GFX90A-NEXT:    ; def v[0:1]
9340; GFX90A-NEXT:    ;;#ASMEND
9341; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
9342; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
9343; GFX90A-NEXT:    ;;#ASMSTART
9344; GFX90A-NEXT:    ; def v[2:3]
9345; GFX90A-NEXT:    ;;#ASMEND
9346; GFX90A-NEXT:    v_perm_b32 v1, v2, v1, s4
9347; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
9348; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
9349; GFX90A-NEXT:    s_waitcnt vmcnt(0)
9350; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9351;
9352; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_5:
9353; GFX940:       ; %bb.0:
9354; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9355; GFX940-NEXT:    ;;#ASMSTART
9356; GFX940-NEXT:    ; def v[0:1]
9357; GFX940-NEXT:    ;;#ASMEND
9358; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
9359; GFX940-NEXT:    v_mov_b32_e32 v4, 0
9360; GFX940-NEXT:    ;;#ASMSTART
9361; GFX940-NEXT:    ; def v[2:3]
9362; GFX940-NEXT:    ;;#ASMEND
9363; GFX940-NEXT:    s_nop 0
9364; GFX940-NEXT:    v_perm_b32 v1, v2, v1, s2
9365; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
9366; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
9367; GFX940-NEXT:    s_waitcnt vmcnt(0)
9368; GFX940-NEXT:    s_setpc_b64 s[30:31]
9369  %vec0 = call <4 x i16> asm "; def $0", "=v"()
9370  %vec1 = call <4 x i16> asm "; def $0", "=v"()
9371  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 5>
9372  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
9373  ret void
9374}
9375
9376define void @v_shuffle_v4i16_v4i16__7_7_4_5(ptr addrspace(1) inreg %ptr) {
9377; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_4_5:
9378; GFX900:       ; %bb.0:
9379; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9380; GFX900-NEXT:    ;;#ASMSTART
9381; GFX900-NEXT:    ; def v[0:1]
9382; GFX900-NEXT:    ;;#ASMEND
9383; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
9384; GFX900-NEXT:    v_mov_b32_e32 v3, 0
9385; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
9386; GFX900-NEXT:    v_mov_b32_e32 v2, v0
9387; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
9388; GFX900-NEXT:    s_waitcnt vmcnt(0)
9389; GFX900-NEXT:    s_setpc_b64 s[30:31]
9390;
9391; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_4_5:
9392; GFX90A:       ; %bb.0:
9393; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9394; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
9395; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
9396; GFX90A-NEXT:    ;;#ASMSTART
9397; GFX90A-NEXT:    ; def v[0:1]
9398; GFX90A-NEXT:    ;;#ASMEND
9399; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
9400; GFX90A-NEXT:    v_mov_b32_e32 v3, v0
9401; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
9402; GFX90A-NEXT:    s_waitcnt vmcnt(0)
9403; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9404;
9405; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_5:
9406; GFX940:       ; %bb.0:
9407; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9408; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
9409; GFX940-NEXT:    v_mov_b32_e32 v4, 0
9410; GFX940-NEXT:    ;;#ASMSTART
9411; GFX940-NEXT:    ; def v[0:1]
9412; GFX940-NEXT:    ;;#ASMEND
9413; GFX940-NEXT:    s_nop 0
9414; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
9415; GFX940-NEXT:    v_mov_b32_e32 v3, v0
9416; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
9417; GFX940-NEXT:    s_waitcnt vmcnt(0)
9418; GFX940-NEXT:    s_setpc_b64 s[30:31]
9419  %vec0 = call <4 x i16> asm "; def $0", "=v"()
9420  %vec1 = call <4 x i16> asm "; def $0", "=v"()
9421  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 5>
9422  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
9423  ret void
9424}
9425
9426define void @v_shuffle_v4i16_v4i16__7_7_6_5(ptr addrspace(1) inreg %ptr) {
9427; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_6_5:
9428; GFX900:       ; %bb.0:
9429; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9430; GFX900-NEXT:    s_mov_b32 s4, 0xffff
9431; GFX900-NEXT:    ;;#ASMSTART
9432; GFX900-NEXT:    ; def v[0:1]
9433; GFX900-NEXT:    ;;#ASMEND
9434; GFX900-NEXT:    v_bfi_b32 v2, s4, v1, v0
9435; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
9436; GFX900-NEXT:    v_mov_b32_e32 v3, 0
9437; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
9438; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
9439; GFX900-NEXT:    s_waitcnt vmcnt(0)
9440; GFX900-NEXT:    s_setpc_b64 s[30:31]
9441;
9442; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_6_5:
9443; GFX90A:       ; %bb.0:
9444; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9445; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
9446; GFX90A-NEXT:    ;;#ASMSTART
9447; GFX90A-NEXT:    ; def v[0:1]
9448; GFX90A-NEXT:    ;;#ASMEND
9449; GFX90A-NEXT:    v_bfi_b32 v3, s4, v1, v0
9450; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
9451; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
9452; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
9453; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
9454; GFX90A-NEXT:    s_waitcnt vmcnt(0)
9455; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9456;
9457; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_5:
9458; GFX940:       ; %bb.0:
9459; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9460; GFX940-NEXT:    s_mov_b32 s2, 0xffff
9461; GFX940-NEXT:    ;;#ASMSTART
9462; GFX940-NEXT:    ; def v[0:1]
9463; GFX940-NEXT:    ;;#ASMEND
9464; GFX940-NEXT:    v_mov_b32_e32 v4, 0
9465; GFX940-NEXT:    v_bfi_b32 v3, s2, v1, v0
9466; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
9467; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
9468; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
9469; GFX940-NEXT:    s_waitcnt vmcnt(0)
9470; GFX940-NEXT:    s_setpc_b64 s[30:31]
9471  %vec0 = call <4 x i16> asm "; def $0", "=v"()
9472  %vec1 = call <4 x i16> asm "; def $0", "=v"()
9473  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 5>
9474  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
9475  ret void
9476}
9477
9478define void @v_shuffle_v4i16_v4i16__u_6_6_6(ptr addrspace(1) inreg %ptr) {
9479; GFX900-LABEL: v_shuffle_v4i16_v4i16__u_6_6_6:
9480; GFX900:       ; %bb.0:
9481; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9482; GFX900-NEXT:    ;;#ASMSTART
9483; GFX900-NEXT:    ; def v[0:1]
9484; GFX900-NEXT:    ;;#ASMEND
9485; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
9486; GFX900-NEXT:    v_mov_b32_e32 v3, 0
9487; GFX900-NEXT:    v_perm_b32 v2, v1, v1, s4
9488; GFX900-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
9489; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
9490; GFX900-NEXT:    s_waitcnt vmcnt(0)
9491; GFX900-NEXT:    s_setpc_b64 s[30:31]
9492;
9493; GFX90A-LABEL: v_shuffle_v4i16_v4i16__u_6_6_6:
9494; GFX90A:       ; %bb.0:
9495; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9496; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
9497; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
9498; GFX90A-NEXT:    ;;#ASMSTART
9499; GFX90A-NEXT:    ; def v[0:1]
9500; GFX90A-NEXT:    ;;#ASMEND
9501; GFX90A-NEXT:    v_perm_b32 v3, v1, v1, s4
9502; GFX90A-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
9503; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
9504; GFX90A-NEXT:    s_waitcnt vmcnt(0)
9505; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9506;
9507; GFX940-LABEL: v_shuffle_v4i16_v4i16__u_6_6_6:
9508; GFX940:       ; %bb.0:
9509; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9510; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
9511; GFX940-NEXT:    v_mov_b32_e32 v4, 0
9512; GFX940-NEXT:    ;;#ASMSTART
9513; GFX940-NEXT:    ; def v[0:1]
9514; GFX940-NEXT:    ;;#ASMEND
9515; GFX940-NEXT:    s_nop 0
9516; GFX940-NEXT:    v_perm_b32 v3, v1, v1, s2
9517; GFX940-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
9518; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
9519; GFX940-NEXT:    s_waitcnt vmcnt(0)
9520; GFX940-NEXT:    s_setpc_b64 s[30:31]
9521  %vec0 = call <4 x i16> asm "; def $0", "=v"()
9522  %vec1 = call <4 x i16> asm "; def $0", "=v"()
9523  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 poison, i32 6, i32 6, i32 6>
9524  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
9525  ret void
9526}
9527
9528define void @v_shuffle_v4i16_v4i16__0_6_6_6(ptr addrspace(1) inreg %ptr) {
9529; GFX900-LABEL: v_shuffle_v4i16_v4i16__0_6_6_6:
9530; GFX900:       ; %bb.0:
9531; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9532; GFX900-NEXT:    ;;#ASMSTART
9533; GFX900-NEXT:    ; def v[0:1]
9534; GFX900-NEXT:    ;;#ASMEND
9535; GFX900-NEXT:    ;;#ASMSTART
9536; GFX900-NEXT:    ; def v[1:2]
9537; GFX900-NEXT:    ;;#ASMEND
9538; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
9539; GFX900-NEXT:    v_mov_b32_e32 v3, 0
9540; GFX900-NEXT:    v_perm_b32 v0, v2, v0, s4
9541; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
9542; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
9543; GFX900-NEXT:    s_waitcnt vmcnt(0)
9544; GFX900-NEXT:    s_setpc_b64 s[30:31]
9545;
9546; GFX90A-LABEL: v_shuffle_v4i16_v4i16__0_6_6_6:
9547; GFX90A:       ; %bb.0:
9548; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9549; GFX90A-NEXT:    ;;#ASMSTART
9550; GFX90A-NEXT:    ; def v[0:1]
9551; GFX90A-NEXT:    ;;#ASMEND
9552; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
9553; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
9554; GFX90A-NEXT:    ;;#ASMSTART
9555; GFX90A-NEXT:    ; def v[2:3]
9556; GFX90A-NEXT:    ;;#ASMEND
9557; GFX90A-NEXT:    v_perm_b32 v0, v3, v0, s4
9558; GFX90A-NEXT:    v_perm_b32 v1, v3, v3, s4
9559; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
9560; GFX90A-NEXT:    s_waitcnt vmcnt(0)
9561; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9562;
9563; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_6_6_6:
9564; GFX940:       ; %bb.0:
9565; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9566; GFX940-NEXT:    ;;#ASMSTART
9567; GFX940-NEXT:    ; def v[0:1]
9568; GFX940-NEXT:    ;;#ASMEND
9569; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
9570; GFX940-NEXT:    v_mov_b32_e32 v4, 0
9571; GFX940-NEXT:    ;;#ASMSTART
9572; GFX940-NEXT:    ; def v[2:3]
9573; GFX940-NEXT:    ;;#ASMEND
9574; GFX940-NEXT:    s_nop 0
9575; GFX940-NEXT:    v_perm_b32 v0, v3, v0, s2
9576; GFX940-NEXT:    v_perm_b32 v1, v3, v3, s2
9577; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
9578; GFX940-NEXT:    s_waitcnt vmcnt(0)
9579; GFX940-NEXT:    s_setpc_b64 s[30:31]
9580  %vec0 = call <4 x i16> asm "; def $0", "=v"()
9581  %vec1 = call <4 x i16> asm "; def $0", "=v"()
9582  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
9583  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
9584  ret void
9585}
9586
9587define void @v_shuffle_v4i16_v4i16__1_6_6_6(ptr addrspace(1) inreg %ptr) {
9588; GFX900-LABEL: v_shuffle_v4i16_v4i16__1_6_6_6:
9589; GFX900:       ; %bb.0:
9590; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9591; GFX900-NEXT:    ;;#ASMSTART
9592; GFX900-NEXT:    ; def v[0:1]
9593; GFX900-NEXT:    ;;#ASMEND
9594; GFX900-NEXT:    ;;#ASMSTART
9595; GFX900-NEXT:    ; def v[1:2]
9596; GFX900-NEXT:    ;;#ASMEND
9597; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
9598; GFX900-NEXT:    v_mov_b32_e32 v3, 0
9599; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
9600; GFX900-NEXT:    v_alignbit_b32 v0, v2, v0, 16
9601; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
9602; GFX900-NEXT:    s_waitcnt vmcnt(0)
9603; GFX900-NEXT:    s_setpc_b64 s[30:31]
9604;
9605; GFX90A-LABEL: v_shuffle_v4i16_v4i16__1_6_6_6:
9606; GFX90A:       ; %bb.0:
9607; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9608; GFX90A-NEXT:    ;;#ASMSTART
9609; GFX90A-NEXT:    ; def v[0:1]
9610; GFX90A-NEXT:    ;;#ASMEND
9611; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
9612; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
9613; GFX90A-NEXT:    ;;#ASMSTART
9614; GFX90A-NEXT:    ; def v[2:3]
9615; GFX90A-NEXT:    ;;#ASMEND
9616; GFX90A-NEXT:    v_perm_b32 v1, v3, v3, s4
9617; GFX90A-NEXT:    v_alignbit_b32 v0, v3, v0, 16
9618; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
9619; GFX90A-NEXT:    s_waitcnt vmcnt(0)
9620; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9621;
9622; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_6_6_6:
9623; GFX940:       ; %bb.0:
9624; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9625; GFX940-NEXT:    ;;#ASMSTART
9626; GFX940-NEXT:    ; def v[0:1]
9627; GFX940-NEXT:    ;;#ASMEND
9628; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
9629; GFX940-NEXT:    v_mov_b32_e32 v4, 0
9630; GFX940-NEXT:    ;;#ASMSTART
9631; GFX940-NEXT:    ; def v[2:3]
9632; GFX940-NEXT:    ;;#ASMEND
9633; GFX940-NEXT:    s_nop 0
9634; GFX940-NEXT:    v_perm_b32 v1, v3, v3, s2
9635; GFX940-NEXT:    v_alignbit_b32 v0, v3, v0, 16
9636; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
9637; GFX940-NEXT:    s_waitcnt vmcnt(0)
9638; GFX940-NEXT:    s_setpc_b64 s[30:31]
9639  %vec0 = call <4 x i16> asm "; def $0", "=v"()
9640  %vec1 = call <4 x i16> asm "; def $0", "=v"()
9641  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
9642  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
9643  ret void
9644}
9645
9646define void @v_shuffle_v4i16_v4i16__2_6_6_6(ptr addrspace(1) inreg %ptr) {
9647; GFX900-LABEL: v_shuffle_v4i16_v4i16__2_6_6_6:
9648; GFX900:       ; %bb.0:
9649; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9650; GFX900-NEXT:    ;;#ASMSTART
9651; GFX900-NEXT:    ; def v[0:1]
9652; GFX900-NEXT:    ;;#ASMEND
9653; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
9654; GFX900-NEXT:    v_mov_b32_e32 v4, 0
9655; GFX900-NEXT:    ;;#ASMSTART
9656; GFX900-NEXT:    ; def v[2:3]
9657; GFX900-NEXT:    ;;#ASMEND
9658; GFX900-NEXT:    v_perm_b32 v0, v3, v1, s4
9659; GFX900-NEXT:    v_perm_b32 v1, v3, v3, s4
9660; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
9661; GFX900-NEXT:    s_waitcnt vmcnt(0)
9662; GFX900-NEXT:    s_setpc_b64 s[30:31]
9663;
9664; GFX90A-LABEL: v_shuffle_v4i16_v4i16__2_6_6_6:
9665; GFX90A:       ; %bb.0:
9666; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9667; GFX90A-NEXT:    ;;#ASMSTART
9668; GFX90A-NEXT:    ; def v[0:1]
9669; GFX90A-NEXT:    ;;#ASMEND
9670; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
9671; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
9672; GFX90A-NEXT:    ;;#ASMSTART
9673; GFX90A-NEXT:    ; def v[2:3]
9674; GFX90A-NEXT:    ;;#ASMEND
9675; GFX90A-NEXT:    v_perm_b32 v0, v3, v1, s4
9676; GFX90A-NEXT:    v_perm_b32 v1, v3, v3, s4
9677; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
9678; GFX90A-NEXT:    s_waitcnt vmcnt(0)
9679; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9680;
9681; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_6_6_6:
9682; GFX940:       ; %bb.0:
9683; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9684; GFX940-NEXT:    ;;#ASMSTART
9685; GFX940-NEXT:    ; def v[0:1]
9686; GFX940-NEXT:    ;;#ASMEND
9687; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
9688; GFX940-NEXT:    v_mov_b32_e32 v4, 0
9689; GFX940-NEXT:    ;;#ASMSTART
9690; GFX940-NEXT:    ; def v[2:3]
9691; GFX940-NEXT:    ;;#ASMEND
9692; GFX940-NEXT:    s_nop 0
9693; GFX940-NEXT:    v_perm_b32 v0, v3, v1, s2
9694; GFX940-NEXT:    v_perm_b32 v1, v3, v3, s2
9695; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
9696; GFX940-NEXT:    s_waitcnt vmcnt(0)
9697; GFX940-NEXT:    s_setpc_b64 s[30:31]
9698  %vec0 = call <4 x i16> asm "; def $0", "=v"()
9699  %vec1 = call <4 x i16> asm "; def $0", "=v"()
9700  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
9701  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
9702  ret void
9703}
9704
9705define void @v_shuffle_v4i16_v4i16__3_6_6_6(ptr addrspace(1) inreg %ptr) {
9706; GFX900-LABEL: v_shuffle_v4i16_v4i16__3_6_6_6:
9707; GFX900:       ; %bb.0:
9708; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9709; GFX900-NEXT:    ;;#ASMSTART
9710; GFX900-NEXT:    ; def v[0:1]
9711; GFX900-NEXT:    ;;#ASMEND
9712; GFX900-NEXT:    ;;#ASMSTART
9713; GFX900-NEXT:    ; def v[2:3]
9714; GFX900-NEXT:    ;;#ASMEND
9715; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
9716; GFX900-NEXT:    v_mov_b32_e32 v4, 0
9717; GFX900-NEXT:    v_perm_b32 v2, v3, v3, s4
9718; GFX900-NEXT:    v_alignbit_b32 v1, v3, v1, 16
9719; GFX900-NEXT:    global_store_dwordx2 v4, v[1:2], s[16:17]
9720; GFX900-NEXT:    s_waitcnt vmcnt(0)
9721; GFX900-NEXT:    s_setpc_b64 s[30:31]
9722;
9723; GFX90A-LABEL: v_shuffle_v4i16_v4i16__3_6_6_6:
9724; GFX90A:       ; %bb.0:
9725; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9726; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
9727; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
9728; GFX90A-NEXT:    ;;#ASMSTART
9729; GFX90A-NEXT:    ; def v[0:1]
9730; GFX90A-NEXT:    ;;#ASMEND
9731; GFX90A-NEXT:    ;;#ASMSTART
9732; GFX90A-NEXT:    ; def v[2:3]
9733; GFX90A-NEXT:    ;;#ASMEND
9734; GFX90A-NEXT:    v_perm_b32 v5, v3, v3, s4
9735; GFX90A-NEXT:    v_alignbit_b32 v4, v3, v1, 16
9736; GFX90A-NEXT:    global_store_dwordx2 v6, v[4:5], s[16:17]
9737; GFX90A-NEXT:    s_waitcnt vmcnt(0)
9738; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9739;
9740; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_6_6_6:
9741; GFX940:       ; %bb.0:
9742; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9743; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
9744; GFX940-NEXT:    v_mov_b32_e32 v6, 0
9745; GFX940-NEXT:    ;;#ASMSTART
9746; GFX940-NEXT:    ; def v[0:1]
9747; GFX940-NEXT:    ;;#ASMEND
9748; GFX940-NEXT:    ;;#ASMSTART
9749; GFX940-NEXT:    ; def v[2:3]
9750; GFX940-NEXT:    ;;#ASMEND
9751; GFX940-NEXT:    s_nop 0
9752; GFX940-NEXT:    v_perm_b32 v5, v3, v3, s2
9753; GFX940-NEXT:    v_alignbit_b32 v4, v3, v1, 16
9754; GFX940-NEXT:    global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1
9755; GFX940-NEXT:    s_waitcnt vmcnt(0)
9756; GFX940-NEXT:    s_setpc_b64 s[30:31]
9757  %vec0 = call <4 x i16> asm "; def $0", "=v"()
9758  %vec1 = call <4 x i16> asm "; def $0", "=v"()
9759  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
9760  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
9761  ret void
9762}
9763
9764define void @v_shuffle_v4i16_v4i16__4_6_6_6(ptr addrspace(1) inreg %ptr) {
9765; GFX900-LABEL: v_shuffle_v4i16_v4i16__4_6_6_6:
9766; GFX900:       ; %bb.0:
9767; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9768; GFX900-NEXT:    ;;#ASMSTART
9769; GFX900-NEXT:    ; def v[0:1]
9770; GFX900-NEXT:    ;;#ASMEND
9771; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
9772; GFX900-NEXT:    v_mov_b32_e32 v2, 0
9773; GFX900-NEXT:    v_perm_b32 v0, v1, v0, s4
9774; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
9775; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
9776; GFX900-NEXT:    s_waitcnt vmcnt(0)
9777; GFX900-NEXT:    s_setpc_b64 s[30:31]
9778;
9779; GFX90A-LABEL: v_shuffle_v4i16_v4i16__4_6_6_6:
9780; GFX90A:       ; %bb.0:
9781; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9782; GFX90A-NEXT:    ;;#ASMSTART
9783; GFX90A-NEXT:    ; def v[0:1]
9784; GFX90A-NEXT:    ;;#ASMEND
9785; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
9786; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
9787; GFX90A-NEXT:    v_perm_b32 v0, v1, v0, s4
9788; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
9789; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
9790; GFX90A-NEXT:    s_waitcnt vmcnt(0)
9791; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9792;
9793; GFX940-LABEL: v_shuffle_v4i16_v4i16__4_6_6_6:
9794; GFX940:       ; %bb.0:
9795; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9796; GFX940-NEXT:    ;;#ASMSTART
9797; GFX940-NEXT:    ; def v[0:1]
9798; GFX940-NEXT:    ;;#ASMEND
9799; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
9800; GFX940-NEXT:    v_mov_b32_e32 v2, 0
9801; GFX940-NEXT:    v_perm_b32 v0, v1, v0, s2
9802; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
9803; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
9804; GFX940-NEXT:    s_waitcnt vmcnt(0)
9805; GFX940-NEXT:    s_setpc_b64 s[30:31]
9806  %vec0 = call <4 x i16> asm "; def $0", "=v"()
9807  %vec1 = call <4 x i16> asm "; def $0", "=v"()
9808  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 4, i32 6, i32 6, i32 6>
9809  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
9810  ret void
9811}
9812
9813define void @v_shuffle_v4i16_v4i16__5_6_6_6(ptr addrspace(1) inreg %ptr) {
9814; GFX900-LABEL: v_shuffle_v4i16_v4i16__5_6_6_6:
9815; GFX900:       ; %bb.0:
9816; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9817; GFX900-NEXT:    ;;#ASMSTART
9818; GFX900-NEXT:    ; def v[0:1]
9819; GFX900-NEXT:    ;;#ASMEND
9820; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
9821; GFX900-NEXT:    v_mov_b32_e32 v3, 0
9822; GFX900-NEXT:    v_perm_b32 v2, v1, v1, s4
9823; GFX900-NEXT:    v_alignbit_b32 v1, v1, v0, 16
9824; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
9825; GFX900-NEXT:    s_waitcnt vmcnt(0)
9826; GFX900-NEXT:    s_setpc_b64 s[30:31]
9827;
9828; GFX90A-LABEL: v_shuffle_v4i16_v4i16__5_6_6_6:
9829; GFX90A:       ; %bb.0:
9830; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9831; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
9832; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
9833; GFX90A-NEXT:    ;;#ASMSTART
9834; GFX90A-NEXT:    ; def v[0:1]
9835; GFX90A-NEXT:    ;;#ASMEND
9836; GFX90A-NEXT:    v_perm_b32 v3, v1, v1, s4
9837; GFX90A-NEXT:    v_alignbit_b32 v2, v1, v0, 16
9838; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
9839; GFX90A-NEXT:    s_waitcnt vmcnt(0)
9840; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9841;
9842; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_6_6_6:
9843; GFX940:       ; %bb.0:
9844; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9845; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
9846; GFX940-NEXT:    v_mov_b32_e32 v4, 0
9847; GFX940-NEXT:    ;;#ASMSTART
9848; GFX940-NEXT:    ; def v[0:1]
9849; GFX940-NEXT:    ;;#ASMEND
9850; GFX940-NEXT:    s_nop 0
9851; GFX940-NEXT:    v_perm_b32 v3, v1, v1, s2
9852; GFX940-NEXT:    v_alignbit_b32 v2, v1, v0, 16
9853; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
9854; GFX940-NEXT:    s_waitcnt vmcnt(0)
9855; GFX940-NEXT:    s_setpc_b64 s[30:31]
9856  %vec0 = call <4 x i16> asm "; def $0", "=v"()
9857  %vec1 = call <4 x i16> asm "; def $0", "=v"()
9858  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 6, i32 6, i32 6>
9859  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
9860  ret void
9861}
9862
9863define void @v_shuffle_v4i16_v4i16__6_6_6_6(ptr addrspace(1) inreg %ptr) {
9864; GFX900-LABEL: v_shuffle_v4i16_v4i16__6_6_6_6:
9865; GFX900:       ; %bb.0:
9866; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9867; GFX900-NEXT:    ;;#ASMSTART
9868; GFX900-NEXT:    ; def v[0:1]
9869; GFX900-NEXT:    ;;#ASMEND
9870; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
9871; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
9872; GFX900-NEXT:    v_mov_b32_e32 v2, 0
9873; GFX900-NEXT:    v_mov_b32_e32 v1, v0
9874; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
9875; GFX900-NEXT:    s_waitcnt vmcnt(0)
9876; GFX900-NEXT:    s_setpc_b64 s[30:31]
9877;
9878; GFX90A-LABEL: v_shuffle_v4i16_v4i16__6_6_6_6:
9879; GFX90A:       ; %bb.0:
9880; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9881; GFX90A-NEXT:    ;;#ASMSTART
9882; GFX90A-NEXT:    ; def v[0:1]
9883; GFX90A-NEXT:    ;;#ASMEND
9884; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
9885; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
9886; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
9887; GFX90A-NEXT:    v_mov_b32_e32 v1, v0
9888; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
9889; GFX90A-NEXT:    s_waitcnt vmcnt(0)
9890; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9891;
9892; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_6_6_6:
9893; GFX940:       ; %bb.0:
9894; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9895; GFX940-NEXT:    ;;#ASMSTART
9896; GFX940-NEXT:    ; def v[0:1]
9897; GFX940-NEXT:    ;;#ASMEND
9898; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
9899; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
9900; GFX940-NEXT:    v_mov_b32_e32 v2, 0
9901; GFX940-NEXT:    v_mov_b32_e32 v1, v0
9902; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
9903; GFX940-NEXT:    s_waitcnt vmcnt(0)
9904; GFX940-NEXT:    s_setpc_b64 s[30:31]
9905  %vec0 = call <4 x i16> asm "; def $0", "=v"()
9906  %vec1 = call <4 x i16> asm "; def $0", "=v"()
9907  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
9908  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
9909  ret void
9910}
9911
9912define void @v_shuffle_v4i16_v4i16__7_6_6_6(ptr addrspace(1) inreg %ptr) {
9913; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_6_6_6:
9914; GFX900:       ; %bb.0:
9915; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9916; GFX900-NEXT:    ;;#ASMSTART
9917; GFX900-NEXT:    ; def v[0:1]
9918; GFX900-NEXT:    ;;#ASMEND
9919; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
9920; GFX900-NEXT:    v_mov_b32_e32 v3, 0
9921; GFX900-NEXT:    v_perm_b32 v2, v1, v1, s4
9922; GFX900-NEXT:    v_alignbit_b32 v1, v1, v1, 16
9923; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
9924; GFX900-NEXT:    s_waitcnt vmcnt(0)
9925; GFX900-NEXT:    s_setpc_b64 s[30:31]
9926;
9927; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_6_6_6:
9928; GFX90A:       ; %bb.0:
9929; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9930; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
9931; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
9932; GFX90A-NEXT:    ;;#ASMSTART
9933; GFX90A-NEXT:    ; def v[0:1]
9934; GFX90A-NEXT:    ;;#ASMEND
9935; GFX90A-NEXT:    v_perm_b32 v3, v1, v1, s4
9936; GFX90A-NEXT:    v_alignbit_b32 v2, v1, v1, 16
9937; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
9938; GFX90A-NEXT:    s_waitcnt vmcnt(0)
9939; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9940;
9941; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_6_6:
9942; GFX940:       ; %bb.0:
9943; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9944; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
9945; GFX940-NEXT:    v_mov_b32_e32 v4, 0
9946; GFX940-NEXT:    ;;#ASMSTART
9947; GFX940-NEXT:    ; def v[0:1]
9948; GFX940-NEXT:    ;;#ASMEND
9949; GFX940-NEXT:    s_nop 0
9950; GFX940-NEXT:    v_perm_b32 v3, v1, v1, s2
9951; GFX940-NEXT:    v_alignbit_b32 v2, v1, v1, 16
9952; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
9953; GFX940-NEXT:    s_waitcnt vmcnt(0)
9954; GFX940-NEXT:    s_setpc_b64 s[30:31]
9955  %vec0 = call <4 x i16> asm "; def $0", "=v"()
9956  %vec1 = call <4 x i16> asm "; def $0", "=v"()
9957  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 6, i32 6>
9958  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
9959  ret void
9960}
9961
9962define void @v_shuffle_v4i16_v4i16__7_u_6_6(ptr addrspace(1) inreg %ptr) {
9963; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_u_6_6:
9964; GFX900:       ; %bb.0:
9965; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9966; GFX900-NEXT:    ;;#ASMSTART
9967; GFX900-NEXT:    ; def v[0:1]
9968; GFX900-NEXT:    ;;#ASMEND
9969; GFX900-NEXT:    v_alignbit_b32 v0, s4, v1, 16
9970; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
9971; GFX900-NEXT:    v_mov_b32_e32 v2, 0
9972; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
9973; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
9974; GFX900-NEXT:    s_waitcnt vmcnt(0)
9975; GFX900-NEXT:    s_setpc_b64 s[30:31]
9976;
9977; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_u_6_6:
9978; GFX90A:       ; %bb.0:
9979; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9980; GFX90A-NEXT:    ;;#ASMSTART
9981; GFX90A-NEXT:    ; def v[0:1]
9982; GFX90A-NEXT:    ;;#ASMEND
9983; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v1, 16
9984; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
9985; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
9986; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
9987; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
9988; GFX90A-NEXT:    s_waitcnt vmcnt(0)
9989; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9990;
9991; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_6_6:
9992; GFX940:       ; %bb.0:
9993; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9994; GFX940-NEXT:    ;;#ASMSTART
9995; GFX940-NEXT:    ; def v[0:1]
9996; GFX940-NEXT:    ;;#ASMEND
9997; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
9998; GFX940-NEXT:    v_mov_b32_e32 v2, 0
9999; GFX940-NEXT:    v_alignbit_b32 v0, s0, v1, 16
10000; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
10001; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
10002; GFX940-NEXT:    s_waitcnt vmcnt(0)
10003; GFX940-NEXT:    s_setpc_b64 s[30:31]
10004  %vec0 = call <4 x i16> asm "; def $0", "=v"()
10005  %vec1 = call <4 x i16> asm "; def $0", "=v"()
10006  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 6, i32 6>
10007  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
10008  ret void
10009}
10010
10011define void @v_shuffle_v4i16_v4i16__7_0_6_6(ptr addrspace(1) inreg %ptr) {
10012; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_0_6_6:
10013; GFX900:       ; %bb.0:
10014; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10015; GFX900-NEXT:    ;;#ASMSTART
10016; GFX900-NEXT:    ; def v[0:1]
10017; GFX900-NEXT:    ;;#ASMEND
10018; GFX900-NEXT:    ;;#ASMSTART
10019; GFX900-NEXT:    ; def v[1:2]
10020; GFX900-NEXT:    ;;#ASMEND
10021; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
10022; GFX900-NEXT:    v_mov_b32_e32 v3, 0
10023; GFX900-NEXT:    v_alignbit_b32 v0, v0, v2, 16
10024; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
10025; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
10026; GFX900-NEXT:    s_waitcnt vmcnt(0)
10027; GFX900-NEXT:    s_setpc_b64 s[30:31]
10028;
10029; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_0_6_6:
10030; GFX90A:       ; %bb.0:
10031; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10032; GFX90A-NEXT:    ;;#ASMSTART
10033; GFX90A-NEXT:    ; def v[0:1]
10034; GFX90A-NEXT:    ;;#ASMEND
10035; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
10036; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
10037; GFX90A-NEXT:    ;;#ASMSTART
10038; GFX90A-NEXT:    ; def v[2:3]
10039; GFX90A-NEXT:    ;;#ASMEND
10040; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v3, 16
10041; GFX90A-NEXT:    v_perm_b32 v1, v3, v3, s4
10042; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
10043; GFX90A-NEXT:    s_waitcnt vmcnt(0)
10044; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10045;
10046; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_6_6:
10047; GFX940:       ; %bb.0:
10048; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10049; GFX940-NEXT:    ;;#ASMSTART
10050; GFX940-NEXT:    ; def v[0:1]
10051; GFX940-NEXT:    ;;#ASMEND
10052; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
10053; GFX940-NEXT:    v_mov_b32_e32 v4, 0
10054; GFX940-NEXT:    ;;#ASMSTART
10055; GFX940-NEXT:    ; def v[2:3]
10056; GFX940-NEXT:    ;;#ASMEND
10057; GFX940-NEXT:    s_nop 0
10058; GFX940-NEXT:    v_alignbit_b32 v0, v0, v3, 16
10059; GFX940-NEXT:    v_perm_b32 v1, v3, v3, s2
10060; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
10061; GFX940-NEXT:    s_waitcnt vmcnt(0)
10062; GFX940-NEXT:    s_setpc_b64 s[30:31]
10063  %vec0 = call <4 x i16> asm "; def $0", "=v"()
10064  %vec1 = call <4 x i16> asm "; def $0", "=v"()
10065  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 6, i32 6>
10066  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
10067  ret void
10068}
10069
10070define void @v_shuffle_v4i16_v4i16__7_1_6_6(ptr addrspace(1) inreg %ptr) {
10071; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_1_6_6:
10072; GFX900:       ; %bb.0:
10073; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10074; GFX900-NEXT:    ;;#ASMSTART
10075; GFX900-NEXT:    ; def v[0:1]
10076; GFX900-NEXT:    ;;#ASMEND
10077; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
10078; GFX900-NEXT:    ;;#ASMSTART
10079; GFX900-NEXT:    ; def v[1:2]
10080; GFX900-NEXT:    ;;#ASMEND
10081; GFX900-NEXT:    v_perm_b32 v0, v0, v2, s4
10082; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
10083; GFX900-NEXT:    v_mov_b32_e32 v3, 0
10084; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
10085; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
10086; GFX900-NEXT:    s_waitcnt vmcnt(0)
10087; GFX900-NEXT:    s_setpc_b64 s[30:31]
10088;
10089; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_1_6_6:
10090; GFX90A:       ; %bb.0:
10091; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10092; GFX90A-NEXT:    ;;#ASMSTART
10093; GFX90A-NEXT:    ; def v[0:1]
10094; GFX90A-NEXT:    ;;#ASMEND
10095; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
10096; GFX90A-NEXT:    ;;#ASMSTART
10097; GFX90A-NEXT:    ; def v[2:3]
10098; GFX90A-NEXT:    ;;#ASMEND
10099; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
10100; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
10101; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
10102; GFX90A-NEXT:    v_perm_b32 v1, v3, v3, s4
10103; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
10104; GFX90A-NEXT:    s_waitcnt vmcnt(0)
10105; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10106;
10107; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_6_6:
10108; GFX940:       ; %bb.0:
10109; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10110; GFX940-NEXT:    ;;#ASMSTART
10111; GFX940-NEXT:    ; def v[0:1]
10112; GFX940-NEXT:    ;;#ASMEND
10113; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
10114; GFX940-NEXT:    ;;#ASMSTART
10115; GFX940-NEXT:    ; def v[2:3]
10116; GFX940-NEXT:    ;;#ASMEND
10117; GFX940-NEXT:    v_mov_b32_e32 v4, 0
10118; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
10119; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
10120; GFX940-NEXT:    v_perm_b32 v1, v3, v3, s2
10121; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
10122; GFX940-NEXT:    s_waitcnt vmcnt(0)
10123; GFX940-NEXT:    s_setpc_b64 s[30:31]
10124  %vec0 = call <4 x i16> asm "; def $0", "=v"()
10125  %vec1 = call <4 x i16> asm "; def $0", "=v"()
10126  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 6, i32 6>
10127  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
10128  ret void
10129}
10130
10131define void @v_shuffle_v4i16_v4i16__7_2_6_6(ptr addrspace(1) inreg %ptr) {
10132; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_2_6_6:
10133; GFX900:       ; %bb.0:
10134; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10135; GFX900-NEXT:    ;;#ASMSTART
10136; GFX900-NEXT:    ; def v[0:1]
10137; GFX900-NEXT:    ;;#ASMEND
10138; GFX900-NEXT:    ;;#ASMSTART
10139; GFX900-NEXT:    ; def v[2:3]
10140; GFX900-NEXT:    ;;#ASMEND
10141; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
10142; GFX900-NEXT:    v_mov_b32_e32 v4, 0
10143; GFX900-NEXT:    v_perm_b32 v2, v3, v3, s4
10144; GFX900-NEXT:    v_alignbit_b32 v1, v1, v3, 16
10145; GFX900-NEXT:    global_store_dwordx2 v4, v[1:2], s[16:17]
10146; GFX900-NEXT:    s_waitcnt vmcnt(0)
10147; GFX900-NEXT:    s_setpc_b64 s[30:31]
10148;
10149; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_2_6_6:
10150; GFX90A:       ; %bb.0:
10151; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10152; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
10153; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
10154; GFX90A-NEXT:    ;;#ASMSTART
10155; GFX90A-NEXT:    ; def v[0:1]
10156; GFX90A-NEXT:    ;;#ASMEND
10157; GFX90A-NEXT:    ;;#ASMSTART
10158; GFX90A-NEXT:    ; def v[2:3]
10159; GFX90A-NEXT:    ;;#ASMEND
10160; GFX90A-NEXT:    v_perm_b32 v5, v3, v3, s4
10161; GFX90A-NEXT:    v_alignbit_b32 v4, v1, v3, 16
10162; GFX90A-NEXT:    global_store_dwordx2 v6, v[4:5], s[16:17]
10163; GFX90A-NEXT:    s_waitcnt vmcnt(0)
10164; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10165;
10166; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_6_6:
10167; GFX940:       ; %bb.0:
10168; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10169; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
10170; GFX940-NEXT:    v_mov_b32_e32 v6, 0
10171; GFX940-NEXT:    ;;#ASMSTART
10172; GFX940-NEXT:    ; def v[0:1]
10173; GFX940-NEXT:    ;;#ASMEND
10174; GFX940-NEXT:    ;;#ASMSTART
10175; GFX940-NEXT:    ; def v[2:3]
10176; GFX940-NEXT:    ;;#ASMEND
10177; GFX940-NEXT:    s_nop 0
10178; GFX940-NEXT:    v_perm_b32 v5, v3, v3, s2
10179; GFX940-NEXT:    v_alignbit_b32 v4, v1, v3, 16
10180; GFX940-NEXT:    global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1
10181; GFX940-NEXT:    s_waitcnt vmcnt(0)
10182; GFX940-NEXT:    s_setpc_b64 s[30:31]
10183  %vec0 = call <4 x i16> asm "; def $0", "=v"()
10184  %vec1 = call <4 x i16> asm "; def $0", "=v"()
10185  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 6, i32 6>
10186  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
10187  ret void
10188}
10189
10190define void @v_shuffle_v4i16_v4i16__7_3_6_6(ptr addrspace(1) inreg %ptr) {
10191; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_3_6_6:
10192; GFX900:       ; %bb.0:
10193; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10194; GFX900-NEXT:    ;;#ASMSTART
10195; GFX900-NEXT:    ; def v[0:1]
10196; GFX900-NEXT:    ;;#ASMEND
10197; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
10198; GFX900-NEXT:    ;;#ASMSTART
10199; GFX900-NEXT:    ; def v[2:3]
10200; GFX900-NEXT:    ;;#ASMEND
10201; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
10202; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
10203; GFX900-NEXT:    v_mov_b32_e32 v4, 0
10204; GFX900-NEXT:    v_perm_b32 v1, v3, v3, s4
10205; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
10206; GFX900-NEXT:    s_waitcnt vmcnt(0)
10207; GFX900-NEXT:    s_setpc_b64 s[30:31]
10208;
10209; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_3_6_6:
10210; GFX90A:       ; %bb.0:
10211; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10212; GFX90A-NEXT:    ;;#ASMSTART
10213; GFX90A-NEXT:    ; def v[0:1]
10214; GFX90A-NEXT:    ;;#ASMEND
10215; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
10216; GFX90A-NEXT:    ;;#ASMSTART
10217; GFX90A-NEXT:    ; def v[2:3]
10218; GFX90A-NEXT:    ;;#ASMEND
10219; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
10220; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
10221; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
10222; GFX90A-NEXT:    v_perm_b32 v1, v3, v3, s4
10223; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
10224; GFX90A-NEXT:    s_waitcnt vmcnt(0)
10225; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10226;
10227; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_6_6:
10228; GFX940:       ; %bb.0:
10229; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10230; GFX940-NEXT:    ;;#ASMSTART
10231; GFX940-NEXT:    ; def v[0:1]
10232; GFX940-NEXT:    ;;#ASMEND
10233; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
10234; GFX940-NEXT:    ;;#ASMSTART
10235; GFX940-NEXT:    ; def v[2:3]
10236; GFX940-NEXT:    ;;#ASMEND
10237; GFX940-NEXT:    v_mov_b32_e32 v4, 0
10238; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
10239; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
10240; GFX940-NEXT:    v_perm_b32 v1, v3, v3, s2
10241; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
10242; GFX940-NEXT:    s_waitcnt vmcnt(0)
10243; GFX940-NEXT:    s_setpc_b64 s[30:31]
10244  %vec0 = call <4 x i16> asm "; def $0", "=v"()
10245  %vec1 = call <4 x i16> asm "; def $0", "=v"()
10246  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 6, i32 6>
10247  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
10248  ret void
10249}
10250
10251define void @v_shuffle_v4i16_v4i16__7_4_6_6(ptr addrspace(1) inreg %ptr) {
10252; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_4_6_6:
10253; GFX900:       ; %bb.0:
10254; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10255; GFX900-NEXT:    ;;#ASMSTART
10256; GFX900-NEXT:    ; def v[0:1]
10257; GFX900-NEXT:    ;;#ASMEND
10258; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
10259; GFX900-NEXT:    v_mov_b32_e32 v2, 0
10260; GFX900-NEXT:    v_alignbit_b32 v0, v0, v1, 16
10261; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
10262; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
10263; GFX900-NEXT:    s_waitcnt vmcnt(0)
10264; GFX900-NEXT:    s_setpc_b64 s[30:31]
10265;
10266; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_4_6_6:
10267; GFX90A:       ; %bb.0:
10268; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10269; GFX90A-NEXT:    ;;#ASMSTART
10270; GFX90A-NEXT:    ; def v[0:1]
10271; GFX90A-NEXT:    ;;#ASMEND
10272; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
10273; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
10274; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v1, 16
10275; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
10276; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
10277; GFX90A-NEXT:    s_waitcnt vmcnt(0)
10278; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10279;
10280; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_6_6:
10281; GFX940:       ; %bb.0:
10282; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10283; GFX940-NEXT:    ;;#ASMSTART
10284; GFX940-NEXT:    ; def v[0:1]
10285; GFX940-NEXT:    ;;#ASMEND
10286; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
10287; GFX940-NEXT:    v_mov_b32_e32 v2, 0
10288; GFX940-NEXT:    v_alignbit_b32 v0, v0, v1, 16
10289; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
10290; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
10291; GFX940-NEXT:    s_waitcnt vmcnt(0)
10292; GFX940-NEXT:    s_setpc_b64 s[30:31]
10293  %vec0 = call <4 x i16> asm "; def $0", "=v"()
10294  %vec1 = call <4 x i16> asm "; def $0", "=v"()
10295  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 6, i32 6>
10296  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
10297  ret void
10298}
10299
10300define void @v_shuffle_v4i16_v4i16__7_5_6_6(ptr addrspace(1) inreg %ptr) {
10301; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_5_6_6:
10302; GFX900:       ; %bb.0:
10303; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10304; GFX900-NEXT:    ;;#ASMSTART
10305; GFX900-NEXT:    ; def v[0:1]
10306; GFX900-NEXT:    ;;#ASMEND
10307; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
10308; GFX900-NEXT:    v_perm_b32 v0, v0, v1, s4
10309; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
10310; GFX900-NEXT:    v_mov_b32_e32 v2, 0
10311; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
10312; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
10313; GFX900-NEXT:    s_waitcnt vmcnt(0)
10314; GFX900-NEXT:    s_setpc_b64 s[30:31]
10315;
10316; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_5_6_6:
10317; GFX90A:       ; %bb.0:
10318; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10319; GFX90A-NEXT:    ;;#ASMSTART
10320; GFX90A-NEXT:    ; def v[0:1]
10321; GFX90A-NEXT:    ;;#ASMEND
10322; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
10323; GFX90A-NEXT:    v_perm_b32 v0, v0, v1, s4
10324; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
10325; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
10326; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
10327; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
10328; GFX90A-NEXT:    s_waitcnt vmcnt(0)
10329; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10330;
10331; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_6_6:
10332; GFX940:       ; %bb.0:
10333; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10334; GFX940-NEXT:    ;;#ASMSTART
10335; GFX940-NEXT:    ; def v[0:1]
10336; GFX940-NEXT:    ;;#ASMEND
10337; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
10338; GFX940-NEXT:    v_perm_b32 v0, v0, v1, s2
10339; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
10340; GFX940-NEXT:    v_mov_b32_e32 v2, 0
10341; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
10342; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
10343; GFX940-NEXT:    s_waitcnt vmcnt(0)
10344; GFX940-NEXT:    s_setpc_b64 s[30:31]
10345  %vec0 = call <4 x i16> asm "; def $0", "=v"()
10346  %vec1 = call <4 x i16> asm "; def $0", "=v"()
10347  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 6, i32 6>
10348  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
10349  ret void
10350}
10351
10352define void @v_shuffle_v4i16_v4i16__7_7_6_6(ptr addrspace(1) inreg %ptr) {
10353; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_6_6:
10354; GFX900:       ; %bb.0:
10355; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10356; GFX900-NEXT:    ;;#ASMSTART
10357; GFX900-NEXT:    ; def v[0:1]
10358; GFX900-NEXT:    ;;#ASMEND
10359; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
10360; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
10361; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
10362; GFX900-NEXT:    v_mov_b32_e32 v2, 0
10363; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
10364; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
10365; GFX900-NEXT:    s_waitcnt vmcnt(0)
10366; GFX900-NEXT:    s_setpc_b64 s[30:31]
10367;
10368; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_6_6:
10369; GFX90A:       ; %bb.0:
10370; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10371; GFX90A-NEXT:    ;;#ASMSTART
10372; GFX90A-NEXT:    ; def v[0:1]
10373; GFX90A-NEXT:    ;;#ASMEND
10374; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
10375; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
10376; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
10377; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
10378; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
10379; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
10380; GFX90A-NEXT:    s_waitcnt vmcnt(0)
10381; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10382;
10383; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_6:
10384; GFX940:       ; %bb.0:
10385; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10386; GFX940-NEXT:    ;;#ASMSTART
10387; GFX940-NEXT:    ; def v[0:1]
10388; GFX940-NEXT:    ;;#ASMEND
10389; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
10390; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
10391; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
10392; GFX940-NEXT:    v_mov_b32_e32 v2, 0
10393; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
10394; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
10395; GFX940-NEXT:    s_waitcnt vmcnt(0)
10396; GFX940-NEXT:    s_setpc_b64 s[30:31]
10397  %vec0 = call <4 x i16> asm "; def $0", "=v"()
10398  %vec1 = call <4 x i16> asm "; def $0", "=v"()
10399  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 6>
10400  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
10401  ret void
10402}
10403
10404define void @v_shuffle_v4i16_v4i16__7_7_u_6(ptr addrspace(1) inreg %ptr) {
10405; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_u_6:
10406; GFX900:       ; %bb.0:
10407; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10408; GFX900-NEXT:    ;;#ASMSTART
10409; GFX900-NEXT:    ; def v[0:1]
10410; GFX900-NEXT:    ;;#ASMEND
10411; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
10412; GFX900-NEXT:    v_mov_b32_e32 v2, 0
10413; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
10414; GFX900-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
10415; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
10416; GFX900-NEXT:    s_waitcnt vmcnt(0)
10417; GFX900-NEXT:    s_setpc_b64 s[30:31]
10418;
10419; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_u_6:
10420; GFX90A:       ; %bb.0:
10421; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10422; GFX90A-NEXT:    ;;#ASMSTART
10423; GFX90A-NEXT:    ; def v[0:1]
10424; GFX90A-NEXT:    ;;#ASMEND
10425; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
10426; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
10427; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
10428; GFX90A-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
10429; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
10430; GFX90A-NEXT:    s_waitcnt vmcnt(0)
10431; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10432;
10433; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_6:
10434; GFX940:       ; %bb.0:
10435; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10436; GFX940-NEXT:    ;;#ASMSTART
10437; GFX940-NEXT:    ; def v[0:1]
10438; GFX940-NEXT:    ;;#ASMEND
10439; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
10440; GFX940-NEXT:    v_mov_b32_e32 v2, 0
10441; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
10442; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
10443; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
10444; GFX940-NEXT:    s_waitcnt vmcnt(0)
10445; GFX940-NEXT:    s_setpc_b64 s[30:31]
10446  %vec0 = call <4 x i16> asm "; def $0", "=v"()
10447  %vec1 = call <4 x i16> asm "; def $0", "=v"()
10448  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 6>
10449  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
10450  ret void
10451}
10452
10453define void @v_shuffle_v4i16_v4i16__7_7_0_6(ptr addrspace(1) inreg %ptr) {
10454; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_0_6:
10455; GFX900:       ; %bb.0:
10456; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10457; GFX900-NEXT:    ;;#ASMSTART
10458; GFX900-NEXT:    ; def v[0:1]
10459; GFX900-NEXT:    ;;#ASMEND
10460; GFX900-NEXT:    ;;#ASMSTART
10461; GFX900-NEXT:    ; def v[1:2]
10462; GFX900-NEXT:    ;;#ASMEND
10463; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
10464; GFX900-NEXT:    v_perm_b32 v1, v2, v0, s4
10465; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
10466; GFX900-NEXT:    v_mov_b32_e32 v3, 0
10467; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
10468; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
10469; GFX900-NEXT:    s_waitcnt vmcnt(0)
10470; GFX900-NEXT:    s_setpc_b64 s[30:31]
10471;
10472; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_0_6:
10473; GFX90A:       ; %bb.0:
10474; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10475; GFX90A-NEXT:    ;;#ASMSTART
10476; GFX90A-NEXT:    ; def v[0:1]
10477; GFX90A-NEXT:    ;;#ASMEND
10478; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
10479; GFX90A-NEXT:    ;;#ASMSTART
10480; GFX90A-NEXT:    ; def v[2:3]
10481; GFX90A-NEXT:    ;;#ASMEND
10482; GFX90A-NEXT:    v_perm_b32 v1, v3, v0, s4
10483; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
10484; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
10485; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
10486; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
10487; GFX90A-NEXT:    s_waitcnt vmcnt(0)
10488; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10489;
10490; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_6:
10491; GFX940:       ; %bb.0:
10492; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10493; GFX940-NEXT:    ;;#ASMSTART
10494; GFX940-NEXT:    ; def v[0:1]
10495; GFX940-NEXT:    ;;#ASMEND
10496; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
10497; GFX940-NEXT:    ;;#ASMSTART
10498; GFX940-NEXT:    ; def v[2:3]
10499; GFX940-NEXT:    ;;#ASMEND
10500; GFX940-NEXT:    v_mov_b32_e32 v4, 0
10501; GFX940-NEXT:    v_perm_b32 v1, v3, v0, s2
10502; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
10503; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
10504; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
10505; GFX940-NEXT:    s_waitcnt vmcnt(0)
10506; GFX940-NEXT:    s_setpc_b64 s[30:31]
10507  %vec0 = call <4 x i16> asm "; def $0", "=v"()
10508  %vec1 = call <4 x i16> asm "; def $0", "=v"()
10509  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 6>
10510  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
10511  ret void
10512}
10513
10514define void @v_shuffle_v4i16_v4i16__7_7_1_6(ptr addrspace(1) inreg %ptr) {
10515; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_1_6:
10516; GFX900:       ; %bb.0:
10517; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10518; GFX900-NEXT:    ;;#ASMSTART
10519; GFX900-NEXT:    ; def v[0:1]
10520; GFX900-NEXT:    ;;#ASMEND
10521; GFX900-NEXT:    ;;#ASMSTART
10522; GFX900-NEXT:    ; def v[1:2]
10523; GFX900-NEXT:    ;;#ASMEND
10524; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
10525; GFX900-NEXT:    v_mov_b32_e32 v3, 0
10526; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
10527; GFX900-NEXT:    v_alignbit_b32 v2, v2, v0, 16
10528; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
10529; GFX900-NEXT:    s_waitcnt vmcnt(0)
10530; GFX900-NEXT:    s_setpc_b64 s[30:31]
10531;
10532; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_1_6:
10533; GFX90A:       ; %bb.0:
10534; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10535; GFX90A-NEXT:    ;;#ASMSTART
10536; GFX90A-NEXT:    ; def v[2:3]
10537; GFX90A-NEXT:    ;;#ASMEND
10538; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
10539; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
10540; GFX90A-NEXT:    ;;#ASMSTART
10541; GFX90A-NEXT:    ; def v[0:1]
10542; GFX90A-NEXT:    ;;#ASMEND
10543; GFX90A-NEXT:    v_perm_b32 v2, v3, v3, s4
10544; GFX90A-NEXT:    v_alignbit_b32 v3, v3, v0, 16
10545; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
10546; GFX90A-NEXT:    s_waitcnt vmcnt(0)
10547; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10548;
10549; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_6:
10550; GFX940:       ; %bb.0:
10551; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10552; GFX940-NEXT:    ;;#ASMSTART
10553; GFX940-NEXT:    ; def v[2:3]
10554; GFX940-NEXT:    ;;#ASMEND
10555; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
10556; GFX940-NEXT:    v_mov_b32_e32 v4, 0
10557; GFX940-NEXT:    ;;#ASMSTART
10558; GFX940-NEXT:    ; def v[0:1]
10559; GFX940-NEXT:    ;;#ASMEND
10560; GFX940-NEXT:    v_perm_b32 v2, v3, v3, s2
10561; GFX940-NEXT:    v_alignbit_b32 v3, v3, v0, 16
10562; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
10563; GFX940-NEXT:    s_waitcnt vmcnt(0)
10564; GFX940-NEXT:    s_setpc_b64 s[30:31]
10565  %vec0 = call <4 x i16> asm "; def $0", "=v"()
10566  %vec1 = call <4 x i16> asm "; def $0", "=v"()
10567  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 6>
10568  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
10569  ret void
10570}
10571
10572define void @v_shuffle_v4i16_v4i16__7_7_2_6(ptr addrspace(1) inreg %ptr) {
10573; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_2_6:
10574; GFX900:       ; %bb.0:
10575; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10576; GFX900-NEXT:    ;;#ASMSTART
10577; GFX900-NEXT:    ; def v[0:1]
10578; GFX900-NEXT:    ;;#ASMEND
10579; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
10580; GFX900-NEXT:    ;;#ASMSTART
10581; GFX900-NEXT:    ; def v[2:3]
10582; GFX900-NEXT:    ;;#ASMEND
10583; GFX900-NEXT:    v_perm_b32 v1, v3, v1, s4
10584; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
10585; GFX900-NEXT:    v_mov_b32_e32 v4, 0
10586; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
10587; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
10588; GFX900-NEXT:    s_waitcnt vmcnt(0)
10589; GFX900-NEXT:    s_setpc_b64 s[30:31]
10590;
10591; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_2_6:
10592; GFX90A:       ; %bb.0:
10593; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10594; GFX90A-NEXT:    ;;#ASMSTART
10595; GFX90A-NEXT:    ; def v[0:1]
10596; GFX90A-NEXT:    ;;#ASMEND
10597; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
10598; GFX90A-NEXT:    ;;#ASMSTART
10599; GFX90A-NEXT:    ; def v[2:3]
10600; GFX90A-NEXT:    ;;#ASMEND
10601; GFX90A-NEXT:    v_perm_b32 v1, v3, v1, s4
10602; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
10603; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
10604; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
10605; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
10606; GFX90A-NEXT:    s_waitcnt vmcnt(0)
10607; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10608;
10609; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_6:
10610; GFX940:       ; %bb.0:
10611; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10612; GFX940-NEXT:    ;;#ASMSTART
10613; GFX940-NEXT:    ; def v[0:1]
10614; GFX940-NEXT:    ;;#ASMEND
10615; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
10616; GFX940-NEXT:    ;;#ASMSTART
10617; GFX940-NEXT:    ; def v[2:3]
10618; GFX940-NEXT:    ;;#ASMEND
10619; GFX940-NEXT:    v_mov_b32_e32 v4, 0
10620; GFX940-NEXT:    v_perm_b32 v1, v3, v1, s2
10621; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
10622; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
10623; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
10624; GFX940-NEXT:    s_waitcnt vmcnt(0)
10625; GFX940-NEXT:    s_setpc_b64 s[30:31]
10626  %vec0 = call <4 x i16> asm "; def $0", "=v"()
10627  %vec1 = call <4 x i16> asm "; def $0", "=v"()
10628  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 6>
10629  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
10630  ret void
10631}
10632
10633define void @v_shuffle_v4i16_v4i16__7_7_3_6(ptr addrspace(1) inreg %ptr) {
10634; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_3_6:
10635; GFX900:       ; %bb.0:
10636; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10637; GFX900-NEXT:    ;;#ASMSTART
10638; GFX900-NEXT:    ; def v[0:1]
10639; GFX900-NEXT:    ;;#ASMEND
10640; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
10641; GFX900-NEXT:    v_mov_b32_e32 v4, 0
10642; GFX900-NEXT:    ;;#ASMSTART
10643; GFX900-NEXT:    ; def v[2:3]
10644; GFX900-NEXT:    ;;#ASMEND
10645; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
10646; GFX900-NEXT:    v_alignbit_b32 v1, v3, v1, 16
10647; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
10648; GFX900-NEXT:    s_waitcnt vmcnt(0)
10649; GFX900-NEXT:    s_setpc_b64 s[30:31]
10650;
10651; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_3_6:
10652; GFX90A:       ; %bb.0:
10653; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10654; GFX90A-NEXT:    ;;#ASMSTART
10655; GFX90A-NEXT:    ; def v[0:1]
10656; GFX90A-NEXT:    ;;#ASMEND
10657; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
10658; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
10659; GFX90A-NEXT:    ;;#ASMSTART
10660; GFX90A-NEXT:    ; def v[2:3]
10661; GFX90A-NEXT:    ;;#ASMEND
10662; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
10663; GFX90A-NEXT:    v_alignbit_b32 v1, v3, v1, 16
10664; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
10665; GFX90A-NEXT:    s_waitcnt vmcnt(0)
10666; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10667;
10668; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_6:
10669; GFX940:       ; %bb.0:
10670; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10671; GFX940-NEXT:    ;;#ASMSTART
10672; GFX940-NEXT:    ; def v[0:1]
10673; GFX940-NEXT:    ;;#ASMEND
10674; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
10675; GFX940-NEXT:    v_mov_b32_e32 v4, 0
10676; GFX940-NEXT:    ;;#ASMSTART
10677; GFX940-NEXT:    ; def v[2:3]
10678; GFX940-NEXT:    ;;#ASMEND
10679; GFX940-NEXT:    s_nop 0
10680; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
10681; GFX940-NEXT:    v_alignbit_b32 v1, v3, v1, 16
10682; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
10683; GFX940-NEXT:    s_waitcnt vmcnt(0)
10684; GFX940-NEXT:    s_setpc_b64 s[30:31]
10685  %vec0 = call <4 x i16> asm "; def $0", "=v"()
10686  %vec1 = call <4 x i16> asm "; def $0", "=v"()
10687  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 6>
10688  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
10689  ret void
10690}
10691
10692define void @v_shuffle_v4i16_v4i16__7_7_4_6(ptr addrspace(1) inreg %ptr) {
10693; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_4_6:
10694; GFX900:       ; %bb.0:
10695; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10696; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
10697; GFX900-NEXT:    ;;#ASMSTART
10698; GFX900-NEXT:    ; def v[0:1]
10699; GFX900-NEXT:    ;;#ASMEND
10700; GFX900-NEXT:    v_perm_b32 v2, v1, v0, s4
10701; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
10702; GFX900-NEXT:    v_mov_b32_e32 v3, 0
10703; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
10704; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
10705; GFX900-NEXT:    s_waitcnt vmcnt(0)
10706; GFX900-NEXT:    s_setpc_b64 s[30:31]
10707;
10708; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_4_6:
10709; GFX90A:       ; %bb.0:
10710; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10711; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
10712; GFX90A-NEXT:    ;;#ASMSTART
10713; GFX90A-NEXT:    ; def v[0:1]
10714; GFX90A-NEXT:    ;;#ASMEND
10715; GFX90A-NEXT:    v_perm_b32 v3, v1, v0, s4
10716; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
10717; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
10718; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
10719; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
10720; GFX90A-NEXT:    s_waitcnt vmcnt(0)
10721; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10722;
10723; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_6:
10724; GFX940:       ; %bb.0:
10725; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10726; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
10727; GFX940-NEXT:    ;;#ASMSTART
10728; GFX940-NEXT:    ; def v[0:1]
10729; GFX940-NEXT:    ;;#ASMEND
10730; GFX940-NEXT:    v_mov_b32_e32 v4, 0
10731; GFX940-NEXT:    v_perm_b32 v3, v1, v0, s2
10732; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
10733; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
10734; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
10735; GFX940-NEXT:    s_waitcnt vmcnt(0)
10736; GFX940-NEXT:    s_setpc_b64 s[30:31]
10737  %vec0 = call <4 x i16> asm "; def $0", "=v"()
10738  %vec1 = call <4 x i16> asm "; def $0", "=v"()
10739  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 6>
10740  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
10741  ret void
10742}
10743
10744define void @v_shuffle_v4i16_v4i16__7_7_5_6(ptr addrspace(1) inreg %ptr) {
10745; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_5_6:
10746; GFX900:       ; %bb.0:
10747; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10748; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
10749; GFX900-NEXT:    v_mov_b32_e32 v4, 0
10750; GFX900-NEXT:    ;;#ASMSTART
10751; GFX900-NEXT:    ; def v[0:1]
10752; GFX900-NEXT:    ;;#ASMEND
10753; GFX900-NEXT:    v_perm_b32 v2, v1, v1, s4
10754; GFX900-NEXT:    v_alignbit_b32 v3, v1, v0, 16
10755; GFX900-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
10756; GFX900-NEXT:    s_waitcnt vmcnt(0)
10757; GFX900-NEXT:    s_setpc_b64 s[30:31]
10758;
10759; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_5_6:
10760; GFX90A:       ; %bb.0:
10761; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10762; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
10763; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
10764; GFX90A-NEXT:    ;;#ASMSTART
10765; GFX90A-NEXT:    ; def v[0:1]
10766; GFX90A-NEXT:    ;;#ASMEND
10767; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
10768; GFX90A-NEXT:    v_alignbit_b32 v3, v1, v0, 16
10769; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
10770; GFX90A-NEXT:    s_waitcnt vmcnt(0)
10771; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10772;
10773; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_6:
10774; GFX940:       ; %bb.0:
10775; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10776; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
10777; GFX940-NEXT:    v_mov_b32_e32 v4, 0
10778; GFX940-NEXT:    ;;#ASMSTART
10779; GFX940-NEXT:    ; def v[0:1]
10780; GFX940-NEXT:    ;;#ASMEND
10781; GFX940-NEXT:    s_nop 0
10782; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
10783; GFX940-NEXT:    v_alignbit_b32 v3, v1, v0, 16
10784; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
10785; GFX940-NEXT:    s_waitcnt vmcnt(0)
10786; GFX940-NEXT:    s_setpc_b64 s[30:31]
10787  %vec0 = call <4 x i16> asm "; def $0", "=v"()
10788  %vec1 = call <4 x i16> asm "; def $0", "=v"()
10789  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 6>
10790  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
10791  ret void
10792}
10793
10794define void @v_shuffle_v4i16_v4i16__u_7_7_7(ptr addrspace(1) inreg %ptr) {
10795; GFX900-LABEL: v_shuffle_v4i16_v4i16__u_7_7_7:
10796; GFX900:       ; %bb.0:
10797; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10798; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
10799; GFX900-NEXT:    v_mov_b32_e32 v3, 0
10800; GFX900-NEXT:    ;;#ASMSTART
10801; GFX900-NEXT:    ; def v[0:1]
10802; GFX900-NEXT:    ;;#ASMEND
10803; GFX900-NEXT:    v_perm_b32 v2, v1, v1, s4
10804; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
10805; GFX900-NEXT:    s_waitcnt vmcnt(0)
10806; GFX900-NEXT:    s_setpc_b64 s[30:31]
10807;
10808; GFX90A-LABEL: v_shuffle_v4i16_v4i16__u_7_7_7:
10809; GFX90A:       ; %bb.0:
10810; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10811; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
10812; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
10813; GFX90A-NEXT:    ;;#ASMSTART
10814; GFX90A-NEXT:    ; def v[0:1]
10815; GFX90A-NEXT:    ;;#ASMEND
10816; GFX90A-NEXT:    v_perm_b32 v3, v1, v1, s4
10817; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
10818; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
10819; GFX90A-NEXT:    s_waitcnt vmcnt(0)
10820; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10821;
10822; GFX940-LABEL: v_shuffle_v4i16_v4i16__u_7_7_7:
10823; GFX940:       ; %bb.0:
10824; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10825; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
10826; GFX940-NEXT:    v_mov_b32_e32 v4, 0
10827; GFX940-NEXT:    ;;#ASMSTART
10828; GFX940-NEXT:    ; def v[0:1]
10829; GFX940-NEXT:    ;;#ASMEND
10830; GFX940-NEXT:    s_nop 0
10831; GFX940-NEXT:    v_perm_b32 v3, v1, v1, s2
10832; GFX940-NEXT:    v_mov_b32_e32 v2, v1
10833; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
10834; GFX940-NEXT:    s_waitcnt vmcnt(0)
10835; GFX940-NEXT:    s_setpc_b64 s[30:31]
10836  %vec0 = call <4 x i16> asm "; def $0", "=v"()
10837  %vec1 = call <4 x i16> asm "; def $0", "=v"()
10838  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 poison, i32 7, i32 7, i32 7>
10839  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
10840  ret void
10841}
10842
10843define void @v_shuffle_v4i16_v4i16__0_7_7_7(ptr addrspace(1) inreg %ptr) {
10844; GFX900-LABEL: v_shuffle_v4i16_v4i16__0_7_7_7:
10845; GFX900:       ; %bb.0:
10846; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10847; GFX900-NEXT:    ;;#ASMSTART
10848; GFX900-NEXT:    ; def v[0:1]
10849; GFX900-NEXT:    ;;#ASMEND
10850; GFX900-NEXT:    s_mov_b32 s4, 0xffff
10851; GFX900-NEXT:    ;;#ASMSTART
10852; GFX900-NEXT:    ; def v[1:2]
10853; GFX900-NEXT:    ;;#ASMEND
10854; GFX900-NEXT:    v_bfi_b32 v0, s4, v0, v2
10855; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
10856; GFX900-NEXT:    v_mov_b32_e32 v3, 0
10857; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
10858; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
10859; GFX900-NEXT:    s_waitcnt vmcnt(0)
10860; GFX900-NEXT:    s_setpc_b64 s[30:31]
10861;
10862; GFX90A-LABEL: v_shuffle_v4i16_v4i16__0_7_7_7:
10863; GFX90A:       ; %bb.0:
10864; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10865; GFX90A-NEXT:    ;;#ASMSTART
10866; GFX90A-NEXT:    ; def v[0:1]
10867; GFX90A-NEXT:    ;;#ASMEND
10868; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
10869; GFX90A-NEXT:    ;;#ASMSTART
10870; GFX90A-NEXT:    ; def v[2:3]
10871; GFX90A-NEXT:    ;;#ASMEND
10872; GFX90A-NEXT:    v_bfi_b32 v0, s4, v0, v3
10873; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
10874; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
10875; GFX90A-NEXT:    v_perm_b32 v1, v3, v3, s4
10876; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
10877; GFX90A-NEXT:    s_waitcnt vmcnt(0)
10878; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10879;
10880; GFX940-LABEL: v_shuffle_v4i16_v4i16__0_7_7_7:
10881; GFX940:       ; %bb.0:
10882; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10883; GFX940-NEXT:    ;;#ASMSTART
10884; GFX940-NEXT:    ; def v[0:1]
10885; GFX940-NEXT:    ;;#ASMEND
10886; GFX940-NEXT:    s_mov_b32 s2, 0xffff
10887; GFX940-NEXT:    ;;#ASMSTART
10888; GFX940-NEXT:    ; def v[2:3]
10889; GFX940-NEXT:    ;;#ASMEND
10890; GFX940-NEXT:    v_mov_b32_e32 v4, 0
10891; GFX940-NEXT:    v_bfi_b32 v0, s2, v0, v3
10892; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
10893; GFX940-NEXT:    v_perm_b32 v1, v3, v3, s2
10894; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
10895; GFX940-NEXT:    s_waitcnt vmcnt(0)
10896; GFX940-NEXT:    s_setpc_b64 s[30:31]
10897  %vec0 = call <4 x i16> asm "; def $0", "=v"()
10898  %vec1 = call <4 x i16> asm "; def $0", "=v"()
10899  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
10900  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
10901  ret void
10902}
10903
10904define void @v_shuffle_v4i16_v4i16__1_7_7_7(ptr addrspace(1) inreg %ptr) {
10905; GFX900-LABEL: v_shuffle_v4i16_v4i16__1_7_7_7:
10906; GFX900:       ; %bb.0:
10907; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10908; GFX900-NEXT:    ;;#ASMSTART
10909; GFX900-NEXT:    ; def v[0:1]
10910; GFX900-NEXT:    ;;#ASMEND
10911; GFX900-NEXT:    ;;#ASMSTART
10912; GFX900-NEXT:    ; def v[1:2]
10913; GFX900-NEXT:    ;;#ASMEND
10914; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
10915; GFX900-NEXT:    v_mov_b32_e32 v3, 0
10916; GFX900-NEXT:    v_perm_b32 v0, v2, v0, s4
10917; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
10918; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
10919; GFX900-NEXT:    s_waitcnt vmcnt(0)
10920; GFX900-NEXT:    s_setpc_b64 s[30:31]
10921;
10922; GFX90A-LABEL: v_shuffle_v4i16_v4i16__1_7_7_7:
10923; GFX90A:       ; %bb.0:
10924; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10925; GFX90A-NEXT:    ;;#ASMSTART
10926; GFX90A-NEXT:    ; def v[0:1]
10927; GFX90A-NEXT:    ;;#ASMEND
10928; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
10929; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
10930; GFX90A-NEXT:    ;;#ASMSTART
10931; GFX90A-NEXT:    ; def v[2:3]
10932; GFX90A-NEXT:    ;;#ASMEND
10933; GFX90A-NEXT:    v_perm_b32 v0, v3, v0, s4
10934; GFX90A-NEXT:    v_perm_b32 v1, v3, v3, s4
10935; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
10936; GFX90A-NEXT:    s_waitcnt vmcnt(0)
10937; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10938;
10939; GFX940-LABEL: v_shuffle_v4i16_v4i16__1_7_7_7:
10940; GFX940:       ; %bb.0:
10941; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10942; GFX940-NEXT:    ;;#ASMSTART
10943; GFX940-NEXT:    ; def v[0:1]
10944; GFX940-NEXT:    ;;#ASMEND
10945; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
10946; GFX940-NEXT:    v_mov_b32_e32 v4, 0
10947; GFX940-NEXT:    ;;#ASMSTART
10948; GFX940-NEXT:    ; def v[2:3]
10949; GFX940-NEXT:    ;;#ASMEND
10950; GFX940-NEXT:    s_nop 0
10951; GFX940-NEXT:    v_perm_b32 v0, v3, v0, s2
10952; GFX940-NEXT:    v_perm_b32 v1, v3, v3, s2
10953; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
10954; GFX940-NEXT:    s_waitcnt vmcnt(0)
10955; GFX940-NEXT:    s_setpc_b64 s[30:31]
10956  %vec0 = call <4 x i16> asm "; def $0", "=v"()
10957  %vec1 = call <4 x i16> asm "; def $0", "=v"()
10958  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
10959  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
10960  ret void
10961}
10962
10963define void @v_shuffle_v4i16_v4i16__2_7_7_7(ptr addrspace(1) inreg %ptr) {
10964; GFX900-LABEL: v_shuffle_v4i16_v4i16__2_7_7_7:
10965; GFX900:       ; %bb.0:
10966; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10967; GFX900-NEXT:    ;;#ASMSTART
10968; GFX900-NEXT:    ; def v[0:1]
10969; GFX900-NEXT:    ;;#ASMEND
10970; GFX900-NEXT:    s_mov_b32 s4, 0xffff
10971; GFX900-NEXT:    ;;#ASMSTART
10972; GFX900-NEXT:    ; def v[2:3]
10973; GFX900-NEXT:    ;;#ASMEND
10974; GFX900-NEXT:    v_bfi_b32 v0, s4, v1, v3
10975; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
10976; GFX900-NEXT:    v_mov_b32_e32 v4, 0
10977; GFX900-NEXT:    v_perm_b32 v1, v3, v3, s4
10978; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
10979; GFX900-NEXT:    s_waitcnt vmcnt(0)
10980; GFX900-NEXT:    s_setpc_b64 s[30:31]
10981;
10982; GFX90A-LABEL: v_shuffle_v4i16_v4i16__2_7_7_7:
10983; GFX90A:       ; %bb.0:
10984; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10985; GFX90A-NEXT:    ;;#ASMSTART
10986; GFX90A-NEXT:    ; def v[0:1]
10987; GFX90A-NEXT:    ;;#ASMEND
10988; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
10989; GFX90A-NEXT:    ;;#ASMSTART
10990; GFX90A-NEXT:    ; def v[2:3]
10991; GFX90A-NEXT:    ;;#ASMEND
10992; GFX90A-NEXT:    v_bfi_b32 v0, s4, v1, v3
10993; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
10994; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
10995; GFX90A-NEXT:    v_perm_b32 v1, v3, v3, s4
10996; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
10997; GFX90A-NEXT:    s_waitcnt vmcnt(0)
10998; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10999;
11000; GFX940-LABEL: v_shuffle_v4i16_v4i16__2_7_7_7:
11001; GFX940:       ; %bb.0:
11002; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11003; GFX940-NEXT:    ;;#ASMSTART
11004; GFX940-NEXT:    ; def v[0:1]
11005; GFX940-NEXT:    ;;#ASMEND
11006; GFX940-NEXT:    s_mov_b32 s2, 0xffff
11007; GFX940-NEXT:    ;;#ASMSTART
11008; GFX940-NEXT:    ; def v[2:3]
11009; GFX940-NEXT:    ;;#ASMEND
11010; GFX940-NEXT:    v_mov_b32_e32 v4, 0
11011; GFX940-NEXT:    v_bfi_b32 v0, s2, v1, v3
11012; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
11013; GFX940-NEXT:    v_perm_b32 v1, v3, v3, s2
11014; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
11015; GFX940-NEXT:    s_waitcnt vmcnt(0)
11016; GFX940-NEXT:    s_setpc_b64 s[30:31]
11017  %vec0 = call <4 x i16> asm "; def $0", "=v"()
11018  %vec1 = call <4 x i16> asm "; def $0", "=v"()
11019  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
11020  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
11021  ret void
11022}
11023
11024define void @v_shuffle_v4i16_v4i16__3_7_7_7(ptr addrspace(1) inreg %ptr) {
11025; GFX900-LABEL: v_shuffle_v4i16_v4i16__3_7_7_7:
11026; GFX900:       ; %bb.0:
11027; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11028; GFX900-NEXT:    ;;#ASMSTART
11029; GFX900-NEXT:    ; def v[0:1]
11030; GFX900-NEXT:    ;;#ASMEND
11031; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
11032; GFX900-NEXT:    v_mov_b32_e32 v4, 0
11033; GFX900-NEXT:    ;;#ASMSTART
11034; GFX900-NEXT:    ; def v[2:3]
11035; GFX900-NEXT:    ;;#ASMEND
11036; GFX900-NEXT:    v_perm_b32 v0, v3, v1, s4
11037; GFX900-NEXT:    v_perm_b32 v1, v3, v3, s4
11038; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
11039; GFX900-NEXT:    s_waitcnt vmcnt(0)
11040; GFX900-NEXT:    s_setpc_b64 s[30:31]
11041;
11042; GFX90A-LABEL: v_shuffle_v4i16_v4i16__3_7_7_7:
11043; GFX90A:       ; %bb.0:
11044; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11045; GFX90A-NEXT:    ;;#ASMSTART
11046; GFX90A-NEXT:    ; def v[0:1]
11047; GFX90A-NEXT:    ;;#ASMEND
11048; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
11049; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
11050; GFX90A-NEXT:    ;;#ASMSTART
11051; GFX90A-NEXT:    ; def v[2:3]
11052; GFX90A-NEXT:    ;;#ASMEND
11053; GFX90A-NEXT:    v_perm_b32 v0, v3, v1, s4
11054; GFX90A-NEXT:    v_perm_b32 v1, v3, v3, s4
11055; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
11056; GFX90A-NEXT:    s_waitcnt vmcnt(0)
11057; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11058;
11059; GFX940-LABEL: v_shuffle_v4i16_v4i16__3_7_7_7:
11060; GFX940:       ; %bb.0:
11061; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11062; GFX940-NEXT:    ;;#ASMSTART
11063; GFX940-NEXT:    ; def v[0:1]
11064; GFX940-NEXT:    ;;#ASMEND
11065; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
11066; GFX940-NEXT:    v_mov_b32_e32 v4, 0
11067; GFX940-NEXT:    ;;#ASMSTART
11068; GFX940-NEXT:    ; def v[2:3]
11069; GFX940-NEXT:    ;;#ASMEND
11070; GFX940-NEXT:    s_nop 0
11071; GFX940-NEXT:    v_perm_b32 v0, v3, v1, s2
11072; GFX940-NEXT:    v_perm_b32 v1, v3, v3, s2
11073; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
11074; GFX940-NEXT:    s_waitcnt vmcnt(0)
11075; GFX940-NEXT:    s_setpc_b64 s[30:31]
11076  %vec0 = call <4 x i16> asm "; def $0", "=v"()
11077  %vec1 = call <4 x i16> asm "; def $0", "=v"()
11078  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
11079  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
11080  ret void
11081}
11082
11083define void @v_shuffle_v4i16_v4i16__4_7_7_7(ptr addrspace(1) inreg %ptr) {
11084; GFX900-LABEL: v_shuffle_v4i16_v4i16__4_7_7_7:
11085; GFX900:       ; %bb.0:
11086; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11087; GFX900-NEXT:    ;;#ASMSTART
11088; GFX900-NEXT:    ; def v[0:1]
11089; GFX900-NEXT:    ;;#ASMEND
11090; GFX900-NEXT:    s_mov_b32 s4, 0xffff
11091; GFX900-NEXT:    v_bfi_b32 v0, s4, v0, v1
11092; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
11093; GFX900-NEXT:    v_mov_b32_e32 v2, 0
11094; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
11095; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
11096; GFX900-NEXT:    s_waitcnt vmcnt(0)
11097; GFX900-NEXT:    s_setpc_b64 s[30:31]
11098;
11099; GFX90A-LABEL: v_shuffle_v4i16_v4i16__4_7_7_7:
11100; GFX90A:       ; %bb.0:
11101; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11102; GFX90A-NEXT:    ;;#ASMSTART
11103; GFX90A-NEXT:    ; def v[0:1]
11104; GFX90A-NEXT:    ;;#ASMEND
11105; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
11106; GFX90A-NEXT:    v_bfi_b32 v0, s4, v0, v1
11107; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
11108; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
11109; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
11110; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
11111; GFX90A-NEXT:    s_waitcnt vmcnt(0)
11112; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11113;
11114; GFX940-LABEL: v_shuffle_v4i16_v4i16__4_7_7_7:
11115; GFX940:       ; %bb.0:
11116; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11117; GFX940-NEXT:    ;;#ASMSTART
11118; GFX940-NEXT:    ; def v[0:1]
11119; GFX940-NEXT:    ;;#ASMEND
11120; GFX940-NEXT:    s_mov_b32 s2, 0xffff
11121; GFX940-NEXT:    v_bfi_b32 v0, s2, v0, v1
11122; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
11123; GFX940-NEXT:    v_mov_b32_e32 v2, 0
11124; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
11125; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
11126; GFX940-NEXT:    s_waitcnt vmcnt(0)
11127; GFX940-NEXT:    s_setpc_b64 s[30:31]
11128  %vec0 = call <4 x i16> asm "; def $0", "=v"()
11129  %vec1 = call <4 x i16> asm "; def $0", "=v"()
11130  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 4, i32 7, i32 7, i32 7>
11131  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
11132  ret void
11133}
11134
11135define void @v_shuffle_v4i16_v4i16__5_7_7_7(ptr addrspace(1) inreg %ptr) {
11136; GFX900-LABEL: v_shuffle_v4i16_v4i16__5_7_7_7:
11137; GFX900:       ; %bb.0:
11138; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11139; GFX900-NEXT:    ;;#ASMSTART
11140; GFX900-NEXT:    ; def v[0:1]
11141; GFX900-NEXT:    ;;#ASMEND
11142; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
11143; GFX900-NEXT:    v_mov_b32_e32 v2, 0
11144; GFX900-NEXT:    v_perm_b32 v0, v1, v0, s4
11145; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
11146; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
11147; GFX900-NEXT:    s_waitcnt vmcnt(0)
11148; GFX900-NEXT:    s_setpc_b64 s[30:31]
11149;
11150; GFX90A-LABEL: v_shuffle_v4i16_v4i16__5_7_7_7:
11151; GFX90A:       ; %bb.0:
11152; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11153; GFX90A-NEXT:    ;;#ASMSTART
11154; GFX90A-NEXT:    ; def v[0:1]
11155; GFX90A-NEXT:    ;;#ASMEND
11156; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
11157; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
11158; GFX90A-NEXT:    v_perm_b32 v0, v1, v0, s4
11159; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
11160; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
11161; GFX90A-NEXT:    s_waitcnt vmcnt(0)
11162; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11163;
11164; GFX940-LABEL: v_shuffle_v4i16_v4i16__5_7_7_7:
11165; GFX940:       ; %bb.0:
11166; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11167; GFX940-NEXT:    ;;#ASMSTART
11168; GFX940-NEXT:    ; def v[0:1]
11169; GFX940-NEXT:    ;;#ASMEND
11170; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
11171; GFX940-NEXT:    v_mov_b32_e32 v2, 0
11172; GFX940-NEXT:    v_perm_b32 v0, v1, v0, s2
11173; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
11174; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
11175; GFX940-NEXT:    s_waitcnt vmcnt(0)
11176; GFX940-NEXT:    s_setpc_b64 s[30:31]
11177  %vec0 = call <4 x i16> asm "; def $0", "=v"()
11178  %vec1 = call <4 x i16> asm "; def $0", "=v"()
11179  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 7, i32 7, i32 7>
11180  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
11181  ret void
11182}
11183
11184define void @v_shuffle_v4i16_v4i16__6_7_7_7(ptr addrspace(1) inreg %ptr) {
11185; GFX900-LABEL: v_shuffle_v4i16_v4i16__6_7_7_7:
11186; GFX900:       ; %bb.0:
11187; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11188; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
11189; GFX900-NEXT:    v_mov_b32_e32 v3, 0
11190; GFX900-NEXT:    ;;#ASMSTART
11191; GFX900-NEXT:    ; def v[0:1]
11192; GFX900-NEXT:    ;;#ASMEND
11193; GFX900-NEXT:    v_perm_b32 v2, v1, v1, s4
11194; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
11195; GFX900-NEXT:    s_waitcnt vmcnt(0)
11196; GFX900-NEXT:    s_setpc_b64 s[30:31]
11197;
11198; GFX90A-LABEL: v_shuffle_v4i16_v4i16__6_7_7_7:
11199; GFX90A:       ; %bb.0:
11200; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11201; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
11202; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
11203; GFX90A-NEXT:    ;;#ASMSTART
11204; GFX90A-NEXT:    ; def v[0:1]
11205; GFX90A-NEXT:    ;;#ASMEND
11206; GFX90A-NEXT:    v_perm_b32 v3, v1, v1, s4
11207; GFX90A-NEXT:    v_mov_b32_e32 v2, v1
11208; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
11209; GFX90A-NEXT:    s_waitcnt vmcnt(0)
11210; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11211;
11212; GFX940-LABEL: v_shuffle_v4i16_v4i16__6_7_7_7:
11213; GFX940:       ; %bb.0:
11214; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11215; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
11216; GFX940-NEXT:    v_mov_b32_e32 v4, 0
11217; GFX940-NEXT:    ;;#ASMSTART
11218; GFX940-NEXT:    ; def v[0:1]
11219; GFX940-NEXT:    ;;#ASMEND
11220; GFX940-NEXT:    s_nop 0
11221; GFX940-NEXT:    v_perm_b32 v3, v1, v1, s2
11222; GFX940-NEXT:    v_mov_b32_e32 v2, v1
11223; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
11224; GFX940-NEXT:    s_waitcnt vmcnt(0)
11225; GFX940-NEXT:    s_setpc_b64 s[30:31]
11226  %vec0 = call <4 x i16> asm "; def $0", "=v"()
11227  %vec1 = call <4 x i16> asm "; def $0", "=v"()
11228  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 7, i32 7, i32 7>
11229  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
11230  ret void
11231}
11232
11233define void @v_shuffle_v4i16_v4i16__7_u_7_7(ptr addrspace(1) inreg %ptr) {
11234; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_u_7_7:
11235; GFX900:       ; %bb.0:
11236; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11237; GFX900-NEXT:    ;;#ASMSTART
11238; GFX900-NEXT:    ; def v[0:1]
11239; GFX900-NEXT:    ;;#ASMEND
11240; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
11241; GFX900-NEXT:    v_mov_b32_e32 v3, 0
11242; GFX900-NEXT:    v_perm_b32 v2, v1, v1, s4
11243; GFX900-NEXT:    v_alignbit_b32 v1, s4, v1, 16
11244; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
11245; GFX900-NEXT:    s_waitcnt vmcnt(0)
11246; GFX900-NEXT:    s_setpc_b64 s[30:31]
11247;
11248; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_u_7_7:
11249; GFX90A:       ; %bb.0:
11250; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11251; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
11252; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
11253; GFX90A-NEXT:    ;;#ASMSTART
11254; GFX90A-NEXT:    ; def v[0:1]
11255; GFX90A-NEXT:    ;;#ASMEND
11256; GFX90A-NEXT:    v_perm_b32 v3, v1, v1, s4
11257; GFX90A-NEXT:    v_alignbit_b32 v2, s4, v1, 16
11258; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
11259; GFX90A-NEXT:    s_waitcnt vmcnt(0)
11260; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11261;
11262; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_u_7_7:
11263; GFX940:       ; %bb.0:
11264; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11265; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
11266; GFX940-NEXT:    v_mov_b32_e32 v4, 0
11267; GFX940-NEXT:    ;;#ASMSTART
11268; GFX940-NEXT:    ; def v[0:1]
11269; GFX940-NEXT:    ;;#ASMEND
11270; GFX940-NEXT:    s_nop 0
11271; GFX940-NEXT:    v_perm_b32 v3, v1, v1, s2
11272; GFX940-NEXT:    v_alignbit_b32 v2, s0, v1, 16
11273; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
11274; GFX940-NEXT:    s_waitcnt vmcnt(0)
11275; GFX940-NEXT:    s_setpc_b64 s[30:31]
11276  %vec0 = call <4 x i16> asm "; def $0", "=v"()
11277  %vec1 = call <4 x i16> asm "; def $0", "=v"()
11278  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 7, i32 7>
11279  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
11280  ret void
11281}
11282
11283define void @v_shuffle_v4i16_v4i16__7_0_7_7(ptr addrspace(1) inreg %ptr) {
11284; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_0_7_7:
11285; GFX900:       ; %bb.0:
11286; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11287; GFX900-NEXT:    ;;#ASMSTART
11288; GFX900-NEXT:    ; def v[0:1]
11289; GFX900-NEXT:    ;;#ASMEND
11290; GFX900-NEXT:    ;;#ASMSTART
11291; GFX900-NEXT:    ; def v[1:2]
11292; GFX900-NEXT:    ;;#ASMEND
11293; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
11294; GFX900-NEXT:    v_mov_b32_e32 v3, 0
11295; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
11296; GFX900-NEXT:    v_alignbit_b32 v0, v0, v2, 16
11297; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
11298; GFX900-NEXT:    s_waitcnt vmcnt(0)
11299; GFX900-NEXT:    s_setpc_b64 s[30:31]
11300;
11301; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_0_7_7:
11302; GFX90A:       ; %bb.0:
11303; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11304; GFX90A-NEXT:    ;;#ASMSTART
11305; GFX90A-NEXT:    ; def v[0:1]
11306; GFX90A-NEXT:    ;;#ASMEND
11307; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
11308; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
11309; GFX90A-NEXT:    ;;#ASMSTART
11310; GFX90A-NEXT:    ; def v[2:3]
11311; GFX90A-NEXT:    ;;#ASMEND
11312; GFX90A-NEXT:    v_perm_b32 v1, v3, v3, s4
11313; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v3, 16
11314; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
11315; GFX90A-NEXT:    s_waitcnt vmcnt(0)
11316; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11317;
11318; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_0_7_7:
11319; GFX940:       ; %bb.0:
11320; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11321; GFX940-NEXT:    ;;#ASMSTART
11322; GFX940-NEXT:    ; def v[0:1]
11323; GFX940-NEXT:    ;;#ASMEND
11324; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
11325; GFX940-NEXT:    v_mov_b32_e32 v4, 0
11326; GFX940-NEXT:    ;;#ASMSTART
11327; GFX940-NEXT:    ; def v[2:3]
11328; GFX940-NEXT:    ;;#ASMEND
11329; GFX940-NEXT:    s_nop 0
11330; GFX940-NEXT:    v_perm_b32 v1, v3, v3, s2
11331; GFX940-NEXT:    v_alignbit_b32 v0, v0, v3, 16
11332; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
11333; GFX940-NEXT:    s_waitcnt vmcnt(0)
11334; GFX940-NEXT:    s_setpc_b64 s[30:31]
11335  %vec0 = call <4 x i16> asm "; def $0", "=v"()
11336  %vec1 = call <4 x i16> asm "; def $0", "=v"()
11337  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 7, i32 7>
11338  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
11339  ret void
11340}
11341
11342define void @v_shuffle_v4i16_v4i16__7_1_7_7(ptr addrspace(1) inreg %ptr) {
11343; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_1_7_7:
11344; GFX900:       ; %bb.0:
11345; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11346; GFX900-NEXT:    ;;#ASMSTART
11347; GFX900-NEXT:    ; def v[0:1]
11348; GFX900-NEXT:    ;;#ASMEND
11349; GFX900-NEXT:    ;;#ASMSTART
11350; GFX900-NEXT:    ; def v[1:2]
11351; GFX900-NEXT:    ;;#ASMEND
11352; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
11353; GFX900-NEXT:    v_mov_b32_e32 v3, 0
11354; GFX900-NEXT:    v_perm_b32 v0, v0, v2, s4
11355; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
11356; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
11357; GFX900-NEXT:    s_waitcnt vmcnt(0)
11358; GFX900-NEXT:    s_setpc_b64 s[30:31]
11359;
11360; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_1_7_7:
11361; GFX90A:       ; %bb.0:
11362; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11363; GFX90A-NEXT:    ;;#ASMSTART
11364; GFX90A-NEXT:    ; def v[0:1]
11365; GFX90A-NEXT:    ;;#ASMEND
11366; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
11367; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
11368; GFX90A-NEXT:    ;;#ASMSTART
11369; GFX90A-NEXT:    ; def v[2:3]
11370; GFX90A-NEXT:    ;;#ASMEND
11371; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
11372; GFX90A-NEXT:    v_perm_b32 v1, v3, v3, s4
11373; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
11374; GFX90A-NEXT:    s_waitcnt vmcnt(0)
11375; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11376;
11377; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_1_7_7:
11378; GFX940:       ; %bb.0:
11379; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11380; GFX940-NEXT:    ;;#ASMSTART
11381; GFX940-NEXT:    ; def v[0:1]
11382; GFX940-NEXT:    ;;#ASMEND
11383; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
11384; GFX940-NEXT:    v_mov_b32_e32 v4, 0
11385; GFX940-NEXT:    ;;#ASMSTART
11386; GFX940-NEXT:    ; def v[2:3]
11387; GFX940-NEXT:    ;;#ASMEND
11388; GFX940-NEXT:    s_nop 0
11389; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
11390; GFX940-NEXT:    v_perm_b32 v1, v3, v3, s2
11391; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
11392; GFX940-NEXT:    s_waitcnt vmcnt(0)
11393; GFX940-NEXT:    s_setpc_b64 s[30:31]
11394  %vec0 = call <4 x i16> asm "; def $0", "=v"()
11395  %vec1 = call <4 x i16> asm "; def $0", "=v"()
11396  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 7, i32 7>
11397  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
11398  ret void
11399}
11400
11401define void @v_shuffle_v4i16_v4i16__7_2_7_7(ptr addrspace(1) inreg %ptr) {
11402; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_2_7_7:
11403; GFX900:       ; %bb.0:
11404; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11405; GFX900-NEXT:    ;;#ASMSTART
11406; GFX900-NEXT:    ; def v[0:1]
11407; GFX900-NEXT:    ;;#ASMEND
11408; GFX900-NEXT:    ;;#ASMSTART
11409; GFX900-NEXT:    ; def v[2:3]
11410; GFX900-NEXT:    ;;#ASMEND
11411; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
11412; GFX900-NEXT:    v_mov_b32_e32 v4, 0
11413; GFX900-NEXT:    v_perm_b32 v2, v3, v3, s4
11414; GFX900-NEXT:    v_alignbit_b32 v1, v1, v3, 16
11415; GFX900-NEXT:    global_store_dwordx2 v4, v[1:2], s[16:17]
11416; GFX900-NEXT:    s_waitcnt vmcnt(0)
11417; GFX900-NEXT:    s_setpc_b64 s[30:31]
11418;
11419; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_2_7_7:
11420; GFX90A:       ; %bb.0:
11421; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11422; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
11423; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
11424; GFX90A-NEXT:    ;;#ASMSTART
11425; GFX90A-NEXT:    ; def v[0:1]
11426; GFX90A-NEXT:    ;;#ASMEND
11427; GFX90A-NEXT:    ;;#ASMSTART
11428; GFX90A-NEXT:    ; def v[2:3]
11429; GFX90A-NEXT:    ;;#ASMEND
11430; GFX90A-NEXT:    v_perm_b32 v5, v3, v3, s4
11431; GFX90A-NEXT:    v_alignbit_b32 v4, v1, v3, 16
11432; GFX90A-NEXT:    global_store_dwordx2 v6, v[4:5], s[16:17]
11433; GFX90A-NEXT:    s_waitcnt vmcnt(0)
11434; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11435;
11436; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_2_7_7:
11437; GFX940:       ; %bb.0:
11438; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11439; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
11440; GFX940-NEXT:    v_mov_b32_e32 v6, 0
11441; GFX940-NEXT:    ;;#ASMSTART
11442; GFX940-NEXT:    ; def v[0:1]
11443; GFX940-NEXT:    ;;#ASMEND
11444; GFX940-NEXT:    ;;#ASMSTART
11445; GFX940-NEXT:    ; def v[2:3]
11446; GFX940-NEXT:    ;;#ASMEND
11447; GFX940-NEXT:    s_nop 0
11448; GFX940-NEXT:    v_perm_b32 v5, v3, v3, s2
11449; GFX940-NEXT:    v_alignbit_b32 v4, v1, v3, 16
11450; GFX940-NEXT:    global_store_dwordx2 v6, v[4:5], s[0:1] sc0 sc1
11451; GFX940-NEXT:    s_waitcnt vmcnt(0)
11452; GFX940-NEXT:    s_setpc_b64 s[30:31]
11453  %vec0 = call <4 x i16> asm "; def $0", "=v"()
11454  %vec1 = call <4 x i16> asm "; def $0", "=v"()
11455  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 7, i32 7>
11456  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
11457  ret void
11458}
11459
11460define void @v_shuffle_v4i16_v4i16__7_3_7_7(ptr addrspace(1) inreg %ptr) {
11461; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_3_7_7:
11462; GFX900:       ; %bb.0:
11463; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11464; GFX900-NEXT:    ;;#ASMSTART
11465; GFX900-NEXT:    ; def v[0:1]
11466; GFX900-NEXT:    ;;#ASMEND
11467; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
11468; GFX900-NEXT:    v_mov_b32_e32 v4, 0
11469; GFX900-NEXT:    ;;#ASMSTART
11470; GFX900-NEXT:    ; def v[2:3]
11471; GFX900-NEXT:    ;;#ASMEND
11472; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
11473; GFX900-NEXT:    v_perm_b32 v1, v3, v3, s4
11474; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
11475; GFX900-NEXT:    s_waitcnt vmcnt(0)
11476; GFX900-NEXT:    s_setpc_b64 s[30:31]
11477;
11478; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_3_7_7:
11479; GFX90A:       ; %bb.0:
11480; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11481; GFX90A-NEXT:    ;;#ASMSTART
11482; GFX90A-NEXT:    ; def v[0:1]
11483; GFX90A-NEXT:    ;;#ASMEND
11484; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
11485; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
11486; GFX90A-NEXT:    ;;#ASMSTART
11487; GFX90A-NEXT:    ; def v[2:3]
11488; GFX90A-NEXT:    ;;#ASMEND
11489; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
11490; GFX90A-NEXT:    v_perm_b32 v1, v3, v3, s4
11491; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
11492; GFX90A-NEXT:    s_waitcnt vmcnt(0)
11493; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11494;
11495; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_3_7_7:
11496; GFX940:       ; %bb.0:
11497; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11498; GFX940-NEXT:    ;;#ASMSTART
11499; GFX940-NEXT:    ; def v[0:1]
11500; GFX940-NEXT:    ;;#ASMEND
11501; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
11502; GFX940-NEXT:    v_mov_b32_e32 v4, 0
11503; GFX940-NEXT:    ;;#ASMSTART
11504; GFX940-NEXT:    ; def v[2:3]
11505; GFX940-NEXT:    ;;#ASMEND
11506; GFX940-NEXT:    s_nop 0
11507; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
11508; GFX940-NEXT:    v_perm_b32 v1, v3, v3, s2
11509; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
11510; GFX940-NEXT:    s_waitcnt vmcnt(0)
11511; GFX940-NEXT:    s_setpc_b64 s[30:31]
11512  %vec0 = call <4 x i16> asm "; def $0", "=v"()
11513  %vec1 = call <4 x i16> asm "; def $0", "=v"()
11514  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 7, i32 7>
11515  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
11516  ret void
11517}
11518
11519define void @v_shuffle_v4i16_v4i16__7_4_7_7(ptr addrspace(1) inreg %ptr) {
11520; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_4_7_7:
11521; GFX900:       ; %bb.0:
11522; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11523; GFX900-NEXT:    ;;#ASMSTART
11524; GFX900-NEXT:    ; def v[0:1]
11525; GFX900-NEXT:    ;;#ASMEND
11526; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
11527; GFX900-NEXT:    v_mov_b32_e32 v3, 0
11528; GFX900-NEXT:    v_perm_b32 v2, v1, v1, s4
11529; GFX900-NEXT:    v_alignbit_b32 v1, v0, v1, 16
11530; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
11531; GFX900-NEXT:    s_waitcnt vmcnt(0)
11532; GFX900-NEXT:    s_setpc_b64 s[30:31]
11533;
11534; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_4_7_7:
11535; GFX90A:       ; %bb.0:
11536; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11537; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
11538; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
11539; GFX90A-NEXT:    ;;#ASMSTART
11540; GFX90A-NEXT:    ; def v[0:1]
11541; GFX90A-NEXT:    ;;#ASMEND
11542; GFX90A-NEXT:    v_perm_b32 v3, v1, v1, s4
11543; GFX90A-NEXT:    v_alignbit_b32 v2, v0, v1, 16
11544; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
11545; GFX90A-NEXT:    s_waitcnt vmcnt(0)
11546; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11547;
11548; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_4_7_7:
11549; GFX940:       ; %bb.0:
11550; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11551; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
11552; GFX940-NEXT:    v_mov_b32_e32 v4, 0
11553; GFX940-NEXT:    ;;#ASMSTART
11554; GFX940-NEXT:    ; def v[0:1]
11555; GFX940-NEXT:    ;;#ASMEND
11556; GFX940-NEXT:    s_nop 0
11557; GFX940-NEXT:    v_perm_b32 v3, v1, v1, s2
11558; GFX940-NEXT:    v_alignbit_b32 v2, v0, v1, 16
11559; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
11560; GFX940-NEXT:    s_waitcnt vmcnt(0)
11561; GFX940-NEXT:    s_setpc_b64 s[30:31]
11562  %vec0 = call <4 x i16> asm "; def $0", "=v"()
11563  %vec1 = call <4 x i16> asm "; def $0", "=v"()
11564  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 7, i32 7>
11565  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
11566  ret void
11567}
11568
11569define void @v_shuffle_v4i16_v4i16__7_5_7_7(ptr addrspace(1) inreg %ptr) {
11570; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_5_7_7:
11571; GFX900:       ; %bb.0:
11572; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11573; GFX900-NEXT:    ;;#ASMSTART
11574; GFX900-NEXT:    ; def v[0:1]
11575; GFX900-NEXT:    ;;#ASMEND
11576; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
11577; GFX900-NEXT:    v_mov_b32_e32 v2, 0
11578; GFX900-NEXT:    v_perm_b32 v0, v0, v1, s4
11579; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
11580; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
11581; GFX900-NEXT:    s_waitcnt vmcnt(0)
11582; GFX900-NEXT:    s_setpc_b64 s[30:31]
11583;
11584; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_5_7_7:
11585; GFX90A:       ; %bb.0:
11586; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11587; GFX90A-NEXT:    ;;#ASMSTART
11588; GFX90A-NEXT:    ; def v[0:1]
11589; GFX90A-NEXT:    ;;#ASMEND
11590; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
11591; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
11592; GFX90A-NEXT:    v_perm_b32 v0, v0, v1, s4
11593; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
11594; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
11595; GFX90A-NEXT:    s_waitcnt vmcnt(0)
11596; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11597;
11598; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_5_7_7:
11599; GFX940:       ; %bb.0:
11600; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11601; GFX940-NEXT:    ;;#ASMSTART
11602; GFX940-NEXT:    ; def v[0:1]
11603; GFX940-NEXT:    ;;#ASMEND
11604; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
11605; GFX940-NEXT:    v_mov_b32_e32 v2, 0
11606; GFX940-NEXT:    v_perm_b32 v0, v0, v1, s2
11607; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
11608; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
11609; GFX940-NEXT:    s_waitcnt vmcnt(0)
11610; GFX940-NEXT:    s_setpc_b64 s[30:31]
11611  %vec0 = call <4 x i16> asm "; def $0", "=v"()
11612  %vec1 = call <4 x i16> asm "; def $0", "=v"()
11613  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 7, i32 7>
11614  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
11615  ret void
11616}
11617
11618define void @v_shuffle_v4i16_v4i16__7_6_7_7(ptr addrspace(1) inreg %ptr) {
11619; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_6_7_7:
11620; GFX900:       ; %bb.0:
11621; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11622; GFX900-NEXT:    ;;#ASMSTART
11623; GFX900-NEXT:    ; def v[0:1]
11624; GFX900-NEXT:    ;;#ASMEND
11625; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
11626; GFX900-NEXT:    v_mov_b32_e32 v3, 0
11627; GFX900-NEXT:    v_perm_b32 v2, v1, v1, s4
11628; GFX900-NEXT:    v_alignbit_b32 v1, v1, v1, 16
11629; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
11630; GFX900-NEXT:    s_waitcnt vmcnt(0)
11631; GFX900-NEXT:    s_setpc_b64 s[30:31]
11632;
11633; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_6_7_7:
11634; GFX90A:       ; %bb.0:
11635; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11636; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
11637; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
11638; GFX90A-NEXT:    ;;#ASMSTART
11639; GFX90A-NEXT:    ; def v[0:1]
11640; GFX90A-NEXT:    ;;#ASMEND
11641; GFX90A-NEXT:    v_perm_b32 v3, v1, v1, s4
11642; GFX90A-NEXT:    v_alignbit_b32 v2, v1, v1, 16
11643; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
11644; GFX90A-NEXT:    s_waitcnt vmcnt(0)
11645; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11646;
11647; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_6_7_7:
11648; GFX940:       ; %bb.0:
11649; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11650; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
11651; GFX940-NEXT:    v_mov_b32_e32 v4, 0
11652; GFX940-NEXT:    ;;#ASMSTART
11653; GFX940-NEXT:    ; def v[0:1]
11654; GFX940-NEXT:    ;;#ASMEND
11655; GFX940-NEXT:    s_nop 0
11656; GFX940-NEXT:    v_perm_b32 v3, v1, v1, s2
11657; GFX940-NEXT:    v_alignbit_b32 v2, v1, v1, 16
11658; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
11659; GFX940-NEXT:    s_waitcnt vmcnt(0)
11660; GFX940-NEXT:    s_setpc_b64 s[30:31]
11661  %vec0 = call <4 x i16> asm "; def $0", "=v"()
11662  %vec1 = call <4 x i16> asm "; def $0", "=v"()
11663  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 7, i32 7>
11664  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
11665  ret void
11666}
11667
11668define void @v_shuffle_v4i16_v4i16__7_7_u_7(ptr addrspace(1) inreg %ptr) {
11669; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_u_7:
11670; GFX900:       ; %bb.0:
11671; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11672; GFX900-NEXT:    ;;#ASMSTART
11673; GFX900-NEXT:    ; def v[0:1]
11674; GFX900-NEXT:    ;;#ASMEND
11675; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
11676; GFX900-NEXT:    v_mov_b32_e32 v2, 0
11677; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
11678; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
11679; GFX900-NEXT:    s_waitcnt vmcnt(0)
11680; GFX900-NEXT:    s_setpc_b64 s[30:31]
11681;
11682; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_u_7:
11683; GFX90A:       ; %bb.0:
11684; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11685; GFX90A-NEXT:    ;;#ASMSTART
11686; GFX90A-NEXT:    ; def v[0:1]
11687; GFX90A-NEXT:    ;;#ASMEND
11688; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
11689; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
11690; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
11691; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
11692; GFX90A-NEXT:    s_waitcnt vmcnt(0)
11693; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11694;
11695; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_u_7:
11696; GFX940:       ; %bb.0:
11697; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11698; GFX940-NEXT:    ;;#ASMSTART
11699; GFX940-NEXT:    ; def v[0:1]
11700; GFX940-NEXT:    ;;#ASMEND
11701; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
11702; GFX940-NEXT:    v_mov_b32_e32 v2, 0
11703; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
11704; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
11705; GFX940-NEXT:    s_waitcnt vmcnt(0)
11706; GFX940-NEXT:    s_setpc_b64 s[30:31]
11707  %vec0 = call <4 x i16> asm "; def $0", "=v"()
11708  %vec1 = call <4 x i16> asm "; def $0", "=v"()
11709  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 7>
11710  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
11711  ret void
11712}
11713
11714define void @v_shuffle_v4i16_v4i16__7_7_0_7(ptr addrspace(1) inreg %ptr) {
11715; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_0_7:
11716; GFX900:       ; %bb.0:
11717; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11718; GFX900-NEXT:    ;;#ASMSTART
11719; GFX900-NEXT:    ; def v[0:1]
11720; GFX900-NEXT:    ;;#ASMEND
11721; GFX900-NEXT:    ;;#ASMSTART
11722; GFX900-NEXT:    ; def v[1:2]
11723; GFX900-NEXT:    ;;#ASMEND
11724; GFX900-NEXT:    s_mov_b32 s4, 0xffff
11725; GFX900-NEXT:    v_bfi_b32 v1, s4, v0, v2
11726; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
11727; GFX900-NEXT:    v_mov_b32_e32 v3, 0
11728; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
11729; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
11730; GFX900-NEXT:    s_waitcnt vmcnt(0)
11731; GFX900-NEXT:    s_setpc_b64 s[30:31]
11732;
11733; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_0_7:
11734; GFX90A:       ; %bb.0:
11735; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11736; GFX90A-NEXT:    ;;#ASMSTART
11737; GFX90A-NEXT:    ; def v[0:1]
11738; GFX90A-NEXT:    ;;#ASMEND
11739; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
11740; GFX90A-NEXT:    ;;#ASMSTART
11741; GFX90A-NEXT:    ; def v[2:3]
11742; GFX90A-NEXT:    ;;#ASMEND
11743; GFX90A-NEXT:    v_bfi_b32 v1, s4, v0, v3
11744; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
11745; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
11746; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
11747; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
11748; GFX90A-NEXT:    s_waitcnt vmcnt(0)
11749; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11750;
11751; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_0_7:
11752; GFX940:       ; %bb.0:
11753; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11754; GFX940-NEXT:    ;;#ASMSTART
11755; GFX940-NEXT:    ; def v[0:1]
11756; GFX940-NEXT:    ;;#ASMEND
11757; GFX940-NEXT:    s_mov_b32 s2, 0xffff
11758; GFX940-NEXT:    ;;#ASMSTART
11759; GFX940-NEXT:    ; def v[2:3]
11760; GFX940-NEXT:    ;;#ASMEND
11761; GFX940-NEXT:    v_mov_b32_e32 v4, 0
11762; GFX940-NEXT:    v_bfi_b32 v1, s2, v0, v3
11763; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
11764; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
11765; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
11766; GFX940-NEXT:    s_waitcnt vmcnt(0)
11767; GFX940-NEXT:    s_setpc_b64 s[30:31]
11768  %vec0 = call <4 x i16> asm "; def $0", "=v"()
11769  %vec1 = call <4 x i16> asm "; def $0", "=v"()
11770  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 7>
11771  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
11772  ret void
11773}
11774
11775define void @v_shuffle_v4i16_v4i16__7_7_1_7(ptr addrspace(1) inreg %ptr) {
11776; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_1_7:
11777; GFX900:       ; %bb.0:
11778; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11779; GFX900-NEXT:    ;;#ASMSTART
11780; GFX900-NEXT:    ; def v[0:1]
11781; GFX900-NEXT:    ;;#ASMEND
11782; GFX900-NEXT:    ;;#ASMSTART
11783; GFX900-NEXT:    ; def v[1:2]
11784; GFX900-NEXT:    ;;#ASMEND
11785; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
11786; GFX900-NEXT:    v_mov_b32_e32 v3, 0
11787; GFX900-NEXT:    v_perm_b32 v1, v2, v0, s4
11788; GFX900-NEXT:    v_perm_b32 v0, v2, v2, s4
11789; GFX900-NEXT:    global_store_dwordx2 v3, v[0:1], s[16:17]
11790; GFX900-NEXT:    s_waitcnt vmcnt(0)
11791; GFX900-NEXT:    s_setpc_b64 s[30:31]
11792;
11793; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_1_7:
11794; GFX90A:       ; %bb.0:
11795; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11796; GFX90A-NEXT:    ;;#ASMSTART
11797; GFX90A-NEXT:    ; def v[0:1]
11798; GFX90A-NEXT:    ;;#ASMEND
11799; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
11800; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
11801; GFX90A-NEXT:    ;;#ASMSTART
11802; GFX90A-NEXT:    ; def v[2:3]
11803; GFX90A-NEXT:    ;;#ASMEND
11804; GFX90A-NEXT:    v_perm_b32 v1, v3, v0, s4
11805; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
11806; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
11807; GFX90A-NEXT:    s_waitcnt vmcnt(0)
11808; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11809;
11810; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_1_7:
11811; GFX940:       ; %bb.0:
11812; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11813; GFX940-NEXT:    ;;#ASMSTART
11814; GFX940-NEXT:    ; def v[0:1]
11815; GFX940-NEXT:    ;;#ASMEND
11816; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
11817; GFX940-NEXT:    v_mov_b32_e32 v4, 0
11818; GFX940-NEXT:    ;;#ASMSTART
11819; GFX940-NEXT:    ; def v[2:3]
11820; GFX940-NEXT:    ;;#ASMEND
11821; GFX940-NEXT:    s_nop 0
11822; GFX940-NEXT:    v_perm_b32 v1, v3, v0, s2
11823; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
11824; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
11825; GFX940-NEXT:    s_waitcnt vmcnt(0)
11826; GFX940-NEXT:    s_setpc_b64 s[30:31]
11827  %vec0 = call <4 x i16> asm "; def $0", "=v"()
11828  %vec1 = call <4 x i16> asm "; def $0", "=v"()
11829  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 7>
11830  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
11831  ret void
11832}
11833
11834define void @v_shuffle_v4i16_v4i16__7_7_2_7(ptr addrspace(1) inreg %ptr) {
11835; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_2_7:
11836; GFX900:       ; %bb.0:
11837; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11838; GFX900-NEXT:    ;;#ASMSTART
11839; GFX900-NEXT:    ; def v[0:1]
11840; GFX900-NEXT:    ;;#ASMEND
11841; GFX900-NEXT:    s_mov_b32 s4, 0xffff
11842; GFX900-NEXT:    ;;#ASMSTART
11843; GFX900-NEXT:    ; def v[2:3]
11844; GFX900-NEXT:    ;;#ASMEND
11845; GFX900-NEXT:    v_bfi_b32 v1, s4, v1, v3
11846; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
11847; GFX900-NEXT:    v_mov_b32_e32 v4, 0
11848; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
11849; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
11850; GFX900-NEXT:    s_waitcnt vmcnt(0)
11851; GFX900-NEXT:    s_setpc_b64 s[30:31]
11852;
11853; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_2_7:
11854; GFX90A:       ; %bb.0:
11855; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11856; GFX90A-NEXT:    ;;#ASMSTART
11857; GFX90A-NEXT:    ; def v[0:1]
11858; GFX90A-NEXT:    ;;#ASMEND
11859; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
11860; GFX90A-NEXT:    ;;#ASMSTART
11861; GFX90A-NEXT:    ; def v[2:3]
11862; GFX90A-NEXT:    ;;#ASMEND
11863; GFX90A-NEXT:    v_bfi_b32 v1, s4, v1, v3
11864; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
11865; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
11866; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
11867; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
11868; GFX90A-NEXT:    s_waitcnt vmcnt(0)
11869; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11870;
11871; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_2_7:
11872; GFX940:       ; %bb.0:
11873; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11874; GFX940-NEXT:    ;;#ASMSTART
11875; GFX940-NEXT:    ; def v[0:1]
11876; GFX940-NEXT:    ;;#ASMEND
11877; GFX940-NEXT:    s_mov_b32 s2, 0xffff
11878; GFX940-NEXT:    ;;#ASMSTART
11879; GFX940-NEXT:    ; def v[2:3]
11880; GFX940-NEXT:    ;;#ASMEND
11881; GFX940-NEXT:    v_mov_b32_e32 v4, 0
11882; GFX940-NEXT:    v_bfi_b32 v1, s2, v1, v3
11883; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
11884; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
11885; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
11886; GFX940-NEXT:    s_waitcnt vmcnt(0)
11887; GFX940-NEXT:    s_setpc_b64 s[30:31]
11888  %vec0 = call <4 x i16> asm "; def $0", "=v"()
11889  %vec1 = call <4 x i16> asm "; def $0", "=v"()
11890  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 7>
11891  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
11892  ret void
11893}
11894
11895define void @v_shuffle_v4i16_v4i16__7_7_3_7(ptr addrspace(1) inreg %ptr) {
11896; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_3_7:
11897; GFX900:       ; %bb.0:
11898; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11899; GFX900-NEXT:    ;;#ASMSTART
11900; GFX900-NEXT:    ; def v[0:1]
11901; GFX900-NEXT:    ;;#ASMEND
11902; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
11903; GFX900-NEXT:    v_mov_b32_e32 v4, 0
11904; GFX900-NEXT:    ;;#ASMSTART
11905; GFX900-NEXT:    ; def v[2:3]
11906; GFX900-NEXT:    ;;#ASMEND
11907; GFX900-NEXT:    v_perm_b32 v1, v3, v1, s4
11908; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
11909; GFX900-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
11910; GFX900-NEXT:    s_waitcnt vmcnt(0)
11911; GFX900-NEXT:    s_setpc_b64 s[30:31]
11912;
11913; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_3_7:
11914; GFX90A:       ; %bb.0:
11915; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11916; GFX90A-NEXT:    ;;#ASMSTART
11917; GFX90A-NEXT:    ; def v[0:1]
11918; GFX90A-NEXT:    ;;#ASMEND
11919; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
11920; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
11921; GFX90A-NEXT:    ;;#ASMSTART
11922; GFX90A-NEXT:    ; def v[2:3]
11923; GFX90A-NEXT:    ;;#ASMEND
11924; GFX90A-NEXT:    v_perm_b32 v1, v3, v1, s4
11925; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
11926; GFX90A-NEXT:    global_store_dwordx2 v4, v[0:1], s[16:17]
11927; GFX90A-NEXT:    s_waitcnt vmcnt(0)
11928; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11929;
11930; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_3_7:
11931; GFX940:       ; %bb.0:
11932; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11933; GFX940-NEXT:    ;;#ASMSTART
11934; GFX940-NEXT:    ; def v[0:1]
11935; GFX940-NEXT:    ;;#ASMEND
11936; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
11937; GFX940-NEXT:    v_mov_b32_e32 v4, 0
11938; GFX940-NEXT:    ;;#ASMSTART
11939; GFX940-NEXT:    ; def v[2:3]
11940; GFX940-NEXT:    ;;#ASMEND
11941; GFX940-NEXT:    s_nop 0
11942; GFX940-NEXT:    v_perm_b32 v1, v3, v1, s2
11943; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
11944; GFX940-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1] sc0 sc1
11945; GFX940-NEXT:    s_waitcnt vmcnt(0)
11946; GFX940-NEXT:    s_setpc_b64 s[30:31]
11947  %vec0 = call <4 x i16> asm "; def $0", "=v"()
11948  %vec1 = call <4 x i16> asm "; def $0", "=v"()
11949  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 7>
11950  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
11951  ret void
11952}
11953
11954define void @v_shuffle_v4i16_v4i16__7_7_4_7(ptr addrspace(1) inreg %ptr) {
11955; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_4_7:
11956; GFX900:       ; %bb.0:
11957; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11958; GFX900-NEXT:    s_mov_b32 s4, 0xffff
11959; GFX900-NEXT:    ;;#ASMSTART
11960; GFX900-NEXT:    ; def v[0:1]
11961; GFX900-NEXT:    ;;#ASMEND
11962; GFX900-NEXT:    v_bfi_b32 v2, s4, v0, v1
11963; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
11964; GFX900-NEXT:    v_mov_b32_e32 v3, 0
11965; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
11966; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
11967; GFX900-NEXT:    s_waitcnt vmcnt(0)
11968; GFX900-NEXT:    s_setpc_b64 s[30:31]
11969;
11970; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_4_7:
11971; GFX90A:       ; %bb.0:
11972; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11973; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
11974; GFX90A-NEXT:    ;;#ASMSTART
11975; GFX90A-NEXT:    ; def v[0:1]
11976; GFX90A-NEXT:    ;;#ASMEND
11977; GFX90A-NEXT:    v_bfi_b32 v3, s4, v0, v1
11978; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
11979; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
11980; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
11981; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
11982; GFX90A-NEXT:    s_waitcnt vmcnt(0)
11983; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11984;
11985; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_4_7:
11986; GFX940:       ; %bb.0:
11987; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11988; GFX940-NEXT:    s_mov_b32 s2, 0xffff
11989; GFX940-NEXT:    ;;#ASMSTART
11990; GFX940-NEXT:    ; def v[0:1]
11991; GFX940-NEXT:    ;;#ASMEND
11992; GFX940-NEXT:    v_mov_b32_e32 v4, 0
11993; GFX940-NEXT:    v_bfi_b32 v3, s2, v0, v1
11994; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
11995; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
11996; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
11997; GFX940-NEXT:    s_waitcnt vmcnt(0)
11998; GFX940-NEXT:    s_setpc_b64 s[30:31]
11999  %vec0 = call <4 x i16> asm "; def $0", "=v"()
12000  %vec1 = call <4 x i16> asm "; def $0", "=v"()
12001  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 7>
12002  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
12003  ret void
12004}
12005
12006define void @v_shuffle_v4i16_v4i16__7_7_5_7(ptr addrspace(1) inreg %ptr) {
12007; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_5_7:
12008; GFX900:       ; %bb.0:
12009; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12010; GFX900-NEXT:    ;;#ASMSTART
12011; GFX900-NEXT:    ; def v[0:1]
12012; GFX900-NEXT:    ;;#ASMEND
12013; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
12014; GFX900-NEXT:    v_mov_b32_e32 v3, 0
12015; GFX900-NEXT:    v_perm_b32 v2, v1, v0, s4
12016; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
12017; GFX900-NEXT:    global_store_dwordx2 v3, v[1:2], s[16:17]
12018; GFX900-NEXT:    s_waitcnt vmcnt(0)
12019; GFX900-NEXT:    s_setpc_b64 s[30:31]
12020;
12021; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_5_7:
12022; GFX90A:       ; %bb.0:
12023; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12024; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
12025; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
12026; GFX90A-NEXT:    ;;#ASMSTART
12027; GFX90A-NEXT:    ; def v[0:1]
12028; GFX90A-NEXT:    ;;#ASMEND
12029; GFX90A-NEXT:    v_perm_b32 v3, v1, v0, s4
12030; GFX90A-NEXT:    v_perm_b32 v2, v1, v1, s4
12031; GFX90A-NEXT:    global_store_dwordx2 v4, v[2:3], s[16:17]
12032; GFX90A-NEXT:    s_waitcnt vmcnt(0)
12033; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12034;
12035; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_5_7:
12036; GFX940:       ; %bb.0:
12037; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12038; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
12039; GFX940-NEXT:    v_mov_b32_e32 v4, 0
12040; GFX940-NEXT:    ;;#ASMSTART
12041; GFX940-NEXT:    ; def v[0:1]
12042; GFX940-NEXT:    ;;#ASMEND
12043; GFX940-NEXT:    s_nop 0
12044; GFX940-NEXT:    v_perm_b32 v3, v1, v0, s2
12045; GFX940-NEXT:    v_perm_b32 v2, v1, v1, s2
12046; GFX940-NEXT:    global_store_dwordx2 v4, v[2:3], s[0:1] sc0 sc1
12047; GFX940-NEXT:    s_waitcnt vmcnt(0)
12048; GFX940-NEXT:    s_setpc_b64 s[30:31]
12049  %vec0 = call <4 x i16> asm "; def $0", "=v"()
12050  %vec1 = call <4 x i16> asm "; def $0", "=v"()
12051  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 7>
12052  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
12053  ret void
12054}
12055
12056define void @v_shuffle_v4i16_v4i16__7_7_6_7(ptr addrspace(1) inreg %ptr) {
12057; GFX900-LABEL: v_shuffle_v4i16_v4i16__7_7_6_7:
12058; GFX900:       ; %bb.0:
12059; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12060; GFX900-NEXT:    ;;#ASMSTART
12061; GFX900-NEXT:    ; def v[0:1]
12062; GFX900-NEXT:    ;;#ASMEND
12063; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
12064; GFX900-NEXT:    v_mov_b32_e32 v2, 0
12065; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
12066; GFX900-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
12067; GFX900-NEXT:    s_waitcnt vmcnt(0)
12068; GFX900-NEXT:    s_setpc_b64 s[30:31]
12069;
12070; GFX90A-LABEL: v_shuffle_v4i16_v4i16__7_7_6_7:
12071; GFX90A:       ; %bb.0:
12072; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12073; GFX90A-NEXT:    ;;#ASMSTART
12074; GFX90A-NEXT:    ; def v[0:1]
12075; GFX90A-NEXT:    ;;#ASMEND
12076; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
12077; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
12078; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
12079; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[16:17]
12080; GFX90A-NEXT:    s_waitcnt vmcnt(0)
12081; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12082;
12083; GFX940-LABEL: v_shuffle_v4i16_v4i16__7_7_6_7:
12084; GFX940:       ; %bb.0:
12085; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12086; GFX940-NEXT:    ;;#ASMSTART
12087; GFX940-NEXT:    ; def v[0:1]
12088; GFX940-NEXT:    ;;#ASMEND
12089; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
12090; GFX940-NEXT:    v_mov_b32_e32 v2, 0
12091; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
12092; GFX940-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
12093; GFX940-NEXT:    s_waitcnt vmcnt(0)
12094; GFX940-NEXT:    s_setpc_b64 s[30:31]
12095  %vec0 = call <4 x i16> asm "; def $0", "=v"()
12096  %vec1 = call <4 x i16> asm "; def $0", "=v"()
12097  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 7>
12098  store <4 x i16> %shuf, ptr addrspace(1) %ptr, align 8
12099  ret void
12100}
12101
12102define void @s_shuffle_v4i16_v4i16__u_u_u_u() {
12103; GFX9-LABEL: s_shuffle_v4i16_v4i16__u_u_u_u:
12104; GFX9:       ; %bb.0:
12105; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12106; GFX9-NEXT:    ;;#ASMSTART
12107; GFX9-NEXT:    ; use s[8:9]
12108; GFX9-NEXT:    ;;#ASMEND
12109; GFX9-NEXT:    s_setpc_b64 s[30:31]
12110  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12111  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> poison
12112  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12113  ret void
12114}
12115
12116define void @s_shuffle_v4i16_v4i16__0_u_u_u() {
12117; GFX900-LABEL: s_shuffle_v4i16_v4i16__0_u_u_u:
12118; GFX900:       ; %bb.0:
12119; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12120; GFX900-NEXT:    ;;#ASMSTART
12121; GFX900-NEXT:    ; def s[8:9]
12122; GFX900-NEXT:    ;;#ASMEND
12123; GFX900-NEXT:    ;;#ASMSTART
12124; GFX900-NEXT:    ; use s[8:9]
12125; GFX900-NEXT:    ;;#ASMEND
12126; GFX900-NEXT:    s_setpc_b64 s[30:31]
12127;
12128; GFX90A-LABEL: s_shuffle_v4i16_v4i16__0_u_u_u:
12129; GFX90A:       ; %bb.0:
12130; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12131; GFX90A-NEXT:    ;;#ASMSTART
12132; GFX90A-NEXT:    ; def s[8:9]
12133; GFX90A-NEXT:    ;;#ASMEND
12134; GFX90A-NEXT:    ;;#ASMSTART
12135; GFX90A-NEXT:    ; use s[8:9]
12136; GFX90A-NEXT:    ;;#ASMEND
12137; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12138;
12139; GFX940-LABEL: s_shuffle_v4i16_v4i16__0_u_u_u:
12140; GFX940:       ; %bb.0:
12141; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12142; GFX940-NEXT:    ;;#ASMSTART
12143; GFX940-NEXT:    ; def s[8:9]
12144; GFX940-NEXT:    ;;#ASMEND
12145; GFX940-NEXT:    s_nop 0
12146; GFX940-NEXT:    ;;#ASMSTART
12147; GFX940-NEXT:    ; use s[8:9]
12148; GFX940-NEXT:    ;;#ASMEND
12149; GFX940-NEXT:    s_setpc_b64 s[30:31]
12150  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12151  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
12152  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12153  ret void
12154}
12155
12156define void @s_shuffle_v4i16_v4i16__1_u_u_u() {
12157; GFX900-LABEL: s_shuffle_v4i16_v4i16__1_u_u_u:
12158; GFX900:       ; %bb.0:
12159; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12160; GFX900-NEXT:    ;;#ASMSTART
12161; GFX900-NEXT:    ; def s[4:5]
12162; GFX900-NEXT:    ;;#ASMEND
12163; GFX900-NEXT:    s_lshr_b32 s8, s4, 16
12164; GFX900-NEXT:    ;;#ASMSTART
12165; GFX900-NEXT:    ; use s[8:9]
12166; GFX900-NEXT:    ;;#ASMEND
12167; GFX900-NEXT:    s_setpc_b64 s[30:31]
12168;
12169; GFX90A-LABEL: s_shuffle_v4i16_v4i16__1_u_u_u:
12170; GFX90A:       ; %bb.0:
12171; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12172; GFX90A-NEXT:    ;;#ASMSTART
12173; GFX90A-NEXT:    ; def s[4:5]
12174; GFX90A-NEXT:    ;;#ASMEND
12175; GFX90A-NEXT:    s_lshr_b32 s8, s4, 16
12176; GFX90A-NEXT:    ;;#ASMSTART
12177; GFX90A-NEXT:    ; use s[8:9]
12178; GFX90A-NEXT:    ;;#ASMEND
12179; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12180;
12181; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_u_u_u:
12182; GFX940:       ; %bb.0:
12183; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12184; GFX940-NEXT:    ;;#ASMSTART
12185; GFX940-NEXT:    ; def s[0:1]
12186; GFX940-NEXT:    ;;#ASMEND
12187; GFX940-NEXT:    s_lshr_b32 s8, s0, 16
12188; GFX940-NEXT:    ;;#ASMSTART
12189; GFX940-NEXT:    ; use s[8:9]
12190; GFX940-NEXT:    ;;#ASMEND
12191; GFX940-NEXT:    s_setpc_b64 s[30:31]
12192  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12193  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
12194  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12195  ret void
12196}
12197
12198define void @s_shuffle_v4i16_v4i16__2_u_u_u() {
12199; GFX900-LABEL: s_shuffle_v4i16_v4i16__2_u_u_u:
12200; GFX900:       ; %bb.0:
12201; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12202; GFX900-NEXT:    ;;#ASMSTART
12203; GFX900-NEXT:    ; def s[4:5]
12204; GFX900-NEXT:    ;;#ASMEND
12205; GFX900-NEXT:    s_mov_b32 s8, s5
12206; GFX900-NEXT:    ;;#ASMSTART
12207; GFX900-NEXT:    ; use s[8:9]
12208; GFX900-NEXT:    ;;#ASMEND
12209; GFX900-NEXT:    s_setpc_b64 s[30:31]
12210;
12211; GFX90A-LABEL: s_shuffle_v4i16_v4i16__2_u_u_u:
12212; GFX90A:       ; %bb.0:
12213; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12214; GFX90A-NEXT:    ;;#ASMSTART
12215; GFX90A-NEXT:    ; def s[4:5]
12216; GFX90A-NEXT:    ;;#ASMEND
12217; GFX90A-NEXT:    s_mov_b32 s8, s5
12218; GFX90A-NEXT:    ;;#ASMSTART
12219; GFX90A-NEXT:    ; use s[8:9]
12220; GFX90A-NEXT:    ;;#ASMEND
12221; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12222;
12223; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_u_u_u:
12224; GFX940:       ; %bb.0:
12225; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12226; GFX940-NEXT:    ;;#ASMSTART
12227; GFX940-NEXT:    ; def s[0:1]
12228; GFX940-NEXT:    ;;#ASMEND
12229; GFX940-NEXT:    s_mov_b32 s8, s1
12230; GFX940-NEXT:    ;;#ASMSTART
12231; GFX940-NEXT:    ; use s[8:9]
12232; GFX940-NEXT:    ;;#ASMEND
12233; GFX940-NEXT:    s_setpc_b64 s[30:31]
12234  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12235  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
12236  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12237  ret void
12238}
12239
12240define void @s_shuffle_v4i16_v4i16__3_u_u_u() {
12241; GFX900-LABEL: s_shuffle_v4i16_v4i16__3_u_u_u:
12242; GFX900:       ; %bb.0:
12243; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12244; GFX900-NEXT:    ;;#ASMSTART
12245; GFX900-NEXT:    ; def s[4:5]
12246; GFX900-NEXT:    ;;#ASMEND
12247; GFX900-NEXT:    s_lshr_b32 s8, s5, 16
12248; GFX900-NEXT:    ;;#ASMSTART
12249; GFX900-NEXT:    ; use s[8:9]
12250; GFX900-NEXT:    ;;#ASMEND
12251; GFX900-NEXT:    s_setpc_b64 s[30:31]
12252;
12253; GFX90A-LABEL: s_shuffle_v4i16_v4i16__3_u_u_u:
12254; GFX90A:       ; %bb.0:
12255; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12256; GFX90A-NEXT:    ;;#ASMSTART
12257; GFX90A-NEXT:    ; def s[4:5]
12258; GFX90A-NEXT:    ;;#ASMEND
12259; GFX90A-NEXT:    s_lshr_b32 s8, s5, 16
12260; GFX90A-NEXT:    ;;#ASMSTART
12261; GFX90A-NEXT:    ; use s[8:9]
12262; GFX90A-NEXT:    ;;#ASMEND
12263; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12264;
12265; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_u_u_u:
12266; GFX940:       ; %bb.0:
12267; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12268; GFX940-NEXT:    ;;#ASMSTART
12269; GFX940-NEXT:    ; def s[0:1]
12270; GFX940-NEXT:    ;;#ASMEND
12271; GFX940-NEXT:    s_lshr_b32 s8, s1, 16
12272; GFX940-NEXT:    ;;#ASMSTART
12273; GFX940-NEXT:    ; use s[8:9]
12274; GFX940-NEXT:    ;;#ASMEND
12275; GFX940-NEXT:    s_setpc_b64 s[30:31]
12276  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12277  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
12278  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12279  ret void
12280}
12281
12282define void @s_shuffle_v4i16_v4i16__4_u_u_u() {
12283; GFX9-LABEL: s_shuffle_v4i16_v4i16__4_u_u_u:
12284; GFX9:       ; %bb.0:
12285; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12286; GFX9-NEXT:    ;;#ASMSTART
12287; GFX9-NEXT:    ; use s[8:9]
12288; GFX9-NEXT:    ;;#ASMEND
12289; GFX9-NEXT:    s_setpc_b64 s[30:31]
12290  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12291  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 4, i32 poison, i32 poison, i32 poison>
12292  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12293  ret void
12294}
12295
12296define void @s_shuffle_v4i16_v4i16__5_u_u_u() {
12297; GFX900-LABEL: s_shuffle_v4i16_v4i16__5_u_u_u:
12298; GFX900:       ; %bb.0:
12299; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12300; GFX900-NEXT:    ;;#ASMSTART
12301; GFX900-NEXT:    ; def s[4:5]
12302; GFX900-NEXT:    ;;#ASMEND
12303; GFX900-NEXT:    s_lshr_b32 s8, s4, 16
12304; GFX900-NEXT:    ;;#ASMSTART
12305; GFX900-NEXT:    ; use s[8:9]
12306; GFX900-NEXT:    ;;#ASMEND
12307; GFX900-NEXT:    s_setpc_b64 s[30:31]
12308;
12309; GFX90A-LABEL: s_shuffle_v4i16_v4i16__5_u_u_u:
12310; GFX90A:       ; %bb.0:
12311; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12312; GFX90A-NEXT:    ;;#ASMSTART
12313; GFX90A-NEXT:    ; def s[4:5]
12314; GFX90A-NEXT:    ;;#ASMEND
12315; GFX90A-NEXT:    s_lshr_b32 s8, s4, 16
12316; GFX90A-NEXT:    ;;#ASMSTART
12317; GFX90A-NEXT:    ; use s[8:9]
12318; GFX90A-NEXT:    ;;#ASMEND
12319; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12320;
12321; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_u_u_u:
12322; GFX940:       ; %bb.0:
12323; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12324; GFX940-NEXT:    ;;#ASMSTART
12325; GFX940-NEXT:    ; def s[0:1]
12326; GFX940-NEXT:    ;;#ASMEND
12327; GFX940-NEXT:    s_lshr_b32 s8, s0, 16
12328; GFX940-NEXT:    ;;#ASMSTART
12329; GFX940-NEXT:    ; use s[8:9]
12330; GFX940-NEXT:    ;;#ASMEND
12331; GFX940-NEXT:    s_setpc_b64 s[30:31]
12332  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12333  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12334  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 poison, i32 poison, i32 poison>
12335  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12336  ret void
12337}
12338
12339define void @s_shuffle_v4i16_v4i16__6_u_u_u() {
12340; GFX900-LABEL: s_shuffle_v4i16_v4i16__6_u_u_u:
12341; GFX900:       ; %bb.0:
12342; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12343; GFX900-NEXT:    ;;#ASMSTART
12344; GFX900-NEXT:    ; def s[4:5]
12345; GFX900-NEXT:    ;;#ASMEND
12346; GFX900-NEXT:    s_mov_b32 s8, s5
12347; GFX900-NEXT:    ;;#ASMSTART
12348; GFX900-NEXT:    ; use s[8:9]
12349; GFX900-NEXT:    ;;#ASMEND
12350; GFX900-NEXT:    s_setpc_b64 s[30:31]
12351;
12352; GFX90A-LABEL: s_shuffle_v4i16_v4i16__6_u_u_u:
12353; GFX90A:       ; %bb.0:
12354; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12355; GFX90A-NEXT:    ;;#ASMSTART
12356; GFX90A-NEXT:    ; def s[4:5]
12357; GFX90A-NEXT:    ;;#ASMEND
12358; GFX90A-NEXT:    s_mov_b32 s8, s5
12359; GFX90A-NEXT:    ;;#ASMSTART
12360; GFX90A-NEXT:    ; use s[8:9]
12361; GFX90A-NEXT:    ;;#ASMEND
12362; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12363;
12364; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_u_u_u:
12365; GFX940:       ; %bb.0:
12366; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12367; GFX940-NEXT:    ;;#ASMSTART
12368; GFX940-NEXT:    ; def s[0:1]
12369; GFX940-NEXT:    ;;#ASMEND
12370; GFX940-NEXT:    s_mov_b32 s8, s1
12371; GFX940-NEXT:    ;;#ASMSTART
12372; GFX940-NEXT:    ; use s[8:9]
12373; GFX940-NEXT:    ;;#ASMEND
12374; GFX940-NEXT:    s_setpc_b64 s[30:31]
12375  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12376  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12377  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 poison, i32 poison, i32 poison>
12378  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12379  ret void
12380}
12381
12382define void @s_shuffle_v4i16_v4i16__7_u_u_u() {
12383; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_u_u_u:
12384; GFX900:       ; %bb.0:
12385; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12386; GFX900-NEXT:    ;;#ASMSTART
12387; GFX900-NEXT:    ; def s[4:5]
12388; GFX900-NEXT:    ;;#ASMEND
12389; GFX900-NEXT:    s_lshr_b32 s8, s5, 16
12390; GFX900-NEXT:    ;;#ASMSTART
12391; GFX900-NEXT:    ; use s[8:9]
12392; GFX900-NEXT:    ;;#ASMEND
12393; GFX900-NEXT:    s_setpc_b64 s[30:31]
12394;
12395; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_u_u_u:
12396; GFX90A:       ; %bb.0:
12397; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12398; GFX90A-NEXT:    ;;#ASMSTART
12399; GFX90A-NEXT:    ; def s[4:5]
12400; GFX90A-NEXT:    ;;#ASMEND
12401; GFX90A-NEXT:    s_lshr_b32 s8, s5, 16
12402; GFX90A-NEXT:    ;;#ASMSTART
12403; GFX90A-NEXT:    ; use s[8:9]
12404; GFX90A-NEXT:    ;;#ASMEND
12405; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12406;
12407; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_u_u:
12408; GFX940:       ; %bb.0:
12409; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12410; GFX940-NEXT:    ;;#ASMSTART
12411; GFX940-NEXT:    ; def s[0:1]
12412; GFX940-NEXT:    ;;#ASMEND
12413; GFX940-NEXT:    s_lshr_b32 s8, s1, 16
12414; GFX940-NEXT:    ;;#ASMSTART
12415; GFX940-NEXT:    ; use s[8:9]
12416; GFX940-NEXT:    ;;#ASMEND
12417; GFX940-NEXT:    s_setpc_b64 s[30:31]
12418  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12419  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12420  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 poison, i32 poison>
12421  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12422  ret void
12423}
12424
12425define void @s_shuffle_v4i16_v4i16__7_0_u_u() {
12426; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_0_u_u:
12427; GFX900:       ; %bb.0:
12428; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12429; GFX900-NEXT:    ;;#ASMSTART
12430; GFX900-NEXT:    ; def s[4:5]
12431; GFX900-NEXT:    ;;#ASMEND
12432; GFX900-NEXT:    ;;#ASMSTART
12433; GFX900-NEXT:    ; def s[6:7]
12434; GFX900-NEXT:    ;;#ASMEND
12435; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
12436; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
12437; GFX900-NEXT:    ;;#ASMSTART
12438; GFX900-NEXT:    ; use s[8:9]
12439; GFX900-NEXT:    ;;#ASMEND
12440; GFX900-NEXT:    s_setpc_b64 s[30:31]
12441;
12442; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_0_u_u:
12443; GFX90A:       ; %bb.0:
12444; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12445; GFX90A-NEXT:    ;;#ASMSTART
12446; GFX90A-NEXT:    ; def s[4:5]
12447; GFX90A-NEXT:    ;;#ASMEND
12448; GFX90A-NEXT:    ;;#ASMSTART
12449; GFX90A-NEXT:    ; def s[6:7]
12450; GFX90A-NEXT:    ;;#ASMEND
12451; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
12452; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
12453; GFX90A-NEXT:    ;;#ASMSTART
12454; GFX90A-NEXT:    ; use s[8:9]
12455; GFX90A-NEXT:    ;;#ASMEND
12456; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12457;
12458; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_u_u:
12459; GFX940:       ; %bb.0:
12460; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12461; GFX940-NEXT:    ;;#ASMSTART
12462; GFX940-NEXT:    ; def s[0:1]
12463; GFX940-NEXT:    ;;#ASMEND
12464; GFX940-NEXT:    ;;#ASMSTART
12465; GFX940-NEXT:    ; def s[2:3]
12466; GFX940-NEXT:    ;;#ASMEND
12467; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
12468; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
12469; GFX940-NEXT:    ;;#ASMSTART
12470; GFX940-NEXT:    ; use s[8:9]
12471; GFX940-NEXT:    ;;#ASMEND
12472; GFX940-NEXT:    s_setpc_b64 s[30:31]
12473  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12474  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12475  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 poison, i32 poison>
12476  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12477  ret void
12478}
12479
12480define void @s_shuffle_v4i16_v4i16__7_1_u_u() {
12481; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_1_u_u:
12482; GFX900:       ; %bb.0:
12483; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12484; GFX900-NEXT:    ;;#ASMSTART
12485; GFX900-NEXT:    ; def s[4:5]
12486; GFX900-NEXT:    ;;#ASMEND
12487; GFX900-NEXT:    ;;#ASMSTART
12488; GFX900-NEXT:    ; def s[6:7]
12489; GFX900-NEXT:    ;;#ASMEND
12490; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
12491; GFX900-NEXT:    ;;#ASMSTART
12492; GFX900-NEXT:    ; use s[8:9]
12493; GFX900-NEXT:    ;;#ASMEND
12494; GFX900-NEXT:    s_setpc_b64 s[30:31]
12495;
12496; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_1_u_u:
12497; GFX90A:       ; %bb.0:
12498; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12499; GFX90A-NEXT:    ;;#ASMSTART
12500; GFX90A-NEXT:    ; def s[4:5]
12501; GFX90A-NEXT:    ;;#ASMEND
12502; GFX90A-NEXT:    ;;#ASMSTART
12503; GFX90A-NEXT:    ; def s[6:7]
12504; GFX90A-NEXT:    ;;#ASMEND
12505; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
12506; GFX90A-NEXT:    ;;#ASMSTART
12507; GFX90A-NEXT:    ; use s[8:9]
12508; GFX90A-NEXT:    ;;#ASMEND
12509; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12510;
12511; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_u_u:
12512; GFX940:       ; %bb.0:
12513; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12514; GFX940-NEXT:    ;;#ASMSTART
12515; GFX940-NEXT:    ; def s[0:1]
12516; GFX940-NEXT:    ;;#ASMEND
12517; GFX940-NEXT:    ;;#ASMSTART
12518; GFX940-NEXT:    ; def s[2:3]
12519; GFX940-NEXT:    ;;#ASMEND
12520; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s0
12521; GFX940-NEXT:    ;;#ASMSTART
12522; GFX940-NEXT:    ; use s[8:9]
12523; GFX940-NEXT:    ;;#ASMEND
12524; GFX940-NEXT:    s_setpc_b64 s[30:31]
12525  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12526  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12527  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 poison, i32 poison>
12528  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12529  ret void
12530}
12531
12532define void @s_shuffle_v4i16_v4i16__7_2_u_u() {
12533; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_2_u_u:
12534; GFX900:       ; %bb.0:
12535; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12536; GFX900-NEXT:    ;;#ASMSTART
12537; GFX900-NEXT:    ; def s[4:5]
12538; GFX900-NEXT:    ;;#ASMEND
12539; GFX900-NEXT:    ;;#ASMSTART
12540; GFX900-NEXT:    ; def s[6:7]
12541; GFX900-NEXT:    ;;#ASMEND
12542; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
12543; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
12544; GFX900-NEXT:    ;;#ASMSTART
12545; GFX900-NEXT:    ; use s[8:9]
12546; GFX900-NEXT:    ;;#ASMEND
12547; GFX900-NEXT:    s_setpc_b64 s[30:31]
12548;
12549; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_2_u_u:
12550; GFX90A:       ; %bb.0:
12551; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12552; GFX90A-NEXT:    ;;#ASMSTART
12553; GFX90A-NEXT:    ; def s[4:5]
12554; GFX90A-NEXT:    ;;#ASMEND
12555; GFX90A-NEXT:    ;;#ASMSTART
12556; GFX90A-NEXT:    ; def s[6:7]
12557; GFX90A-NEXT:    ;;#ASMEND
12558; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
12559; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
12560; GFX90A-NEXT:    ;;#ASMSTART
12561; GFX90A-NEXT:    ; use s[8:9]
12562; GFX90A-NEXT:    ;;#ASMEND
12563; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12564;
12565; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_u_u:
12566; GFX940:       ; %bb.0:
12567; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12568; GFX940-NEXT:    ;;#ASMSTART
12569; GFX940-NEXT:    ; def s[0:1]
12570; GFX940-NEXT:    ;;#ASMEND
12571; GFX940-NEXT:    ;;#ASMSTART
12572; GFX940-NEXT:    ; def s[2:3]
12573; GFX940-NEXT:    ;;#ASMEND
12574; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
12575; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
12576; GFX940-NEXT:    ;;#ASMSTART
12577; GFX940-NEXT:    ; use s[8:9]
12578; GFX940-NEXT:    ;;#ASMEND
12579; GFX940-NEXT:    s_setpc_b64 s[30:31]
12580  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12581  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12582  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 poison, i32 poison>
12583  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12584  ret void
12585}
12586
12587define void @s_shuffle_v4i16_v4i16__7_3_u_u() {
12588; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_3_u_u:
12589; GFX900:       ; %bb.0:
12590; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12591; GFX900-NEXT:    ;;#ASMSTART
12592; GFX900-NEXT:    ; def s[4:5]
12593; GFX900-NEXT:    ;;#ASMEND
12594; GFX900-NEXT:    ;;#ASMSTART
12595; GFX900-NEXT:    ; def s[6:7]
12596; GFX900-NEXT:    ;;#ASMEND
12597; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
12598; GFX900-NEXT:    ;;#ASMSTART
12599; GFX900-NEXT:    ; use s[8:9]
12600; GFX900-NEXT:    ;;#ASMEND
12601; GFX900-NEXT:    s_setpc_b64 s[30:31]
12602;
12603; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_3_u_u:
12604; GFX90A:       ; %bb.0:
12605; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12606; GFX90A-NEXT:    ;;#ASMSTART
12607; GFX90A-NEXT:    ; def s[4:5]
12608; GFX90A-NEXT:    ;;#ASMEND
12609; GFX90A-NEXT:    ;;#ASMSTART
12610; GFX90A-NEXT:    ; def s[6:7]
12611; GFX90A-NEXT:    ;;#ASMEND
12612; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
12613; GFX90A-NEXT:    ;;#ASMSTART
12614; GFX90A-NEXT:    ; use s[8:9]
12615; GFX90A-NEXT:    ;;#ASMEND
12616; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12617;
12618; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_u_u:
12619; GFX940:       ; %bb.0:
12620; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12621; GFX940-NEXT:    ;;#ASMSTART
12622; GFX940-NEXT:    ; def s[0:1]
12623; GFX940-NEXT:    ;;#ASMEND
12624; GFX940-NEXT:    ;;#ASMSTART
12625; GFX940-NEXT:    ; def s[2:3]
12626; GFX940-NEXT:    ;;#ASMEND
12627; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s1
12628; GFX940-NEXT:    ;;#ASMSTART
12629; GFX940-NEXT:    ; use s[8:9]
12630; GFX940-NEXT:    ;;#ASMEND
12631; GFX940-NEXT:    s_setpc_b64 s[30:31]
12632  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12633  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12634  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 poison, i32 poison>
12635  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12636  ret void
12637}
12638
12639define void @s_shuffle_v4i16_v4i16__7_4_u_u() {
12640; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_4_u_u:
12641; GFX900:       ; %bb.0:
12642; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12643; GFX900-NEXT:    ;;#ASMSTART
12644; GFX900-NEXT:    ; def s[4:5]
12645; GFX900-NEXT:    ;;#ASMEND
12646; GFX900-NEXT:    s_lshr_b32 s5, s5, 16
12647; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
12648; GFX900-NEXT:    ;;#ASMSTART
12649; GFX900-NEXT:    ; use s[8:9]
12650; GFX900-NEXT:    ;;#ASMEND
12651; GFX900-NEXT:    s_setpc_b64 s[30:31]
12652;
12653; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_4_u_u:
12654; GFX90A:       ; %bb.0:
12655; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12656; GFX90A-NEXT:    ;;#ASMSTART
12657; GFX90A-NEXT:    ; def s[4:5]
12658; GFX90A-NEXT:    ;;#ASMEND
12659; GFX90A-NEXT:    s_lshr_b32 s5, s5, 16
12660; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
12661; GFX90A-NEXT:    ;;#ASMSTART
12662; GFX90A-NEXT:    ; use s[8:9]
12663; GFX90A-NEXT:    ;;#ASMEND
12664; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12665;
12666; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_u_u:
12667; GFX940:       ; %bb.0:
12668; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12669; GFX940-NEXT:    ;;#ASMSTART
12670; GFX940-NEXT:    ; def s[0:1]
12671; GFX940-NEXT:    ;;#ASMEND
12672; GFX940-NEXT:    s_lshr_b32 s1, s1, 16
12673; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
12674; GFX940-NEXT:    ;;#ASMSTART
12675; GFX940-NEXT:    ; use s[8:9]
12676; GFX940-NEXT:    ;;#ASMEND
12677; GFX940-NEXT:    s_setpc_b64 s[30:31]
12678  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12679  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12680  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 poison, i32 poison>
12681  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12682  ret void
12683}
12684
12685define void @s_shuffle_v4i16_v4i16__7_5_u_u() {
12686; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_5_u_u:
12687; GFX900:       ; %bb.0:
12688; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12689; GFX900-NEXT:    ;;#ASMSTART
12690; GFX900-NEXT:    ; def s[4:5]
12691; GFX900-NEXT:    ;;#ASMEND
12692; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s4
12693; GFX900-NEXT:    ;;#ASMSTART
12694; GFX900-NEXT:    ; use s[8:9]
12695; GFX900-NEXT:    ;;#ASMEND
12696; GFX900-NEXT:    s_setpc_b64 s[30:31]
12697;
12698; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_5_u_u:
12699; GFX90A:       ; %bb.0:
12700; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12701; GFX90A-NEXT:    ;;#ASMSTART
12702; GFX90A-NEXT:    ; def s[4:5]
12703; GFX90A-NEXT:    ;;#ASMEND
12704; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s4
12705; GFX90A-NEXT:    ;;#ASMSTART
12706; GFX90A-NEXT:    ; use s[8:9]
12707; GFX90A-NEXT:    ;;#ASMEND
12708; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12709;
12710; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_u_u:
12711; GFX940:       ; %bb.0:
12712; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12713; GFX940-NEXT:    ;;#ASMSTART
12714; GFX940-NEXT:    ; def s[0:1]
12715; GFX940-NEXT:    ;;#ASMEND
12716; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s0
12717; GFX940-NEXT:    ;;#ASMSTART
12718; GFX940-NEXT:    ; use s[8:9]
12719; GFX940-NEXT:    ;;#ASMEND
12720; GFX940-NEXT:    s_setpc_b64 s[30:31]
12721  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12722  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12723  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 poison, i32 poison>
12724  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12725  ret void
12726}
12727
12728define void @s_shuffle_v4i16_v4i16__7_6_u_u() {
12729; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_6_u_u:
12730; GFX900:       ; %bb.0:
12731; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12732; GFX900-NEXT:    ;;#ASMSTART
12733; GFX900-NEXT:    ; def s[4:5]
12734; GFX900-NEXT:    ;;#ASMEND
12735; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
12736; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
12737; GFX900-NEXT:    ;;#ASMSTART
12738; GFX900-NEXT:    ; use s[8:9]
12739; GFX900-NEXT:    ;;#ASMEND
12740; GFX900-NEXT:    s_setpc_b64 s[30:31]
12741;
12742; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_6_u_u:
12743; GFX90A:       ; %bb.0:
12744; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12745; GFX90A-NEXT:    ;;#ASMSTART
12746; GFX90A-NEXT:    ; def s[4:5]
12747; GFX90A-NEXT:    ;;#ASMEND
12748; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
12749; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
12750; GFX90A-NEXT:    ;;#ASMSTART
12751; GFX90A-NEXT:    ; use s[8:9]
12752; GFX90A-NEXT:    ;;#ASMEND
12753; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12754;
12755; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_u_u:
12756; GFX940:       ; %bb.0:
12757; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12758; GFX940-NEXT:    ;;#ASMSTART
12759; GFX940-NEXT:    ; def s[0:1]
12760; GFX940-NEXT:    ;;#ASMEND
12761; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
12762; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
12763; GFX940-NEXT:    ;;#ASMSTART
12764; GFX940-NEXT:    ; use s[8:9]
12765; GFX940-NEXT:    ;;#ASMEND
12766; GFX940-NEXT:    s_setpc_b64 s[30:31]
12767  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12768  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12769  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 poison, i32 poison>
12770  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12771  ret void
12772}
12773
12774define void @s_shuffle_v4i16_v4i16__7_7_u_u() {
12775; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_u_u:
12776; GFX900:       ; %bb.0:
12777; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12778; GFX900-NEXT:    ;;#ASMSTART
12779; GFX900-NEXT:    ; def s[4:5]
12780; GFX900-NEXT:    ;;#ASMEND
12781; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
12782; GFX900-NEXT:    ;;#ASMSTART
12783; GFX900-NEXT:    ; use s[8:9]
12784; GFX900-NEXT:    ;;#ASMEND
12785; GFX900-NEXT:    s_setpc_b64 s[30:31]
12786;
12787; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_u_u:
12788; GFX90A:       ; %bb.0:
12789; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12790; GFX90A-NEXT:    ;;#ASMSTART
12791; GFX90A-NEXT:    ; def s[4:5]
12792; GFX90A-NEXT:    ;;#ASMEND
12793; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
12794; GFX90A-NEXT:    ;;#ASMSTART
12795; GFX90A-NEXT:    ; use s[8:9]
12796; GFX90A-NEXT:    ;;#ASMEND
12797; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12798;
12799; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_u_u:
12800; GFX940:       ; %bb.0:
12801; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12802; GFX940-NEXT:    ;;#ASMSTART
12803; GFX940-NEXT:    ; def s[0:1]
12804; GFX940-NEXT:    ;;#ASMEND
12805; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
12806; GFX940-NEXT:    ;;#ASMSTART
12807; GFX940-NEXT:    ; use s[8:9]
12808; GFX940-NEXT:    ;;#ASMEND
12809; GFX940-NEXT:    s_setpc_b64 s[30:31]
12810  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12811  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12812  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 poison>
12813  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12814  ret void
12815}
12816
12817define void @s_shuffle_v4i16_v4i16__7_7_0_u() {
12818; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_0_u:
12819; GFX900:       ; %bb.0:
12820; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12821; GFX900-NEXT:    ;;#ASMSTART
12822; GFX900-NEXT:    ; def s[4:5]
12823; GFX900-NEXT:    ;;#ASMEND
12824; GFX900-NEXT:    ;;#ASMSTART
12825; GFX900-NEXT:    ; def s[6:7]
12826; GFX900-NEXT:    ;;#ASMEND
12827; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
12828; GFX900-NEXT:    s_mov_b32 s9, s4
12829; GFX900-NEXT:    ;;#ASMSTART
12830; GFX900-NEXT:    ; use s[8:9]
12831; GFX900-NEXT:    ;;#ASMEND
12832; GFX900-NEXT:    s_setpc_b64 s[30:31]
12833;
12834; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_0_u:
12835; GFX90A:       ; %bb.0:
12836; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12837; GFX90A-NEXT:    ;;#ASMSTART
12838; GFX90A-NEXT:    ; def s[4:5]
12839; GFX90A-NEXT:    ;;#ASMEND
12840; GFX90A-NEXT:    ;;#ASMSTART
12841; GFX90A-NEXT:    ; def s[6:7]
12842; GFX90A-NEXT:    ;;#ASMEND
12843; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
12844; GFX90A-NEXT:    s_mov_b32 s9, s4
12845; GFX90A-NEXT:    ;;#ASMSTART
12846; GFX90A-NEXT:    ; use s[8:9]
12847; GFX90A-NEXT:    ;;#ASMEND
12848; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12849;
12850; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_u:
12851; GFX940:       ; %bb.0:
12852; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12853; GFX940-NEXT:    ;;#ASMSTART
12854; GFX940-NEXT:    ; def s[0:1]
12855; GFX940-NEXT:    ;;#ASMEND
12856; GFX940-NEXT:    ;;#ASMSTART
12857; GFX940-NEXT:    ; def s[2:3]
12858; GFX940-NEXT:    ;;#ASMEND
12859; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
12860; GFX940-NEXT:    s_mov_b32 s9, s0
12861; GFX940-NEXT:    ;;#ASMSTART
12862; GFX940-NEXT:    ; use s[8:9]
12863; GFX940-NEXT:    ;;#ASMEND
12864; GFX940-NEXT:    s_setpc_b64 s[30:31]
12865  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12866  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12867  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 poison>
12868  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12869  ret void
12870}
12871
12872define void @s_shuffle_v4i16_v4i16__7_7_1_u() {
12873; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_1_u:
12874; GFX900:       ; %bb.0:
12875; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12876; GFX900-NEXT:    ;;#ASMSTART
12877; GFX900-NEXT:    ; def s[4:5]
12878; GFX900-NEXT:    ;;#ASMEND
12879; GFX900-NEXT:    ;;#ASMSTART
12880; GFX900-NEXT:    ; def s[6:7]
12881; GFX900-NEXT:    ;;#ASMEND
12882; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
12883; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
12884; GFX900-NEXT:    ;;#ASMSTART
12885; GFX900-NEXT:    ; use s[8:9]
12886; GFX900-NEXT:    ;;#ASMEND
12887; GFX900-NEXT:    s_setpc_b64 s[30:31]
12888;
12889; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_1_u:
12890; GFX90A:       ; %bb.0:
12891; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12892; GFX90A-NEXT:    ;;#ASMSTART
12893; GFX90A-NEXT:    ; def s[4:5]
12894; GFX90A-NEXT:    ;;#ASMEND
12895; GFX90A-NEXT:    ;;#ASMSTART
12896; GFX90A-NEXT:    ; def s[6:7]
12897; GFX90A-NEXT:    ;;#ASMEND
12898; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
12899; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
12900; GFX90A-NEXT:    ;;#ASMSTART
12901; GFX90A-NEXT:    ; use s[8:9]
12902; GFX90A-NEXT:    ;;#ASMEND
12903; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12904;
12905; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_u:
12906; GFX940:       ; %bb.0:
12907; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12908; GFX940-NEXT:    ;;#ASMSTART
12909; GFX940-NEXT:    ; def s[0:1]
12910; GFX940-NEXT:    ;;#ASMEND
12911; GFX940-NEXT:    ;;#ASMSTART
12912; GFX940-NEXT:    ; def s[2:3]
12913; GFX940-NEXT:    ;;#ASMEND
12914; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
12915; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
12916; GFX940-NEXT:    ;;#ASMSTART
12917; GFX940-NEXT:    ; use s[8:9]
12918; GFX940-NEXT:    ;;#ASMEND
12919; GFX940-NEXT:    s_setpc_b64 s[30:31]
12920  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12921  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12922  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 poison>
12923  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12924  ret void
12925}
12926
12927define void @s_shuffle_v4i16_v4i16__7_7_2_u() {
12928; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_2_u:
12929; GFX900:       ; %bb.0:
12930; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12931; GFX900-NEXT:    ;;#ASMSTART
12932; GFX900-NEXT:    ; def s[8:9]
12933; GFX900-NEXT:    ;;#ASMEND
12934; GFX900-NEXT:    ;;#ASMSTART
12935; GFX900-NEXT:    ; def s[4:5]
12936; GFX900-NEXT:    ;;#ASMEND
12937; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
12938; GFX900-NEXT:    ;;#ASMSTART
12939; GFX900-NEXT:    ; use s[8:9]
12940; GFX900-NEXT:    ;;#ASMEND
12941; GFX900-NEXT:    s_setpc_b64 s[30:31]
12942;
12943; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_2_u:
12944; GFX90A:       ; %bb.0:
12945; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12946; GFX90A-NEXT:    ;;#ASMSTART
12947; GFX90A-NEXT:    ; def s[8:9]
12948; GFX90A-NEXT:    ;;#ASMEND
12949; GFX90A-NEXT:    ;;#ASMSTART
12950; GFX90A-NEXT:    ; def s[4:5]
12951; GFX90A-NEXT:    ;;#ASMEND
12952; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
12953; GFX90A-NEXT:    ;;#ASMSTART
12954; GFX90A-NEXT:    ; use s[8:9]
12955; GFX90A-NEXT:    ;;#ASMEND
12956; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12957;
12958; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_u:
12959; GFX940:       ; %bb.0:
12960; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12961; GFX940-NEXT:    ;;#ASMSTART
12962; GFX940-NEXT:    ; def s[8:9]
12963; GFX940-NEXT:    ;;#ASMEND
12964; GFX940-NEXT:    ;;#ASMSTART
12965; GFX940-NEXT:    ; def s[0:1]
12966; GFX940-NEXT:    ;;#ASMEND
12967; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
12968; GFX940-NEXT:    ;;#ASMSTART
12969; GFX940-NEXT:    ; use s[8:9]
12970; GFX940-NEXT:    ;;#ASMEND
12971; GFX940-NEXT:    s_setpc_b64 s[30:31]
12972  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12973  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12974  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 poison>
12975  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
12976  ret void
12977}
12978
12979define void @s_shuffle_v4i16_v4i16__7_7_3_u() {
12980; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_3_u:
12981; GFX900:       ; %bb.0:
12982; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12983; GFX900-NEXT:    ;;#ASMSTART
12984; GFX900-NEXT:    ; def s[4:5]
12985; GFX900-NEXT:    ;;#ASMEND
12986; GFX900-NEXT:    ;;#ASMSTART
12987; GFX900-NEXT:    ; def s[6:7]
12988; GFX900-NEXT:    ;;#ASMEND
12989; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
12990; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
12991; GFX900-NEXT:    ;;#ASMSTART
12992; GFX900-NEXT:    ; use s[8:9]
12993; GFX900-NEXT:    ;;#ASMEND
12994; GFX900-NEXT:    s_setpc_b64 s[30:31]
12995;
12996; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_3_u:
12997; GFX90A:       ; %bb.0:
12998; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12999; GFX90A-NEXT:    ;;#ASMSTART
13000; GFX90A-NEXT:    ; def s[4:5]
13001; GFX90A-NEXT:    ;;#ASMEND
13002; GFX90A-NEXT:    ;;#ASMSTART
13003; GFX90A-NEXT:    ; def s[6:7]
13004; GFX90A-NEXT:    ;;#ASMEND
13005; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
13006; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
13007; GFX90A-NEXT:    ;;#ASMSTART
13008; GFX90A-NEXT:    ; use s[8:9]
13009; GFX90A-NEXT:    ;;#ASMEND
13010; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13011;
13012; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_u:
13013; GFX940:       ; %bb.0:
13014; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13015; GFX940-NEXT:    ;;#ASMSTART
13016; GFX940-NEXT:    ; def s[0:1]
13017; GFX940-NEXT:    ;;#ASMEND
13018; GFX940-NEXT:    ;;#ASMSTART
13019; GFX940-NEXT:    ; def s[2:3]
13020; GFX940-NEXT:    ;;#ASMEND
13021; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
13022; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
13023; GFX940-NEXT:    ;;#ASMSTART
13024; GFX940-NEXT:    ; use s[8:9]
13025; GFX940-NEXT:    ;;#ASMEND
13026; GFX940-NEXT:    s_setpc_b64 s[30:31]
13027  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13028  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13029  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 poison>
13030  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13031  ret void
13032}
13033
13034define void @s_shuffle_v4i16_v4i16__7_7_4_u() {
13035; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_4_u:
13036; GFX900:       ; %bb.0:
13037; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13038; GFX900-NEXT:    ;;#ASMSTART
13039; GFX900-NEXT:    ; def s[4:5]
13040; GFX900-NEXT:    ;;#ASMEND
13041; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
13042; GFX900-NEXT:    s_mov_b32 s9, s4
13043; GFX900-NEXT:    ;;#ASMSTART
13044; GFX900-NEXT:    ; use s[8:9]
13045; GFX900-NEXT:    ;;#ASMEND
13046; GFX900-NEXT:    s_setpc_b64 s[30:31]
13047;
13048; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_4_u:
13049; GFX90A:       ; %bb.0:
13050; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13051; GFX90A-NEXT:    ;;#ASMSTART
13052; GFX90A-NEXT:    ; def s[4:5]
13053; GFX90A-NEXT:    ;;#ASMEND
13054; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
13055; GFX90A-NEXT:    s_mov_b32 s9, s4
13056; GFX90A-NEXT:    ;;#ASMSTART
13057; GFX90A-NEXT:    ; use s[8:9]
13058; GFX90A-NEXT:    ;;#ASMEND
13059; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13060;
13061; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_u:
13062; GFX940:       ; %bb.0:
13063; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13064; GFX940-NEXT:    ;;#ASMSTART
13065; GFX940-NEXT:    ; def s[0:1]
13066; GFX940-NEXT:    ;;#ASMEND
13067; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
13068; GFX940-NEXT:    s_mov_b32 s9, s0
13069; GFX940-NEXT:    ;;#ASMSTART
13070; GFX940-NEXT:    ; use s[8:9]
13071; GFX940-NEXT:    ;;#ASMEND
13072; GFX940-NEXT:    s_setpc_b64 s[30:31]
13073  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13074  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13075  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 poison>
13076  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13077  ret void
13078}
13079
13080define void @s_shuffle_v4i16_v4i16__7_7_5_u() {
13081; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_5_u:
13082; GFX900:       ; %bb.0:
13083; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13084; GFX900-NEXT:    ;;#ASMSTART
13085; GFX900-NEXT:    ; def s[4:5]
13086; GFX900-NEXT:    ;;#ASMEND
13087; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
13088; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
13089; GFX900-NEXT:    ;;#ASMSTART
13090; GFX900-NEXT:    ; use s[8:9]
13091; GFX900-NEXT:    ;;#ASMEND
13092; GFX900-NEXT:    s_setpc_b64 s[30:31]
13093;
13094; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_5_u:
13095; GFX90A:       ; %bb.0:
13096; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13097; GFX90A-NEXT:    ;;#ASMSTART
13098; GFX90A-NEXT:    ; def s[4:5]
13099; GFX90A-NEXT:    ;;#ASMEND
13100; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
13101; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
13102; GFX90A-NEXT:    ;;#ASMSTART
13103; GFX90A-NEXT:    ; use s[8:9]
13104; GFX90A-NEXT:    ;;#ASMEND
13105; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13106;
13107; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_u:
13108; GFX940:       ; %bb.0:
13109; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13110; GFX940-NEXT:    ;;#ASMSTART
13111; GFX940-NEXT:    ; def s[0:1]
13112; GFX940-NEXT:    ;;#ASMEND
13113; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
13114; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
13115; GFX940-NEXT:    ;;#ASMSTART
13116; GFX940-NEXT:    ; use s[8:9]
13117; GFX940-NEXT:    ;;#ASMEND
13118; GFX940-NEXT:    s_setpc_b64 s[30:31]
13119  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13120  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13121  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 poison>
13122  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13123  ret void
13124}
13125
13126define void @s_shuffle_v4i16_v4i16__7_7_6_u() {
13127; GFX9-LABEL: s_shuffle_v4i16_v4i16__7_7_6_u:
13128; GFX9:       ; %bb.0:
13129; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13130; GFX9-NEXT:    ;;#ASMSTART
13131; GFX9-NEXT:    ; def s[8:9]
13132; GFX9-NEXT:    ;;#ASMEND
13133; GFX9-NEXT:    s_pack_hh_b32_b16 s8, s9, s9
13134; GFX9-NEXT:    ;;#ASMSTART
13135; GFX9-NEXT:    ; use s[8:9]
13136; GFX9-NEXT:    ;;#ASMEND
13137; GFX9-NEXT:    s_setpc_b64 s[30:31]
13138  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13139  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13140  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 poison>
13141  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13142  ret void
13143}
13144
13145define void @s_shuffle_v4i16_v4i16__7_7_7_u() {
13146; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_7_u:
13147; GFX900:       ; %bb.0:
13148; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13149; GFX900-NEXT:    ;;#ASMSTART
13150; GFX900-NEXT:    ; def s[4:5]
13151; GFX900-NEXT:    ;;#ASMEND
13152; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
13153; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
13154; GFX900-NEXT:    ;;#ASMSTART
13155; GFX900-NEXT:    ; use s[8:9]
13156; GFX900-NEXT:    ;;#ASMEND
13157; GFX900-NEXT:    s_setpc_b64 s[30:31]
13158;
13159; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_7_u:
13160; GFX90A:       ; %bb.0:
13161; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13162; GFX90A-NEXT:    ;;#ASMSTART
13163; GFX90A-NEXT:    ; def s[4:5]
13164; GFX90A-NEXT:    ;;#ASMEND
13165; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
13166; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
13167; GFX90A-NEXT:    ;;#ASMSTART
13168; GFX90A-NEXT:    ; use s[8:9]
13169; GFX90A-NEXT:    ;;#ASMEND
13170; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13171;
13172; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_u:
13173; GFX940:       ; %bb.0:
13174; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13175; GFX940-NEXT:    ;;#ASMSTART
13176; GFX940-NEXT:    ; def s[0:1]
13177; GFX940-NEXT:    ;;#ASMEND
13178; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
13179; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
13180; GFX940-NEXT:    ;;#ASMSTART
13181; GFX940-NEXT:    ; use s[8:9]
13182; GFX940-NEXT:    ;;#ASMEND
13183; GFX940-NEXT:    s_setpc_b64 s[30:31]
13184  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13185  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13186  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 poison>
13187  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13188  ret void
13189}
13190
13191define void @s_shuffle_v4i16_v4i16__7_7_7_0() {
13192; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_7_0:
13193; GFX900:       ; %bb.0:
13194; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13195; GFX900-NEXT:    ;;#ASMSTART
13196; GFX900-NEXT:    ; def s[4:5]
13197; GFX900-NEXT:    ;;#ASMEND
13198; GFX900-NEXT:    ;;#ASMSTART
13199; GFX900-NEXT:    ; def s[6:7]
13200; GFX900-NEXT:    ;;#ASMEND
13201; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
13202; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s4
13203; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
13204; GFX900-NEXT:    ;;#ASMSTART
13205; GFX900-NEXT:    ; use s[8:9]
13206; GFX900-NEXT:    ;;#ASMEND
13207; GFX900-NEXT:    s_setpc_b64 s[30:31]
13208;
13209; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_7_0:
13210; GFX90A:       ; %bb.0:
13211; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13212; GFX90A-NEXT:    ;;#ASMSTART
13213; GFX90A-NEXT:    ; def s[4:5]
13214; GFX90A-NEXT:    ;;#ASMEND
13215; GFX90A-NEXT:    ;;#ASMSTART
13216; GFX90A-NEXT:    ; def s[6:7]
13217; GFX90A-NEXT:    ;;#ASMEND
13218; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
13219; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s4
13220; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
13221; GFX90A-NEXT:    ;;#ASMSTART
13222; GFX90A-NEXT:    ; use s[8:9]
13223; GFX90A-NEXT:    ;;#ASMEND
13224; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13225;
13226; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_0:
13227; GFX940:       ; %bb.0:
13228; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13229; GFX940-NEXT:    ;;#ASMSTART
13230; GFX940-NEXT:    ; def s[0:1]
13231; GFX940-NEXT:    ;;#ASMEND
13232; GFX940-NEXT:    ;;#ASMSTART
13233; GFX940-NEXT:    ; def s[2:3]
13234; GFX940-NEXT:    ;;#ASMEND
13235; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
13236; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s0
13237; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
13238; GFX940-NEXT:    ;;#ASMSTART
13239; GFX940-NEXT:    ; use s[8:9]
13240; GFX940-NEXT:    ;;#ASMEND
13241; GFX940-NEXT:    s_setpc_b64 s[30:31]
13242  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13243  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13244  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 0>
13245  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13246  ret void
13247}
13248
13249define void @s_shuffle_v4i16_v4i16__7_7_7_1() {
13250; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_7_1:
13251; GFX900:       ; %bb.0:
13252; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13253; GFX900-NEXT:    ;;#ASMSTART
13254; GFX900-NEXT:    ; def s[4:5]
13255; GFX900-NEXT:    ;;#ASMEND
13256; GFX900-NEXT:    ;;#ASMSTART
13257; GFX900-NEXT:    ; def s[6:7]
13258; GFX900-NEXT:    ;;#ASMEND
13259; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s7, s4
13260; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
13261; GFX900-NEXT:    ;;#ASMSTART
13262; GFX900-NEXT:    ; use s[8:9]
13263; GFX900-NEXT:    ;;#ASMEND
13264; GFX900-NEXT:    s_setpc_b64 s[30:31]
13265;
13266; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_7_1:
13267; GFX90A:       ; %bb.0:
13268; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13269; GFX90A-NEXT:    ;;#ASMSTART
13270; GFX90A-NEXT:    ; def s[4:5]
13271; GFX90A-NEXT:    ;;#ASMEND
13272; GFX90A-NEXT:    ;;#ASMSTART
13273; GFX90A-NEXT:    ; def s[6:7]
13274; GFX90A-NEXT:    ;;#ASMEND
13275; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s7, s4
13276; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
13277; GFX90A-NEXT:    ;;#ASMSTART
13278; GFX90A-NEXT:    ; use s[8:9]
13279; GFX90A-NEXT:    ;;#ASMEND
13280; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13281;
13282; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_1:
13283; GFX940:       ; %bb.0:
13284; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13285; GFX940-NEXT:    ;;#ASMSTART
13286; GFX940-NEXT:    ; def s[0:1]
13287; GFX940-NEXT:    ;;#ASMEND
13288; GFX940-NEXT:    ;;#ASMSTART
13289; GFX940-NEXT:    ; def s[2:3]
13290; GFX940-NEXT:    ;;#ASMEND
13291; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s3, s0
13292; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
13293; GFX940-NEXT:    ;;#ASMSTART
13294; GFX940-NEXT:    ; use s[8:9]
13295; GFX940-NEXT:    ;;#ASMEND
13296; GFX940-NEXT:    s_setpc_b64 s[30:31]
13297  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13298  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13299  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 1>
13300  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13301  ret void
13302}
13303
13304define void @s_shuffle_v4i16_v4i16__7_7_7_2() {
13305; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_7_2:
13306; GFX900:       ; %bb.0:
13307; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13308; GFX900-NEXT:    ;;#ASMSTART
13309; GFX900-NEXT:    ; def s[4:5]
13310; GFX900-NEXT:    ;;#ASMEND
13311; GFX900-NEXT:    ;;#ASMSTART
13312; GFX900-NEXT:    ; def s[6:7]
13313; GFX900-NEXT:    ;;#ASMEND
13314; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
13315; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s5
13316; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
13317; GFX900-NEXT:    ;;#ASMSTART
13318; GFX900-NEXT:    ; use s[8:9]
13319; GFX900-NEXT:    ;;#ASMEND
13320; GFX900-NEXT:    s_setpc_b64 s[30:31]
13321;
13322; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_7_2:
13323; GFX90A:       ; %bb.0:
13324; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13325; GFX90A-NEXT:    ;;#ASMSTART
13326; GFX90A-NEXT:    ; def s[4:5]
13327; GFX90A-NEXT:    ;;#ASMEND
13328; GFX90A-NEXT:    ;;#ASMSTART
13329; GFX90A-NEXT:    ; def s[6:7]
13330; GFX90A-NEXT:    ;;#ASMEND
13331; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
13332; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s5
13333; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
13334; GFX90A-NEXT:    ;;#ASMSTART
13335; GFX90A-NEXT:    ; use s[8:9]
13336; GFX90A-NEXT:    ;;#ASMEND
13337; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13338;
13339; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_2:
13340; GFX940:       ; %bb.0:
13341; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13342; GFX940-NEXT:    ;;#ASMSTART
13343; GFX940-NEXT:    ; def s[0:1]
13344; GFX940-NEXT:    ;;#ASMEND
13345; GFX940-NEXT:    ;;#ASMSTART
13346; GFX940-NEXT:    ; def s[2:3]
13347; GFX940-NEXT:    ;;#ASMEND
13348; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
13349; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s1
13350; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
13351; GFX940-NEXT:    ;;#ASMSTART
13352; GFX940-NEXT:    ; use s[8:9]
13353; GFX940-NEXT:    ;;#ASMEND
13354; GFX940-NEXT:    s_setpc_b64 s[30:31]
13355  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13356  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13357  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 2>
13358  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13359  ret void
13360}
13361
13362define void @s_shuffle_v4i16_v4i16__7_7_7_3() {
13363; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_7_3:
13364; GFX900:       ; %bb.0:
13365; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13366; GFX900-NEXT:    ;;#ASMSTART
13367; GFX900-NEXT:    ; def s[4:5]
13368; GFX900-NEXT:    ;;#ASMEND
13369; GFX900-NEXT:    ;;#ASMSTART
13370; GFX900-NEXT:    ; def s[6:7]
13371; GFX900-NEXT:    ;;#ASMEND
13372; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s7, s5
13373; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
13374; GFX900-NEXT:    ;;#ASMSTART
13375; GFX900-NEXT:    ; use s[8:9]
13376; GFX900-NEXT:    ;;#ASMEND
13377; GFX900-NEXT:    s_setpc_b64 s[30:31]
13378;
13379; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_7_3:
13380; GFX90A:       ; %bb.0:
13381; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13382; GFX90A-NEXT:    ;;#ASMSTART
13383; GFX90A-NEXT:    ; def s[4:5]
13384; GFX90A-NEXT:    ;;#ASMEND
13385; GFX90A-NEXT:    ;;#ASMSTART
13386; GFX90A-NEXT:    ; def s[6:7]
13387; GFX90A-NEXT:    ;;#ASMEND
13388; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s7, s5
13389; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
13390; GFX90A-NEXT:    ;;#ASMSTART
13391; GFX90A-NEXT:    ; use s[8:9]
13392; GFX90A-NEXT:    ;;#ASMEND
13393; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13394;
13395; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_3:
13396; GFX940:       ; %bb.0:
13397; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13398; GFX940-NEXT:    ;;#ASMSTART
13399; GFX940-NEXT:    ; def s[0:1]
13400; GFX940-NEXT:    ;;#ASMEND
13401; GFX940-NEXT:    ;;#ASMSTART
13402; GFX940-NEXT:    ; def s[2:3]
13403; GFX940-NEXT:    ;;#ASMEND
13404; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s3, s1
13405; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
13406; GFX940-NEXT:    ;;#ASMSTART
13407; GFX940-NEXT:    ; use s[8:9]
13408; GFX940-NEXT:    ;;#ASMEND
13409; GFX940-NEXT:    s_setpc_b64 s[30:31]
13410  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13411  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13412  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 3>
13413  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13414  ret void
13415}
13416
13417define void @s_shuffle_v4i16_v4i16__7_7_7_4() {
13418; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_7_4:
13419; GFX900:       ; %bb.0:
13420; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13421; GFX900-NEXT:    ;;#ASMSTART
13422; GFX900-NEXT:    ; def s[4:5]
13423; GFX900-NEXT:    ;;#ASMEND
13424; GFX900-NEXT:    s_lshr_b32 s6, s5, 16
13425; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s6, s4
13426; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
13427; GFX900-NEXT:    ;;#ASMSTART
13428; GFX900-NEXT:    ; use s[8:9]
13429; GFX900-NEXT:    ;;#ASMEND
13430; GFX900-NEXT:    s_setpc_b64 s[30:31]
13431;
13432; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_7_4:
13433; GFX90A:       ; %bb.0:
13434; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13435; GFX90A-NEXT:    ;;#ASMSTART
13436; GFX90A-NEXT:    ; def s[4:5]
13437; GFX90A-NEXT:    ;;#ASMEND
13438; GFX90A-NEXT:    s_lshr_b32 s6, s5, 16
13439; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s6, s4
13440; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
13441; GFX90A-NEXT:    ;;#ASMSTART
13442; GFX90A-NEXT:    ; use s[8:9]
13443; GFX90A-NEXT:    ;;#ASMEND
13444; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13445;
13446; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_4:
13447; GFX940:       ; %bb.0:
13448; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13449; GFX940-NEXT:    ;;#ASMSTART
13450; GFX940-NEXT:    ; def s[0:1]
13451; GFX940-NEXT:    ;;#ASMEND
13452; GFX940-NEXT:    s_lshr_b32 s2, s1, 16
13453; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s2, s0
13454; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
13455; GFX940-NEXT:    ;;#ASMSTART
13456; GFX940-NEXT:    ; use s[8:9]
13457; GFX940-NEXT:    ;;#ASMEND
13458; GFX940-NEXT:    s_setpc_b64 s[30:31]
13459  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13460  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13461  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 4>
13462  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13463  ret void
13464}
13465
13466define void @s_shuffle_v4i16_v4i16__7_7_7_5() {
13467; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_7_5:
13468; GFX900:       ; %bb.0:
13469; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13470; GFX900-NEXT:    ;;#ASMSTART
13471; GFX900-NEXT:    ; def s[4:5]
13472; GFX900-NEXT:    ;;#ASMEND
13473; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s4
13474; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
13475; GFX900-NEXT:    ;;#ASMSTART
13476; GFX900-NEXT:    ; use s[8:9]
13477; GFX900-NEXT:    ;;#ASMEND
13478; GFX900-NEXT:    s_setpc_b64 s[30:31]
13479;
13480; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_7_5:
13481; GFX90A:       ; %bb.0:
13482; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13483; GFX90A-NEXT:    ;;#ASMSTART
13484; GFX90A-NEXT:    ; def s[4:5]
13485; GFX90A-NEXT:    ;;#ASMEND
13486; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s4
13487; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
13488; GFX90A-NEXT:    ;;#ASMSTART
13489; GFX90A-NEXT:    ; use s[8:9]
13490; GFX90A-NEXT:    ;;#ASMEND
13491; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13492;
13493; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_5:
13494; GFX940:       ; %bb.0:
13495; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13496; GFX940-NEXT:    ;;#ASMSTART
13497; GFX940-NEXT:    ; def s[0:1]
13498; GFX940-NEXT:    ;;#ASMEND
13499; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s0
13500; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
13501; GFX940-NEXT:    ;;#ASMSTART
13502; GFX940-NEXT:    ; use s[8:9]
13503; GFX940-NEXT:    ;;#ASMEND
13504; GFX940-NEXT:    s_setpc_b64 s[30:31]
13505  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13506  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13507  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 5>
13508  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13509  ret void
13510}
13511
13512define void @s_shuffle_v4i16_v4i16__7_7_7_6() {
13513; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_7_6:
13514; GFX900:       ; %bb.0:
13515; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13516; GFX900-NEXT:    ;;#ASMSTART
13517; GFX900-NEXT:    ; def s[4:5]
13518; GFX900-NEXT:    ;;#ASMEND
13519; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
13520; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s5
13521; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
13522; GFX900-NEXT:    ;;#ASMSTART
13523; GFX900-NEXT:    ; use s[8:9]
13524; GFX900-NEXT:    ;;#ASMEND
13525; GFX900-NEXT:    s_setpc_b64 s[30:31]
13526;
13527; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_7_6:
13528; GFX90A:       ; %bb.0:
13529; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13530; GFX90A-NEXT:    ;;#ASMSTART
13531; GFX90A-NEXT:    ; def s[4:5]
13532; GFX90A-NEXT:    ;;#ASMEND
13533; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
13534; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s5
13535; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
13536; GFX90A-NEXT:    ;;#ASMSTART
13537; GFX90A-NEXT:    ; use s[8:9]
13538; GFX90A-NEXT:    ;;#ASMEND
13539; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13540;
13541; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_6:
13542; GFX940:       ; %bb.0:
13543; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13544; GFX940-NEXT:    ;;#ASMSTART
13545; GFX940-NEXT:    ; def s[0:1]
13546; GFX940-NEXT:    ;;#ASMEND
13547; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
13548; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s1
13549; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
13550; GFX940-NEXT:    ;;#ASMSTART
13551; GFX940-NEXT:    ; use s[8:9]
13552; GFX940-NEXT:    ;;#ASMEND
13553; GFX940-NEXT:    s_setpc_b64 s[30:31]
13554  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13555  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13556  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 6>
13557  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13558  ret void
13559}
13560
13561define void @s_shuffle_v4i16_v4i16__7_7_7_7() {
13562; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_7_7:
13563; GFX900:       ; %bb.0:
13564; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13565; GFX900-NEXT:    ;;#ASMSTART
13566; GFX900-NEXT:    ; def s[4:5]
13567; GFX900-NEXT:    ;;#ASMEND
13568; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
13569; GFX900-NEXT:    s_mov_b32 s9, s8
13570; GFX900-NEXT:    ;;#ASMSTART
13571; GFX900-NEXT:    ; use s[8:9]
13572; GFX900-NEXT:    ;;#ASMEND
13573; GFX900-NEXT:    s_setpc_b64 s[30:31]
13574;
13575; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_7_7:
13576; GFX90A:       ; %bb.0:
13577; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13578; GFX90A-NEXT:    ;;#ASMSTART
13579; GFX90A-NEXT:    ; def s[4:5]
13580; GFX90A-NEXT:    ;;#ASMEND
13581; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
13582; GFX90A-NEXT:    s_mov_b32 s9, s8
13583; GFX90A-NEXT:    ;;#ASMSTART
13584; GFX90A-NEXT:    ; use s[8:9]
13585; GFX90A-NEXT:    ;;#ASMEND
13586; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13587;
13588; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_7_7:
13589; GFX940:       ; %bb.0:
13590; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13591; GFX940-NEXT:    ;;#ASMSTART
13592; GFX940-NEXT:    ; def s[0:1]
13593; GFX940-NEXT:    ;;#ASMEND
13594; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
13595; GFX940-NEXT:    s_mov_b32 s9, s8
13596; GFX940-NEXT:    ;;#ASMSTART
13597; GFX940-NEXT:    ; use s[8:9]
13598; GFX940-NEXT:    ;;#ASMEND
13599; GFX940-NEXT:    s_setpc_b64 s[30:31]
13600  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13601  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13602  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 7, i32 7>
13603  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13604  ret void
13605}
13606
13607define void @s_shuffle_v4i16_v4i16__u_0_0_0() {
13608; GFX900-LABEL: s_shuffle_v4i16_v4i16__u_0_0_0:
13609; GFX900:       ; %bb.0:
13610; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13611; GFX900-NEXT:    ;;#ASMSTART
13612; GFX900-NEXT:    ; def s[4:5]
13613; GFX900-NEXT:    ;;#ASMEND
13614; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
13615; GFX900-NEXT:    s_lshl_b32 s8, s4, 16
13616; GFX900-NEXT:    ;;#ASMSTART
13617; GFX900-NEXT:    ; use s[8:9]
13618; GFX900-NEXT:    ;;#ASMEND
13619; GFX900-NEXT:    s_setpc_b64 s[30:31]
13620;
13621; GFX90A-LABEL: s_shuffle_v4i16_v4i16__u_0_0_0:
13622; GFX90A:       ; %bb.0:
13623; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13624; GFX90A-NEXT:    ;;#ASMSTART
13625; GFX90A-NEXT:    ; def s[4:5]
13626; GFX90A-NEXT:    ;;#ASMEND
13627; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
13628; GFX90A-NEXT:    s_lshl_b32 s8, s4, 16
13629; GFX90A-NEXT:    ;;#ASMSTART
13630; GFX90A-NEXT:    ; use s[8:9]
13631; GFX90A-NEXT:    ;;#ASMEND
13632; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13633;
13634; GFX940-LABEL: s_shuffle_v4i16_v4i16__u_0_0_0:
13635; GFX940:       ; %bb.0:
13636; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13637; GFX940-NEXT:    ;;#ASMSTART
13638; GFX940-NEXT:    ; def s[0:1]
13639; GFX940-NEXT:    ;;#ASMEND
13640; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
13641; GFX940-NEXT:    s_lshl_b32 s8, s0, 16
13642; GFX940-NEXT:    ;;#ASMSTART
13643; GFX940-NEXT:    ; use s[8:9]
13644; GFX940-NEXT:    ;;#ASMEND
13645; GFX940-NEXT:    s_setpc_b64 s[30:31]
13646  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13647  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0>
13648  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13649  ret void
13650}
13651
13652define void @s_shuffle_v4i16_v4i16__0_0_0_0() {
13653; GFX900-LABEL: s_shuffle_v4i16_v4i16__0_0_0_0:
13654; GFX900:       ; %bb.0:
13655; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13656; GFX900-NEXT:    ;;#ASMSTART
13657; GFX900-NEXT:    ; def s[4:5]
13658; GFX900-NEXT:    ;;#ASMEND
13659; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
13660; GFX900-NEXT:    s_mov_b32 s9, s8
13661; GFX900-NEXT:    ;;#ASMSTART
13662; GFX900-NEXT:    ; use s[8:9]
13663; GFX900-NEXT:    ;;#ASMEND
13664; GFX900-NEXT:    s_setpc_b64 s[30:31]
13665;
13666; GFX90A-LABEL: s_shuffle_v4i16_v4i16__0_0_0_0:
13667; GFX90A:       ; %bb.0:
13668; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13669; GFX90A-NEXT:    ;;#ASMSTART
13670; GFX90A-NEXT:    ; def s[4:5]
13671; GFX90A-NEXT:    ;;#ASMEND
13672; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
13673; GFX90A-NEXT:    s_mov_b32 s9, s8
13674; GFX90A-NEXT:    ;;#ASMSTART
13675; GFX90A-NEXT:    ; use s[8:9]
13676; GFX90A-NEXT:    ;;#ASMEND
13677; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13678;
13679; GFX940-LABEL: s_shuffle_v4i16_v4i16__0_0_0_0:
13680; GFX940:       ; %bb.0:
13681; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13682; GFX940-NEXT:    ;;#ASMSTART
13683; GFX940-NEXT:    ; def s[0:1]
13684; GFX940-NEXT:    ;;#ASMEND
13685; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s0
13686; GFX940-NEXT:    s_mov_b32 s9, s8
13687; GFX940-NEXT:    ;;#ASMSTART
13688; GFX940-NEXT:    ; use s[8:9]
13689; GFX940-NEXT:    ;;#ASMEND
13690; GFX940-NEXT:    s_setpc_b64 s[30:31]
13691  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13692  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> zeroinitializer
13693  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13694  ret void
13695}
13696
13697define void @s_shuffle_v4i16_v4i16__1_0_0_0() {
13698; GFX900-LABEL: s_shuffle_v4i16_v4i16__1_0_0_0:
13699; GFX900:       ; %bb.0:
13700; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13701; GFX900-NEXT:    ;;#ASMSTART
13702; GFX900-NEXT:    ; def s[4:5]
13703; GFX900-NEXT:    ;;#ASMEND
13704; GFX900-NEXT:    s_lshr_b32 s5, s4, 16
13705; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
13706; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
13707; GFX900-NEXT:    ;;#ASMSTART
13708; GFX900-NEXT:    ; use s[8:9]
13709; GFX900-NEXT:    ;;#ASMEND
13710; GFX900-NEXT:    s_setpc_b64 s[30:31]
13711;
13712; GFX90A-LABEL: s_shuffle_v4i16_v4i16__1_0_0_0:
13713; GFX90A:       ; %bb.0:
13714; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13715; GFX90A-NEXT:    ;;#ASMSTART
13716; GFX90A-NEXT:    ; def s[4:5]
13717; GFX90A-NEXT:    ;;#ASMEND
13718; GFX90A-NEXT:    s_lshr_b32 s5, s4, 16
13719; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
13720; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
13721; GFX90A-NEXT:    ;;#ASMSTART
13722; GFX90A-NEXT:    ; use s[8:9]
13723; GFX90A-NEXT:    ;;#ASMEND
13724; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13725;
13726; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_0_0_0:
13727; GFX940:       ; %bb.0:
13728; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13729; GFX940-NEXT:    ;;#ASMSTART
13730; GFX940-NEXT:    ; def s[0:1]
13731; GFX940-NEXT:    ;;#ASMEND
13732; GFX940-NEXT:    s_lshr_b32 s1, s0, 16
13733; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
13734; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
13735; GFX940-NEXT:    ;;#ASMSTART
13736; GFX940-NEXT:    ; use s[8:9]
13737; GFX940-NEXT:    ;;#ASMEND
13738; GFX940-NEXT:    s_setpc_b64 s[30:31]
13739  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13740  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
13741  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13742  ret void
13743}
13744
13745define void @s_shuffle_v4i16_v4i16__2_0_0_0() {
13746; GFX900-LABEL: s_shuffle_v4i16_v4i16__2_0_0_0:
13747; GFX900:       ; %bb.0:
13748; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13749; GFX900-NEXT:    ;;#ASMSTART
13750; GFX900-NEXT:    ; def s[4:5]
13751; GFX900-NEXT:    ;;#ASMEND
13752; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
13753; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
13754; GFX900-NEXT:    ;;#ASMSTART
13755; GFX900-NEXT:    ; use s[8:9]
13756; GFX900-NEXT:    ;;#ASMEND
13757; GFX900-NEXT:    s_setpc_b64 s[30:31]
13758;
13759; GFX90A-LABEL: s_shuffle_v4i16_v4i16__2_0_0_0:
13760; GFX90A:       ; %bb.0:
13761; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13762; GFX90A-NEXT:    ;;#ASMSTART
13763; GFX90A-NEXT:    ; def s[4:5]
13764; GFX90A-NEXT:    ;;#ASMEND
13765; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
13766; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
13767; GFX90A-NEXT:    ;;#ASMSTART
13768; GFX90A-NEXT:    ; use s[8:9]
13769; GFX90A-NEXT:    ;;#ASMEND
13770; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13771;
13772; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_0_0_0:
13773; GFX940:       ; %bb.0:
13774; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13775; GFX940-NEXT:    ;;#ASMSTART
13776; GFX940-NEXT:    ; def s[0:1]
13777; GFX940-NEXT:    ;;#ASMEND
13778; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
13779; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
13780; GFX940-NEXT:    ;;#ASMSTART
13781; GFX940-NEXT:    ; use s[8:9]
13782; GFX940-NEXT:    ;;#ASMEND
13783; GFX940-NEXT:    s_setpc_b64 s[30:31]
13784  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13785  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
13786  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13787  ret void
13788}
13789
13790define void @s_shuffle_v4i16_v4i16__3_0_0_0() {
13791; GFX900-LABEL: s_shuffle_v4i16_v4i16__3_0_0_0:
13792; GFX900:       ; %bb.0:
13793; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13794; GFX900-NEXT:    ;;#ASMSTART
13795; GFX900-NEXT:    ; def s[4:5]
13796; GFX900-NEXT:    ;;#ASMEND
13797; GFX900-NEXT:    s_lshr_b32 s5, s5, 16
13798; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
13799; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
13800; GFX900-NEXT:    ;;#ASMSTART
13801; GFX900-NEXT:    ; use s[8:9]
13802; GFX900-NEXT:    ;;#ASMEND
13803; GFX900-NEXT:    s_setpc_b64 s[30:31]
13804;
13805; GFX90A-LABEL: s_shuffle_v4i16_v4i16__3_0_0_0:
13806; GFX90A:       ; %bb.0:
13807; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13808; GFX90A-NEXT:    ;;#ASMSTART
13809; GFX90A-NEXT:    ; def s[4:5]
13810; GFX90A-NEXT:    ;;#ASMEND
13811; GFX90A-NEXT:    s_lshr_b32 s5, s5, 16
13812; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
13813; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
13814; GFX90A-NEXT:    ;;#ASMSTART
13815; GFX90A-NEXT:    ; use s[8:9]
13816; GFX90A-NEXT:    ;;#ASMEND
13817; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13818;
13819; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_0_0_0:
13820; GFX940:       ; %bb.0:
13821; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13822; GFX940-NEXT:    ;;#ASMSTART
13823; GFX940-NEXT:    ; def s[0:1]
13824; GFX940-NEXT:    ;;#ASMEND
13825; GFX940-NEXT:    s_lshr_b32 s1, s1, 16
13826; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
13827; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
13828; GFX940-NEXT:    ;;#ASMSTART
13829; GFX940-NEXT:    ; use s[8:9]
13830; GFX940-NEXT:    ;;#ASMEND
13831; GFX940-NEXT:    s_setpc_b64 s[30:31]
13832  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13833  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
13834  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13835  ret void
13836}
13837
13838define void @s_shuffle_v4i16_v4i16__4_0_0_0() {
13839; GFX900-LABEL: s_shuffle_v4i16_v4i16__4_0_0_0:
13840; GFX900:       ; %bb.0:
13841; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13842; GFX900-NEXT:    ;;#ASMSTART
13843; GFX900-NEXT:    ; def s[4:5]
13844; GFX900-NEXT:    ;;#ASMEND
13845; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
13846; GFX900-NEXT:    s_lshl_b32 s8, s4, 16
13847; GFX900-NEXT:    ;;#ASMSTART
13848; GFX900-NEXT:    ; use s[8:9]
13849; GFX900-NEXT:    ;;#ASMEND
13850; GFX900-NEXT:    s_setpc_b64 s[30:31]
13851;
13852; GFX90A-LABEL: s_shuffle_v4i16_v4i16__4_0_0_0:
13853; GFX90A:       ; %bb.0:
13854; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13855; GFX90A-NEXT:    ;;#ASMSTART
13856; GFX90A-NEXT:    ; def s[4:5]
13857; GFX90A-NEXT:    ;;#ASMEND
13858; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
13859; GFX90A-NEXT:    s_lshl_b32 s8, s4, 16
13860; GFX90A-NEXT:    ;;#ASMSTART
13861; GFX90A-NEXT:    ; use s[8:9]
13862; GFX90A-NEXT:    ;;#ASMEND
13863; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13864;
13865; GFX940-LABEL: s_shuffle_v4i16_v4i16__4_0_0_0:
13866; GFX940:       ; %bb.0:
13867; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13868; GFX940-NEXT:    ;;#ASMSTART
13869; GFX940-NEXT:    ; def s[0:1]
13870; GFX940-NEXT:    ;;#ASMEND
13871; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
13872; GFX940-NEXT:    s_lshl_b32 s8, s0, 16
13873; GFX940-NEXT:    ;;#ASMSTART
13874; GFX940-NEXT:    ; use s[8:9]
13875; GFX940-NEXT:    ;;#ASMEND
13876; GFX940-NEXT:    s_setpc_b64 s[30:31]
13877  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13878  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 4, i32 0, i32 0, i32 0>
13879  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13880  ret void
13881}
13882
13883define void @s_shuffle_v4i16_v4i16__5_0_0_0() {
13884; GFX900-LABEL: s_shuffle_v4i16_v4i16__5_0_0_0:
13885; GFX900:       ; %bb.0:
13886; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13887; GFX900-NEXT:    ;;#ASMSTART
13888; GFX900-NEXT:    ; def s[4:5]
13889; GFX900-NEXT:    ;;#ASMEND
13890; GFX900-NEXT:    ;;#ASMSTART
13891; GFX900-NEXT:    ; def s[6:7]
13892; GFX900-NEXT:    ;;#ASMEND
13893; GFX900-NEXT:    s_lshr_b32 s5, s6, 16
13894; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
13895; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
13896; GFX900-NEXT:    ;;#ASMSTART
13897; GFX900-NEXT:    ; use s[8:9]
13898; GFX900-NEXT:    ;;#ASMEND
13899; GFX900-NEXT:    s_setpc_b64 s[30:31]
13900;
13901; GFX90A-LABEL: s_shuffle_v4i16_v4i16__5_0_0_0:
13902; GFX90A:       ; %bb.0:
13903; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13904; GFX90A-NEXT:    ;;#ASMSTART
13905; GFX90A-NEXT:    ; def s[4:5]
13906; GFX90A-NEXT:    ;;#ASMEND
13907; GFX90A-NEXT:    ;;#ASMSTART
13908; GFX90A-NEXT:    ; def s[6:7]
13909; GFX90A-NEXT:    ;;#ASMEND
13910; GFX90A-NEXT:    s_lshr_b32 s5, s6, 16
13911; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
13912; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
13913; GFX90A-NEXT:    ;;#ASMSTART
13914; GFX90A-NEXT:    ; use s[8:9]
13915; GFX90A-NEXT:    ;;#ASMEND
13916; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13917;
13918; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_0_0_0:
13919; GFX940:       ; %bb.0:
13920; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13921; GFX940-NEXT:    ;;#ASMSTART
13922; GFX940-NEXT:    ; def s[0:1]
13923; GFX940-NEXT:    ;;#ASMEND
13924; GFX940-NEXT:    ;;#ASMSTART
13925; GFX940-NEXT:    ; def s[2:3]
13926; GFX940-NEXT:    ;;#ASMEND
13927; GFX940-NEXT:    s_lshr_b32 s1, s2, 16
13928; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
13929; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
13930; GFX940-NEXT:    ;;#ASMSTART
13931; GFX940-NEXT:    ; use s[8:9]
13932; GFX940-NEXT:    ;;#ASMEND
13933; GFX940-NEXT:    s_setpc_b64 s[30:31]
13934  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13935  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13936  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 0, i32 0, i32 0>
13937  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13938  ret void
13939}
13940
13941define void @s_shuffle_v4i16_v4i16__6_0_0_0() {
13942; GFX900-LABEL: s_shuffle_v4i16_v4i16__6_0_0_0:
13943; GFX900:       ; %bb.0:
13944; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13945; GFX900-NEXT:    ;;#ASMSTART
13946; GFX900-NEXT:    ; def s[4:5]
13947; GFX900-NEXT:    ;;#ASMEND
13948; GFX900-NEXT:    ;;#ASMSTART
13949; GFX900-NEXT:    ; def s[6:7]
13950; GFX900-NEXT:    ;;#ASMEND
13951; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
13952; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
13953; GFX900-NEXT:    ;;#ASMSTART
13954; GFX900-NEXT:    ; use s[8:9]
13955; GFX900-NEXT:    ;;#ASMEND
13956; GFX900-NEXT:    s_setpc_b64 s[30:31]
13957;
13958; GFX90A-LABEL: s_shuffle_v4i16_v4i16__6_0_0_0:
13959; GFX90A:       ; %bb.0:
13960; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13961; GFX90A-NEXT:    ;;#ASMSTART
13962; GFX90A-NEXT:    ; def s[4:5]
13963; GFX90A-NEXT:    ;;#ASMEND
13964; GFX90A-NEXT:    ;;#ASMSTART
13965; GFX90A-NEXT:    ; def s[6:7]
13966; GFX90A-NEXT:    ;;#ASMEND
13967; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
13968; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
13969; GFX90A-NEXT:    ;;#ASMSTART
13970; GFX90A-NEXT:    ; use s[8:9]
13971; GFX90A-NEXT:    ;;#ASMEND
13972; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13973;
13974; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_0_0_0:
13975; GFX940:       ; %bb.0:
13976; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13977; GFX940-NEXT:    ;;#ASMSTART
13978; GFX940-NEXT:    ; def s[0:1]
13979; GFX940-NEXT:    ;;#ASMEND
13980; GFX940-NEXT:    ;;#ASMSTART
13981; GFX940-NEXT:    ; def s[2:3]
13982; GFX940-NEXT:    ;;#ASMEND
13983; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s0
13984; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
13985; GFX940-NEXT:    ;;#ASMSTART
13986; GFX940-NEXT:    ; use s[8:9]
13987; GFX940-NEXT:    ;;#ASMEND
13988; GFX940-NEXT:    s_setpc_b64 s[30:31]
13989  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13990  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13991  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 0, i32 0, i32 0>
13992  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
13993  ret void
13994}
13995
13996define void @s_shuffle_v4i16_v4i16__7_0_0_0() {
13997; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_0_0_0:
13998; GFX900:       ; %bb.0:
13999; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14000; GFX900-NEXT:    ;;#ASMSTART
14001; GFX900-NEXT:    ; def s[4:5]
14002; GFX900-NEXT:    ;;#ASMEND
14003; GFX900-NEXT:    ;;#ASMSTART
14004; GFX900-NEXT:    ; def s[6:7]
14005; GFX900-NEXT:    ;;#ASMEND
14006; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
14007; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
14008; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
14009; GFX900-NEXT:    ;;#ASMSTART
14010; GFX900-NEXT:    ; use s[8:9]
14011; GFX900-NEXT:    ;;#ASMEND
14012; GFX900-NEXT:    s_setpc_b64 s[30:31]
14013;
14014; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_0_0_0:
14015; GFX90A:       ; %bb.0:
14016; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14017; GFX90A-NEXT:    ;;#ASMSTART
14018; GFX90A-NEXT:    ; def s[4:5]
14019; GFX90A-NEXT:    ;;#ASMEND
14020; GFX90A-NEXT:    ;;#ASMSTART
14021; GFX90A-NEXT:    ; def s[6:7]
14022; GFX90A-NEXT:    ;;#ASMEND
14023; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
14024; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
14025; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
14026; GFX90A-NEXT:    ;;#ASMSTART
14027; GFX90A-NEXT:    ; use s[8:9]
14028; GFX90A-NEXT:    ;;#ASMEND
14029; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14030;
14031; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_0_0:
14032; GFX940:       ; %bb.0:
14033; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14034; GFX940-NEXT:    ;;#ASMSTART
14035; GFX940-NEXT:    ; def s[0:1]
14036; GFX940-NEXT:    ;;#ASMEND
14037; GFX940-NEXT:    ;;#ASMSTART
14038; GFX940-NEXT:    ; def s[2:3]
14039; GFX940-NEXT:    ;;#ASMEND
14040; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
14041; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
14042; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
14043; GFX940-NEXT:    ;;#ASMSTART
14044; GFX940-NEXT:    ; use s[8:9]
14045; GFX940-NEXT:    ;;#ASMEND
14046; GFX940-NEXT:    s_setpc_b64 s[30:31]
14047  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14048  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14049  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 0, i32 0>
14050  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
14051  ret void
14052}
14053
14054define void @s_shuffle_v4i16_v4i16__7_u_0_0() {
14055; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_u_0_0:
14056; GFX900:       ; %bb.0:
14057; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14058; GFX900-NEXT:    ;;#ASMSTART
14059; GFX900-NEXT:    ; def s[4:5]
14060; GFX900-NEXT:    ;;#ASMEND
14061; GFX900-NEXT:    ;;#ASMSTART
14062; GFX900-NEXT:    ; def s[6:7]
14063; GFX900-NEXT:    ;;#ASMEND
14064; GFX900-NEXT:    s_lshr_b32 s8, s7, 16
14065; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
14066; GFX900-NEXT:    ;;#ASMSTART
14067; GFX900-NEXT:    ; use s[8:9]
14068; GFX900-NEXT:    ;;#ASMEND
14069; GFX900-NEXT:    s_setpc_b64 s[30:31]
14070;
14071; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_u_0_0:
14072; GFX90A:       ; %bb.0:
14073; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14074; GFX90A-NEXT:    ;;#ASMSTART
14075; GFX90A-NEXT:    ; def s[4:5]
14076; GFX90A-NEXT:    ;;#ASMEND
14077; GFX90A-NEXT:    ;;#ASMSTART
14078; GFX90A-NEXT:    ; def s[6:7]
14079; GFX90A-NEXT:    ;;#ASMEND
14080; GFX90A-NEXT:    s_lshr_b32 s8, s7, 16
14081; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
14082; GFX90A-NEXT:    ;;#ASMSTART
14083; GFX90A-NEXT:    ; use s[8:9]
14084; GFX90A-NEXT:    ;;#ASMEND
14085; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14086;
14087; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_0_0:
14088; GFX940:       ; %bb.0:
14089; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14090; GFX940-NEXT:    ;;#ASMSTART
14091; GFX940-NEXT:    ; def s[0:1]
14092; GFX940-NEXT:    ;;#ASMEND
14093; GFX940-NEXT:    ;;#ASMSTART
14094; GFX940-NEXT:    ; def s[2:3]
14095; GFX940-NEXT:    ;;#ASMEND
14096; GFX940-NEXT:    s_lshr_b32 s8, s3, 16
14097; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
14098; GFX940-NEXT:    ;;#ASMSTART
14099; GFX940-NEXT:    ; use s[8:9]
14100; GFX940-NEXT:    ;;#ASMEND
14101; GFX940-NEXT:    s_setpc_b64 s[30:31]
14102  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14103  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14104  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 0, i32 0>
14105  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
14106  ret void
14107}
14108
14109define void @s_shuffle_v4i16_v4i16__7_1_0_0() {
14110; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_1_0_0:
14111; GFX900:       ; %bb.0:
14112; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14113; GFX900-NEXT:    ;;#ASMSTART
14114; GFX900-NEXT:    ; def s[4:5]
14115; GFX900-NEXT:    ;;#ASMEND
14116; GFX900-NEXT:    ;;#ASMSTART
14117; GFX900-NEXT:    ; def s[6:7]
14118; GFX900-NEXT:    ;;#ASMEND
14119; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
14120; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
14121; GFX900-NEXT:    ;;#ASMSTART
14122; GFX900-NEXT:    ; use s[8:9]
14123; GFX900-NEXT:    ;;#ASMEND
14124; GFX900-NEXT:    s_setpc_b64 s[30:31]
14125;
14126; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_1_0_0:
14127; GFX90A:       ; %bb.0:
14128; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14129; GFX90A-NEXT:    ;;#ASMSTART
14130; GFX90A-NEXT:    ; def s[4:5]
14131; GFX90A-NEXT:    ;;#ASMEND
14132; GFX90A-NEXT:    ;;#ASMSTART
14133; GFX90A-NEXT:    ; def s[6:7]
14134; GFX90A-NEXT:    ;;#ASMEND
14135; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
14136; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
14137; GFX90A-NEXT:    ;;#ASMSTART
14138; GFX90A-NEXT:    ; use s[8:9]
14139; GFX90A-NEXT:    ;;#ASMEND
14140; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14141;
14142; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_0_0:
14143; GFX940:       ; %bb.0:
14144; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14145; GFX940-NEXT:    ;;#ASMSTART
14146; GFX940-NEXT:    ; def s[0:1]
14147; GFX940-NEXT:    ;;#ASMEND
14148; GFX940-NEXT:    ;;#ASMSTART
14149; GFX940-NEXT:    ; def s[2:3]
14150; GFX940-NEXT:    ;;#ASMEND
14151; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s0
14152; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
14153; GFX940-NEXT:    ;;#ASMSTART
14154; GFX940-NEXT:    ; use s[8:9]
14155; GFX940-NEXT:    ;;#ASMEND
14156; GFX940-NEXT:    s_setpc_b64 s[30:31]
14157  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14158  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14159  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 0, i32 0>
14160  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
14161  ret void
14162}
14163
14164define void @s_shuffle_v4i16_v4i16__7_2_0_0() {
14165; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_2_0_0:
14166; GFX900:       ; %bb.0:
14167; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14168; GFX900-NEXT:    ;;#ASMSTART
14169; GFX900-NEXT:    ; def s[6:7]
14170; GFX900-NEXT:    ;;#ASMEND
14171; GFX900-NEXT:    s_lshr_b32 s6, s7, 16
14172; GFX900-NEXT:    ;;#ASMSTART
14173; GFX900-NEXT:    ; def s[4:5]
14174; GFX900-NEXT:    ;;#ASMEND
14175; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
14176; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
14177; GFX900-NEXT:    ;;#ASMSTART
14178; GFX900-NEXT:    ; use s[8:9]
14179; GFX900-NEXT:    ;;#ASMEND
14180; GFX900-NEXT:    s_setpc_b64 s[30:31]
14181;
14182; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_2_0_0:
14183; GFX90A:       ; %bb.0:
14184; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14185; GFX90A-NEXT:    ;;#ASMSTART
14186; GFX90A-NEXT:    ; def s[6:7]
14187; GFX90A-NEXT:    ;;#ASMEND
14188; GFX90A-NEXT:    s_lshr_b32 s6, s7, 16
14189; GFX90A-NEXT:    ;;#ASMSTART
14190; GFX90A-NEXT:    ; def s[4:5]
14191; GFX90A-NEXT:    ;;#ASMEND
14192; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
14193; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
14194; GFX90A-NEXT:    ;;#ASMSTART
14195; GFX90A-NEXT:    ; use s[8:9]
14196; GFX90A-NEXT:    ;;#ASMEND
14197; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14198;
14199; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_0_0:
14200; GFX940:       ; %bb.0:
14201; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14202; GFX940-NEXT:    ;;#ASMSTART
14203; GFX940-NEXT:    ; def s[2:3]
14204; GFX940-NEXT:    ;;#ASMEND
14205; GFX940-NEXT:    s_lshr_b32 s2, s3, 16
14206; GFX940-NEXT:    ;;#ASMSTART
14207; GFX940-NEXT:    ; def s[0:1]
14208; GFX940-NEXT:    ;;#ASMEND
14209; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s2, s1
14210; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
14211; GFX940-NEXT:    ;;#ASMSTART
14212; GFX940-NEXT:    ; use s[8:9]
14213; GFX940-NEXT:    ;;#ASMEND
14214; GFX940-NEXT:    s_setpc_b64 s[30:31]
14215  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14216  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14217  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 0, i32 0>
14218  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
14219  ret void
14220}
14221
14222define void @s_shuffle_v4i16_v4i16__7_3_0_0() {
14223; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_3_0_0:
14224; GFX900:       ; %bb.0:
14225; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14226; GFX900-NEXT:    ;;#ASMSTART
14227; GFX900-NEXT:    ; def s[4:5]
14228; GFX900-NEXT:    ;;#ASMEND
14229; GFX900-NEXT:    ;;#ASMSTART
14230; GFX900-NEXT:    ; def s[6:7]
14231; GFX900-NEXT:    ;;#ASMEND
14232; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
14233; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
14234; GFX900-NEXT:    ;;#ASMSTART
14235; GFX900-NEXT:    ; use s[8:9]
14236; GFX900-NEXT:    ;;#ASMEND
14237; GFX900-NEXT:    s_setpc_b64 s[30:31]
14238;
14239; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_3_0_0:
14240; GFX90A:       ; %bb.0:
14241; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14242; GFX90A-NEXT:    ;;#ASMSTART
14243; GFX90A-NEXT:    ; def s[4:5]
14244; GFX90A-NEXT:    ;;#ASMEND
14245; GFX90A-NEXT:    ;;#ASMSTART
14246; GFX90A-NEXT:    ; def s[6:7]
14247; GFX90A-NEXT:    ;;#ASMEND
14248; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
14249; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
14250; GFX90A-NEXT:    ;;#ASMSTART
14251; GFX90A-NEXT:    ; use s[8:9]
14252; GFX90A-NEXT:    ;;#ASMEND
14253; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14254;
14255; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_0_0:
14256; GFX940:       ; %bb.0:
14257; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14258; GFX940-NEXT:    ;;#ASMSTART
14259; GFX940-NEXT:    ; def s[0:1]
14260; GFX940-NEXT:    ;;#ASMEND
14261; GFX940-NEXT:    ;;#ASMSTART
14262; GFX940-NEXT:    ; def s[2:3]
14263; GFX940-NEXT:    ;;#ASMEND
14264; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s1
14265; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
14266; GFX940-NEXT:    ;;#ASMSTART
14267; GFX940-NEXT:    ; use s[8:9]
14268; GFX940-NEXT:    ;;#ASMEND
14269; GFX940-NEXT:    s_setpc_b64 s[30:31]
14270  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14271  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14272  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 0, i32 0>
14273  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
14274  ret void
14275}
14276
14277define void @s_shuffle_v4i16_v4i16__7_4_0_0() {
14278; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_4_0_0:
14279; GFX900:       ; %bb.0:
14280; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14281; GFX900-NEXT:    ;;#ASMSTART
14282; GFX900-NEXT:    ; def s[4:5]
14283; GFX900-NEXT:    ;;#ASMEND
14284; GFX900-NEXT:    ;;#ASMSTART
14285; GFX900-NEXT:    ; def s[6:7]
14286; GFX900-NEXT:    ;;#ASMEND
14287; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
14288; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s6
14289; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
14290; GFX900-NEXT:    ;;#ASMSTART
14291; GFX900-NEXT:    ; use s[8:9]
14292; GFX900-NEXT:    ;;#ASMEND
14293; GFX900-NEXT:    s_setpc_b64 s[30:31]
14294;
14295; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_4_0_0:
14296; GFX90A:       ; %bb.0:
14297; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14298; GFX90A-NEXT:    ;;#ASMSTART
14299; GFX90A-NEXT:    ; def s[4:5]
14300; GFX90A-NEXT:    ;;#ASMEND
14301; GFX90A-NEXT:    ;;#ASMSTART
14302; GFX90A-NEXT:    ; def s[6:7]
14303; GFX90A-NEXT:    ;;#ASMEND
14304; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
14305; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s6
14306; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
14307; GFX90A-NEXT:    ;;#ASMSTART
14308; GFX90A-NEXT:    ; use s[8:9]
14309; GFX90A-NEXT:    ;;#ASMEND
14310; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14311;
14312; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_0_0:
14313; GFX940:       ; %bb.0:
14314; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14315; GFX940-NEXT:    ;;#ASMSTART
14316; GFX940-NEXT:    ; def s[0:1]
14317; GFX940-NEXT:    ;;#ASMEND
14318; GFX940-NEXT:    ;;#ASMSTART
14319; GFX940-NEXT:    ; def s[2:3]
14320; GFX940-NEXT:    ;;#ASMEND
14321; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
14322; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s2
14323; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
14324; GFX940-NEXT:    ;;#ASMSTART
14325; GFX940-NEXT:    ; use s[8:9]
14326; GFX940-NEXT:    ;;#ASMEND
14327; GFX940-NEXT:    s_setpc_b64 s[30:31]
14328  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14329  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14330  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 0, i32 0>
14331  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
14332  ret void
14333}
14334
14335define void @s_shuffle_v4i16_v4i16__7_5_0_0() {
14336; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_5_0_0:
14337; GFX900:       ; %bb.0:
14338; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14339; GFX900-NEXT:    ;;#ASMSTART
14340; GFX900-NEXT:    ; def s[4:5]
14341; GFX900-NEXT:    ;;#ASMEND
14342; GFX900-NEXT:    ;;#ASMSTART
14343; GFX900-NEXT:    ; def s[6:7]
14344; GFX900-NEXT:    ;;#ASMEND
14345; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s6
14346; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
14347; GFX900-NEXT:    ;;#ASMSTART
14348; GFX900-NEXT:    ; use s[8:9]
14349; GFX900-NEXT:    ;;#ASMEND
14350; GFX900-NEXT:    s_setpc_b64 s[30:31]
14351;
14352; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_5_0_0:
14353; GFX90A:       ; %bb.0:
14354; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14355; GFX90A-NEXT:    ;;#ASMSTART
14356; GFX90A-NEXT:    ; def s[4:5]
14357; GFX90A-NEXT:    ;;#ASMEND
14358; GFX90A-NEXT:    ;;#ASMSTART
14359; GFX90A-NEXT:    ; def s[6:7]
14360; GFX90A-NEXT:    ;;#ASMEND
14361; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s6
14362; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
14363; GFX90A-NEXT:    ;;#ASMSTART
14364; GFX90A-NEXT:    ; use s[8:9]
14365; GFX90A-NEXT:    ;;#ASMEND
14366; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14367;
14368; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_0_0:
14369; GFX940:       ; %bb.0:
14370; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14371; GFX940-NEXT:    ;;#ASMSTART
14372; GFX940-NEXT:    ; def s[0:1]
14373; GFX940-NEXT:    ;;#ASMEND
14374; GFX940-NEXT:    ;;#ASMSTART
14375; GFX940-NEXT:    ; def s[2:3]
14376; GFX940-NEXT:    ;;#ASMEND
14377; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s2
14378; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
14379; GFX940-NEXT:    ;;#ASMSTART
14380; GFX940-NEXT:    ; use s[8:9]
14381; GFX940-NEXT:    ;;#ASMEND
14382; GFX940-NEXT:    s_setpc_b64 s[30:31]
14383  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14384  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14385  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 0, i32 0>
14386  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
14387  ret void
14388}
14389
14390define void @s_shuffle_v4i16_v4i16__7_6_0_0() {
14391; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_6_0_0:
14392; GFX900:       ; %bb.0:
14393; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14394; GFX900-NEXT:    ;;#ASMSTART
14395; GFX900-NEXT:    ; def s[4:5]
14396; GFX900-NEXT:    ;;#ASMEND
14397; GFX900-NEXT:    ;;#ASMSTART
14398; GFX900-NEXT:    ; def s[6:7]
14399; GFX900-NEXT:    ;;#ASMEND
14400; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
14401; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s7
14402; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
14403; GFX900-NEXT:    ;;#ASMSTART
14404; GFX900-NEXT:    ; use s[8:9]
14405; GFX900-NEXT:    ;;#ASMEND
14406; GFX900-NEXT:    s_setpc_b64 s[30:31]
14407;
14408; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_6_0_0:
14409; GFX90A:       ; %bb.0:
14410; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14411; GFX90A-NEXT:    ;;#ASMSTART
14412; GFX90A-NEXT:    ; def s[4:5]
14413; GFX90A-NEXT:    ;;#ASMEND
14414; GFX90A-NEXT:    ;;#ASMSTART
14415; GFX90A-NEXT:    ; def s[6:7]
14416; GFX90A-NEXT:    ;;#ASMEND
14417; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
14418; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s7
14419; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
14420; GFX90A-NEXT:    ;;#ASMSTART
14421; GFX90A-NEXT:    ; use s[8:9]
14422; GFX90A-NEXT:    ;;#ASMEND
14423; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14424;
14425; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_0_0:
14426; GFX940:       ; %bb.0:
14427; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14428; GFX940-NEXT:    ;;#ASMSTART
14429; GFX940-NEXT:    ; def s[0:1]
14430; GFX940-NEXT:    ;;#ASMEND
14431; GFX940-NEXT:    ;;#ASMSTART
14432; GFX940-NEXT:    ; def s[2:3]
14433; GFX940-NEXT:    ;;#ASMEND
14434; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
14435; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s3
14436; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
14437; GFX940-NEXT:    ;;#ASMSTART
14438; GFX940-NEXT:    ; use s[8:9]
14439; GFX940-NEXT:    ;;#ASMEND
14440; GFX940-NEXT:    s_setpc_b64 s[30:31]
14441  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14442  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14443  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 0, i32 0>
14444  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
14445  ret void
14446}
14447
14448define void @s_shuffle_v4i16_v4i16__7_7_0_0() {
14449; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_0_0:
14450; GFX900:       ; %bb.0:
14451; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14452; GFX900-NEXT:    ;;#ASMSTART
14453; GFX900-NEXT:    ; def s[4:5]
14454; GFX900-NEXT:    ;;#ASMEND
14455; GFX900-NEXT:    ;;#ASMSTART
14456; GFX900-NEXT:    ; def s[6:7]
14457; GFX900-NEXT:    ;;#ASMEND
14458; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
14459; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
14460; GFX900-NEXT:    ;;#ASMSTART
14461; GFX900-NEXT:    ; use s[8:9]
14462; GFX900-NEXT:    ;;#ASMEND
14463; GFX900-NEXT:    s_setpc_b64 s[30:31]
14464;
14465; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_0_0:
14466; GFX90A:       ; %bb.0:
14467; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14468; GFX90A-NEXT:    ;;#ASMSTART
14469; GFX90A-NEXT:    ; def s[4:5]
14470; GFX90A-NEXT:    ;;#ASMEND
14471; GFX90A-NEXT:    ;;#ASMSTART
14472; GFX90A-NEXT:    ; def s[6:7]
14473; GFX90A-NEXT:    ;;#ASMEND
14474; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
14475; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
14476; GFX90A-NEXT:    ;;#ASMSTART
14477; GFX90A-NEXT:    ; use s[8:9]
14478; GFX90A-NEXT:    ;;#ASMEND
14479; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14480;
14481; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_0:
14482; GFX940:       ; %bb.0:
14483; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14484; GFX940-NEXT:    ;;#ASMSTART
14485; GFX940-NEXT:    ; def s[0:1]
14486; GFX940-NEXT:    ;;#ASMEND
14487; GFX940-NEXT:    ;;#ASMSTART
14488; GFX940-NEXT:    ; def s[2:3]
14489; GFX940-NEXT:    ;;#ASMEND
14490; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
14491; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
14492; GFX940-NEXT:    ;;#ASMSTART
14493; GFX940-NEXT:    ; use s[8:9]
14494; GFX940-NEXT:    ;;#ASMEND
14495; GFX940-NEXT:    s_setpc_b64 s[30:31]
14496  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14497  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14498  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 0>
14499  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
14500  ret void
14501}
14502
14503define void @s_shuffle_v4i16_v4i16__7_7_u_0() {
14504; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_u_0:
14505; GFX900:       ; %bb.0:
14506; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14507; GFX900-NEXT:    ;;#ASMSTART
14508; GFX900-NEXT:    ; def s[4:5]
14509; GFX900-NEXT:    ;;#ASMEND
14510; GFX900-NEXT:    ;;#ASMSTART
14511; GFX900-NEXT:    ; def s[6:7]
14512; GFX900-NEXT:    ;;#ASMEND
14513; GFX900-NEXT:    s_lshl_b32 s9, s4, 16
14514; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
14515; GFX900-NEXT:    ;;#ASMSTART
14516; GFX900-NEXT:    ; use s[8:9]
14517; GFX900-NEXT:    ;;#ASMEND
14518; GFX900-NEXT:    s_setpc_b64 s[30:31]
14519;
14520; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_u_0:
14521; GFX90A:       ; %bb.0:
14522; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14523; GFX90A-NEXT:    ;;#ASMSTART
14524; GFX90A-NEXT:    ; def s[4:5]
14525; GFX90A-NEXT:    ;;#ASMEND
14526; GFX90A-NEXT:    ;;#ASMSTART
14527; GFX90A-NEXT:    ; def s[6:7]
14528; GFX90A-NEXT:    ;;#ASMEND
14529; GFX90A-NEXT:    s_lshl_b32 s9, s4, 16
14530; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
14531; GFX90A-NEXT:    ;;#ASMSTART
14532; GFX90A-NEXT:    ; use s[8:9]
14533; GFX90A-NEXT:    ;;#ASMEND
14534; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14535;
14536; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_u_0:
14537; GFX940:       ; %bb.0:
14538; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14539; GFX940-NEXT:    ;;#ASMSTART
14540; GFX940-NEXT:    ; def s[0:1]
14541; GFX940-NEXT:    ;;#ASMEND
14542; GFX940-NEXT:    ;;#ASMSTART
14543; GFX940-NEXT:    ; def s[2:3]
14544; GFX940-NEXT:    ;;#ASMEND
14545; GFX940-NEXT:    s_lshl_b32 s9, s0, 16
14546; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
14547; GFX940-NEXT:    ;;#ASMSTART
14548; GFX940-NEXT:    ; use s[8:9]
14549; GFX940-NEXT:    ;;#ASMEND
14550; GFX940-NEXT:    s_setpc_b64 s[30:31]
14551  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14552  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14553  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 0>
14554  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
14555  ret void
14556}
14557
14558define void @s_shuffle_v4i16_v4i16__7_7_1_0() {
14559; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_1_0:
14560; GFX900:       ; %bb.0:
14561; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14562; GFX900-NEXT:    ;;#ASMSTART
14563; GFX900-NEXT:    ; def s[4:5]
14564; GFX900-NEXT:    ;;#ASMEND
14565; GFX900-NEXT:    s_lshr_b32 s5, s4, 16
14566; GFX900-NEXT:    ;;#ASMSTART
14567; GFX900-NEXT:    ; def s[6:7]
14568; GFX900-NEXT:    ;;#ASMEND
14569; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s4
14570; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
14571; GFX900-NEXT:    ;;#ASMSTART
14572; GFX900-NEXT:    ; use s[8:9]
14573; GFX900-NEXT:    ;;#ASMEND
14574; GFX900-NEXT:    s_setpc_b64 s[30:31]
14575;
14576; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_1_0:
14577; GFX90A:       ; %bb.0:
14578; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14579; GFX90A-NEXT:    ;;#ASMSTART
14580; GFX90A-NEXT:    ; def s[4:5]
14581; GFX90A-NEXT:    ;;#ASMEND
14582; GFX90A-NEXT:    s_lshr_b32 s5, s4, 16
14583; GFX90A-NEXT:    ;;#ASMSTART
14584; GFX90A-NEXT:    ; def s[6:7]
14585; GFX90A-NEXT:    ;;#ASMEND
14586; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s4
14587; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
14588; GFX90A-NEXT:    ;;#ASMSTART
14589; GFX90A-NEXT:    ; use s[8:9]
14590; GFX90A-NEXT:    ;;#ASMEND
14591; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14592;
14593; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_0:
14594; GFX940:       ; %bb.0:
14595; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14596; GFX940-NEXT:    ;;#ASMSTART
14597; GFX940-NEXT:    ; def s[0:1]
14598; GFX940-NEXT:    ;;#ASMEND
14599; GFX940-NEXT:    s_lshr_b32 s1, s0, 16
14600; GFX940-NEXT:    ;;#ASMSTART
14601; GFX940-NEXT:    ; def s[2:3]
14602; GFX940-NEXT:    ;;#ASMEND
14603; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s0
14604; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
14605; GFX940-NEXT:    ;;#ASMSTART
14606; GFX940-NEXT:    ; use s[8:9]
14607; GFX940-NEXT:    ;;#ASMEND
14608; GFX940-NEXT:    s_setpc_b64 s[30:31]
14609  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14610  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14611  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 0>
14612  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
14613  ret void
14614}
14615
14616define void @s_shuffle_v4i16_v4i16__7_7_2_0() {
14617; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_2_0:
14618; GFX900:       ; %bb.0:
14619; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14620; GFX900-NEXT:    ;;#ASMSTART
14621; GFX900-NEXT:    ; def s[4:5]
14622; GFX900-NEXT:    ;;#ASMEND
14623; GFX900-NEXT:    ;;#ASMSTART
14624; GFX900-NEXT:    ; def s[6:7]
14625; GFX900-NEXT:    ;;#ASMEND
14626; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s4
14627; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
14628; GFX900-NEXT:    ;;#ASMSTART
14629; GFX900-NEXT:    ; use s[8:9]
14630; GFX900-NEXT:    ;;#ASMEND
14631; GFX900-NEXT:    s_setpc_b64 s[30:31]
14632;
14633; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_2_0:
14634; GFX90A:       ; %bb.0:
14635; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14636; GFX90A-NEXT:    ;;#ASMSTART
14637; GFX90A-NEXT:    ; def s[4:5]
14638; GFX90A-NEXT:    ;;#ASMEND
14639; GFX90A-NEXT:    ;;#ASMSTART
14640; GFX90A-NEXT:    ; def s[6:7]
14641; GFX90A-NEXT:    ;;#ASMEND
14642; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s4
14643; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
14644; GFX90A-NEXT:    ;;#ASMSTART
14645; GFX90A-NEXT:    ; use s[8:9]
14646; GFX90A-NEXT:    ;;#ASMEND
14647; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14648;
14649; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_0:
14650; GFX940:       ; %bb.0:
14651; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14652; GFX940-NEXT:    ;;#ASMSTART
14653; GFX940-NEXT:    ; def s[0:1]
14654; GFX940-NEXT:    ;;#ASMEND
14655; GFX940-NEXT:    ;;#ASMSTART
14656; GFX940-NEXT:    ; def s[2:3]
14657; GFX940-NEXT:    ;;#ASMEND
14658; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s0
14659; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
14660; GFX940-NEXT:    ;;#ASMSTART
14661; GFX940-NEXT:    ; use s[8:9]
14662; GFX940-NEXT:    ;;#ASMEND
14663; GFX940-NEXT:    s_setpc_b64 s[30:31]
14664  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14665  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14666  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 0>
14667  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
14668  ret void
14669}
14670
14671define void @s_shuffle_v4i16_v4i16__7_7_3_0() {
14672; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_3_0:
14673; GFX900:       ; %bb.0:
14674; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14675; GFX900-NEXT:    ;;#ASMSTART
14676; GFX900-NEXT:    ; def s[4:5]
14677; GFX900-NEXT:    ;;#ASMEND
14678; GFX900-NEXT:    s_lshr_b32 s5, s5, 16
14679; GFX900-NEXT:    ;;#ASMSTART
14680; GFX900-NEXT:    ; def s[6:7]
14681; GFX900-NEXT:    ;;#ASMEND
14682; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s4
14683; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
14684; GFX900-NEXT:    ;;#ASMSTART
14685; GFX900-NEXT:    ; use s[8:9]
14686; GFX900-NEXT:    ;;#ASMEND
14687; GFX900-NEXT:    s_setpc_b64 s[30:31]
14688;
14689; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_3_0:
14690; GFX90A:       ; %bb.0:
14691; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14692; GFX90A-NEXT:    ;;#ASMSTART
14693; GFX90A-NEXT:    ; def s[4:5]
14694; GFX90A-NEXT:    ;;#ASMEND
14695; GFX90A-NEXT:    s_lshr_b32 s5, s5, 16
14696; GFX90A-NEXT:    ;;#ASMSTART
14697; GFX90A-NEXT:    ; def s[6:7]
14698; GFX90A-NEXT:    ;;#ASMEND
14699; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s4
14700; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
14701; GFX90A-NEXT:    ;;#ASMSTART
14702; GFX90A-NEXT:    ; use s[8:9]
14703; GFX90A-NEXT:    ;;#ASMEND
14704; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14705;
14706; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_0:
14707; GFX940:       ; %bb.0:
14708; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14709; GFX940-NEXT:    ;;#ASMSTART
14710; GFX940-NEXT:    ; def s[0:1]
14711; GFX940-NEXT:    ;;#ASMEND
14712; GFX940-NEXT:    s_lshr_b32 s1, s1, 16
14713; GFX940-NEXT:    ;;#ASMSTART
14714; GFX940-NEXT:    ; def s[2:3]
14715; GFX940-NEXT:    ;;#ASMEND
14716; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s0
14717; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
14718; GFX940-NEXT:    ;;#ASMSTART
14719; GFX940-NEXT:    ; use s[8:9]
14720; GFX940-NEXT:    ;;#ASMEND
14721; GFX940-NEXT:    s_setpc_b64 s[30:31]
14722  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14723  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14724  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 0>
14725  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
14726  ret void
14727}
14728
14729define void @s_shuffle_v4i16_v4i16__7_7_4_0() {
14730; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_4_0:
14731; GFX900:       ; %bb.0:
14732; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14733; GFX900-NEXT:    ;;#ASMSTART
14734; GFX900-NEXT:    ; def s[4:5]
14735; GFX900-NEXT:    ;;#ASMEND
14736; GFX900-NEXT:    ;;#ASMSTART
14737; GFX900-NEXT:    ; def s[6:7]
14738; GFX900-NEXT:    ;;#ASMEND
14739; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s6, s4
14740; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
14741; GFX900-NEXT:    ;;#ASMSTART
14742; GFX900-NEXT:    ; use s[8:9]
14743; GFX900-NEXT:    ;;#ASMEND
14744; GFX900-NEXT:    s_setpc_b64 s[30:31]
14745;
14746; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_4_0:
14747; GFX90A:       ; %bb.0:
14748; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14749; GFX90A-NEXT:    ;;#ASMSTART
14750; GFX90A-NEXT:    ; def s[4:5]
14751; GFX90A-NEXT:    ;;#ASMEND
14752; GFX90A-NEXT:    ;;#ASMSTART
14753; GFX90A-NEXT:    ; def s[6:7]
14754; GFX90A-NEXT:    ;;#ASMEND
14755; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s6, s4
14756; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
14757; GFX90A-NEXT:    ;;#ASMSTART
14758; GFX90A-NEXT:    ; use s[8:9]
14759; GFX90A-NEXT:    ;;#ASMEND
14760; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14761;
14762; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_0:
14763; GFX940:       ; %bb.0:
14764; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14765; GFX940-NEXT:    ;;#ASMSTART
14766; GFX940-NEXT:    ; def s[0:1]
14767; GFX940-NEXT:    ;;#ASMEND
14768; GFX940-NEXT:    ;;#ASMSTART
14769; GFX940-NEXT:    ; def s[2:3]
14770; GFX940-NEXT:    ;;#ASMEND
14771; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s2, s0
14772; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
14773; GFX940-NEXT:    ;;#ASMSTART
14774; GFX940-NEXT:    ; use s[8:9]
14775; GFX940-NEXT:    ;;#ASMEND
14776; GFX940-NEXT:    s_setpc_b64 s[30:31]
14777  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14778  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14779  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 0>
14780  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
14781  ret void
14782}
14783
14784define void @s_shuffle_v4i16_v4i16__7_7_5_0() {
14785; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_5_0:
14786; GFX900:       ; %bb.0:
14787; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14788; GFX900-NEXT:    ;;#ASMSTART
14789; GFX900-NEXT:    ; def s[4:5]
14790; GFX900-NEXT:    ;;#ASMEND
14791; GFX900-NEXT:    ;;#ASMSTART
14792; GFX900-NEXT:    ; def s[6:7]
14793; GFX900-NEXT:    ;;#ASMEND
14794; GFX900-NEXT:    s_lshr_b32 s5, s6, 16
14795; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s4
14796; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
14797; GFX900-NEXT:    ;;#ASMSTART
14798; GFX900-NEXT:    ; use s[8:9]
14799; GFX900-NEXT:    ;;#ASMEND
14800; GFX900-NEXT:    s_setpc_b64 s[30:31]
14801;
14802; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_5_0:
14803; GFX90A:       ; %bb.0:
14804; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14805; GFX90A-NEXT:    ;;#ASMSTART
14806; GFX90A-NEXT:    ; def s[4:5]
14807; GFX90A-NEXT:    ;;#ASMEND
14808; GFX90A-NEXT:    ;;#ASMSTART
14809; GFX90A-NEXT:    ; def s[6:7]
14810; GFX90A-NEXT:    ;;#ASMEND
14811; GFX90A-NEXT:    s_lshr_b32 s5, s6, 16
14812; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s4
14813; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
14814; GFX90A-NEXT:    ;;#ASMSTART
14815; GFX90A-NEXT:    ; use s[8:9]
14816; GFX90A-NEXT:    ;;#ASMEND
14817; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14818;
14819; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_0:
14820; GFX940:       ; %bb.0:
14821; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14822; GFX940-NEXT:    ;;#ASMSTART
14823; GFX940-NEXT:    ; def s[0:1]
14824; GFX940-NEXT:    ;;#ASMEND
14825; GFX940-NEXT:    ;;#ASMSTART
14826; GFX940-NEXT:    ; def s[2:3]
14827; GFX940-NEXT:    ;;#ASMEND
14828; GFX940-NEXT:    s_lshr_b32 s1, s2, 16
14829; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s0
14830; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
14831; GFX940-NEXT:    ;;#ASMSTART
14832; GFX940-NEXT:    ; use s[8:9]
14833; GFX940-NEXT:    ;;#ASMEND
14834; GFX940-NEXT:    s_setpc_b64 s[30:31]
14835  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14836  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14837  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 0>
14838  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
14839  ret void
14840}
14841
14842define void @s_shuffle_v4i16_v4i16__7_7_6_0() {
14843; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_6_0:
14844; GFX900:       ; %bb.0:
14845; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14846; GFX900-NEXT:    ;;#ASMSTART
14847; GFX900-NEXT:    ; def s[4:5]
14848; GFX900-NEXT:    ;;#ASMEND
14849; GFX900-NEXT:    ;;#ASMSTART
14850; GFX900-NEXT:    ; def s[6:7]
14851; GFX900-NEXT:    ;;#ASMEND
14852; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s7, s4
14853; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
14854; GFX900-NEXT:    ;;#ASMSTART
14855; GFX900-NEXT:    ; use s[8:9]
14856; GFX900-NEXT:    ;;#ASMEND
14857; GFX900-NEXT:    s_setpc_b64 s[30:31]
14858;
14859; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_6_0:
14860; GFX90A:       ; %bb.0:
14861; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14862; GFX90A-NEXT:    ;;#ASMSTART
14863; GFX90A-NEXT:    ; def s[4:5]
14864; GFX90A-NEXT:    ;;#ASMEND
14865; GFX90A-NEXT:    ;;#ASMSTART
14866; GFX90A-NEXT:    ; def s[6:7]
14867; GFX90A-NEXT:    ;;#ASMEND
14868; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s7, s4
14869; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
14870; GFX90A-NEXT:    ;;#ASMSTART
14871; GFX90A-NEXT:    ; use s[8:9]
14872; GFX90A-NEXT:    ;;#ASMEND
14873; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14874;
14875; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_6_0:
14876; GFX940:       ; %bb.0:
14877; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14878; GFX940-NEXT:    ;;#ASMSTART
14879; GFX940-NEXT:    ; def s[0:1]
14880; GFX940-NEXT:    ;;#ASMEND
14881; GFX940-NEXT:    ;;#ASMSTART
14882; GFX940-NEXT:    ; def s[2:3]
14883; GFX940-NEXT:    ;;#ASMEND
14884; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s3, s0
14885; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
14886; GFX940-NEXT:    ;;#ASMSTART
14887; GFX940-NEXT:    ; use s[8:9]
14888; GFX940-NEXT:    ;;#ASMEND
14889; GFX940-NEXT:    s_setpc_b64 s[30:31]
14890  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14891  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14892  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 0>
14893  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
14894  ret void
14895}
14896
14897define void @s_shuffle_v4i16_v4i16__u_1_1_1() {
14898; GFX9-LABEL: s_shuffle_v4i16_v4i16__u_1_1_1:
14899; GFX9:       ; %bb.0:
14900; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14901; GFX9-NEXT:    ;;#ASMSTART
14902; GFX9-NEXT:    ; def s[8:9]
14903; GFX9-NEXT:    ;;#ASMEND
14904; GFX9-NEXT:    s_pack_hh_b32_b16 s9, s8, s8
14905; GFX9-NEXT:    ;;#ASMSTART
14906; GFX9-NEXT:    ; use s[8:9]
14907; GFX9-NEXT:    ;;#ASMEND
14908; GFX9-NEXT:    s_setpc_b64 s[30:31]
14909  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14910  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
14911  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
14912  ret void
14913}
14914
14915define void @s_shuffle_v4i16_v4i16__0_1_1_1() {
14916; GFX9-LABEL: s_shuffle_v4i16_v4i16__0_1_1_1:
14917; GFX9:       ; %bb.0:
14918; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14919; GFX9-NEXT:    ;;#ASMSTART
14920; GFX9-NEXT:    ; def s[8:9]
14921; GFX9-NEXT:    ;;#ASMEND
14922; GFX9-NEXT:    s_pack_hh_b32_b16 s9, s8, s8
14923; GFX9-NEXT:    ;;#ASMSTART
14924; GFX9-NEXT:    ; use s[8:9]
14925; GFX9-NEXT:    ;;#ASMEND
14926; GFX9-NEXT:    s_setpc_b64 s[30:31]
14927  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14928  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
14929  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
14930  ret void
14931}
14932
14933define void @s_shuffle_v4i16_v4i16__1_1_1_1() {
14934; GFX900-LABEL: s_shuffle_v4i16_v4i16__1_1_1_1:
14935; GFX900:       ; %bb.0:
14936; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14937; GFX900-NEXT:    ;;#ASMSTART
14938; GFX900-NEXT:    ; def s[4:5]
14939; GFX900-NEXT:    ;;#ASMEND
14940; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s4, s4
14941; GFX900-NEXT:    s_mov_b32 s9, s8
14942; GFX900-NEXT:    ;;#ASMSTART
14943; GFX900-NEXT:    ; use s[8:9]
14944; GFX900-NEXT:    ;;#ASMEND
14945; GFX900-NEXT:    s_setpc_b64 s[30:31]
14946;
14947; GFX90A-LABEL: s_shuffle_v4i16_v4i16__1_1_1_1:
14948; GFX90A:       ; %bb.0:
14949; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14950; GFX90A-NEXT:    ;;#ASMSTART
14951; GFX90A-NEXT:    ; def s[4:5]
14952; GFX90A-NEXT:    ;;#ASMEND
14953; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s4, s4
14954; GFX90A-NEXT:    s_mov_b32 s9, s8
14955; GFX90A-NEXT:    ;;#ASMSTART
14956; GFX90A-NEXT:    ; use s[8:9]
14957; GFX90A-NEXT:    ;;#ASMEND
14958; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14959;
14960; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_1_1_1:
14961; GFX940:       ; %bb.0:
14962; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14963; GFX940-NEXT:    ;;#ASMSTART
14964; GFX940-NEXT:    ; def s[0:1]
14965; GFX940-NEXT:    ;;#ASMEND
14966; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s0, s0
14967; GFX940-NEXT:    s_mov_b32 s9, s8
14968; GFX940-NEXT:    ;;#ASMSTART
14969; GFX940-NEXT:    ; use s[8:9]
14970; GFX940-NEXT:    ;;#ASMEND
14971; GFX940-NEXT:    s_setpc_b64 s[30:31]
14972  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14973  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
14974  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
14975  ret void
14976}
14977
14978define void @s_shuffle_v4i16_v4i16__2_1_1_1() {
14979; GFX900-LABEL: s_shuffle_v4i16_v4i16__2_1_1_1:
14980; GFX900:       ; %bb.0:
14981; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14982; GFX900-NEXT:    ;;#ASMSTART
14983; GFX900-NEXT:    ; def s[4:5]
14984; GFX900-NEXT:    ;;#ASMEND
14985; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
14986; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
14987; GFX900-NEXT:    ;;#ASMSTART
14988; GFX900-NEXT:    ; use s[8:9]
14989; GFX900-NEXT:    ;;#ASMEND
14990; GFX900-NEXT:    s_setpc_b64 s[30:31]
14991;
14992; GFX90A-LABEL: s_shuffle_v4i16_v4i16__2_1_1_1:
14993; GFX90A:       ; %bb.0:
14994; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14995; GFX90A-NEXT:    ;;#ASMSTART
14996; GFX90A-NEXT:    ; def s[4:5]
14997; GFX90A-NEXT:    ;;#ASMEND
14998; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
14999; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
15000; GFX90A-NEXT:    ;;#ASMSTART
15001; GFX90A-NEXT:    ; use s[8:9]
15002; GFX90A-NEXT:    ;;#ASMEND
15003; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15004;
15005; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_1_1_1:
15006; GFX940:       ; %bb.0:
15007; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15008; GFX940-NEXT:    ;;#ASMSTART
15009; GFX940-NEXT:    ; def s[0:1]
15010; GFX940-NEXT:    ;;#ASMEND
15011; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s1, s0
15012; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
15013; GFX940-NEXT:    ;;#ASMSTART
15014; GFX940-NEXT:    ; use s[8:9]
15015; GFX940-NEXT:    ;;#ASMEND
15016; GFX940-NEXT:    s_setpc_b64 s[30:31]
15017  %vec0 = call <4 x i16> asm "; def $0", "=s"()
15018  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
15019  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
15020  ret void
15021}
15022
15023define void @s_shuffle_v4i16_v4i16__3_1_1_1() {
15024; GFX900-LABEL: s_shuffle_v4i16_v4i16__3_1_1_1:
15025; GFX900:       ; %bb.0:
15026; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15027; GFX900-NEXT:    ;;#ASMSTART
15028; GFX900-NEXT:    ; def s[4:5]
15029; GFX900-NEXT:    ;;#ASMEND
15030; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s4
15031; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
15032; GFX900-NEXT:    ;;#ASMSTART
15033; GFX900-NEXT:    ; use s[8:9]
15034; GFX900-NEXT:    ;;#ASMEND
15035; GFX900-NEXT:    s_setpc_b64 s[30:31]
15036;
15037; GFX90A-LABEL: s_shuffle_v4i16_v4i16__3_1_1_1:
15038; GFX90A:       ; %bb.0:
15039; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15040; GFX90A-NEXT:    ;;#ASMSTART
15041; GFX90A-NEXT:    ; def s[4:5]
15042; GFX90A-NEXT:    ;;#ASMEND
15043; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s4
15044; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
15045; GFX90A-NEXT:    ;;#ASMSTART
15046; GFX90A-NEXT:    ; use s[8:9]
15047; GFX90A-NEXT:    ;;#ASMEND
15048; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15049;
15050; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_1_1_1:
15051; GFX940:       ; %bb.0:
15052; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15053; GFX940-NEXT:    ;;#ASMSTART
15054; GFX940-NEXT:    ; def s[0:1]
15055; GFX940-NEXT:    ;;#ASMEND
15056; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s0
15057; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
15058; GFX940-NEXT:    ;;#ASMSTART
15059; GFX940-NEXT:    ; use s[8:9]
15060; GFX940-NEXT:    ;;#ASMEND
15061; GFX940-NEXT:    s_setpc_b64 s[30:31]
15062  %vec0 = call <4 x i16> asm "; def $0", "=s"()
15063  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
15064  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
15065  ret void
15066}
15067
15068define void @s_shuffle_v4i16_v4i16__4_1_1_1() {
15069; GFX9-LABEL: s_shuffle_v4i16_v4i16__4_1_1_1:
15070; GFX9:       ; %bb.0:
15071; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15072; GFX9-NEXT:    ;;#ASMSTART
15073; GFX9-NEXT:    ; def s[8:9]
15074; GFX9-NEXT:    ;;#ASMEND
15075; GFX9-NEXT:    s_pack_hh_b32_b16 s9, s8, s8
15076; GFX9-NEXT:    ;;#ASMSTART
15077; GFX9-NEXT:    ; use s[8:9]
15078; GFX9-NEXT:    ;;#ASMEND
15079; GFX9-NEXT:    s_setpc_b64 s[30:31]
15080  %vec0 = call <4 x i16> asm "; def $0", "=s"()
15081  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 4, i32 1, i32 1, i32 1>
15082  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
15083  ret void
15084}
15085
15086define void @s_shuffle_v4i16_v4i16__5_1_1_1() {
15087; GFX900-LABEL: s_shuffle_v4i16_v4i16__5_1_1_1:
15088; GFX900:       ; %bb.0:
15089; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15090; GFX900-NEXT:    ;;#ASMSTART
15091; GFX900-NEXT:    ; def s[4:5]
15092; GFX900-NEXT:    ;;#ASMEND
15093; GFX900-NEXT:    ;;#ASMSTART
15094; GFX900-NEXT:    ; def s[6:7]
15095; GFX900-NEXT:    ;;#ASMEND
15096; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s6, s4
15097; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
15098; GFX900-NEXT:    ;;#ASMSTART
15099; GFX900-NEXT:    ; use s[8:9]
15100; GFX900-NEXT:    ;;#ASMEND
15101; GFX900-NEXT:    s_setpc_b64 s[30:31]
15102;
15103; GFX90A-LABEL: s_shuffle_v4i16_v4i16__5_1_1_1:
15104; GFX90A:       ; %bb.0:
15105; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15106; GFX90A-NEXT:    ;;#ASMSTART
15107; GFX90A-NEXT:    ; def s[4:5]
15108; GFX90A-NEXT:    ;;#ASMEND
15109; GFX90A-NEXT:    ;;#ASMSTART
15110; GFX90A-NEXT:    ; def s[6:7]
15111; GFX90A-NEXT:    ;;#ASMEND
15112; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s6, s4
15113; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
15114; GFX90A-NEXT:    ;;#ASMSTART
15115; GFX90A-NEXT:    ; use s[8:9]
15116; GFX90A-NEXT:    ;;#ASMEND
15117; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15118;
15119; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_1_1_1:
15120; GFX940:       ; %bb.0:
15121; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15122; GFX940-NEXT:    ;;#ASMSTART
15123; GFX940-NEXT:    ; def s[0:1]
15124; GFX940-NEXT:    ;;#ASMEND
15125; GFX940-NEXT:    ;;#ASMSTART
15126; GFX940-NEXT:    ; def s[2:3]
15127; GFX940-NEXT:    ;;#ASMEND
15128; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s2, s0
15129; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
15130; GFX940-NEXT:    ;;#ASMSTART
15131; GFX940-NEXT:    ; use s[8:9]
15132; GFX940-NEXT:    ;;#ASMEND
15133; GFX940-NEXT:    s_setpc_b64 s[30:31]
15134  %vec0 = call <4 x i16> asm "; def $0", "=s"()
15135  %vec1 = call <4 x i16> asm "; def $0", "=s"()
15136  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 1, i32 1, i32 1>
15137  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
15138  ret void
15139}
15140
15141define void @s_shuffle_v4i16_v4i16__6_1_1_1() {
15142; GFX900-LABEL: s_shuffle_v4i16_v4i16__6_1_1_1:
15143; GFX900:       ; %bb.0:
15144; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15145; GFX900-NEXT:    ;;#ASMSTART
15146; GFX900-NEXT:    ; def s[4:5]
15147; GFX900-NEXT:    ;;#ASMEND
15148; GFX900-NEXT:    ;;#ASMSTART
15149; GFX900-NEXT:    ; def s[6:7]
15150; GFX900-NEXT:    ;;#ASMEND
15151; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
15152; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
15153; GFX900-NEXT:    ;;#ASMSTART
15154; GFX900-NEXT:    ; use s[8:9]
15155; GFX900-NEXT:    ;;#ASMEND
15156; GFX900-NEXT:    s_setpc_b64 s[30:31]
15157;
15158; GFX90A-LABEL: s_shuffle_v4i16_v4i16__6_1_1_1:
15159; GFX90A:       ; %bb.0:
15160; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15161; GFX90A-NEXT:    ;;#ASMSTART
15162; GFX90A-NEXT:    ; def s[4:5]
15163; GFX90A-NEXT:    ;;#ASMEND
15164; GFX90A-NEXT:    ;;#ASMSTART
15165; GFX90A-NEXT:    ; def s[6:7]
15166; GFX90A-NEXT:    ;;#ASMEND
15167; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
15168; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
15169; GFX90A-NEXT:    ;;#ASMSTART
15170; GFX90A-NEXT:    ; use s[8:9]
15171; GFX90A-NEXT:    ;;#ASMEND
15172; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15173;
15174; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_1_1_1:
15175; GFX940:       ; %bb.0:
15176; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15177; GFX940-NEXT:    ;;#ASMSTART
15178; GFX940-NEXT:    ; def s[0:1]
15179; GFX940-NEXT:    ;;#ASMEND
15180; GFX940-NEXT:    ;;#ASMSTART
15181; GFX940-NEXT:    ; def s[2:3]
15182; GFX940-NEXT:    ;;#ASMEND
15183; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s3, s0
15184; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
15185; GFX940-NEXT:    ;;#ASMSTART
15186; GFX940-NEXT:    ; use s[8:9]
15187; GFX940-NEXT:    ;;#ASMEND
15188; GFX940-NEXT:    s_setpc_b64 s[30:31]
15189  %vec0 = call <4 x i16> asm "; def $0", "=s"()
15190  %vec1 = call <4 x i16> asm "; def $0", "=s"()
15191  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 1, i32 1, i32 1>
15192  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
15193  ret void
15194}
15195
15196define void @s_shuffle_v4i16_v4i16__7_1_1_1() {
15197; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_1_1_1:
15198; GFX900:       ; %bb.0:
15199; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15200; GFX900-NEXT:    ;;#ASMSTART
15201; GFX900-NEXT:    ; def s[4:5]
15202; GFX900-NEXT:    ;;#ASMEND
15203; GFX900-NEXT:    ;;#ASMSTART
15204; GFX900-NEXT:    ; def s[6:7]
15205; GFX900-NEXT:    ;;#ASMEND
15206; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
15207; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
15208; GFX900-NEXT:    ;;#ASMSTART
15209; GFX900-NEXT:    ; use s[8:9]
15210; GFX900-NEXT:    ;;#ASMEND
15211; GFX900-NEXT:    s_setpc_b64 s[30:31]
15212;
15213; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_1_1_1:
15214; GFX90A:       ; %bb.0:
15215; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15216; GFX90A-NEXT:    ;;#ASMSTART
15217; GFX90A-NEXT:    ; def s[4:5]
15218; GFX90A-NEXT:    ;;#ASMEND
15219; GFX90A-NEXT:    ;;#ASMSTART
15220; GFX90A-NEXT:    ; def s[6:7]
15221; GFX90A-NEXT:    ;;#ASMEND
15222; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
15223; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
15224; GFX90A-NEXT:    ;;#ASMSTART
15225; GFX90A-NEXT:    ; use s[8:9]
15226; GFX90A-NEXT:    ;;#ASMEND
15227; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15228;
15229; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_1_1:
15230; GFX940:       ; %bb.0:
15231; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15232; GFX940-NEXT:    ;;#ASMSTART
15233; GFX940-NEXT:    ; def s[0:1]
15234; GFX940-NEXT:    ;;#ASMEND
15235; GFX940-NEXT:    ;;#ASMSTART
15236; GFX940-NEXT:    ; def s[2:3]
15237; GFX940-NEXT:    ;;#ASMEND
15238; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s0
15239; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
15240; GFX940-NEXT:    ;;#ASMSTART
15241; GFX940-NEXT:    ; use s[8:9]
15242; GFX940-NEXT:    ;;#ASMEND
15243; GFX940-NEXT:    s_setpc_b64 s[30:31]
15244  %vec0 = call <4 x i16> asm "; def $0", "=s"()
15245  %vec1 = call <4 x i16> asm "; def $0", "=s"()
15246  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 1, i32 1>
15247  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
15248  ret void
15249}
15250
15251define void @s_shuffle_v4i16_v4i16__7_u_1_1() {
15252; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_u_1_1:
15253; GFX900:       ; %bb.0:
15254; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15255; GFX900-NEXT:    ;;#ASMSTART
15256; GFX900-NEXT:    ; def s[4:5]
15257; GFX900-NEXT:    ;;#ASMEND
15258; GFX900-NEXT:    ;;#ASMSTART
15259; GFX900-NEXT:    ; def s[6:7]
15260; GFX900-NEXT:    ;;#ASMEND
15261; GFX900-NEXT:    s_lshr_b32 s8, s7, 16
15262; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
15263; GFX900-NEXT:    ;;#ASMSTART
15264; GFX900-NEXT:    ; use s[8:9]
15265; GFX900-NEXT:    ;;#ASMEND
15266; GFX900-NEXT:    s_setpc_b64 s[30:31]
15267;
15268; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_u_1_1:
15269; GFX90A:       ; %bb.0:
15270; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15271; GFX90A-NEXT:    ;;#ASMSTART
15272; GFX90A-NEXT:    ; def s[4:5]
15273; GFX90A-NEXT:    ;;#ASMEND
15274; GFX90A-NEXT:    ;;#ASMSTART
15275; GFX90A-NEXT:    ; def s[6:7]
15276; GFX90A-NEXT:    ;;#ASMEND
15277; GFX90A-NEXT:    s_lshr_b32 s8, s7, 16
15278; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
15279; GFX90A-NEXT:    ;;#ASMSTART
15280; GFX90A-NEXT:    ; use s[8:9]
15281; GFX90A-NEXT:    ;;#ASMEND
15282; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15283;
15284; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_1_1:
15285; GFX940:       ; %bb.0:
15286; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15287; GFX940-NEXT:    ;;#ASMSTART
15288; GFX940-NEXT:    ; def s[0:1]
15289; GFX940-NEXT:    ;;#ASMEND
15290; GFX940-NEXT:    ;;#ASMSTART
15291; GFX940-NEXT:    ; def s[2:3]
15292; GFX940-NEXT:    ;;#ASMEND
15293; GFX940-NEXT:    s_lshr_b32 s8, s3, 16
15294; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
15295; GFX940-NEXT:    ;;#ASMSTART
15296; GFX940-NEXT:    ; use s[8:9]
15297; GFX940-NEXT:    ;;#ASMEND
15298; GFX940-NEXT:    s_setpc_b64 s[30:31]
15299  %vec0 = call <4 x i16> asm "; def $0", "=s"()
15300  %vec1 = call <4 x i16> asm "; def $0", "=s"()
15301  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 1, i32 1>
15302  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
15303  ret void
15304}
15305
15306define void @s_shuffle_v4i16_v4i16__7_0_1_1() {
15307; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_0_1_1:
15308; GFX900:       ; %bb.0:
15309; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15310; GFX900-NEXT:    ;;#ASMSTART
15311; GFX900-NEXT:    ; def s[4:5]
15312; GFX900-NEXT:    ;;#ASMEND
15313; GFX900-NEXT:    ;;#ASMSTART
15314; GFX900-NEXT:    ; def s[6:7]
15315; GFX900-NEXT:    ;;#ASMEND
15316; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
15317; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
15318; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
15319; GFX900-NEXT:    ;;#ASMSTART
15320; GFX900-NEXT:    ; use s[8:9]
15321; GFX900-NEXT:    ;;#ASMEND
15322; GFX900-NEXT:    s_setpc_b64 s[30:31]
15323;
15324; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_0_1_1:
15325; GFX90A:       ; %bb.0:
15326; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15327; GFX90A-NEXT:    ;;#ASMSTART
15328; GFX90A-NEXT:    ; def s[4:5]
15329; GFX90A-NEXT:    ;;#ASMEND
15330; GFX90A-NEXT:    ;;#ASMSTART
15331; GFX90A-NEXT:    ; def s[6:7]
15332; GFX90A-NEXT:    ;;#ASMEND
15333; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
15334; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
15335; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
15336; GFX90A-NEXT:    ;;#ASMSTART
15337; GFX90A-NEXT:    ; use s[8:9]
15338; GFX90A-NEXT:    ;;#ASMEND
15339; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15340;
15341; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_1_1:
15342; GFX940:       ; %bb.0:
15343; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15344; GFX940-NEXT:    ;;#ASMSTART
15345; GFX940-NEXT:    ; def s[0:1]
15346; GFX940-NEXT:    ;;#ASMEND
15347; GFX940-NEXT:    ;;#ASMSTART
15348; GFX940-NEXT:    ; def s[2:3]
15349; GFX940-NEXT:    ;;#ASMEND
15350; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
15351; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
15352; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
15353; GFX940-NEXT:    ;;#ASMSTART
15354; GFX940-NEXT:    ; use s[8:9]
15355; GFX940-NEXT:    ;;#ASMEND
15356; GFX940-NEXT:    s_setpc_b64 s[30:31]
15357  %vec0 = call <4 x i16> asm "; def $0", "=s"()
15358  %vec1 = call <4 x i16> asm "; def $0", "=s"()
15359  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 1, i32 1>
15360  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
15361  ret void
15362}
15363
15364define void @s_shuffle_v4i16_v4i16__7_2_1_1() {
15365; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_2_1_1:
15366; GFX900:       ; %bb.0:
15367; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15368; GFX900-NEXT:    ;;#ASMSTART
15369; GFX900-NEXT:    ; def s[6:7]
15370; GFX900-NEXT:    ;;#ASMEND
15371; GFX900-NEXT:    s_lshr_b32 s6, s7, 16
15372; GFX900-NEXT:    ;;#ASMSTART
15373; GFX900-NEXT:    ; def s[4:5]
15374; GFX900-NEXT:    ;;#ASMEND
15375; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
15376; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
15377; GFX900-NEXT:    ;;#ASMSTART
15378; GFX900-NEXT:    ; use s[8:9]
15379; GFX900-NEXT:    ;;#ASMEND
15380; GFX900-NEXT:    s_setpc_b64 s[30:31]
15381;
15382; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_2_1_1:
15383; GFX90A:       ; %bb.0:
15384; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15385; GFX90A-NEXT:    ;;#ASMSTART
15386; GFX90A-NEXT:    ; def s[6:7]
15387; GFX90A-NEXT:    ;;#ASMEND
15388; GFX90A-NEXT:    s_lshr_b32 s6, s7, 16
15389; GFX90A-NEXT:    ;;#ASMSTART
15390; GFX90A-NEXT:    ; def s[4:5]
15391; GFX90A-NEXT:    ;;#ASMEND
15392; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
15393; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
15394; GFX90A-NEXT:    ;;#ASMSTART
15395; GFX90A-NEXT:    ; use s[8:9]
15396; GFX90A-NEXT:    ;;#ASMEND
15397; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15398;
15399; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_1_1:
15400; GFX940:       ; %bb.0:
15401; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15402; GFX940-NEXT:    ;;#ASMSTART
15403; GFX940-NEXT:    ; def s[2:3]
15404; GFX940-NEXT:    ;;#ASMEND
15405; GFX940-NEXT:    s_lshr_b32 s2, s3, 16
15406; GFX940-NEXT:    ;;#ASMSTART
15407; GFX940-NEXT:    ; def s[0:1]
15408; GFX940-NEXT:    ;;#ASMEND
15409; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s2, s1
15410; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
15411; GFX940-NEXT:    ;;#ASMSTART
15412; GFX940-NEXT:    ; use s[8:9]
15413; GFX940-NEXT:    ;;#ASMEND
15414; GFX940-NEXT:    s_setpc_b64 s[30:31]
15415  %vec0 = call <4 x i16> asm "; def $0", "=s"()
15416  %vec1 = call <4 x i16> asm "; def $0", "=s"()
15417  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 1, i32 1>
15418  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
15419  ret void
15420}
15421
15422define void @s_shuffle_v4i16_v4i16__7_3_1_1() {
15423; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_3_1_1:
15424; GFX900:       ; %bb.0:
15425; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15426; GFX900-NEXT:    ;;#ASMSTART
15427; GFX900-NEXT:    ; def s[4:5]
15428; GFX900-NEXT:    ;;#ASMEND
15429; GFX900-NEXT:    ;;#ASMSTART
15430; GFX900-NEXT:    ; def s[6:7]
15431; GFX900-NEXT:    ;;#ASMEND
15432; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
15433; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
15434; GFX900-NEXT:    ;;#ASMSTART
15435; GFX900-NEXT:    ; use s[8:9]
15436; GFX900-NEXT:    ;;#ASMEND
15437; GFX900-NEXT:    s_setpc_b64 s[30:31]
15438;
15439; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_3_1_1:
15440; GFX90A:       ; %bb.0:
15441; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15442; GFX90A-NEXT:    ;;#ASMSTART
15443; GFX90A-NEXT:    ; def s[4:5]
15444; GFX90A-NEXT:    ;;#ASMEND
15445; GFX90A-NEXT:    ;;#ASMSTART
15446; GFX90A-NEXT:    ; def s[6:7]
15447; GFX90A-NEXT:    ;;#ASMEND
15448; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
15449; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
15450; GFX90A-NEXT:    ;;#ASMSTART
15451; GFX90A-NEXT:    ; use s[8:9]
15452; GFX90A-NEXT:    ;;#ASMEND
15453; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15454;
15455; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_1_1:
15456; GFX940:       ; %bb.0:
15457; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15458; GFX940-NEXT:    ;;#ASMSTART
15459; GFX940-NEXT:    ; def s[0:1]
15460; GFX940-NEXT:    ;;#ASMEND
15461; GFX940-NEXT:    ;;#ASMSTART
15462; GFX940-NEXT:    ; def s[2:3]
15463; GFX940-NEXT:    ;;#ASMEND
15464; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s1
15465; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
15466; GFX940-NEXT:    ;;#ASMSTART
15467; GFX940-NEXT:    ; use s[8:9]
15468; GFX940-NEXT:    ;;#ASMEND
15469; GFX940-NEXT:    s_setpc_b64 s[30:31]
15470  %vec0 = call <4 x i16> asm "; def $0", "=s"()
15471  %vec1 = call <4 x i16> asm "; def $0", "=s"()
15472  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 1, i32 1>
15473  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
15474  ret void
15475}
15476
15477define void @s_shuffle_v4i16_v4i16__7_4_1_1() {
15478; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_4_1_1:
15479; GFX900:       ; %bb.0:
15480; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15481; GFX900-NEXT:    ;;#ASMSTART
15482; GFX900-NEXT:    ; def s[4:5]
15483; GFX900-NEXT:    ;;#ASMEND
15484; GFX900-NEXT:    ;;#ASMSTART
15485; GFX900-NEXT:    ; def s[6:7]
15486; GFX900-NEXT:    ;;#ASMEND
15487; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
15488; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s6
15489; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
15490; GFX900-NEXT:    ;;#ASMSTART
15491; GFX900-NEXT:    ; use s[8:9]
15492; GFX900-NEXT:    ;;#ASMEND
15493; GFX900-NEXT:    s_setpc_b64 s[30:31]
15494;
15495; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_4_1_1:
15496; GFX90A:       ; %bb.0:
15497; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15498; GFX90A-NEXT:    ;;#ASMSTART
15499; GFX90A-NEXT:    ; def s[4:5]
15500; GFX90A-NEXT:    ;;#ASMEND
15501; GFX90A-NEXT:    ;;#ASMSTART
15502; GFX90A-NEXT:    ; def s[6:7]
15503; GFX90A-NEXT:    ;;#ASMEND
15504; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
15505; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s6
15506; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
15507; GFX90A-NEXT:    ;;#ASMSTART
15508; GFX90A-NEXT:    ; use s[8:9]
15509; GFX90A-NEXT:    ;;#ASMEND
15510; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15511;
15512; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_1_1:
15513; GFX940:       ; %bb.0:
15514; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15515; GFX940-NEXT:    ;;#ASMSTART
15516; GFX940-NEXT:    ; def s[0:1]
15517; GFX940-NEXT:    ;;#ASMEND
15518; GFX940-NEXT:    ;;#ASMSTART
15519; GFX940-NEXT:    ; def s[2:3]
15520; GFX940-NEXT:    ;;#ASMEND
15521; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
15522; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s2
15523; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
15524; GFX940-NEXT:    ;;#ASMSTART
15525; GFX940-NEXT:    ; use s[8:9]
15526; GFX940-NEXT:    ;;#ASMEND
15527; GFX940-NEXT:    s_setpc_b64 s[30:31]
15528  %vec0 = call <4 x i16> asm "; def $0", "=s"()
15529  %vec1 = call <4 x i16> asm "; def $0", "=s"()
15530  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 1, i32 1>
15531  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
15532  ret void
15533}
15534
15535define void @s_shuffle_v4i16_v4i16__7_5_1_1() {
15536; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_5_1_1:
15537; GFX900:       ; %bb.0:
15538; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15539; GFX900-NEXT:    ;;#ASMSTART
15540; GFX900-NEXT:    ; def s[4:5]
15541; GFX900-NEXT:    ;;#ASMEND
15542; GFX900-NEXT:    ;;#ASMSTART
15543; GFX900-NEXT:    ; def s[6:7]
15544; GFX900-NEXT:    ;;#ASMEND
15545; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s6
15546; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
15547; GFX900-NEXT:    ;;#ASMSTART
15548; GFX900-NEXT:    ; use s[8:9]
15549; GFX900-NEXT:    ;;#ASMEND
15550; GFX900-NEXT:    s_setpc_b64 s[30:31]
15551;
15552; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_5_1_1:
15553; GFX90A:       ; %bb.0:
15554; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15555; GFX90A-NEXT:    ;;#ASMSTART
15556; GFX90A-NEXT:    ; def s[4:5]
15557; GFX90A-NEXT:    ;;#ASMEND
15558; GFX90A-NEXT:    ;;#ASMSTART
15559; GFX90A-NEXT:    ; def s[6:7]
15560; GFX90A-NEXT:    ;;#ASMEND
15561; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s6
15562; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
15563; GFX90A-NEXT:    ;;#ASMSTART
15564; GFX90A-NEXT:    ; use s[8:9]
15565; GFX90A-NEXT:    ;;#ASMEND
15566; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15567;
15568; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_1_1:
15569; GFX940:       ; %bb.0:
15570; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15571; GFX940-NEXT:    ;;#ASMSTART
15572; GFX940-NEXT:    ; def s[0:1]
15573; GFX940-NEXT:    ;;#ASMEND
15574; GFX940-NEXT:    ;;#ASMSTART
15575; GFX940-NEXT:    ; def s[2:3]
15576; GFX940-NEXT:    ;;#ASMEND
15577; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s2
15578; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
15579; GFX940-NEXT:    ;;#ASMSTART
15580; GFX940-NEXT:    ; use s[8:9]
15581; GFX940-NEXT:    ;;#ASMEND
15582; GFX940-NEXT:    s_setpc_b64 s[30:31]
15583  %vec0 = call <4 x i16> asm "; def $0", "=s"()
15584  %vec1 = call <4 x i16> asm "; def $0", "=s"()
15585  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 1, i32 1>
15586  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
15587  ret void
15588}
15589
15590define void @s_shuffle_v4i16_v4i16__7_6_1_1() {
15591; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_6_1_1:
15592; GFX900:       ; %bb.0:
15593; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15594; GFX900-NEXT:    ;;#ASMSTART
15595; GFX900-NEXT:    ; def s[4:5]
15596; GFX900-NEXT:    ;;#ASMEND
15597; GFX900-NEXT:    ;;#ASMSTART
15598; GFX900-NEXT:    ; def s[6:7]
15599; GFX900-NEXT:    ;;#ASMEND
15600; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
15601; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s7
15602; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
15603; GFX900-NEXT:    ;;#ASMSTART
15604; GFX900-NEXT:    ; use s[8:9]
15605; GFX900-NEXT:    ;;#ASMEND
15606; GFX900-NEXT:    s_setpc_b64 s[30:31]
15607;
15608; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_6_1_1:
15609; GFX90A:       ; %bb.0:
15610; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15611; GFX90A-NEXT:    ;;#ASMSTART
15612; GFX90A-NEXT:    ; def s[4:5]
15613; GFX90A-NEXT:    ;;#ASMEND
15614; GFX90A-NEXT:    ;;#ASMSTART
15615; GFX90A-NEXT:    ; def s[6:7]
15616; GFX90A-NEXT:    ;;#ASMEND
15617; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
15618; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s7
15619; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
15620; GFX90A-NEXT:    ;;#ASMSTART
15621; GFX90A-NEXT:    ; use s[8:9]
15622; GFX90A-NEXT:    ;;#ASMEND
15623; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15624;
15625; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_1_1:
15626; GFX940:       ; %bb.0:
15627; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15628; GFX940-NEXT:    ;;#ASMSTART
15629; GFX940-NEXT:    ; def s[0:1]
15630; GFX940-NEXT:    ;;#ASMEND
15631; GFX940-NEXT:    ;;#ASMSTART
15632; GFX940-NEXT:    ; def s[2:3]
15633; GFX940-NEXT:    ;;#ASMEND
15634; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
15635; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s3
15636; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
15637; GFX940-NEXT:    ;;#ASMSTART
15638; GFX940-NEXT:    ; use s[8:9]
15639; GFX940-NEXT:    ;;#ASMEND
15640; GFX940-NEXT:    s_setpc_b64 s[30:31]
15641  %vec0 = call <4 x i16> asm "; def $0", "=s"()
15642  %vec1 = call <4 x i16> asm "; def $0", "=s"()
15643  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 1, i32 1>
15644  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
15645  ret void
15646}
15647
15648define void @s_shuffle_v4i16_v4i16__7_7_1_1() {
15649; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_1_1:
15650; GFX900:       ; %bb.0:
15651; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15652; GFX900-NEXT:    ;;#ASMSTART
15653; GFX900-NEXT:    ; def s[4:5]
15654; GFX900-NEXT:    ;;#ASMEND
15655; GFX900-NEXT:    ;;#ASMSTART
15656; GFX900-NEXT:    ; def s[6:7]
15657; GFX900-NEXT:    ;;#ASMEND
15658; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
15659; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
15660; GFX900-NEXT:    ;;#ASMSTART
15661; GFX900-NEXT:    ; use s[8:9]
15662; GFX900-NEXT:    ;;#ASMEND
15663; GFX900-NEXT:    s_setpc_b64 s[30:31]
15664;
15665; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_1_1:
15666; GFX90A:       ; %bb.0:
15667; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15668; GFX90A-NEXT:    ;;#ASMSTART
15669; GFX90A-NEXT:    ; def s[4:5]
15670; GFX90A-NEXT:    ;;#ASMEND
15671; GFX90A-NEXT:    ;;#ASMSTART
15672; GFX90A-NEXT:    ; def s[6:7]
15673; GFX90A-NEXT:    ;;#ASMEND
15674; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
15675; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
15676; GFX90A-NEXT:    ;;#ASMSTART
15677; GFX90A-NEXT:    ; use s[8:9]
15678; GFX90A-NEXT:    ;;#ASMEND
15679; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15680;
15681; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_1:
15682; GFX940:       ; %bb.0:
15683; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15684; GFX940-NEXT:    ;;#ASMSTART
15685; GFX940-NEXT:    ; def s[0:1]
15686; GFX940-NEXT:    ;;#ASMEND
15687; GFX940-NEXT:    ;;#ASMSTART
15688; GFX940-NEXT:    ; def s[2:3]
15689; GFX940-NEXT:    ;;#ASMEND
15690; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
15691; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
15692; GFX940-NEXT:    ;;#ASMSTART
15693; GFX940-NEXT:    ; use s[8:9]
15694; GFX940-NEXT:    ;;#ASMEND
15695; GFX940-NEXT:    s_setpc_b64 s[30:31]
15696  %vec0 = call <4 x i16> asm "; def $0", "=s"()
15697  %vec1 = call <4 x i16> asm "; def $0", "=s"()
15698  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 1>
15699  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
15700  ret void
15701}
15702
15703define void @s_shuffle_v4i16_v4i16__7_7_u_1() {
15704; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_u_1:
15705; GFX900:       ; %bb.0:
15706; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15707; GFX900-NEXT:    ;;#ASMSTART
15708; GFX900-NEXT:    ; def s[4:5]
15709; GFX900-NEXT:    ;;#ASMEND
15710; GFX900-NEXT:    ;;#ASMSTART
15711; GFX900-NEXT:    ; def s[6:7]
15712; GFX900-NEXT:    ;;#ASMEND
15713; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
15714; GFX900-NEXT:    s_mov_b32 s9, s4
15715; GFX900-NEXT:    ;;#ASMSTART
15716; GFX900-NEXT:    ; use s[8:9]
15717; GFX900-NEXT:    ;;#ASMEND
15718; GFX900-NEXT:    s_setpc_b64 s[30:31]
15719;
15720; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_u_1:
15721; GFX90A:       ; %bb.0:
15722; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15723; GFX90A-NEXT:    ;;#ASMSTART
15724; GFX90A-NEXT:    ; def s[4:5]
15725; GFX90A-NEXT:    ;;#ASMEND
15726; GFX90A-NEXT:    ;;#ASMSTART
15727; GFX90A-NEXT:    ; def s[6:7]
15728; GFX90A-NEXT:    ;;#ASMEND
15729; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
15730; GFX90A-NEXT:    s_mov_b32 s9, s4
15731; GFX90A-NEXT:    ;;#ASMSTART
15732; GFX90A-NEXT:    ; use s[8:9]
15733; GFX90A-NEXT:    ;;#ASMEND
15734; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15735;
15736; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_u_1:
15737; GFX940:       ; %bb.0:
15738; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15739; GFX940-NEXT:    ;;#ASMSTART
15740; GFX940-NEXT:    ; def s[0:1]
15741; GFX940-NEXT:    ;;#ASMEND
15742; GFX940-NEXT:    ;;#ASMSTART
15743; GFX940-NEXT:    ; def s[2:3]
15744; GFX940-NEXT:    ;;#ASMEND
15745; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
15746; GFX940-NEXT:    s_mov_b32 s9, s0
15747; GFX940-NEXT:    ;;#ASMSTART
15748; GFX940-NEXT:    ; use s[8:9]
15749; GFX940-NEXT:    ;;#ASMEND
15750; GFX940-NEXT:    s_setpc_b64 s[30:31]
15751  %vec0 = call <4 x i16> asm "; def $0", "=s"()
15752  %vec1 = call <4 x i16> asm "; def $0", "=s"()
15753  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 1>
15754  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
15755  ret void
15756}
15757
15758define void @s_shuffle_v4i16_v4i16__7_7_0_1() {
15759; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_0_1:
15760; GFX900:       ; %bb.0:
15761; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15762; GFX900-NEXT:    ;;#ASMSTART
15763; GFX900-NEXT:    ; def s[4:5]
15764; GFX900-NEXT:    ;;#ASMEND
15765; GFX900-NEXT:    ;;#ASMSTART
15766; GFX900-NEXT:    ; def s[6:7]
15767; GFX900-NEXT:    ;;#ASMEND
15768; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
15769; GFX900-NEXT:    s_mov_b32 s9, s4
15770; GFX900-NEXT:    ;;#ASMSTART
15771; GFX900-NEXT:    ; use s[8:9]
15772; GFX900-NEXT:    ;;#ASMEND
15773; GFX900-NEXT:    s_setpc_b64 s[30:31]
15774;
15775; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_0_1:
15776; GFX90A:       ; %bb.0:
15777; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15778; GFX90A-NEXT:    ;;#ASMSTART
15779; GFX90A-NEXT:    ; def s[4:5]
15780; GFX90A-NEXT:    ;;#ASMEND
15781; GFX90A-NEXT:    ;;#ASMSTART
15782; GFX90A-NEXT:    ; def s[6:7]
15783; GFX90A-NEXT:    ;;#ASMEND
15784; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
15785; GFX90A-NEXT:    s_mov_b32 s9, s4
15786; GFX90A-NEXT:    ;;#ASMSTART
15787; GFX90A-NEXT:    ; use s[8:9]
15788; GFX90A-NEXT:    ;;#ASMEND
15789; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15790;
15791; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_1:
15792; GFX940:       ; %bb.0:
15793; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15794; GFX940-NEXT:    ;;#ASMSTART
15795; GFX940-NEXT:    ; def s[0:1]
15796; GFX940-NEXT:    ;;#ASMEND
15797; GFX940-NEXT:    ;;#ASMSTART
15798; GFX940-NEXT:    ; def s[2:3]
15799; GFX940-NEXT:    ;;#ASMEND
15800; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
15801; GFX940-NEXT:    s_mov_b32 s9, s0
15802; GFX940-NEXT:    ;;#ASMSTART
15803; GFX940-NEXT:    ; use s[8:9]
15804; GFX940-NEXT:    ;;#ASMEND
15805; GFX940-NEXT:    s_setpc_b64 s[30:31]
15806  %vec0 = call <4 x i16> asm "; def $0", "=s"()
15807  %vec1 = call <4 x i16> asm "; def $0", "=s"()
15808  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 1>
15809  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
15810  ret void
15811}
15812
15813define void @s_shuffle_v4i16_v4i16__7_7_2_1() {
15814; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_2_1:
15815; GFX900:       ; %bb.0:
15816; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15817; GFX900-NEXT:    ;;#ASMSTART
15818; GFX900-NEXT:    ; def s[4:5]
15819; GFX900-NEXT:    ;;#ASMEND
15820; GFX900-NEXT:    ;;#ASMSTART
15821; GFX900-NEXT:    ; def s[6:7]
15822; GFX900-NEXT:    ;;#ASMEND
15823; GFX900-NEXT:    s_pack_lh_b32_b16 s9, s5, s4
15824; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
15825; GFX900-NEXT:    ;;#ASMSTART
15826; GFX900-NEXT:    ; use s[8:9]
15827; GFX900-NEXT:    ;;#ASMEND
15828; GFX900-NEXT:    s_setpc_b64 s[30:31]
15829;
15830; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_2_1:
15831; GFX90A:       ; %bb.0:
15832; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15833; GFX90A-NEXT:    ;;#ASMSTART
15834; GFX90A-NEXT:    ; def s[4:5]
15835; GFX90A-NEXT:    ;;#ASMEND
15836; GFX90A-NEXT:    ;;#ASMSTART
15837; GFX90A-NEXT:    ; def s[6:7]
15838; GFX90A-NEXT:    ;;#ASMEND
15839; GFX90A-NEXT:    s_pack_lh_b32_b16 s9, s5, s4
15840; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
15841; GFX90A-NEXT:    ;;#ASMSTART
15842; GFX90A-NEXT:    ; use s[8:9]
15843; GFX90A-NEXT:    ;;#ASMEND
15844; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15845;
15846; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_1:
15847; GFX940:       ; %bb.0:
15848; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15849; GFX940-NEXT:    ;;#ASMSTART
15850; GFX940-NEXT:    ; def s[0:1]
15851; GFX940-NEXT:    ;;#ASMEND
15852; GFX940-NEXT:    ;;#ASMSTART
15853; GFX940-NEXT:    ; def s[2:3]
15854; GFX940-NEXT:    ;;#ASMEND
15855; GFX940-NEXT:    s_pack_lh_b32_b16 s9, s1, s0
15856; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
15857; GFX940-NEXT:    ;;#ASMSTART
15858; GFX940-NEXT:    ; use s[8:9]
15859; GFX940-NEXT:    ;;#ASMEND
15860; GFX940-NEXT:    s_setpc_b64 s[30:31]
15861  %vec0 = call <4 x i16> asm "; def $0", "=s"()
15862  %vec1 = call <4 x i16> asm "; def $0", "=s"()
15863  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 1>
15864  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
15865  ret void
15866}
15867
15868define void @s_shuffle_v4i16_v4i16__7_7_3_1() {
15869; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_3_1:
15870; GFX900:       ; %bb.0:
15871; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15872; GFX900-NEXT:    ;;#ASMSTART
15873; GFX900-NEXT:    ; def s[4:5]
15874; GFX900-NEXT:    ;;#ASMEND
15875; GFX900-NEXT:    ;;#ASMSTART
15876; GFX900-NEXT:    ; def s[6:7]
15877; GFX900-NEXT:    ;;#ASMEND
15878; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s4
15879; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
15880; GFX900-NEXT:    ;;#ASMSTART
15881; GFX900-NEXT:    ; use s[8:9]
15882; GFX900-NEXT:    ;;#ASMEND
15883; GFX900-NEXT:    s_setpc_b64 s[30:31]
15884;
15885; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_3_1:
15886; GFX90A:       ; %bb.0:
15887; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15888; GFX90A-NEXT:    ;;#ASMSTART
15889; GFX90A-NEXT:    ; def s[4:5]
15890; GFX90A-NEXT:    ;;#ASMEND
15891; GFX90A-NEXT:    ;;#ASMSTART
15892; GFX90A-NEXT:    ; def s[6:7]
15893; GFX90A-NEXT:    ;;#ASMEND
15894; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s4
15895; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
15896; GFX90A-NEXT:    ;;#ASMSTART
15897; GFX90A-NEXT:    ; use s[8:9]
15898; GFX90A-NEXT:    ;;#ASMEND
15899; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15900;
15901; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_1:
15902; GFX940:       ; %bb.0:
15903; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15904; GFX940-NEXT:    ;;#ASMSTART
15905; GFX940-NEXT:    ; def s[0:1]
15906; GFX940-NEXT:    ;;#ASMEND
15907; GFX940-NEXT:    ;;#ASMSTART
15908; GFX940-NEXT:    ; def s[2:3]
15909; GFX940-NEXT:    ;;#ASMEND
15910; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s0
15911; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
15912; GFX940-NEXT:    ;;#ASMSTART
15913; GFX940-NEXT:    ; use s[8:9]
15914; GFX940-NEXT:    ;;#ASMEND
15915; GFX940-NEXT:    s_setpc_b64 s[30:31]
15916  %vec0 = call <4 x i16> asm "; def $0", "=s"()
15917  %vec1 = call <4 x i16> asm "; def $0", "=s"()
15918  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 1>
15919  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
15920  ret void
15921}
15922
15923define void @s_shuffle_v4i16_v4i16__7_7_4_1() {
15924; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_4_1:
15925; GFX900:       ; %bb.0:
15926; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15927; GFX900-NEXT:    ;;#ASMSTART
15928; GFX900-NEXT:    ; def s[4:5]
15929; GFX900-NEXT:    ;;#ASMEND
15930; GFX900-NEXT:    ;;#ASMSTART
15931; GFX900-NEXT:    ; def s[6:7]
15932; GFX900-NEXT:    ;;#ASMEND
15933; GFX900-NEXT:    s_pack_lh_b32_b16 s9, s6, s4
15934; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
15935; GFX900-NEXT:    ;;#ASMSTART
15936; GFX900-NEXT:    ; use s[8:9]
15937; GFX900-NEXT:    ;;#ASMEND
15938; GFX900-NEXT:    s_setpc_b64 s[30:31]
15939;
15940; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_4_1:
15941; GFX90A:       ; %bb.0:
15942; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15943; GFX90A-NEXT:    ;;#ASMSTART
15944; GFX90A-NEXT:    ; def s[4:5]
15945; GFX90A-NEXT:    ;;#ASMEND
15946; GFX90A-NEXT:    ;;#ASMSTART
15947; GFX90A-NEXT:    ; def s[6:7]
15948; GFX90A-NEXT:    ;;#ASMEND
15949; GFX90A-NEXT:    s_pack_lh_b32_b16 s9, s6, s4
15950; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
15951; GFX90A-NEXT:    ;;#ASMSTART
15952; GFX90A-NEXT:    ; use s[8:9]
15953; GFX90A-NEXT:    ;;#ASMEND
15954; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15955;
15956; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_1:
15957; GFX940:       ; %bb.0:
15958; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15959; GFX940-NEXT:    ;;#ASMSTART
15960; GFX940-NEXT:    ; def s[0:1]
15961; GFX940-NEXT:    ;;#ASMEND
15962; GFX940-NEXT:    ;;#ASMSTART
15963; GFX940-NEXT:    ; def s[2:3]
15964; GFX940-NEXT:    ;;#ASMEND
15965; GFX940-NEXT:    s_pack_lh_b32_b16 s9, s2, s0
15966; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
15967; GFX940-NEXT:    ;;#ASMSTART
15968; GFX940-NEXT:    ; use s[8:9]
15969; GFX940-NEXT:    ;;#ASMEND
15970; GFX940-NEXT:    s_setpc_b64 s[30:31]
15971  %vec0 = call <4 x i16> asm "; def $0", "=s"()
15972  %vec1 = call <4 x i16> asm "; def $0", "=s"()
15973  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 1>
15974  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
15975  ret void
15976}
15977
15978define void @s_shuffle_v4i16_v4i16__7_7_5_1() {
15979; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_5_1:
15980; GFX900:       ; %bb.0:
15981; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15982; GFX900-NEXT:    ;;#ASMSTART
15983; GFX900-NEXT:    ; def s[4:5]
15984; GFX900-NEXT:    ;;#ASMEND
15985; GFX900-NEXT:    ;;#ASMSTART
15986; GFX900-NEXT:    ; def s[6:7]
15987; GFX900-NEXT:    ;;#ASMEND
15988; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s6, s4
15989; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
15990; GFX900-NEXT:    ;;#ASMSTART
15991; GFX900-NEXT:    ; use s[8:9]
15992; GFX900-NEXT:    ;;#ASMEND
15993; GFX900-NEXT:    s_setpc_b64 s[30:31]
15994;
15995; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_5_1:
15996; GFX90A:       ; %bb.0:
15997; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15998; GFX90A-NEXT:    ;;#ASMSTART
15999; GFX90A-NEXT:    ; def s[4:5]
16000; GFX90A-NEXT:    ;;#ASMEND
16001; GFX90A-NEXT:    ;;#ASMSTART
16002; GFX90A-NEXT:    ; def s[6:7]
16003; GFX90A-NEXT:    ;;#ASMEND
16004; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s6, s4
16005; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
16006; GFX90A-NEXT:    ;;#ASMSTART
16007; GFX90A-NEXT:    ; use s[8:9]
16008; GFX90A-NEXT:    ;;#ASMEND
16009; GFX90A-NEXT:    s_setpc_b64 s[30:31]
16010;
16011; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_1:
16012; GFX940:       ; %bb.0:
16013; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16014; GFX940-NEXT:    ;;#ASMSTART
16015; GFX940-NEXT:    ; def s[0:1]
16016; GFX940-NEXT:    ;;#ASMEND
16017; GFX940-NEXT:    ;;#ASMSTART
16018; GFX940-NEXT:    ; def s[2:3]
16019; GFX940-NEXT:    ;;#ASMEND
16020; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s2, s0
16021; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
16022; GFX940-NEXT:    ;;#ASMSTART
16023; GFX940-NEXT:    ; use s[8:9]
16024; GFX940-NEXT:    ;;#ASMEND
16025; GFX940-NEXT:    s_setpc_b64 s[30:31]
16026  %vec0 = call <4 x i16> asm "; def $0", "=s"()
16027  %vec1 = call <4 x i16> asm "; def $0", "=s"()
16028  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 1>
16029  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
16030  ret void
16031}
16032
16033define void @s_shuffle_v4i16_v4i16__7_7_6_1() {
16034; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_6_1:
16035; GFX900:       ; %bb.0:
16036; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16037; GFX900-NEXT:    ;;#ASMSTART
16038; GFX900-NEXT:    ; def s[4:5]
16039; GFX900-NEXT:    ;;#ASMEND
16040; GFX900-NEXT:    ;;#ASMSTART
16041; GFX900-NEXT:    ; def s[6:7]
16042; GFX900-NEXT:    ;;#ASMEND
16043; GFX900-NEXT:    s_pack_lh_b32_b16 s9, s7, s4
16044; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
16045; GFX900-NEXT:    ;;#ASMSTART
16046; GFX900-NEXT:    ; use s[8:9]
16047; GFX900-NEXT:    ;;#ASMEND
16048; GFX900-NEXT:    s_setpc_b64 s[30:31]
16049;
16050; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_6_1:
16051; GFX90A:       ; %bb.0:
16052; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16053; GFX90A-NEXT:    ;;#ASMSTART
16054; GFX90A-NEXT:    ; def s[4:5]
16055; GFX90A-NEXT:    ;;#ASMEND
16056; GFX90A-NEXT:    ;;#ASMSTART
16057; GFX90A-NEXT:    ; def s[6:7]
16058; GFX90A-NEXT:    ;;#ASMEND
16059; GFX90A-NEXT:    s_pack_lh_b32_b16 s9, s7, s4
16060; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
16061; GFX90A-NEXT:    ;;#ASMSTART
16062; GFX90A-NEXT:    ; use s[8:9]
16063; GFX90A-NEXT:    ;;#ASMEND
16064; GFX90A-NEXT:    s_setpc_b64 s[30:31]
16065;
16066; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_6_1:
16067; GFX940:       ; %bb.0:
16068; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16069; GFX940-NEXT:    ;;#ASMSTART
16070; GFX940-NEXT:    ; def s[0:1]
16071; GFX940-NEXT:    ;;#ASMEND
16072; GFX940-NEXT:    ;;#ASMSTART
16073; GFX940-NEXT:    ; def s[2:3]
16074; GFX940-NEXT:    ;;#ASMEND
16075; GFX940-NEXT:    s_pack_lh_b32_b16 s9, s3, s0
16076; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
16077; GFX940-NEXT:    ;;#ASMSTART
16078; GFX940-NEXT:    ; use s[8:9]
16079; GFX940-NEXT:    ;;#ASMEND
16080; GFX940-NEXT:    s_setpc_b64 s[30:31]
16081  %vec0 = call <4 x i16> asm "; def $0", "=s"()
16082  %vec1 = call <4 x i16> asm "; def $0", "=s"()
16083  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 1>
16084  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
16085  ret void
16086}
16087
16088define void @s_shuffle_v4i16_v4i16__u_2_2_2() {
16089; GFX900-LABEL: s_shuffle_v4i16_v4i16__u_2_2_2:
16090; GFX900:       ; %bb.0:
16091; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16092; GFX900-NEXT:    ;;#ASMSTART
16093; GFX900-NEXT:    ; def s[4:5]
16094; GFX900-NEXT:    ;;#ASMEND
16095; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16096; GFX900-NEXT:    s_lshl_b32 s8, s5, 16
16097; GFX900-NEXT:    ;;#ASMSTART
16098; GFX900-NEXT:    ; use s[8:9]
16099; GFX900-NEXT:    ;;#ASMEND
16100; GFX900-NEXT:    s_setpc_b64 s[30:31]
16101;
16102; GFX90A-LABEL: s_shuffle_v4i16_v4i16__u_2_2_2:
16103; GFX90A:       ; %bb.0:
16104; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16105; GFX90A-NEXT:    ;;#ASMSTART
16106; GFX90A-NEXT:    ; def s[4:5]
16107; GFX90A-NEXT:    ;;#ASMEND
16108; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16109; GFX90A-NEXT:    s_lshl_b32 s8, s5, 16
16110; GFX90A-NEXT:    ;;#ASMSTART
16111; GFX90A-NEXT:    ; use s[8:9]
16112; GFX90A-NEXT:    ;;#ASMEND
16113; GFX90A-NEXT:    s_setpc_b64 s[30:31]
16114;
16115; GFX940-LABEL: s_shuffle_v4i16_v4i16__u_2_2_2:
16116; GFX940:       ; %bb.0:
16117; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16118; GFX940-NEXT:    ;;#ASMSTART
16119; GFX940-NEXT:    ; def s[0:1]
16120; GFX940-NEXT:    ;;#ASMEND
16121; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
16122; GFX940-NEXT:    s_lshl_b32 s8, s1, 16
16123; GFX940-NEXT:    ;;#ASMSTART
16124; GFX940-NEXT:    ; use s[8:9]
16125; GFX940-NEXT:    ;;#ASMEND
16126; GFX940-NEXT:    s_setpc_b64 s[30:31]
16127  %vec0 = call <4 x i16> asm "; def $0", "=s"()
16128  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
16129  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
16130  ret void
16131}
16132
16133define void @s_shuffle_v4i16_v4i16__0_2_2_2() {
16134; GFX900-LABEL: s_shuffle_v4i16_v4i16__0_2_2_2:
16135; GFX900:       ; %bb.0:
16136; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16137; GFX900-NEXT:    ;;#ASMSTART
16138; GFX900-NEXT:    ; def s[4:5]
16139; GFX900-NEXT:    ;;#ASMEND
16140; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
16141; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16142; GFX900-NEXT:    ;;#ASMSTART
16143; GFX900-NEXT:    ; use s[8:9]
16144; GFX900-NEXT:    ;;#ASMEND
16145; GFX900-NEXT:    s_setpc_b64 s[30:31]
16146;
16147; GFX90A-LABEL: s_shuffle_v4i16_v4i16__0_2_2_2:
16148; GFX90A:       ; %bb.0:
16149; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16150; GFX90A-NEXT:    ;;#ASMSTART
16151; GFX90A-NEXT:    ; def s[4:5]
16152; GFX90A-NEXT:    ;;#ASMEND
16153; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
16154; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16155; GFX90A-NEXT:    ;;#ASMSTART
16156; GFX90A-NEXT:    ; use s[8:9]
16157; GFX90A-NEXT:    ;;#ASMEND
16158; GFX90A-NEXT:    s_setpc_b64 s[30:31]
16159;
16160; GFX940-LABEL: s_shuffle_v4i16_v4i16__0_2_2_2:
16161; GFX940:       ; %bb.0:
16162; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16163; GFX940-NEXT:    ;;#ASMSTART
16164; GFX940-NEXT:    ; def s[0:1]
16165; GFX940-NEXT:    ;;#ASMEND
16166; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
16167; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
16168; GFX940-NEXT:    ;;#ASMSTART
16169; GFX940-NEXT:    ; use s[8:9]
16170; GFX940-NEXT:    ;;#ASMEND
16171; GFX940-NEXT:    s_setpc_b64 s[30:31]
16172  %vec0 = call <4 x i16> asm "; def $0", "=s"()
16173  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
16174  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
16175  ret void
16176}
16177
16178define void @s_shuffle_v4i16_v4i16__1_2_2_2() {
16179; GFX900-LABEL: s_shuffle_v4i16_v4i16__1_2_2_2:
16180; GFX900:       ; %bb.0:
16181; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16182; GFX900-NEXT:    ;;#ASMSTART
16183; GFX900-NEXT:    ; def s[4:5]
16184; GFX900-NEXT:    ;;#ASMEND
16185; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
16186; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
16187; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16188; GFX900-NEXT:    ;;#ASMSTART
16189; GFX900-NEXT:    ; use s[8:9]
16190; GFX900-NEXT:    ;;#ASMEND
16191; GFX900-NEXT:    s_setpc_b64 s[30:31]
16192;
16193; GFX90A-LABEL: s_shuffle_v4i16_v4i16__1_2_2_2:
16194; GFX90A:       ; %bb.0:
16195; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16196; GFX90A-NEXT:    ;;#ASMSTART
16197; GFX90A-NEXT:    ; def s[4:5]
16198; GFX90A-NEXT:    ;;#ASMEND
16199; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
16200; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
16201; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16202; GFX90A-NEXT:    ;;#ASMSTART
16203; GFX90A-NEXT:    ; use s[8:9]
16204; GFX90A-NEXT:    ;;#ASMEND
16205; GFX90A-NEXT:    s_setpc_b64 s[30:31]
16206;
16207; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_2_2_2:
16208; GFX940:       ; %bb.0:
16209; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16210; GFX940-NEXT:    ;;#ASMSTART
16211; GFX940-NEXT:    ; def s[0:1]
16212; GFX940-NEXT:    ;;#ASMEND
16213; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
16214; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
16215; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
16216; GFX940-NEXT:    ;;#ASMSTART
16217; GFX940-NEXT:    ; use s[8:9]
16218; GFX940-NEXT:    ;;#ASMEND
16219; GFX940-NEXT:    s_setpc_b64 s[30:31]
16220  %vec0 = call <4 x i16> asm "; def $0", "=s"()
16221  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
16222  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
16223  ret void
16224}
16225
16226define void @s_shuffle_v4i16_v4i16__2_2_2_2() {
16227; GFX900-LABEL: s_shuffle_v4i16_v4i16__2_2_2_2:
16228; GFX900:       ; %bb.0:
16229; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16230; GFX900-NEXT:    ;;#ASMSTART
16231; GFX900-NEXT:    ; def s[4:5]
16232; GFX900-NEXT:    ;;#ASMEND
16233; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
16234; GFX900-NEXT:    s_mov_b32 s9, s8
16235; GFX900-NEXT:    ;;#ASMSTART
16236; GFX900-NEXT:    ; use s[8:9]
16237; GFX900-NEXT:    ;;#ASMEND
16238; GFX900-NEXT:    s_setpc_b64 s[30:31]
16239;
16240; GFX90A-LABEL: s_shuffle_v4i16_v4i16__2_2_2_2:
16241; GFX90A:       ; %bb.0:
16242; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16243; GFX90A-NEXT:    ;;#ASMSTART
16244; GFX90A-NEXT:    ; def s[4:5]
16245; GFX90A-NEXT:    ;;#ASMEND
16246; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
16247; GFX90A-NEXT:    s_mov_b32 s9, s8
16248; GFX90A-NEXT:    ;;#ASMSTART
16249; GFX90A-NEXT:    ; use s[8:9]
16250; GFX90A-NEXT:    ;;#ASMEND
16251; GFX90A-NEXT:    s_setpc_b64 s[30:31]
16252;
16253; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_2_2_2:
16254; GFX940:       ; %bb.0:
16255; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16256; GFX940-NEXT:    ;;#ASMSTART
16257; GFX940-NEXT:    ; def s[0:1]
16258; GFX940-NEXT:    ;;#ASMEND
16259; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
16260; GFX940-NEXT:    s_mov_b32 s9, s8
16261; GFX940-NEXT:    ;;#ASMSTART
16262; GFX940-NEXT:    ; use s[8:9]
16263; GFX940-NEXT:    ;;#ASMEND
16264; GFX940-NEXT:    s_setpc_b64 s[30:31]
16265  %vec0 = call <4 x i16> asm "; def $0", "=s"()
16266  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
16267  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
16268  ret void
16269}
16270
16271define void @s_shuffle_v4i16_v4i16__3_2_2_2() {
16272; GFX900-LABEL: s_shuffle_v4i16_v4i16__3_2_2_2:
16273; GFX900:       ; %bb.0:
16274; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16275; GFX900-NEXT:    ;;#ASMSTART
16276; GFX900-NEXT:    ; def s[4:5]
16277; GFX900-NEXT:    ;;#ASMEND
16278; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
16279; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
16280; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16281; GFX900-NEXT:    ;;#ASMSTART
16282; GFX900-NEXT:    ; use s[8:9]
16283; GFX900-NEXT:    ;;#ASMEND
16284; GFX900-NEXT:    s_setpc_b64 s[30:31]
16285;
16286; GFX90A-LABEL: s_shuffle_v4i16_v4i16__3_2_2_2:
16287; GFX90A:       ; %bb.0:
16288; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16289; GFX90A-NEXT:    ;;#ASMSTART
16290; GFX90A-NEXT:    ; def s[4:5]
16291; GFX90A-NEXT:    ;;#ASMEND
16292; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
16293; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
16294; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16295; GFX90A-NEXT:    ;;#ASMSTART
16296; GFX90A-NEXT:    ; use s[8:9]
16297; GFX90A-NEXT:    ;;#ASMEND
16298; GFX90A-NEXT:    s_setpc_b64 s[30:31]
16299;
16300; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_2_2_2:
16301; GFX940:       ; %bb.0:
16302; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16303; GFX940-NEXT:    ;;#ASMSTART
16304; GFX940-NEXT:    ; def s[0:1]
16305; GFX940-NEXT:    ;;#ASMEND
16306; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
16307; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
16308; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
16309; GFX940-NEXT:    ;;#ASMSTART
16310; GFX940-NEXT:    ; use s[8:9]
16311; GFX940-NEXT:    ;;#ASMEND
16312; GFX940-NEXT:    s_setpc_b64 s[30:31]
16313  %vec0 = call <4 x i16> asm "; def $0", "=s"()
16314  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
16315  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
16316  ret void
16317}
16318
16319define void @s_shuffle_v4i16_v4i16__4_2_2_2() {
16320; GFX900-LABEL: s_shuffle_v4i16_v4i16__4_2_2_2:
16321; GFX900:       ; %bb.0:
16322; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16323; GFX900-NEXT:    ;;#ASMSTART
16324; GFX900-NEXT:    ; def s[4:5]
16325; GFX900-NEXT:    ;;#ASMEND
16326; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16327; GFX900-NEXT:    s_lshl_b32 s8, s5, 16
16328; GFX900-NEXT:    ;;#ASMSTART
16329; GFX900-NEXT:    ; use s[8:9]
16330; GFX900-NEXT:    ;;#ASMEND
16331; GFX900-NEXT:    s_setpc_b64 s[30:31]
16332;
16333; GFX90A-LABEL: s_shuffle_v4i16_v4i16__4_2_2_2:
16334; GFX90A:       ; %bb.0:
16335; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16336; GFX90A-NEXT:    ;;#ASMSTART
16337; GFX90A-NEXT:    ; def s[4:5]
16338; GFX90A-NEXT:    ;;#ASMEND
16339; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16340; GFX90A-NEXT:    s_lshl_b32 s8, s5, 16
16341; GFX90A-NEXT:    ;;#ASMSTART
16342; GFX90A-NEXT:    ; use s[8:9]
16343; GFX90A-NEXT:    ;;#ASMEND
16344; GFX90A-NEXT:    s_setpc_b64 s[30:31]
16345;
16346; GFX940-LABEL: s_shuffle_v4i16_v4i16__4_2_2_2:
16347; GFX940:       ; %bb.0:
16348; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16349; GFX940-NEXT:    ;;#ASMSTART
16350; GFX940-NEXT:    ; def s[0:1]
16351; GFX940-NEXT:    ;;#ASMEND
16352; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
16353; GFX940-NEXT:    s_lshl_b32 s8, s1, 16
16354; GFX940-NEXT:    ;;#ASMSTART
16355; GFX940-NEXT:    ; use s[8:9]
16356; GFX940-NEXT:    ;;#ASMEND
16357; GFX940-NEXT:    s_setpc_b64 s[30:31]
16358  %vec0 = call <4 x i16> asm "; def $0", "=s"()
16359  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 4, i32 2, i32 2, i32 2>
16360  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
16361  ret void
16362}
16363
16364define void @s_shuffle_v4i16_v4i16__5_2_2_2() {
16365; GFX900-LABEL: s_shuffle_v4i16_v4i16__5_2_2_2:
16366; GFX900:       ; %bb.0:
16367; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16368; GFX900-NEXT:    ;;#ASMSTART
16369; GFX900-NEXT:    ; def s[4:5]
16370; GFX900-NEXT:    ;;#ASMEND
16371; GFX900-NEXT:    ;;#ASMSTART
16372; GFX900-NEXT:    ; def s[6:7]
16373; GFX900-NEXT:    ;;#ASMEND
16374; GFX900-NEXT:    s_lshr_b32 s4, s6, 16
16375; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
16376; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16377; GFX900-NEXT:    ;;#ASMSTART
16378; GFX900-NEXT:    ; use s[8:9]
16379; GFX900-NEXT:    ;;#ASMEND
16380; GFX900-NEXT:    s_setpc_b64 s[30:31]
16381;
16382; GFX90A-LABEL: s_shuffle_v4i16_v4i16__5_2_2_2:
16383; GFX90A:       ; %bb.0:
16384; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16385; GFX90A-NEXT:    ;;#ASMSTART
16386; GFX90A-NEXT:    ; def s[4:5]
16387; GFX90A-NEXT:    ;;#ASMEND
16388; GFX90A-NEXT:    ;;#ASMSTART
16389; GFX90A-NEXT:    ; def s[6:7]
16390; GFX90A-NEXT:    ;;#ASMEND
16391; GFX90A-NEXT:    s_lshr_b32 s4, s6, 16
16392; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
16393; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16394; GFX90A-NEXT:    ;;#ASMSTART
16395; GFX90A-NEXT:    ; use s[8:9]
16396; GFX90A-NEXT:    ;;#ASMEND
16397; GFX90A-NEXT:    s_setpc_b64 s[30:31]
16398;
16399; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_2_2_2:
16400; GFX940:       ; %bb.0:
16401; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16402; GFX940-NEXT:    ;;#ASMSTART
16403; GFX940-NEXT:    ; def s[0:1]
16404; GFX940-NEXT:    ;;#ASMEND
16405; GFX940-NEXT:    ;;#ASMSTART
16406; GFX940-NEXT:    ; def s[2:3]
16407; GFX940-NEXT:    ;;#ASMEND
16408; GFX940-NEXT:    s_lshr_b32 s0, s2, 16
16409; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
16410; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
16411; GFX940-NEXT:    ;;#ASMSTART
16412; GFX940-NEXT:    ; use s[8:9]
16413; GFX940-NEXT:    ;;#ASMEND
16414; GFX940-NEXT:    s_setpc_b64 s[30:31]
16415  %vec0 = call <4 x i16> asm "; def $0", "=s"()
16416  %vec1 = call <4 x i16> asm "; def $0", "=s"()
16417  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2>
16418  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
16419  ret void
16420}
16421
16422define void @s_shuffle_v4i16_v4i16__6_2_2_2() {
16423; GFX900-LABEL: s_shuffle_v4i16_v4i16__6_2_2_2:
16424; GFX900:       ; %bb.0:
16425; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16426; GFX900-NEXT:    ;;#ASMSTART
16427; GFX900-NEXT:    ; def s[4:5]
16428; GFX900-NEXT:    ;;#ASMEND
16429; GFX900-NEXT:    ;;#ASMSTART
16430; GFX900-NEXT:    ; def s[6:7]
16431; GFX900-NEXT:    ;;#ASMEND
16432; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
16433; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16434; GFX900-NEXT:    ;;#ASMSTART
16435; GFX900-NEXT:    ; use s[8:9]
16436; GFX900-NEXT:    ;;#ASMEND
16437; GFX900-NEXT:    s_setpc_b64 s[30:31]
16438;
16439; GFX90A-LABEL: s_shuffle_v4i16_v4i16__6_2_2_2:
16440; GFX90A:       ; %bb.0:
16441; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16442; GFX90A-NEXT:    ;;#ASMSTART
16443; GFX90A-NEXT:    ; def s[4:5]
16444; GFX90A-NEXT:    ;;#ASMEND
16445; GFX90A-NEXT:    ;;#ASMSTART
16446; GFX90A-NEXT:    ; def s[6:7]
16447; GFX90A-NEXT:    ;;#ASMEND
16448; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
16449; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16450; GFX90A-NEXT:    ;;#ASMSTART
16451; GFX90A-NEXT:    ; use s[8:9]
16452; GFX90A-NEXT:    ;;#ASMEND
16453; GFX90A-NEXT:    s_setpc_b64 s[30:31]
16454;
16455; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_2_2_2:
16456; GFX940:       ; %bb.0:
16457; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16458; GFX940-NEXT:    ;;#ASMSTART
16459; GFX940-NEXT:    ; def s[0:1]
16460; GFX940-NEXT:    ;;#ASMEND
16461; GFX940-NEXT:    ;;#ASMSTART
16462; GFX940-NEXT:    ; def s[2:3]
16463; GFX940-NEXT:    ;;#ASMEND
16464; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s1
16465; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
16466; GFX940-NEXT:    ;;#ASMSTART
16467; GFX940-NEXT:    ; use s[8:9]
16468; GFX940-NEXT:    ;;#ASMEND
16469; GFX940-NEXT:    s_setpc_b64 s[30:31]
16470  %vec0 = call <4 x i16> asm "; def $0", "=s"()
16471  %vec1 = call <4 x i16> asm "; def $0", "=s"()
16472  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 2, i32 2, i32 2>
16473  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
16474  ret void
16475}
16476
16477define void @s_shuffle_v4i16_v4i16__7_2_2_2() {
16478; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_2_2_2:
16479; GFX900:       ; %bb.0:
16480; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16481; GFX900-NEXT:    ;;#ASMSTART
16482; GFX900-NEXT:    ; def s[4:5]
16483; GFX900-NEXT:    ;;#ASMEND
16484; GFX900-NEXT:    ;;#ASMSTART
16485; GFX900-NEXT:    ; def s[6:7]
16486; GFX900-NEXT:    ;;#ASMEND
16487; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
16488; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
16489; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16490; GFX900-NEXT:    ;;#ASMSTART
16491; GFX900-NEXT:    ; use s[8:9]
16492; GFX900-NEXT:    ;;#ASMEND
16493; GFX900-NEXT:    s_setpc_b64 s[30:31]
16494;
16495; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_2_2_2:
16496; GFX90A:       ; %bb.0:
16497; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16498; GFX90A-NEXT:    ;;#ASMSTART
16499; GFX90A-NEXT:    ; def s[4:5]
16500; GFX90A-NEXT:    ;;#ASMEND
16501; GFX90A-NEXT:    ;;#ASMSTART
16502; GFX90A-NEXT:    ; def s[6:7]
16503; GFX90A-NEXT:    ;;#ASMEND
16504; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
16505; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
16506; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16507; GFX90A-NEXT:    ;;#ASMSTART
16508; GFX90A-NEXT:    ; use s[8:9]
16509; GFX90A-NEXT:    ;;#ASMEND
16510; GFX90A-NEXT:    s_setpc_b64 s[30:31]
16511;
16512; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_2_2:
16513; GFX940:       ; %bb.0:
16514; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16515; GFX940-NEXT:    ;;#ASMSTART
16516; GFX940-NEXT:    ; def s[0:1]
16517; GFX940-NEXT:    ;;#ASMEND
16518; GFX940-NEXT:    ;;#ASMSTART
16519; GFX940-NEXT:    ; def s[2:3]
16520; GFX940-NEXT:    ;;#ASMEND
16521; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
16522; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
16523; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
16524; GFX940-NEXT:    ;;#ASMSTART
16525; GFX940-NEXT:    ; use s[8:9]
16526; GFX940-NEXT:    ;;#ASMEND
16527; GFX940-NEXT:    s_setpc_b64 s[30:31]
16528  %vec0 = call <4 x i16> asm "; def $0", "=s"()
16529  %vec1 = call <4 x i16> asm "; def $0", "=s"()
16530  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 2, i32 2>
16531  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
16532  ret void
16533}
16534
16535define void @s_shuffle_v4i16_v4i16__7_u_2_2() {
16536; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_u_2_2:
16537; GFX900:       ; %bb.0:
16538; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16539; GFX900-NEXT:    ;;#ASMSTART
16540; GFX900-NEXT:    ; def s[4:5]
16541; GFX900-NEXT:    ;;#ASMEND
16542; GFX900-NEXT:    ;;#ASMSTART
16543; GFX900-NEXT:    ; def s[6:7]
16544; GFX900-NEXT:    ;;#ASMEND
16545; GFX900-NEXT:    s_lshr_b32 s8, s7, 16
16546; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16547; GFX900-NEXT:    ;;#ASMSTART
16548; GFX900-NEXT:    ; use s[8:9]
16549; GFX900-NEXT:    ;;#ASMEND
16550; GFX900-NEXT:    s_setpc_b64 s[30:31]
16551;
16552; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_u_2_2:
16553; GFX90A:       ; %bb.0:
16554; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16555; GFX90A-NEXT:    ;;#ASMSTART
16556; GFX90A-NEXT:    ; def s[4:5]
16557; GFX90A-NEXT:    ;;#ASMEND
16558; GFX90A-NEXT:    ;;#ASMSTART
16559; GFX90A-NEXT:    ; def s[6:7]
16560; GFX90A-NEXT:    ;;#ASMEND
16561; GFX90A-NEXT:    s_lshr_b32 s8, s7, 16
16562; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16563; GFX90A-NEXT:    ;;#ASMSTART
16564; GFX90A-NEXT:    ; use s[8:9]
16565; GFX90A-NEXT:    ;;#ASMEND
16566; GFX90A-NEXT:    s_setpc_b64 s[30:31]
16567;
16568; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_2_2:
16569; GFX940:       ; %bb.0:
16570; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16571; GFX940-NEXT:    ;;#ASMSTART
16572; GFX940-NEXT:    ; def s[0:1]
16573; GFX940-NEXT:    ;;#ASMEND
16574; GFX940-NEXT:    ;;#ASMSTART
16575; GFX940-NEXT:    ; def s[2:3]
16576; GFX940-NEXT:    ;;#ASMEND
16577; GFX940-NEXT:    s_lshr_b32 s8, s3, 16
16578; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
16579; GFX940-NEXT:    ;;#ASMSTART
16580; GFX940-NEXT:    ; use s[8:9]
16581; GFX940-NEXT:    ;;#ASMEND
16582; GFX940-NEXT:    s_setpc_b64 s[30:31]
16583  %vec0 = call <4 x i16> asm "; def $0", "=s"()
16584  %vec1 = call <4 x i16> asm "; def $0", "=s"()
16585  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 2, i32 2>
16586  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
16587  ret void
16588}
16589
16590define void @s_shuffle_v4i16_v4i16__7_0_2_2() {
16591; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_0_2_2:
16592; GFX900:       ; %bb.0:
16593; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16594; GFX900-NEXT:    ;;#ASMSTART
16595; GFX900-NEXT:    ; def s[6:7]
16596; GFX900-NEXT:    ;;#ASMEND
16597; GFX900-NEXT:    s_lshr_b32 s6, s7, 16
16598; GFX900-NEXT:    ;;#ASMSTART
16599; GFX900-NEXT:    ; def s[4:5]
16600; GFX900-NEXT:    ;;#ASMEND
16601; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s6, s4
16602; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16603; GFX900-NEXT:    ;;#ASMSTART
16604; GFX900-NEXT:    ; use s[8:9]
16605; GFX900-NEXT:    ;;#ASMEND
16606; GFX900-NEXT:    s_setpc_b64 s[30:31]
16607;
16608; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_0_2_2:
16609; GFX90A:       ; %bb.0:
16610; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16611; GFX90A-NEXT:    ;;#ASMSTART
16612; GFX90A-NEXT:    ; def s[6:7]
16613; GFX90A-NEXT:    ;;#ASMEND
16614; GFX90A-NEXT:    s_lshr_b32 s6, s7, 16
16615; GFX90A-NEXT:    ;;#ASMSTART
16616; GFX90A-NEXT:    ; def s[4:5]
16617; GFX90A-NEXT:    ;;#ASMEND
16618; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s6, s4
16619; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16620; GFX90A-NEXT:    ;;#ASMSTART
16621; GFX90A-NEXT:    ; use s[8:9]
16622; GFX90A-NEXT:    ;;#ASMEND
16623; GFX90A-NEXT:    s_setpc_b64 s[30:31]
16624;
16625; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_2_2:
16626; GFX940:       ; %bb.0:
16627; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16628; GFX940-NEXT:    ;;#ASMSTART
16629; GFX940-NEXT:    ; def s[2:3]
16630; GFX940-NEXT:    ;;#ASMEND
16631; GFX940-NEXT:    s_lshr_b32 s2, s3, 16
16632; GFX940-NEXT:    ;;#ASMSTART
16633; GFX940-NEXT:    ; def s[0:1]
16634; GFX940-NEXT:    ;;#ASMEND
16635; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s2, s0
16636; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
16637; GFX940-NEXT:    ;;#ASMSTART
16638; GFX940-NEXT:    ; use s[8:9]
16639; GFX940-NEXT:    ;;#ASMEND
16640; GFX940-NEXT:    s_setpc_b64 s[30:31]
16641  %vec0 = call <4 x i16> asm "; def $0", "=s"()
16642  %vec1 = call <4 x i16> asm "; def $0", "=s"()
16643  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 2, i32 2>
16644  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
16645  ret void
16646}
16647
16648define void @s_shuffle_v4i16_v4i16__7_1_2_2() {
16649; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_1_2_2:
16650; GFX900:       ; %bb.0:
16651; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16652; GFX900-NEXT:    ;;#ASMSTART
16653; GFX900-NEXT:    ; def s[4:5]
16654; GFX900-NEXT:    ;;#ASMEND
16655; GFX900-NEXT:    ;;#ASMSTART
16656; GFX900-NEXT:    ; def s[6:7]
16657; GFX900-NEXT:    ;;#ASMEND
16658; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
16659; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16660; GFX900-NEXT:    ;;#ASMSTART
16661; GFX900-NEXT:    ; use s[8:9]
16662; GFX900-NEXT:    ;;#ASMEND
16663; GFX900-NEXT:    s_setpc_b64 s[30:31]
16664;
16665; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_1_2_2:
16666; GFX90A:       ; %bb.0:
16667; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16668; GFX90A-NEXT:    ;;#ASMSTART
16669; GFX90A-NEXT:    ; def s[4:5]
16670; GFX90A-NEXT:    ;;#ASMEND
16671; GFX90A-NEXT:    ;;#ASMSTART
16672; GFX90A-NEXT:    ; def s[6:7]
16673; GFX90A-NEXT:    ;;#ASMEND
16674; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
16675; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16676; GFX90A-NEXT:    ;;#ASMSTART
16677; GFX90A-NEXT:    ; use s[8:9]
16678; GFX90A-NEXT:    ;;#ASMEND
16679; GFX90A-NEXT:    s_setpc_b64 s[30:31]
16680;
16681; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_2_2:
16682; GFX940:       ; %bb.0:
16683; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16684; GFX940-NEXT:    ;;#ASMSTART
16685; GFX940-NEXT:    ; def s[0:1]
16686; GFX940-NEXT:    ;;#ASMEND
16687; GFX940-NEXT:    ;;#ASMSTART
16688; GFX940-NEXT:    ; def s[2:3]
16689; GFX940-NEXT:    ;;#ASMEND
16690; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s0
16691; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
16692; GFX940-NEXT:    ;;#ASMSTART
16693; GFX940-NEXT:    ; use s[8:9]
16694; GFX940-NEXT:    ;;#ASMEND
16695; GFX940-NEXT:    s_setpc_b64 s[30:31]
16696  %vec0 = call <4 x i16> asm "; def $0", "=s"()
16697  %vec1 = call <4 x i16> asm "; def $0", "=s"()
16698  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 2, i32 2>
16699  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
16700  ret void
16701}
16702
16703define void @s_shuffle_v4i16_v4i16__7_3_2_2() {
16704; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_3_2_2:
16705; GFX900:       ; %bb.0:
16706; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16707; GFX900-NEXT:    ;;#ASMSTART
16708; GFX900-NEXT:    ; def s[4:5]
16709; GFX900-NEXT:    ;;#ASMEND
16710; GFX900-NEXT:    ;;#ASMSTART
16711; GFX900-NEXT:    ; def s[6:7]
16712; GFX900-NEXT:    ;;#ASMEND
16713; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
16714; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16715; GFX900-NEXT:    ;;#ASMSTART
16716; GFX900-NEXT:    ; use s[8:9]
16717; GFX900-NEXT:    ;;#ASMEND
16718; GFX900-NEXT:    s_setpc_b64 s[30:31]
16719;
16720; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_3_2_2:
16721; GFX90A:       ; %bb.0:
16722; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16723; GFX90A-NEXT:    ;;#ASMSTART
16724; GFX90A-NEXT:    ; def s[4:5]
16725; GFX90A-NEXT:    ;;#ASMEND
16726; GFX90A-NEXT:    ;;#ASMSTART
16727; GFX90A-NEXT:    ; def s[6:7]
16728; GFX90A-NEXT:    ;;#ASMEND
16729; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
16730; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16731; GFX90A-NEXT:    ;;#ASMSTART
16732; GFX90A-NEXT:    ; use s[8:9]
16733; GFX90A-NEXT:    ;;#ASMEND
16734; GFX90A-NEXT:    s_setpc_b64 s[30:31]
16735;
16736; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_2_2:
16737; GFX940:       ; %bb.0:
16738; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16739; GFX940-NEXT:    ;;#ASMSTART
16740; GFX940-NEXT:    ; def s[0:1]
16741; GFX940-NEXT:    ;;#ASMEND
16742; GFX940-NEXT:    ;;#ASMSTART
16743; GFX940-NEXT:    ; def s[2:3]
16744; GFX940-NEXT:    ;;#ASMEND
16745; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s1
16746; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
16747; GFX940-NEXT:    ;;#ASMSTART
16748; GFX940-NEXT:    ; use s[8:9]
16749; GFX940-NEXT:    ;;#ASMEND
16750; GFX940-NEXT:    s_setpc_b64 s[30:31]
16751  %vec0 = call <4 x i16> asm "; def $0", "=s"()
16752  %vec1 = call <4 x i16> asm "; def $0", "=s"()
16753  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 2, i32 2>
16754  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
16755  ret void
16756}
16757
16758define void @s_shuffle_v4i16_v4i16__7_4_2_2() {
16759; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_4_2_2:
16760; GFX900:       ; %bb.0:
16761; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16762; GFX900-NEXT:    ;;#ASMSTART
16763; GFX900-NEXT:    ; def s[4:5]
16764; GFX900-NEXT:    ;;#ASMEND
16765; GFX900-NEXT:    ;;#ASMSTART
16766; GFX900-NEXT:    ; def s[6:7]
16767; GFX900-NEXT:    ;;#ASMEND
16768; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
16769; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s6
16770; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16771; GFX900-NEXT:    ;;#ASMSTART
16772; GFX900-NEXT:    ; use s[8:9]
16773; GFX900-NEXT:    ;;#ASMEND
16774; GFX900-NEXT:    s_setpc_b64 s[30:31]
16775;
16776; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_4_2_2:
16777; GFX90A:       ; %bb.0:
16778; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16779; GFX90A-NEXT:    ;;#ASMSTART
16780; GFX90A-NEXT:    ; def s[4:5]
16781; GFX90A-NEXT:    ;;#ASMEND
16782; GFX90A-NEXT:    ;;#ASMSTART
16783; GFX90A-NEXT:    ; def s[6:7]
16784; GFX90A-NEXT:    ;;#ASMEND
16785; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
16786; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s6
16787; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16788; GFX90A-NEXT:    ;;#ASMSTART
16789; GFX90A-NEXT:    ; use s[8:9]
16790; GFX90A-NEXT:    ;;#ASMEND
16791; GFX90A-NEXT:    s_setpc_b64 s[30:31]
16792;
16793; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_2_2:
16794; GFX940:       ; %bb.0:
16795; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16796; GFX940-NEXT:    ;;#ASMSTART
16797; GFX940-NEXT:    ; def s[0:1]
16798; GFX940-NEXT:    ;;#ASMEND
16799; GFX940-NEXT:    ;;#ASMSTART
16800; GFX940-NEXT:    ; def s[2:3]
16801; GFX940-NEXT:    ;;#ASMEND
16802; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
16803; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s2
16804; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
16805; GFX940-NEXT:    ;;#ASMSTART
16806; GFX940-NEXT:    ; use s[8:9]
16807; GFX940-NEXT:    ;;#ASMEND
16808; GFX940-NEXT:    s_setpc_b64 s[30:31]
16809  %vec0 = call <4 x i16> asm "; def $0", "=s"()
16810  %vec1 = call <4 x i16> asm "; def $0", "=s"()
16811  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 2, i32 2>
16812  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
16813  ret void
16814}
16815
16816define void @s_shuffle_v4i16_v4i16__7_5_2_2() {
16817; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_5_2_2:
16818; GFX900:       ; %bb.0:
16819; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16820; GFX900-NEXT:    ;;#ASMSTART
16821; GFX900-NEXT:    ; def s[4:5]
16822; GFX900-NEXT:    ;;#ASMEND
16823; GFX900-NEXT:    ;;#ASMSTART
16824; GFX900-NEXT:    ; def s[6:7]
16825; GFX900-NEXT:    ;;#ASMEND
16826; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s6
16827; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16828; GFX900-NEXT:    ;;#ASMSTART
16829; GFX900-NEXT:    ; use s[8:9]
16830; GFX900-NEXT:    ;;#ASMEND
16831; GFX900-NEXT:    s_setpc_b64 s[30:31]
16832;
16833; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_5_2_2:
16834; GFX90A:       ; %bb.0:
16835; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16836; GFX90A-NEXT:    ;;#ASMSTART
16837; GFX90A-NEXT:    ; def s[4:5]
16838; GFX90A-NEXT:    ;;#ASMEND
16839; GFX90A-NEXT:    ;;#ASMSTART
16840; GFX90A-NEXT:    ; def s[6:7]
16841; GFX90A-NEXT:    ;;#ASMEND
16842; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s6
16843; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16844; GFX90A-NEXT:    ;;#ASMSTART
16845; GFX90A-NEXT:    ; use s[8:9]
16846; GFX90A-NEXT:    ;;#ASMEND
16847; GFX90A-NEXT:    s_setpc_b64 s[30:31]
16848;
16849; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_2_2:
16850; GFX940:       ; %bb.0:
16851; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16852; GFX940-NEXT:    ;;#ASMSTART
16853; GFX940-NEXT:    ; def s[0:1]
16854; GFX940-NEXT:    ;;#ASMEND
16855; GFX940-NEXT:    ;;#ASMSTART
16856; GFX940-NEXT:    ; def s[2:3]
16857; GFX940-NEXT:    ;;#ASMEND
16858; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s2
16859; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
16860; GFX940-NEXT:    ;;#ASMSTART
16861; GFX940-NEXT:    ; use s[8:9]
16862; GFX940-NEXT:    ;;#ASMEND
16863; GFX940-NEXT:    s_setpc_b64 s[30:31]
16864  %vec0 = call <4 x i16> asm "; def $0", "=s"()
16865  %vec1 = call <4 x i16> asm "; def $0", "=s"()
16866  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 2, i32 2>
16867  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
16868  ret void
16869}
16870
16871define void @s_shuffle_v4i16_v4i16__7_6_2_2() {
16872; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_6_2_2:
16873; GFX900:       ; %bb.0:
16874; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16875; GFX900-NEXT:    ;;#ASMSTART
16876; GFX900-NEXT:    ; def s[4:5]
16877; GFX900-NEXT:    ;;#ASMEND
16878; GFX900-NEXT:    ;;#ASMSTART
16879; GFX900-NEXT:    ; def s[6:7]
16880; GFX900-NEXT:    ;;#ASMEND
16881; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
16882; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s7
16883; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16884; GFX900-NEXT:    ;;#ASMSTART
16885; GFX900-NEXT:    ; use s[8:9]
16886; GFX900-NEXT:    ;;#ASMEND
16887; GFX900-NEXT:    s_setpc_b64 s[30:31]
16888;
16889; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_6_2_2:
16890; GFX90A:       ; %bb.0:
16891; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16892; GFX90A-NEXT:    ;;#ASMSTART
16893; GFX90A-NEXT:    ; def s[4:5]
16894; GFX90A-NEXT:    ;;#ASMEND
16895; GFX90A-NEXT:    ;;#ASMSTART
16896; GFX90A-NEXT:    ; def s[6:7]
16897; GFX90A-NEXT:    ;;#ASMEND
16898; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
16899; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s7
16900; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16901; GFX90A-NEXT:    ;;#ASMSTART
16902; GFX90A-NEXT:    ; use s[8:9]
16903; GFX90A-NEXT:    ;;#ASMEND
16904; GFX90A-NEXT:    s_setpc_b64 s[30:31]
16905;
16906; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_2_2:
16907; GFX940:       ; %bb.0:
16908; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16909; GFX940-NEXT:    ;;#ASMSTART
16910; GFX940-NEXT:    ; def s[0:1]
16911; GFX940-NEXT:    ;;#ASMEND
16912; GFX940-NEXT:    ;;#ASMSTART
16913; GFX940-NEXT:    ; def s[2:3]
16914; GFX940-NEXT:    ;;#ASMEND
16915; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
16916; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s3
16917; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
16918; GFX940-NEXT:    ;;#ASMSTART
16919; GFX940-NEXT:    ; use s[8:9]
16920; GFX940-NEXT:    ;;#ASMEND
16921; GFX940-NEXT:    s_setpc_b64 s[30:31]
16922  %vec0 = call <4 x i16> asm "; def $0", "=s"()
16923  %vec1 = call <4 x i16> asm "; def $0", "=s"()
16924  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 2, i32 2>
16925  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
16926  ret void
16927}
16928
16929define void @s_shuffle_v4i16_v4i16__7_7_2_2() {
16930; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_2_2:
16931; GFX900:       ; %bb.0:
16932; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16933; GFX900-NEXT:    ;;#ASMSTART
16934; GFX900-NEXT:    ; def s[4:5]
16935; GFX900-NEXT:    ;;#ASMEND
16936; GFX900-NEXT:    ;;#ASMSTART
16937; GFX900-NEXT:    ; def s[6:7]
16938; GFX900-NEXT:    ;;#ASMEND
16939; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
16940; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16941; GFX900-NEXT:    ;;#ASMSTART
16942; GFX900-NEXT:    ; use s[8:9]
16943; GFX900-NEXT:    ;;#ASMEND
16944; GFX900-NEXT:    s_setpc_b64 s[30:31]
16945;
16946; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_2_2:
16947; GFX90A:       ; %bb.0:
16948; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16949; GFX90A-NEXT:    ;;#ASMSTART
16950; GFX90A-NEXT:    ; def s[4:5]
16951; GFX90A-NEXT:    ;;#ASMEND
16952; GFX90A-NEXT:    ;;#ASMSTART
16953; GFX90A-NEXT:    ; def s[6:7]
16954; GFX90A-NEXT:    ;;#ASMEND
16955; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
16956; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
16957; GFX90A-NEXT:    ;;#ASMSTART
16958; GFX90A-NEXT:    ; use s[8:9]
16959; GFX90A-NEXT:    ;;#ASMEND
16960; GFX90A-NEXT:    s_setpc_b64 s[30:31]
16961;
16962; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_2:
16963; GFX940:       ; %bb.0:
16964; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16965; GFX940-NEXT:    ;;#ASMSTART
16966; GFX940-NEXT:    ; def s[0:1]
16967; GFX940-NEXT:    ;;#ASMEND
16968; GFX940-NEXT:    ;;#ASMSTART
16969; GFX940-NEXT:    ; def s[2:3]
16970; GFX940-NEXT:    ;;#ASMEND
16971; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
16972; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
16973; GFX940-NEXT:    ;;#ASMSTART
16974; GFX940-NEXT:    ; use s[8:9]
16975; GFX940-NEXT:    ;;#ASMEND
16976; GFX940-NEXT:    s_setpc_b64 s[30:31]
16977  %vec0 = call <4 x i16> asm "; def $0", "=s"()
16978  %vec1 = call <4 x i16> asm "; def $0", "=s"()
16979  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 2>
16980  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
16981  ret void
16982}
16983
16984define void @s_shuffle_v4i16_v4i16__7_7_u_2() {
16985; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_u_2:
16986; GFX900:       ; %bb.0:
16987; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16988; GFX900-NEXT:    ;;#ASMSTART
16989; GFX900-NEXT:    ; def s[4:5]
16990; GFX900-NEXT:    ;;#ASMEND
16991; GFX900-NEXT:    ;;#ASMSTART
16992; GFX900-NEXT:    ; def s[6:7]
16993; GFX900-NEXT:    ;;#ASMEND
16994; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
16995; GFX900-NEXT:    s_lshl_b32 s9, s5, 16
16996; GFX900-NEXT:    ;;#ASMSTART
16997; GFX900-NEXT:    ; use s[8:9]
16998; GFX900-NEXT:    ;;#ASMEND
16999; GFX900-NEXT:    s_setpc_b64 s[30:31]
17000;
17001; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_u_2:
17002; GFX90A:       ; %bb.0:
17003; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17004; GFX90A-NEXT:    ;;#ASMSTART
17005; GFX90A-NEXT:    ; def s[4:5]
17006; GFX90A-NEXT:    ;;#ASMEND
17007; GFX90A-NEXT:    ;;#ASMSTART
17008; GFX90A-NEXT:    ; def s[6:7]
17009; GFX90A-NEXT:    ;;#ASMEND
17010; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
17011; GFX90A-NEXT:    s_lshl_b32 s9, s5, 16
17012; GFX90A-NEXT:    ;;#ASMSTART
17013; GFX90A-NEXT:    ; use s[8:9]
17014; GFX90A-NEXT:    ;;#ASMEND
17015; GFX90A-NEXT:    s_setpc_b64 s[30:31]
17016;
17017; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_u_2:
17018; GFX940:       ; %bb.0:
17019; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17020; GFX940-NEXT:    ;;#ASMSTART
17021; GFX940-NEXT:    ; def s[0:1]
17022; GFX940-NEXT:    ;;#ASMEND
17023; GFX940-NEXT:    ;;#ASMSTART
17024; GFX940-NEXT:    ; def s[2:3]
17025; GFX940-NEXT:    ;;#ASMEND
17026; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
17027; GFX940-NEXT:    s_lshl_b32 s9, s1, 16
17028; GFX940-NEXT:    ;;#ASMSTART
17029; GFX940-NEXT:    ; use s[8:9]
17030; GFX940-NEXT:    ;;#ASMEND
17031; GFX940-NEXT:    s_setpc_b64 s[30:31]
17032  %vec0 = call <4 x i16> asm "; def $0", "=s"()
17033  %vec1 = call <4 x i16> asm "; def $0", "=s"()
17034  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 2>
17035  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
17036  ret void
17037}
17038
17039define void @s_shuffle_v4i16_v4i16__7_7_0_2() {
17040; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_0_2:
17041; GFX900:       ; %bb.0:
17042; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17043; GFX900-NEXT:    ;;#ASMSTART
17044; GFX900-NEXT:    ; def s[4:5]
17045; GFX900-NEXT:    ;;#ASMEND
17046; GFX900-NEXT:    ;;#ASMSTART
17047; GFX900-NEXT:    ; def s[6:7]
17048; GFX900-NEXT:    ;;#ASMEND
17049; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s5
17050; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
17051; GFX900-NEXT:    ;;#ASMSTART
17052; GFX900-NEXT:    ; use s[8:9]
17053; GFX900-NEXT:    ;;#ASMEND
17054; GFX900-NEXT:    s_setpc_b64 s[30:31]
17055;
17056; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_0_2:
17057; GFX90A:       ; %bb.0:
17058; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17059; GFX90A-NEXT:    ;;#ASMSTART
17060; GFX90A-NEXT:    ; def s[4:5]
17061; GFX90A-NEXT:    ;;#ASMEND
17062; GFX90A-NEXT:    ;;#ASMSTART
17063; GFX90A-NEXT:    ; def s[6:7]
17064; GFX90A-NEXT:    ;;#ASMEND
17065; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s5
17066; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
17067; GFX90A-NEXT:    ;;#ASMSTART
17068; GFX90A-NEXT:    ; use s[8:9]
17069; GFX90A-NEXT:    ;;#ASMEND
17070; GFX90A-NEXT:    s_setpc_b64 s[30:31]
17071;
17072; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_2:
17073; GFX940:       ; %bb.0:
17074; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17075; GFX940-NEXT:    ;;#ASMSTART
17076; GFX940-NEXT:    ; def s[0:1]
17077; GFX940-NEXT:    ;;#ASMEND
17078; GFX940-NEXT:    ;;#ASMSTART
17079; GFX940-NEXT:    ; def s[2:3]
17080; GFX940-NEXT:    ;;#ASMEND
17081; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s1
17082; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
17083; GFX940-NEXT:    ;;#ASMSTART
17084; GFX940-NEXT:    ; use s[8:9]
17085; GFX940-NEXT:    ;;#ASMEND
17086; GFX940-NEXT:    s_setpc_b64 s[30:31]
17087  %vec0 = call <4 x i16> asm "; def $0", "=s"()
17088  %vec1 = call <4 x i16> asm "; def $0", "=s"()
17089  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 2>
17090  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
17091  ret void
17092}
17093
17094define void @s_shuffle_v4i16_v4i16__7_7_1_2() {
17095; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_1_2:
17096; GFX900:       ; %bb.0:
17097; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17098; GFX900-NEXT:    ;;#ASMSTART
17099; GFX900-NEXT:    ; def s[4:5]
17100; GFX900-NEXT:    ;;#ASMEND
17101; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
17102; GFX900-NEXT:    ;;#ASMSTART
17103; GFX900-NEXT:    ; def s[6:7]
17104; GFX900-NEXT:    ;;#ASMEND
17105; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s5
17106; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
17107; GFX900-NEXT:    ;;#ASMSTART
17108; GFX900-NEXT:    ; use s[8:9]
17109; GFX900-NEXT:    ;;#ASMEND
17110; GFX900-NEXT:    s_setpc_b64 s[30:31]
17111;
17112; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_1_2:
17113; GFX90A:       ; %bb.0:
17114; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17115; GFX90A-NEXT:    ;;#ASMSTART
17116; GFX90A-NEXT:    ; def s[4:5]
17117; GFX90A-NEXT:    ;;#ASMEND
17118; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
17119; GFX90A-NEXT:    ;;#ASMSTART
17120; GFX90A-NEXT:    ; def s[6:7]
17121; GFX90A-NEXT:    ;;#ASMEND
17122; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s5
17123; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
17124; GFX90A-NEXT:    ;;#ASMSTART
17125; GFX90A-NEXT:    ; use s[8:9]
17126; GFX90A-NEXT:    ;;#ASMEND
17127; GFX90A-NEXT:    s_setpc_b64 s[30:31]
17128;
17129; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_2:
17130; GFX940:       ; %bb.0:
17131; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17132; GFX940-NEXT:    ;;#ASMSTART
17133; GFX940-NEXT:    ; def s[0:1]
17134; GFX940-NEXT:    ;;#ASMEND
17135; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
17136; GFX940-NEXT:    ;;#ASMSTART
17137; GFX940-NEXT:    ; def s[2:3]
17138; GFX940-NEXT:    ;;#ASMEND
17139; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s1
17140; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
17141; GFX940-NEXT:    ;;#ASMSTART
17142; GFX940-NEXT:    ; use s[8:9]
17143; GFX940-NEXT:    ;;#ASMEND
17144; GFX940-NEXT:    s_setpc_b64 s[30:31]
17145  %vec0 = call <4 x i16> asm "; def $0", "=s"()
17146  %vec1 = call <4 x i16> asm "; def $0", "=s"()
17147  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 2>
17148  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
17149  ret void
17150}
17151
17152define void @s_shuffle_v4i16_v4i16__7_7_3_2() {
17153; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_3_2:
17154; GFX900:       ; %bb.0:
17155; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17156; GFX900-NEXT:    ;;#ASMSTART
17157; GFX900-NEXT:    ; def s[4:5]
17158; GFX900-NEXT:    ;;#ASMEND
17159; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
17160; GFX900-NEXT:    ;;#ASMSTART
17161; GFX900-NEXT:    ; def s[6:7]
17162; GFX900-NEXT:    ;;#ASMEND
17163; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s5
17164; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
17165; GFX900-NEXT:    ;;#ASMSTART
17166; GFX900-NEXT:    ; use s[8:9]
17167; GFX900-NEXT:    ;;#ASMEND
17168; GFX900-NEXT:    s_setpc_b64 s[30:31]
17169;
17170; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_3_2:
17171; GFX90A:       ; %bb.0:
17172; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17173; GFX90A-NEXT:    ;;#ASMSTART
17174; GFX90A-NEXT:    ; def s[4:5]
17175; GFX90A-NEXT:    ;;#ASMEND
17176; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
17177; GFX90A-NEXT:    ;;#ASMSTART
17178; GFX90A-NEXT:    ; def s[6:7]
17179; GFX90A-NEXT:    ;;#ASMEND
17180; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s5
17181; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
17182; GFX90A-NEXT:    ;;#ASMSTART
17183; GFX90A-NEXT:    ; use s[8:9]
17184; GFX90A-NEXT:    ;;#ASMEND
17185; GFX90A-NEXT:    s_setpc_b64 s[30:31]
17186;
17187; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_2:
17188; GFX940:       ; %bb.0:
17189; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17190; GFX940-NEXT:    ;;#ASMSTART
17191; GFX940-NEXT:    ; def s[0:1]
17192; GFX940-NEXT:    ;;#ASMEND
17193; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
17194; GFX940-NEXT:    ;;#ASMSTART
17195; GFX940-NEXT:    ; def s[2:3]
17196; GFX940-NEXT:    ;;#ASMEND
17197; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s1
17198; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
17199; GFX940-NEXT:    ;;#ASMSTART
17200; GFX940-NEXT:    ; use s[8:9]
17201; GFX940-NEXT:    ;;#ASMEND
17202; GFX940-NEXT:    s_setpc_b64 s[30:31]
17203  %vec0 = call <4 x i16> asm "; def $0", "=s"()
17204  %vec1 = call <4 x i16> asm "; def $0", "=s"()
17205  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 2>
17206  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
17207  ret void
17208}
17209
17210define void @s_shuffle_v4i16_v4i16__7_7_4_2() {
17211; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_4_2:
17212; GFX900:       ; %bb.0:
17213; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17214; GFX900-NEXT:    ;;#ASMSTART
17215; GFX900-NEXT:    ; def s[4:5]
17216; GFX900-NEXT:    ;;#ASMEND
17217; GFX900-NEXT:    ;;#ASMSTART
17218; GFX900-NEXT:    ; def s[6:7]
17219; GFX900-NEXT:    ;;#ASMEND
17220; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s6, s5
17221; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
17222; GFX900-NEXT:    ;;#ASMSTART
17223; GFX900-NEXT:    ; use s[8:9]
17224; GFX900-NEXT:    ;;#ASMEND
17225; GFX900-NEXT:    s_setpc_b64 s[30:31]
17226;
17227; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_4_2:
17228; GFX90A:       ; %bb.0:
17229; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17230; GFX90A-NEXT:    ;;#ASMSTART
17231; GFX90A-NEXT:    ; def s[4:5]
17232; GFX90A-NEXT:    ;;#ASMEND
17233; GFX90A-NEXT:    ;;#ASMSTART
17234; GFX90A-NEXT:    ; def s[6:7]
17235; GFX90A-NEXT:    ;;#ASMEND
17236; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s6, s5
17237; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
17238; GFX90A-NEXT:    ;;#ASMSTART
17239; GFX90A-NEXT:    ; use s[8:9]
17240; GFX90A-NEXT:    ;;#ASMEND
17241; GFX90A-NEXT:    s_setpc_b64 s[30:31]
17242;
17243; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_2:
17244; GFX940:       ; %bb.0:
17245; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17246; GFX940-NEXT:    ;;#ASMSTART
17247; GFX940-NEXT:    ; def s[0:1]
17248; GFX940-NEXT:    ;;#ASMEND
17249; GFX940-NEXT:    ;;#ASMSTART
17250; GFX940-NEXT:    ; def s[2:3]
17251; GFX940-NEXT:    ;;#ASMEND
17252; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s2, s1
17253; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
17254; GFX940-NEXT:    ;;#ASMSTART
17255; GFX940-NEXT:    ; use s[8:9]
17256; GFX940-NEXT:    ;;#ASMEND
17257; GFX940-NEXT:    s_setpc_b64 s[30:31]
17258  %vec0 = call <4 x i16> asm "; def $0", "=s"()
17259  %vec1 = call <4 x i16> asm "; def $0", "=s"()
17260  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 2>
17261  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
17262  ret void
17263}
17264
17265define void @s_shuffle_v4i16_v4i16__7_7_5_2() {
17266; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_5_2:
17267; GFX900:       ; %bb.0:
17268; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17269; GFX900-NEXT:    ;;#ASMSTART
17270; GFX900-NEXT:    ; def s[4:5]
17271; GFX900-NEXT:    ;;#ASMEND
17272; GFX900-NEXT:    ;;#ASMSTART
17273; GFX900-NEXT:    ; def s[6:7]
17274; GFX900-NEXT:    ;;#ASMEND
17275; GFX900-NEXT:    s_lshr_b32 s4, s6, 16
17276; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s5
17277; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
17278; GFX900-NEXT:    ;;#ASMSTART
17279; GFX900-NEXT:    ; use s[8:9]
17280; GFX900-NEXT:    ;;#ASMEND
17281; GFX900-NEXT:    s_setpc_b64 s[30:31]
17282;
17283; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_5_2:
17284; GFX90A:       ; %bb.0:
17285; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17286; GFX90A-NEXT:    ;;#ASMSTART
17287; GFX90A-NEXT:    ; def s[4:5]
17288; GFX90A-NEXT:    ;;#ASMEND
17289; GFX90A-NEXT:    ;;#ASMSTART
17290; GFX90A-NEXT:    ; def s[6:7]
17291; GFX90A-NEXT:    ;;#ASMEND
17292; GFX90A-NEXT:    s_lshr_b32 s4, s6, 16
17293; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s5
17294; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
17295; GFX90A-NEXT:    ;;#ASMSTART
17296; GFX90A-NEXT:    ; use s[8:9]
17297; GFX90A-NEXT:    ;;#ASMEND
17298; GFX90A-NEXT:    s_setpc_b64 s[30:31]
17299;
17300; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_2:
17301; GFX940:       ; %bb.0:
17302; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17303; GFX940-NEXT:    ;;#ASMSTART
17304; GFX940-NEXT:    ; def s[0:1]
17305; GFX940-NEXT:    ;;#ASMEND
17306; GFX940-NEXT:    ;;#ASMSTART
17307; GFX940-NEXT:    ; def s[2:3]
17308; GFX940-NEXT:    ;;#ASMEND
17309; GFX940-NEXT:    s_lshr_b32 s0, s2, 16
17310; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s1
17311; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
17312; GFX940-NEXT:    ;;#ASMSTART
17313; GFX940-NEXT:    ; use s[8:9]
17314; GFX940-NEXT:    ;;#ASMEND
17315; GFX940-NEXT:    s_setpc_b64 s[30:31]
17316  %vec0 = call <4 x i16> asm "; def $0", "=s"()
17317  %vec1 = call <4 x i16> asm "; def $0", "=s"()
17318  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 2>
17319  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
17320  ret void
17321}
17322
17323define void @s_shuffle_v4i16_v4i16__7_7_6_2() {
17324; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_6_2:
17325; GFX900:       ; %bb.0:
17326; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17327; GFX900-NEXT:    ;;#ASMSTART
17328; GFX900-NEXT:    ; def s[4:5]
17329; GFX900-NEXT:    ;;#ASMEND
17330; GFX900-NEXT:    ;;#ASMSTART
17331; GFX900-NEXT:    ; def s[6:7]
17332; GFX900-NEXT:    ;;#ASMEND
17333; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s7, s5
17334; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
17335; GFX900-NEXT:    ;;#ASMSTART
17336; GFX900-NEXT:    ; use s[8:9]
17337; GFX900-NEXT:    ;;#ASMEND
17338; GFX900-NEXT:    s_setpc_b64 s[30:31]
17339;
17340; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_6_2:
17341; GFX90A:       ; %bb.0:
17342; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17343; GFX90A-NEXT:    ;;#ASMSTART
17344; GFX90A-NEXT:    ; def s[4:5]
17345; GFX90A-NEXT:    ;;#ASMEND
17346; GFX90A-NEXT:    ;;#ASMSTART
17347; GFX90A-NEXT:    ; def s[6:7]
17348; GFX90A-NEXT:    ;;#ASMEND
17349; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s7, s5
17350; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
17351; GFX90A-NEXT:    ;;#ASMSTART
17352; GFX90A-NEXT:    ; use s[8:9]
17353; GFX90A-NEXT:    ;;#ASMEND
17354; GFX90A-NEXT:    s_setpc_b64 s[30:31]
17355;
17356; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_6_2:
17357; GFX940:       ; %bb.0:
17358; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17359; GFX940-NEXT:    ;;#ASMSTART
17360; GFX940-NEXT:    ; def s[0:1]
17361; GFX940-NEXT:    ;;#ASMEND
17362; GFX940-NEXT:    ;;#ASMSTART
17363; GFX940-NEXT:    ; def s[2:3]
17364; GFX940-NEXT:    ;;#ASMEND
17365; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s3, s1
17366; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
17367; GFX940-NEXT:    ;;#ASMSTART
17368; GFX940-NEXT:    ; use s[8:9]
17369; GFX940-NEXT:    ;;#ASMEND
17370; GFX940-NEXT:    s_setpc_b64 s[30:31]
17371  %vec0 = call <4 x i16> asm "; def $0", "=s"()
17372  %vec1 = call <4 x i16> asm "; def $0", "=s"()
17373  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 2>
17374  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
17375  ret void
17376}
17377
17378define void @s_shuffle_v4i16_v4i16__u_3_3_3() {
17379; GFX900-LABEL: s_shuffle_v4i16_v4i16__u_3_3_3:
17380; GFX900:       ; %bb.0:
17381; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17382; GFX900-NEXT:    ;;#ASMSTART
17383; GFX900-NEXT:    ; def s[4:5]
17384; GFX900-NEXT:    ;;#ASMEND
17385; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
17386; GFX900-NEXT:    s_mov_b32 s8, s5
17387; GFX900-NEXT:    ;;#ASMSTART
17388; GFX900-NEXT:    ; use s[8:9]
17389; GFX900-NEXT:    ;;#ASMEND
17390; GFX900-NEXT:    s_setpc_b64 s[30:31]
17391;
17392; GFX90A-LABEL: s_shuffle_v4i16_v4i16__u_3_3_3:
17393; GFX90A:       ; %bb.0:
17394; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17395; GFX90A-NEXT:    ;;#ASMSTART
17396; GFX90A-NEXT:    ; def s[4:5]
17397; GFX90A-NEXT:    ;;#ASMEND
17398; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
17399; GFX90A-NEXT:    s_mov_b32 s8, s5
17400; GFX90A-NEXT:    ;;#ASMSTART
17401; GFX90A-NEXT:    ; use s[8:9]
17402; GFX90A-NEXT:    ;;#ASMEND
17403; GFX90A-NEXT:    s_setpc_b64 s[30:31]
17404;
17405; GFX940-LABEL: s_shuffle_v4i16_v4i16__u_3_3_3:
17406; GFX940:       ; %bb.0:
17407; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17408; GFX940-NEXT:    ;;#ASMSTART
17409; GFX940-NEXT:    ; def s[0:1]
17410; GFX940-NEXT:    ;;#ASMEND
17411; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s1
17412; GFX940-NEXT:    s_mov_b32 s8, s1
17413; GFX940-NEXT:    ;;#ASMSTART
17414; GFX940-NEXT:    ; use s[8:9]
17415; GFX940-NEXT:    ;;#ASMEND
17416; GFX940-NEXT:    s_setpc_b64 s[30:31]
17417  %vec0 = call <4 x i16> asm "; def $0", "=s"()
17418  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
17419  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
17420  ret void
17421}
17422
17423define void @s_shuffle_v4i16_v4i16__0_3_3_3() {
17424; GFX900-LABEL: s_shuffle_v4i16_v4i16__0_3_3_3:
17425; GFX900:       ; %bb.0:
17426; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17427; GFX900-NEXT:    ;;#ASMSTART
17428; GFX900-NEXT:    ; def s[4:5]
17429; GFX900-NEXT:    ;;#ASMEND
17430; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s4, s5
17431; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
17432; GFX900-NEXT:    ;;#ASMSTART
17433; GFX900-NEXT:    ; use s[8:9]
17434; GFX900-NEXT:    ;;#ASMEND
17435; GFX900-NEXT:    s_setpc_b64 s[30:31]
17436;
17437; GFX90A-LABEL: s_shuffle_v4i16_v4i16__0_3_3_3:
17438; GFX90A:       ; %bb.0:
17439; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17440; GFX90A-NEXT:    ;;#ASMSTART
17441; GFX90A-NEXT:    ; def s[4:5]
17442; GFX90A-NEXT:    ;;#ASMEND
17443; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s4, s5
17444; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
17445; GFX90A-NEXT:    ;;#ASMSTART
17446; GFX90A-NEXT:    ; use s[8:9]
17447; GFX90A-NEXT:    ;;#ASMEND
17448; GFX90A-NEXT:    s_setpc_b64 s[30:31]
17449;
17450; GFX940-LABEL: s_shuffle_v4i16_v4i16__0_3_3_3:
17451; GFX940:       ; %bb.0:
17452; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17453; GFX940-NEXT:    ;;#ASMSTART
17454; GFX940-NEXT:    ; def s[0:1]
17455; GFX940-NEXT:    ;;#ASMEND
17456; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s0, s1
17457; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s1
17458; GFX940-NEXT:    ;;#ASMSTART
17459; GFX940-NEXT:    ; use s[8:9]
17460; GFX940-NEXT:    ;;#ASMEND
17461; GFX940-NEXT:    s_setpc_b64 s[30:31]
17462  %vec0 = call <4 x i16> asm "; def $0", "=s"()
17463  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
17464  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
17465  ret void
17466}
17467
17468define void @s_shuffle_v4i16_v4i16__1_3_3_3() {
17469; GFX900-LABEL: s_shuffle_v4i16_v4i16__1_3_3_3:
17470; GFX900:       ; %bb.0:
17471; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17472; GFX900-NEXT:    ;;#ASMSTART
17473; GFX900-NEXT:    ; def s[4:5]
17474; GFX900-NEXT:    ;;#ASMEND
17475; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s4, s5
17476; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
17477; GFX900-NEXT:    ;;#ASMSTART
17478; GFX900-NEXT:    ; use s[8:9]
17479; GFX900-NEXT:    ;;#ASMEND
17480; GFX900-NEXT:    s_setpc_b64 s[30:31]
17481;
17482; GFX90A-LABEL: s_shuffle_v4i16_v4i16__1_3_3_3:
17483; GFX90A:       ; %bb.0:
17484; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17485; GFX90A-NEXT:    ;;#ASMSTART
17486; GFX90A-NEXT:    ; def s[4:5]
17487; GFX90A-NEXT:    ;;#ASMEND
17488; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s4, s5
17489; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
17490; GFX90A-NEXT:    ;;#ASMSTART
17491; GFX90A-NEXT:    ; use s[8:9]
17492; GFX90A-NEXT:    ;;#ASMEND
17493; GFX90A-NEXT:    s_setpc_b64 s[30:31]
17494;
17495; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_3_3_3:
17496; GFX940:       ; %bb.0:
17497; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17498; GFX940-NEXT:    ;;#ASMSTART
17499; GFX940-NEXT:    ; def s[0:1]
17500; GFX940-NEXT:    ;;#ASMEND
17501; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s0, s1
17502; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s1
17503; GFX940-NEXT:    ;;#ASMSTART
17504; GFX940-NEXT:    ; use s[8:9]
17505; GFX940-NEXT:    ;;#ASMEND
17506; GFX940-NEXT:    s_setpc_b64 s[30:31]
17507  %vec0 = call <4 x i16> asm "; def $0", "=s"()
17508  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
17509  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
17510  ret void
17511}
17512
17513define void @s_shuffle_v4i16_v4i16__2_3_3_3() {
17514; GFX900-LABEL: s_shuffle_v4i16_v4i16__2_3_3_3:
17515; GFX900:       ; %bb.0:
17516; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17517; GFX900-NEXT:    ;;#ASMSTART
17518; GFX900-NEXT:    ; def s[4:5]
17519; GFX900-NEXT:    ;;#ASMEND
17520; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
17521; GFX900-NEXT:    s_mov_b32 s8, s5
17522; GFX900-NEXT:    ;;#ASMSTART
17523; GFX900-NEXT:    ; use s[8:9]
17524; GFX900-NEXT:    ;;#ASMEND
17525; GFX900-NEXT:    s_setpc_b64 s[30:31]
17526;
17527; GFX90A-LABEL: s_shuffle_v4i16_v4i16__2_3_3_3:
17528; GFX90A:       ; %bb.0:
17529; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17530; GFX90A-NEXT:    ;;#ASMSTART
17531; GFX90A-NEXT:    ; def s[4:5]
17532; GFX90A-NEXT:    ;;#ASMEND
17533; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
17534; GFX90A-NEXT:    s_mov_b32 s8, s5
17535; GFX90A-NEXT:    ;;#ASMSTART
17536; GFX90A-NEXT:    ; use s[8:9]
17537; GFX90A-NEXT:    ;;#ASMEND
17538; GFX90A-NEXT:    s_setpc_b64 s[30:31]
17539;
17540; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_3_3_3:
17541; GFX940:       ; %bb.0:
17542; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17543; GFX940-NEXT:    ;;#ASMSTART
17544; GFX940-NEXT:    ; def s[0:1]
17545; GFX940-NEXT:    ;;#ASMEND
17546; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s1
17547; GFX940-NEXT:    s_mov_b32 s8, s1
17548; GFX940-NEXT:    ;;#ASMSTART
17549; GFX940-NEXT:    ; use s[8:9]
17550; GFX940-NEXT:    ;;#ASMEND
17551; GFX940-NEXT:    s_setpc_b64 s[30:31]
17552  %vec0 = call <4 x i16> asm "; def $0", "=s"()
17553  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
17554  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
17555  ret void
17556}
17557
17558define void @s_shuffle_v4i16_v4i16__3_3_3_3() {
17559; GFX900-LABEL: s_shuffle_v4i16_v4i16__3_3_3_3:
17560; GFX900:       ; %bb.0:
17561; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17562; GFX900-NEXT:    ;;#ASMSTART
17563; GFX900-NEXT:    ; def s[4:5]
17564; GFX900-NEXT:    ;;#ASMEND
17565; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
17566; GFX900-NEXT:    s_mov_b32 s9, s8
17567; GFX900-NEXT:    ;;#ASMSTART
17568; GFX900-NEXT:    ; use s[8:9]
17569; GFX900-NEXT:    ;;#ASMEND
17570; GFX900-NEXT:    s_setpc_b64 s[30:31]
17571;
17572; GFX90A-LABEL: s_shuffle_v4i16_v4i16__3_3_3_3:
17573; GFX90A:       ; %bb.0:
17574; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17575; GFX90A-NEXT:    ;;#ASMSTART
17576; GFX90A-NEXT:    ; def s[4:5]
17577; GFX90A-NEXT:    ;;#ASMEND
17578; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
17579; GFX90A-NEXT:    s_mov_b32 s9, s8
17580; GFX90A-NEXT:    ;;#ASMSTART
17581; GFX90A-NEXT:    ; use s[8:9]
17582; GFX90A-NEXT:    ;;#ASMEND
17583; GFX90A-NEXT:    s_setpc_b64 s[30:31]
17584;
17585; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_3_3_3:
17586; GFX940:       ; %bb.0:
17587; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17588; GFX940-NEXT:    ;;#ASMSTART
17589; GFX940-NEXT:    ; def s[0:1]
17590; GFX940-NEXT:    ;;#ASMEND
17591; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
17592; GFX940-NEXT:    s_mov_b32 s9, s8
17593; GFX940-NEXT:    ;;#ASMSTART
17594; GFX940-NEXT:    ; use s[8:9]
17595; GFX940-NEXT:    ;;#ASMEND
17596; GFX940-NEXT:    s_setpc_b64 s[30:31]
17597  %vec0 = call <4 x i16> asm "; def $0", "=s"()
17598  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
17599  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
17600  ret void
17601}
17602
17603define void @s_shuffle_v4i16_v4i16__4_3_3_3() {
17604; GFX900-LABEL: s_shuffle_v4i16_v4i16__4_3_3_3:
17605; GFX900:       ; %bb.0:
17606; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17607; GFX900-NEXT:    ;;#ASMSTART
17608; GFX900-NEXT:    ; def s[4:5]
17609; GFX900-NEXT:    ;;#ASMEND
17610; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
17611; GFX900-NEXT:    s_mov_b32 s8, s5
17612; GFX900-NEXT:    ;;#ASMSTART
17613; GFX900-NEXT:    ; use s[8:9]
17614; GFX900-NEXT:    ;;#ASMEND
17615; GFX900-NEXT:    s_setpc_b64 s[30:31]
17616;
17617; GFX90A-LABEL: s_shuffle_v4i16_v4i16__4_3_3_3:
17618; GFX90A:       ; %bb.0:
17619; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17620; GFX90A-NEXT:    ;;#ASMSTART
17621; GFX90A-NEXT:    ; def s[4:5]
17622; GFX90A-NEXT:    ;;#ASMEND
17623; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
17624; GFX90A-NEXT:    s_mov_b32 s8, s5
17625; GFX90A-NEXT:    ;;#ASMSTART
17626; GFX90A-NEXT:    ; use s[8:9]
17627; GFX90A-NEXT:    ;;#ASMEND
17628; GFX90A-NEXT:    s_setpc_b64 s[30:31]
17629;
17630; GFX940-LABEL: s_shuffle_v4i16_v4i16__4_3_3_3:
17631; GFX940:       ; %bb.0:
17632; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17633; GFX940-NEXT:    ;;#ASMSTART
17634; GFX940-NEXT:    ; def s[0:1]
17635; GFX940-NEXT:    ;;#ASMEND
17636; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s1
17637; GFX940-NEXT:    s_mov_b32 s8, s1
17638; GFX940-NEXT:    ;;#ASMSTART
17639; GFX940-NEXT:    ; use s[8:9]
17640; GFX940-NEXT:    ;;#ASMEND
17641; GFX940-NEXT:    s_setpc_b64 s[30:31]
17642  %vec0 = call <4 x i16> asm "; def $0", "=s"()
17643  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 4, i32 3, i32 3, i32 3>
17644  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
17645  ret void
17646}
17647
17648define void @s_shuffle_v4i16_v4i16__5_3_3_3() {
17649; GFX900-LABEL: s_shuffle_v4i16_v4i16__5_3_3_3:
17650; GFX900:       ; %bb.0:
17651; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17652; GFX900-NEXT:    ;;#ASMSTART
17653; GFX900-NEXT:    ; def s[4:5]
17654; GFX900-NEXT:    ;;#ASMEND
17655; GFX900-NEXT:    ;;#ASMSTART
17656; GFX900-NEXT:    ; def s[6:7]
17657; GFX900-NEXT:    ;;#ASMEND
17658; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s6, s5
17659; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
17660; GFX900-NEXT:    ;;#ASMSTART
17661; GFX900-NEXT:    ; use s[8:9]
17662; GFX900-NEXT:    ;;#ASMEND
17663; GFX900-NEXT:    s_setpc_b64 s[30:31]
17664;
17665; GFX90A-LABEL: s_shuffle_v4i16_v4i16__5_3_3_3:
17666; GFX90A:       ; %bb.0:
17667; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17668; GFX90A-NEXT:    ;;#ASMSTART
17669; GFX90A-NEXT:    ; def s[4:5]
17670; GFX90A-NEXT:    ;;#ASMEND
17671; GFX90A-NEXT:    ;;#ASMSTART
17672; GFX90A-NEXT:    ; def s[6:7]
17673; GFX90A-NEXT:    ;;#ASMEND
17674; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s6, s5
17675; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
17676; GFX90A-NEXT:    ;;#ASMSTART
17677; GFX90A-NEXT:    ; use s[8:9]
17678; GFX90A-NEXT:    ;;#ASMEND
17679; GFX90A-NEXT:    s_setpc_b64 s[30:31]
17680;
17681; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_3_3_3:
17682; GFX940:       ; %bb.0:
17683; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17684; GFX940-NEXT:    ;;#ASMSTART
17685; GFX940-NEXT:    ; def s[0:1]
17686; GFX940-NEXT:    ;;#ASMEND
17687; GFX940-NEXT:    ;;#ASMSTART
17688; GFX940-NEXT:    ; def s[2:3]
17689; GFX940-NEXT:    ;;#ASMEND
17690; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s2, s1
17691; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s1
17692; GFX940-NEXT:    ;;#ASMSTART
17693; GFX940-NEXT:    ; use s[8:9]
17694; GFX940-NEXT:    ;;#ASMEND
17695; GFX940-NEXT:    s_setpc_b64 s[30:31]
17696  %vec0 = call <4 x i16> asm "; def $0", "=s"()
17697  %vec1 = call <4 x i16> asm "; def $0", "=s"()
17698  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3>
17699  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
17700  ret void
17701}
17702
17703define void @s_shuffle_v4i16_v4i16__6_3_3_3() {
17704; GFX900-LABEL: s_shuffle_v4i16_v4i16__6_3_3_3:
17705; GFX900:       ; %bb.0:
17706; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17707; GFX900-NEXT:    ;;#ASMSTART
17708; GFX900-NEXT:    ; def s[4:5]
17709; GFX900-NEXT:    ;;#ASMEND
17710; GFX900-NEXT:    ;;#ASMSTART
17711; GFX900-NEXT:    ; def s[6:7]
17712; GFX900-NEXT:    ;;#ASMEND
17713; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s7, s5
17714; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
17715; GFX900-NEXT:    ;;#ASMSTART
17716; GFX900-NEXT:    ; use s[8:9]
17717; GFX900-NEXT:    ;;#ASMEND
17718; GFX900-NEXT:    s_setpc_b64 s[30:31]
17719;
17720; GFX90A-LABEL: s_shuffle_v4i16_v4i16__6_3_3_3:
17721; GFX90A:       ; %bb.0:
17722; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17723; GFX90A-NEXT:    ;;#ASMSTART
17724; GFX90A-NEXT:    ; def s[4:5]
17725; GFX90A-NEXT:    ;;#ASMEND
17726; GFX90A-NEXT:    ;;#ASMSTART
17727; GFX90A-NEXT:    ; def s[6:7]
17728; GFX90A-NEXT:    ;;#ASMEND
17729; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s7, s5
17730; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
17731; GFX90A-NEXT:    ;;#ASMSTART
17732; GFX90A-NEXT:    ; use s[8:9]
17733; GFX90A-NEXT:    ;;#ASMEND
17734; GFX90A-NEXT:    s_setpc_b64 s[30:31]
17735;
17736; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_3_3_3:
17737; GFX940:       ; %bb.0:
17738; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17739; GFX940-NEXT:    ;;#ASMSTART
17740; GFX940-NEXT:    ; def s[0:1]
17741; GFX940-NEXT:    ;;#ASMEND
17742; GFX940-NEXT:    ;;#ASMSTART
17743; GFX940-NEXT:    ; def s[2:3]
17744; GFX940-NEXT:    ;;#ASMEND
17745; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s3, s1
17746; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s1
17747; GFX940-NEXT:    ;;#ASMSTART
17748; GFX940-NEXT:    ; use s[8:9]
17749; GFX940-NEXT:    ;;#ASMEND
17750; GFX940-NEXT:    s_setpc_b64 s[30:31]
17751  %vec0 = call <4 x i16> asm "; def $0", "=s"()
17752  %vec1 = call <4 x i16> asm "; def $0", "=s"()
17753  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 3, i32 3, i32 3>
17754  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
17755  ret void
17756}
17757
17758define void @s_shuffle_v4i16_v4i16__7_3_3_3() {
17759; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_3_3_3:
17760; GFX900:       ; %bb.0:
17761; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17762; GFX900-NEXT:    ;;#ASMSTART
17763; GFX900-NEXT:    ; def s[4:5]
17764; GFX900-NEXT:    ;;#ASMEND
17765; GFX900-NEXT:    ;;#ASMSTART
17766; GFX900-NEXT:    ; def s[6:7]
17767; GFX900-NEXT:    ;;#ASMEND
17768; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
17769; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
17770; GFX900-NEXT:    ;;#ASMSTART
17771; GFX900-NEXT:    ; use s[8:9]
17772; GFX900-NEXT:    ;;#ASMEND
17773; GFX900-NEXT:    s_setpc_b64 s[30:31]
17774;
17775; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_3_3_3:
17776; GFX90A:       ; %bb.0:
17777; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17778; GFX90A-NEXT:    ;;#ASMSTART
17779; GFX90A-NEXT:    ; def s[4:5]
17780; GFX90A-NEXT:    ;;#ASMEND
17781; GFX90A-NEXT:    ;;#ASMSTART
17782; GFX90A-NEXT:    ; def s[6:7]
17783; GFX90A-NEXT:    ;;#ASMEND
17784; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
17785; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
17786; GFX90A-NEXT:    ;;#ASMSTART
17787; GFX90A-NEXT:    ; use s[8:9]
17788; GFX90A-NEXT:    ;;#ASMEND
17789; GFX90A-NEXT:    s_setpc_b64 s[30:31]
17790;
17791; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_3_3:
17792; GFX940:       ; %bb.0:
17793; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17794; GFX940-NEXT:    ;;#ASMSTART
17795; GFX940-NEXT:    ; def s[0:1]
17796; GFX940-NEXT:    ;;#ASMEND
17797; GFX940-NEXT:    ;;#ASMSTART
17798; GFX940-NEXT:    ; def s[2:3]
17799; GFX940-NEXT:    ;;#ASMEND
17800; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s1
17801; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s1
17802; GFX940-NEXT:    ;;#ASMSTART
17803; GFX940-NEXT:    ; use s[8:9]
17804; GFX940-NEXT:    ;;#ASMEND
17805; GFX940-NEXT:    s_setpc_b64 s[30:31]
17806  %vec0 = call <4 x i16> asm "; def $0", "=s"()
17807  %vec1 = call <4 x i16> asm "; def $0", "=s"()
17808  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 3, i32 3>
17809  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
17810  ret void
17811}
17812
17813define void @s_shuffle_v4i16_v4i16__7_u_3_3() {
17814; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_u_3_3:
17815; GFX900:       ; %bb.0:
17816; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17817; GFX900-NEXT:    ;;#ASMSTART
17818; GFX900-NEXT:    ; def s[4:5]
17819; GFX900-NEXT:    ;;#ASMEND
17820; GFX900-NEXT:    ;;#ASMSTART
17821; GFX900-NEXT:    ; def s[6:7]
17822; GFX900-NEXT:    ;;#ASMEND
17823; GFX900-NEXT:    s_lshr_b32 s8, s7, 16
17824; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
17825; GFX900-NEXT:    ;;#ASMSTART
17826; GFX900-NEXT:    ; use s[8:9]
17827; GFX900-NEXT:    ;;#ASMEND
17828; GFX900-NEXT:    s_setpc_b64 s[30:31]
17829;
17830; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_u_3_3:
17831; GFX90A:       ; %bb.0:
17832; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17833; GFX90A-NEXT:    ;;#ASMSTART
17834; GFX90A-NEXT:    ; def s[4:5]
17835; GFX90A-NEXT:    ;;#ASMEND
17836; GFX90A-NEXT:    ;;#ASMSTART
17837; GFX90A-NEXT:    ; def s[6:7]
17838; GFX90A-NEXT:    ;;#ASMEND
17839; GFX90A-NEXT:    s_lshr_b32 s8, s7, 16
17840; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
17841; GFX90A-NEXT:    ;;#ASMSTART
17842; GFX90A-NEXT:    ; use s[8:9]
17843; GFX90A-NEXT:    ;;#ASMEND
17844; GFX90A-NEXT:    s_setpc_b64 s[30:31]
17845;
17846; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_3_3:
17847; GFX940:       ; %bb.0:
17848; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17849; GFX940-NEXT:    ;;#ASMSTART
17850; GFX940-NEXT:    ; def s[0:1]
17851; GFX940-NEXT:    ;;#ASMEND
17852; GFX940-NEXT:    ;;#ASMSTART
17853; GFX940-NEXT:    ; def s[2:3]
17854; GFX940-NEXT:    ;;#ASMEND
17855; GFX940-NEXT:    s_lshr_b32 s8, s3, 16
17856; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s1
17857; GFX940-NEXT:    ;;#ASMSTART
17858; GFX940-NEXT:    ; use s[8:9]
17859; GFX940-NEXT:    ;;#ASMEND
17860; GFX940-NEXT:    s_setpc_b64 s[30:31]
17861  %vec0 = call <4 x i16> asm "; def $0", "=s"()
17862  %vec1 = call <4 x i16> asm "; def $0", "=s"()
17863  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 3, i32 3>
17864  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
17865  ret void
17866}
17867
17868define void @s_shuffle_v4i16_v4i16__7_0_3_3() {
17869; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_0_3_3:
17870; GFX900:       ; %bb.0:
17871; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17872; GFX900-NEXT:    ;;#ASMSTART
17873; GFX900-NEXT:    ; def s[6:7]
17874; GFX900-NEXT:    ;;#ASMEND
17875; GFX900-NEXT:    s_lshr_b32 s6, s7, 16
17876; GFX900-NEXT:    ;;#ASMSTART
17877; GFX900-NEXT:    ; def s[4:5]
17878; GFX900-NEXT:    ;;#ASMEND
17879; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s6, s4
17880; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
17881; GFX900-NEXT:    ;;#ASMSTART
17882; GFX900-NEXT:    ; use s[8:9]
17883; GFX900-NEXT:    ;;#ASMEND
17884; GFX900-NEXT:    s_setpc_b64 s[30:31]
17885;
17886; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_0_3_3:
17887; GFX90A:       ; %bb.0:
17888; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17889; GFX90A-NEXT:    ;;#ASMSTART
17890; GFX90A-NEXT:    ; def s[6:7]
17891; GFX90A-NEXT:    ;;#ASMEND
17892; GFX90A-NEXT:    s_lshr_b32 s6, s7, 16
17893; GFX90A-NEXT:    ;;#ASMSTART
17894; GFX90A-NEXT:    ; def s[4:5]
17895; GFX90A-NEXT:    ;;#ASMEND
17896; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s6, s4
17897; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
17898; GFX90A-NEXT:    ;;#ASMSTART
17899; GFX90A-NEXT:    ; use s[8:9]
17900; GFX90A-NEXT:    ;;#ASMEND
17901; GFX90A-NEXT:    s_setpc_b64 s[30:31]
17902;
17903; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_3_3:
17904; GFX940:       ; %bb.0:
17905; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17906; GFX940-NEXT:    ;;#ASMSTART
17907; GFX940-NEXT:    ; def s[2:3]
17908; GFX940-NEXT:    ;;#ASMEND
17909; GFX940-NEXT:    s_lshr_b32 s2, s3, 16
17910; GFX940-NEXT:    ;;#ASMSTART
17911; GFX940-NEXT:    ; def s[0:1]
17912; GFX940-NEXT:    ;;#ASMEND
17913; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s2, s0
17914; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s1
17915; GFX940-NEXT:    ;;#ASMSTART
17916; GFX940-NEXT:    ; use s[8:9]
17917; GFX940-NEXT:    ;;#ASMEND
17918; GFX940-NEXT:    s_setpc_b64 s[30:31]
17919  %vec0 = call <4 x i16> asm "; def $0", "=s"()
17920  %vec1 = call <4 x i16> asm "; def $0", "=s"()
17921  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 3, i32 3>
17922  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
17923  ret void
17924}
17925
17926define void @s_shuffle_v4i16_v4i16__7_1_3_3() {
17927; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_1_3_3:
17928; GFX900:       ; %bb.0:
17929; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17930; GFX900-NEXT:    ;;#ASMSTART
17931; GFX900-NEXT:    ; def s[4:5]
17932; GFX900-NEXT:    ;;#ASMEND
17933; GFX900-NEXT:    ;;#ASMSTART
17934; GFX900-NEXT:    ; def s[6:7]
17935; GFX900-NEXT:    ;;#ASMEND
17936; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
17937; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
17938; GFX900-NEXT:    ;;#ASMSTART
17939; GFX900-NEXT:    ; use s[8:9]
17940; GFX900-NEXT:    ;;#ASMEND
17941; GFX900-NEXT:    s_setpc_b64 s[30:31]
17942;
17943; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_1_3_3:
17944; GFX90A:       ; %bb.0:
17945; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17946; GFX90A-NEXT:    ;;#ASMSTART
17947; GFX90A-NEXT:    ; def s[4:5]
17948; GFX90A-NEXT:    ;;#ASMEND
17949; GFX90A-NEXT:    ;;#ASMSTART
17950; GFX90A-NEXT:    ; def s[6:7]
17951; GFX90A-NEXT:    ;;#ASMEND
17952; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
17953; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
17954; GFX90A-NEXT:    ;;#ASMSTART
17955; GFX90A-NEXT:    ; use s[8:9]
17956; GFX90A-NEXT:    ;;#ASMEND
17957; GFX90A-NEXT:    s_setpc_b64 s[30:31]
17958;
17959; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_3_3:
17960; GFX940:       ; %bb.0:
17961; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17962; GFX940-NEXT:    ;;#ASMSTART
17963; GFX940-NEXT:    ; def s[0:1]
17964; GFX940-NEXT:    ;;#ASMEND
17965; GFX940-NEXT:    ;;#ASMSTART
17966; GFX940-NEXT:    ; def s[2:3]
17967; GFX940-NEXT:    ;;#ASMEND
17968; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s0
17969; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s1
17970; GFX940-NEXT:    ;;#ASMSTART
17971; GFX940-NEXT:    ; use s[8:9]
17972; GFX940-NEXT:    ;;#ASMEND
17973; GFX940-NEXT:    s_setpc_b64 s[30:31]
17974  %vec0 = call <4 x i16> asm "; def $0", "=s"()
17975  %vec1 = call <4 x i16> asm "; def $0", "=s"()
17976  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 3, i32 3>
17977  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
17978  ret void
17979}
17980
17981define void @s_shuffle_v4i16_v4i16__7_2_3_3() {
17982; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_2_3_3:
17983; GFX900:       ; %bb.0:
17984; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17985; GFX900-NEXT:    ;;#ASMSTART
17986; GFX900-NEXT:    ; def s[4:5]
17987; GFX900-NEXT:    ;;#ASMEND
17988; GFX900-NEXT:    ;;#ASMSTART
17989; GFX900-NEXT:    ; def s[6:7]
17990; GFX900-NEXT:    ;;#ASMEND
17991; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
17992; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
17993; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
17994; GFX900-NEXT:    ;;#ASMSTART
17995; GFX900-NEXT:    ; use s[8:9]
17996; GFX900-NEXT:    ;;#ASMEND
17997; GFX900-NEXT:    s_setpc_b64 s[30:31]
17998;
17999; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_2_3_3:
18000; GFX90A:       ; %bb.0:
18001; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18002; GFX90A-NEXT:    ;;#ASMSTART
18003; GFX90A-NEXT:    ; def s[4:5]
18004; GFX90A-NEXT:    ;;#ASMEND
18005; GFX90A-NEXT:    ;;#ASMSTART
18006; GFX90A-NEXT:    ; def s[6:7]
18007; GFX90A-NEXT:    ;;#ASMEND
18008; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
18009; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
18010; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
18011; GFX90A-NEXT:    ;;#ASMSTART
18012; GFX90A-NEXT:    ; use s[8:9]
18013; GFX90A-NEXT:    ;;#ASMEND
18014; GFX90A-NEXT:    s_setpc_b64 s[30:31]
18015;
18016; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_3_3:
18017; GFX940:       ; %bb.0:
18018; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18019; GFX940-NEXT:    ;;#ASMSTART
18020; GFX940-NEXT:    ; def s[0:1]
18021; GFX940-NEXT:    ;;#ASMEND
18022; GFX940-NEXT:    ;;#ASMSTART
18023; GFX940-NEXT:    ; def s[2:3]
18024; GFX940-NEXT:    ;;#ASMEND
18025; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
18026; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
18027; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s1
18028; GFX940-NEXT:    ;;#ASMSTART
18029; GFX940-NEXT:    ; use s[8:9]
18030; GFX940-NEXT:    ;;#ASMEND
18031; GFX940-NEXT:    s_setpc_b64 s[30:31]
18032  %vec0 = call <4 x i16> asm "; def $0", "=s"()
18033  %vec1 = call <4 x i16> asm "; def $0", "=s"()
18034  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 3, i32 3>
18035  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
18036  ret void
18037}
18038
18039define void @s_shuffle_v4i16_v4i16__7_4_3_3() {
18040; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_4_3_3:
18041; GFX900:       ; %bb.0:
18042; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18043; GFX900-NEXT:    ;;#ASMSTART
18044; GFX900-NEXT:    ; def s[4:5]
18045; GFX900-NEXT:    ;;#ASMEND
18046; GFX900-NEXT:    ;;#ASMSTART
18047; GFX900-NEXT:    ; def s[6:7]
18048; GFX900-NEXT:    ;;#ASMEND
18049; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
18050; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s6
18051; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
18052; GFX900-NEXT:    ;;#ASMSTART
18053; GFX900-NEXT:    ; use s[8:9]
18054; GFX900-NEXT:    ;;#ASMEND
18055; GFX900-NEXT:    s_setpc_b64 s[30:31]
18056;
18057; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_4_3_3:
18058; GFX90A:       ; %bb.0:
18059; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18060; GFX90A-NEXT:    ;;#ASMSTART
18061; GFX90A-NEXT:    ; def s[4:5]
18062; GFX90A-NEXT:    ;;#ASMEND
18063; GFX90A-NEXT:    ;;#ASMSTART
18064; GFX90A-NEXT:    ; def s[6:7]
18065; GFX90A-NEXT:    ;;#ASMEND
18066; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
18067; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s6
18068; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
18069; GFX90A-NEXT:    ;;#ASMSTART
18070; GFX90A-NEXT:    ; use s[8:9]
18071; GFX90A-NEXT:    ;;#ASMEND
18072; GFX90A-NEXT:    s_setpc_b64 s[30:31]
18073;
18074; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_3_3:
18075; GFX940:       ; %bb.0:
18076; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18077; GFX940-NEXT:    ;;#ASMSTART
18078; GFX940-NEXT:    ; def s[0:1]
18079; GFX940-NEXT:    ;;#ASMEND
18080; GFX940-NEXT:    ;;#ASMSTART
18081; GFX940-NEXT:    ; def s[2:3]
18082; GFX940-NEXT:    ;;#ASMEND
18083; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
18084; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s2
18085; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s1
18086; GFX940-NEXT:    ;;#ASMSTART
18087; GFX940-NEXT:    ; use s[8:9]
18088; GFX940-NEXT:    ;;#ASMEND
18089; GFX940-NEXT:    s_setpc_b64 s[30:31]
18090  %vec0 = call <4 x i16> asm "; def $0", "=s"()
18091  %vec1 = call <4 x i16> asm "; def $0", "=s"()
18092  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 3, i32 3>
18093  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
18094  ret void
18095}
18096
18097define void @s_shuffle_v4i16_v4i16__7_5_3_3() {
18098; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_5_3_3:
18099; GFX900:       ; %bb.0:
18100; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18101; GFX900-NEXT:    ;;#ASMSTART
18102; GFX900-NEXT:    ; def s[4:5]
18103; GFX900-NEXT:    ;;#ASMEND
18104; GFX900-NEXT:    ;;#ASMSTART
18105; GFX900-NEXT:    ; def s[6:7]
18106; GFX900-NEXT:    ;;#ASMEND
18107; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s6
18108; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
18109; GFX900-NEXT:    ;;#ASMSTART
18110; GFX900-NEXT:    ; use s[8:9]
18111; GFX900-NEXT:    ;;#ASMEND
18112; GFX900-NEXT:    s_setpc_b64 s[30:31]
18113;
18114; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_5_3_3:
18115; GFX90A:       ; %bb.0:
18116; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18117; GFX90A-NEXT:    ;;#ASMSTART
18118; GFX90A-NEXT:    ; def s[4:5]
18119; GFX90A-NEXT:    ;;#ASMEND
18120; GFX90A-NEXT:    ;;#ASMSTART
18121; GFX90A-NEXT:    ; def s[6:7]
18122; GFX90A-NEXT:    ;;#ASMEND
18123; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s6
18124; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
18125; GFX90A-NEXT:    ;;#ASMSTART
18126; GFX90A-NEXT:    ; use s[8:9]
18127; GFX90A-NEXT:    ;;#ASMEND
18128; GFX90A-NEXT:    s_setpc_b64 s[30:31]
18129;
18130; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_3_3:
18131; GFX940:       ; %bb.0:
18132; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18133; GFX940-NEXT:    ;;#ASMSTART
18134; GFX940-NEXT:    ; def s[0:1]
18135; GFX940-NEXT:    ;;#ASMEND
18136; GFX940-NEXT:    ;;#ASMSTART
18137; GFX940-NEXT:    ; def s[2:3]
18138; GFX940-NEXT:    ;;#ASMEND
18139; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s2
18140; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s1
18141; GFX940-NEXT:    ;;#ASMSTART
18142; GFX940-NEXT:    ; use s[8:9]
18143; GFX940-NEXT:    ;;#ASMEND
18144; GFX940-NEXT:    s_setpc_b64 s[30:31]
18145  %vec0 = call <4 x i16> asm "; def $0", "=s"()
18146  %vec1 = call <4 x i16> asm "; def $0", "=s"()
18147  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 3, i32 3>
18148  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
18149  ret void
18150}
18151
18152define void @s_shuffle_v4i16_v4i16__7_6_3_3() {
18153; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_6_3_3:
18154; GFX900:       ; %bb.0:
18155; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18156; GFX900-NEXT:    ;;#ASMSTART
18157; GFX900-NEXT:    ; def s[4:5]
18158; GFX900-NEXT:    ;;#ASMEND
18159; GFX900-NEXT:    ;;#ASMSTART
18160; GFX900-NEXT:    ; def s[6:7]
18161; GFX900-NEXT:    ;;#ASMEND
18162; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
18163; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s7
18164; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
18165; GFX900-NEXT:    ;;#ASMSTART
18166; GFX900-NEXT:    ; use s[8:9]
18167; GFX900-NEXT:    ;;#ASMEND
18168; GFX900-NEXT:    s_setpc_b64 s[30:31]
18169;
18170; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_6_3_3:
18171; GFX90A:       ; %bb.0:
18172; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18173; GFX90A-NEXT:    ;;#ASMSTART
18174; GFX90A-NEXT:    ; def s[4:5]
18175; GFX90A-NEXT:    ;;#ASMEND
18176; GFX90A-NEXT:    ;;#ASMSTART
18177; GFX90A-NEXT:    ; def s[6:7]
18178; GFX90A-NEXT:    ;;#ASMEND
18179; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
18180; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s7
18181; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
18182; GFX90A-NEXT:    ;;#ASMSTART
18183; GFX90A-NEXT:    ; use s[8:9]
18184; GFX90A-NEXT:    ;;#ASMEND
18185; GFX90A-NEXT:    s_setpc_b64 s[30:31]
18186;
18187; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_3_3:
18188; GFX940:       ; %bb.0:
18189; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18190; GFX940-NEXT:    ;;#ASMSTART
18191; GFX940-NEXT:    ; def s[0:1]
18192; GFX940-NEXT:    ;;#ASMEND
18193; GFX940-NEXT:    ;;#ASMSTART
18194; GFX940-NEXT:    ; def s[2:3]
18195; GFX940-NEXT:    ;;#ASMEND
18196; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
18197; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s3
18198; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s1
18199; GFX940-NEXT:    ;;#ASMSTART
18200; GFX940-NEXT:    ; use s[8:9]
18201; GFX940-NEXT:    ;;#ASMEND
18202; GFX940-NEXT:    s_setpc_b64 s[30:31]
18203  %vec0 = call <4 x i16> asm "; def $0", "=s"()
18204  %vec1 = call <4 x i16> asm "; def $0", "=s"()
18205  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 3, i32 3>
18206  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
18207  ret void
18208}
18209
18210define void @s_shuffle_v4i16_v4i16__7_7_3_3() {
18211; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_3_3:
18212; GFX900:       ; %bb.0:
18213; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18214; GFX900-NEXT:    ;;#ASMSTART
18215; GFX900-NEXT:    ; def s[4:5]
18216; GFX900-NEXT:    ;;#ASMEND
18217; GFX900-NEXT:    ;;#ASMSTART
18218; GFX900-NEXT:    ; def s[6:7]
18219; GFX900-NEXT:    ;;#ASMEND
18220; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
18221; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
18222; GFX900-NEXT:    ;;#ASMSTART
18223; GFX900-NEXT:    ; use s[8:9]
18224; GFX900-NEXT:    ;;#ASMEND
18225; GFX900-NEXT:    s_setpc_b64 s[30:31]
18226;
18227; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_3_3:
18228; GFX90A:       ; %bb.0:
18229; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18230; GFX90A-NEXT:    ;;#ASMSTART
18231; GFX90A-NEXT:    ; def s[4:5]
18232; GFX90A-NEXT:    ;;#ASMEND
18233; GFX90A-NEXT:    ;;#ASMSTART
18234; GFX90A-NEXT:    ; def s[6:7]
18235; GFX90A-NEXT:    ;;#ASMEND
18236; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
18237; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
18238; GFX90A-NEXT:    ;;#ASMSTART
18239; GFX90A-NEXT:    ; use s[8:9]
18240; GFX90A-NEXT:    ;;#ASMEND
18241; GFX90A-NEXT:    s_setpc_b64 s[30:31]
18242;
18243; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_3:
18244; GFX940:       ; %bb.0:
18245; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18246; GFX940-NEXT:    ;;#ASMSTART
18247; GFX940-NEXT:    ; def s[0:1]
18248; GFX940-NEXT:    ;;#ASMEND
18249; GFX940-NEXT:    ;;#ASMSTART
18250; GFX940-NEXT:    ; def s[2:3]
18251; GFX940-NEXT:    ;;#ASMEND
18252; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s1
18253; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
18254; GFX940-NEXT:    ;;#ASMSTART
18255; GFX940-NEXT:    ; use s[8:9]
18256; GFX940-NEXT:    ;;#ASMEND
18257; GFX940-NEXT:    s_setpc_b64 s[30:31]
18258  %vec0 = call <4 x i16> asm "; def $0", "=s"()
18259  %vec1 = call <4 x i16> asm "; def $0", "=s"()
18260  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 3>
18261  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
18262  ret void
18263}
18264
18265define void @s_shuffle_v4i16_v4i16__7_7_u_3() {
18266; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_u_3:
18267; GFX900:       ; %bb.0:
18268; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18269; GFX900-NEXT:    ;;#ASMSTART
18270; GFX900-NEXT:    ; def s[8:9]
18271; GFX900-NEXT:    ;;#ASMEND
18272; GFX900-NEXT:    ;;#ASMSTART
18273; GFX900-NEXT:    ; def s[4:5]
18274; GFX900-NEXT:    ;;#ASMEND
18275; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
18276; GFX900-NEXT:    ;;#ASMSTART
18277; GFX900-NEXT:    ; use s[8:9]
18278; GFX900-NEXT:    ;;#ASMEND
18279; GFX900-NEXT:    s_setpc_b64 s[30:31]
18280;
18281; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_u_3:
18282; GFX90A:       ; %bb.0:
18283; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18284; GFX90A-NEXT:    ;;#ASMSTART
18285; GFX90A-NEXT:    ; def s[8:9]
18286; GFX90A-NEXT:    ;;#ASMEND
18287; GFX90A-NEXT:    ;;#ASMSTART
18288; GFX90A-NEXT:    ; def s[4:5]
18289; GFX90A-NEXT:    ;;#ASMEND
18290; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
18291; GFX90A-NEXT:    ;;#ASMSTART
18292; GFX90A-NEXT:    ; use s[8:9]
18293; GFX90A-NEXT:    ;;#ASMEND
18294; GFX90A-NEXT:    s_setpc_b64 s[30:31]
18295;
18296; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_u_3:
18297; GFX940:       ; %bb.0:
18298; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18299; GFX940-NEXT:    ;;#ASMSTART
18300; GFX940-NEXT:    ; def s[8:9]
18301; GFX940-NEXT:    ;;#ASMEND
18302; GFX940-NEXT:    ;;#ASMSTART
18303; GFX940-NEXT:    ; def s[0:1]
18304; GFX940-NEXT:    ;;#ASMEND
18305; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
18306; GFX940-NEXT:    ;;#ASMSTART
18307; GFX940-NEXT:    ; use s[8:9]
18308; GFX940-NEXT:    ;;#ASMEND
18309; GFX940-NEXT:    s_setpc_b64 s[30:31]
18310  %vec0 = call <4 x i16> asm "; def $0", "=s"()
18311  %vec1 = call <4 x i16> asm "; def $0", "=s"()
18312  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 3>
18313  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
18314  ret void
18315}
18316
18317define void @s_shuffle_v4i16_v4i16__7_7_0_3() {
18318; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_0_3:
18319; GFX900:       ; %bb.0:
18320; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18321; GFX900-NEXT:    ;;#ASMSTART
18322; GFX900-NEXT:    ; def s[4:5]
18323; GFX900-NEXT:    ;;#ASMEND
18324; GFX900-NEXT:    ;;#ASMSTART
18325; GFX900-NEXT:    ; def s[6:7]
18326; GFX900-NEXT:    ;;#ASMEND
18327; GFX900-NEXT:    s_pack_lh_b32_b16 s9, s4, s5
18328; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
18329; GFX900-NEXT:    ;;#ASMSTART
18330; GFX900-NEXT:    ; use s[8:9]
18331; GFX900-NEXT:    ;;#ASMEND
18332; GFX900-NEXT:    s_setpc_b64 s[30:31]
18333;
18334; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_0_3:
18335; GFX90A:       ; %bb.0:
18336; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18337; GFX90A-NEXT:    ;;#ASMSTART
18338; GFX90A-NEXT:    ; def s[4:5]
18339; GFX90A-NEXT:    ;;#ASMEND
18340; GFX90A-NEXT:    ;;#ASMSTART
18341; GFX90A-NEXT:    ; def s[6:7]
18342; GFX90A-NEXT:    ;;#ASMEND
18343; GFX90A-NEXT:    s_pack_lh_b32_b16 s9, s4, s5
18344; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
18345; GFX90A-NEXT:    ;;#ASMSTART
18346; GFX90A-NEXT:    ; use s[8:9]
18347; GFX90A-NEXT:    ;;#ASMEND
18348; GFX90A-NEXT:    s_setpc_b64 s[30:31]
18349;
18350; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_3:
18351; GFX940:       ; %bb.0:
18352; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18353; GFX940-NEXT:    ;;#ASMSTART
18354; GFX940-NEXT:    ; def s[0:1]
18355; GFX940-NEXT:    ;;#ASMEND
18356; GFX940-NEXT:    ;;#ASMSTART
18357; GFX940-NEXT:    ; def s[2:3]
18358; GFX940-NEXT:    ;;#ASMEND
18359; GFX940-NEXT:    s_pack_lh_b32_b16 s9, s0, s1
18360; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
18361; GFX940-NEXT:    ;;#ASMSTART
18362; GFX940-NEXT:    ; use s[8:9]
18363; GFX940-NEXT:    ;;#ASMEND
18364; GFX940-NEXT:    s_setpc_b64 s[30:31]
18365  %vec0 = call <4 x i16> asm "; def $0", "=s"()
18366  %vec1 = call <4 x i16> asm "; def $0", "=s"()
18367  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 3>
18368  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
18369  ret void
18370}
18371
18372define void @s_shuffle_v4i16_v4i16__7_7_1_3() {
18373; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_1_3:
18374; GFX900:       ; %bb.0:
18375; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18376; GFX900-NEXT:    ;;#ASMSTART
18377; GFX900-NEXT:    ; def s[4:5]
18378; GFX900-NEXT:    ;;#ASMEND
18379; GFX900-NEXT:    ;;#ASMSTART
18380; GFX900-NEXT:    ; def s[6:7]
18381; GFX900-NEXT:    ;;#ASMEND
18382; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s5
18383; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
18384; GFX900-NEXT:    ;;#ASMSTART
18385; GFX900-NEXT:    ; use s[8:9]
18386; GFX900-NEXT:    ;;#ASMEND
18387; GFX900-NEXT:    s_setpc_b64 s[30:31]
18388;
18389; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_1_3:
18390; GFX90A:       ; %bb.0:
18391; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18392; GFX90A-NEXT:    ;;#ASMSTART
18393; GFX90A-NEXT:    ; def s[4:5]
18394; GFX90A-NEXT:    ;;#ASMEND
18395; GFX90A-NEXT:    ;;#ASMSTART
18396; GFX90A-NEXT:    ; def s[6:7]
18397; GFX90A-NEXT:    ;;#ASMEND
18398; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s5
18399; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
18400; GFX90A-NEXT:    ;;#ASMSTART
18401; GFX90A-NEXT:    ; use s[8:9]
18402; GFX90A-NEXT:    ;;#ASMEND
18403; GFX90A-NEXT:    s_setpc_b64 s[30:31]
18404;
18405; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_3:
18406; GFX940:       ; %bb.0:
18407; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18408; GFX940-NEXT:    ;;#ASMSTART
18409; GFX940-NEXT:    ; def s[0:1]
18410; GFX940-NEXT:    ;;#ASMEND
18411; GFX940-NEXT:    ;;#ASMSTART
18412; GFX940-NEXT:    ; def s[2:3]
18413; GFX940-NEXT:    ;;#ASMEND
18414; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s1
18415; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
18416; GFX940-NEXT:    ;;#ASMSTART
18417; GFX940-NEXT:    ; use s[8:9]
18418; GFX940-NEXT:    ;;#ASMEND
18419; GFX940-NEXT:    s_setpc_b64 s[30:31]
18420  %vec0 = call <4 x i16> asm "; def $0", "=s"()
18421  %vec1 = call <4 x i16> asm "; def $0", "=s"()
18422  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 3>
18423  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
18424  ret void
18425}
18426
18427define void @s_shuffle_v4i16_v4i16__7_7_2_3() {
18428; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_2_3:
18429; GFX900:       ; %bb.0:
18430; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18431; GFX900-NEXT:    ;;#ASMSTART
18432; GFX900-NEXT:    ; def s[8:9]
18433; GFX900-NEXT:    ;;#ASMEND
18434; GFX900-NEXT:    ;;#ASMSTART
18435; GFX900-NEXT:    ; def s[4:5]
18436; GFX900-NEXT:    ;;#ASMEND
18437; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
18438; GFX900-NEXT:    ;;#ASMSTART
18439; GFX900-NEXT:    ; use s[8:9]
18440; GFX900-NEXT:    ;;#ASMEND
18441; GFX900-NEXT:    s_setpc_b64 s[30:31]
18442;
18443; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_2_3:
18444; GFX90A:       ; %bb.0:
18445; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18446; GFX90A-NEXT:    ;;#ASMSTART
18447; GFX90A-NEXT:    ; def s[8:9]
18448; GFX90A-NEXT:    ;;#ASMEND
18449; GFX90A-NEXT:    ;;#ASMSTART
18450; GFX90A-NEXT:    ; def s[4:5]
18451; GFX90A-NEXT:    ;;#ASMEND
18452; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
18453; GFX90A-NEXT:    ;;#ASMSTART
18454; GFX90A-NEXT:    ; use s[8:9]
18455; GFX90A-NEXT:    ;;#ASMEND
18456; GFX90A-NEXT:    s_setpc_b64 s[30:31]
18457;
18458; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_3:
18459; GFX940:       ; %bb.0:
18460; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18461; GFX940-NEXT:    ;;#ASMSTART
18462; GFX940-NEXT:    ; def s[8:9]
18463; GFX940-NEXT:    ;;#ASMEND
18464; GFX940-NEXT:    ;;#ASMSTART
18465; GFX940-NEXT:    ; def s[0:1]
18466; GFX940-NEXT:    ;;#ASMEND
18467; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
18468; GFX940-NEXT:    ;;#ASMSTART
18469; GFX940-NEXT:    ; use s[8:9]
18470; GFX940-NEXT:    ;;#ASMEND
18471; GFX940-NEXT:    s_setpc_b64 s[30:31]
18472  %vec0 = call <4 x i16> asm "; def $0", "=s"()
18473  %vec1 = call <4 x i16> asm "; def $0", "=s"()
18474  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 3>
18475  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
18476  ret void
18477}
18478
18479define void @s_shuffle_v4i16_v4i16__7_7_4_3() {
18480; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_4_3:
18481; GFX900:       ; %bb.0:
18482; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18483; GFX900-NEXT:    ;;#ASMSTART
18484; GFX900-NEXT:    ; def s[4:5]
18485; GFX900-NEXT:    ;;#ASMEND
18486; GFX900-NEXT:    ;;#ASMSTART
18487; GFX900-NEXT:    ; def s[6:7]
18488; GFX900-NEXT:    ;;#ASMEND
18489; GFX900-NEXT:    s_pack_lh_b32_b16 s9, s6, s5
18490; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
18491; GFX900-NEXT:    ;;#ASMSTART
18492; GFX900-NEXT:    ; use s[8:9]
18493; GFX900-NEXT:    ;;#ASMEND
18494; GFX900-NEXT:    s_setpc_b64 s[30:31]
18495;
18496; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_4_3:
18497; GFX90A:       ; %bb.0:
18498; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18499; GFX90A-NEXT:    ;;#ASMSTART
18500; GFX90A-NEXT:    ; def s[4:5]
18501; GFX90A-NEXT:    ;;#ASMEND
18502; GFX90A-NEXT:    ;;#ASMSTART
18503; GFX90A-NEXT:    ; def s[6:7]
18504; GFX90A-NEXT:    ;;#ASMEND
18505; GFX90A-NEXT:    s_pack_lh_b32_b16 s9, s6, s5
18506; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
18507; GFX90A-NEXT:    ;;#ASMSTART
18508; GFX90A-NEXT:    ; use s[8:9]
18509; GFX90A-NEXT:    ;;#ASMEND
18510; GFX90A-NEXT:    s_setpc_b64 s[30:31]
18511;
18512; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_3:
18513; GFX940:       ; %bb.0:
18514; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18515; GFX940-NEXT:    ;;#ASMSTART
18516; GFX940-NEXT:    ; def s[0:1]
18517; GFX940-NEXT:    ;;#ASMEND
18518; GFX940-NEXT:    ;;#ASMSTART
18519; GFX940-NEXT:    ; def s[2:3]
18520; GFX940-NEXT:    ;;#ASMEND
18521; GFX940-NEXT:    s_pack_lh_b32_b16 s9, s2, s1
18522; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
18523; GFX940-NEXT:    ;;#ASMSTART
18524; GFX940-NEXT:    ; use s[8:9]
18525; GFX940-NEXT:    ;;#ASMEND
18526; GFX940-NEXT:    s_setpc_b64 s[30:31]
18527  %vec0 = call <4 x i16> asm "; def $0", "=s"()
18528  %vec1 = call <4 x i16> asm "; def $0", "=s"()
18529  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 3>
18530  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
18531  ret void
18532}
18533
18534define void @s_shuffle_v4i16_v4i16__7_7_5_3() {
18535; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_5_3:
18536; GFX900:       ; %bb.0:
18537; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18538; GFX900-NEXT:    ;;#ASMSTART
18539; GFX900-NEXT:    ; def s[4:5]
18540; GFX900-NEXT:    ;;#ASMEND
18541; GFX900-NEXT:    ;;#ASMSTART
18542; GFX900-NEXT:    ; def s[6:7]
18543; GFX900-NEXT:    ;;#ASMEND
18544; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s6, s5
18545; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
18546; GFX900-NEXT:    ;;#ASMSTART
18547; GFX900-NEXT:    ; use s[8:9]
18548; GFX900-NEXT:    ;;#ASMEND
18549; GFX900-NEXT:    s_setpc_b64 s[30:31]
18550;
18551; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_5_3:
18552; GFX90A:       ; %bb.0:
18553; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18554; GFX90A-NEXT:    ;;#ASMSTART
18555; GFX90A-NEXT:    ; def s[4:5]
18556; GFX90A-NEXT:    ;;#ASMEND
18557; GFX90A-NEXT:    ;;#ASMSTART
18558; GFX90A-NEXT:    ; def s[6:7]
18559; GFX90A-NEXT:    ;;#ASMEND
18560; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s6, s5
18561; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
18562; GFX90A-NEXT:    ;;#ASMSTART
18563; GFX90A-NEXT:    ; use s[8:9]
18564; GFX90A-NEXT:    ;;#ASMEND
18565; GFX90A-NEXT:    s_setpc_b64 s[30:31]
18566;
18567; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_3:
18568; GFX940:       ; %bb.0:
18569; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18570; GFX940-NEXT:    ;;#ASMSTART
18571; GFX940-NEXT:    ; def s[0:1]
18572; GFX940-NEXT:    ;;#ASMEND
18573; GFX940-NEXT:    ;;#ASMSTART
18574; GFX940-NEXT:    ; def s[2:3]
18575; GFX940-NEXT:    ;;#ASMEND
18576; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s2, s1
18577; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
18578; GFX940-NEXT:    ;;#ASMSTART
18579; GFX940-NEXT:    ; use s[8:9]
18580; GFX940-NEXT:    ;;#ASMEND
18581; GFX940-NEXT:    s_setpc_b64 s[30:31]
18582  %vec0 = call <4 x i16> asm "; def $0", "=s"()
18583  %vec1 = call <4 x i16> asm "; def $0", "=s"()
18584  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 3>
18585  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
18586  ret void
18587}
18588
18589define void @s_shuffle_v4i16_v4i16__7_7_6_3() {
18590; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_6_3:
18591; GFX900:       ; %bb.0:
18592; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18593; GFX900-NEXT:    ;;#ASMSTART
18594; GFX900-NEXT:    ; def s[4:5]
18595; GFX900-NEXT:    ;;#ASMEND
18596; GFX900-NEXT:    ;;#ASMSTART
18597; GFX900-NEXT:    ; def s[6:7]
18598; GFX900-NEXT:    ;;#ASMEND
18599; GFX900-NEXT:    s_pack_lh_b32_b16 s9, s7, s5
18600; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
18601; GFX900-NEXT:    ;;#ASMSTART
18602; GFX900-NEXT:    ; use s[8:9]
18603; GFX900-NEXT:    ;;#ASMEND
18604; GFX900-NEXT:    s_setpc_b64 s[30:31]
18605;
18606; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_6_3:
18607; GFX90A:       ; %bb.0:
18608; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18609; GFX90A-NEXT:    ;;#ASMSTART
18610; GFX90A-NEXT:    ; def s[4:5]
18611; GFX90A-NEXT:    ;;#ASMEND
18612; GFX90A-NEXT:    ;;#ASMSTART
18613; GFX90A-NEXT:    ; def s[6:7]
18614; GFX90A-NEXT:    ;;#ASMEND
18615; GFX90A-NEXT:    s_pack_lh_b32_b16 s9, s7, s5
18616; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
18617; GFX90A-NEXT:    ;;#ASMSTART
18618; GFX90A-NEXT:    ; use s[8:9]
18619; GFX90A-NEXT:    ;;#ASMEND
18620; GFX90A-NEXT:    s_setpc_b64 s[30:31]
18621;
18622; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_6_3:
18623; GFX940:       ; %bb.0:
18624; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18625; GFX940-NEXT:    ;;#ASMSTART
18626; GFX940-NEXT:    ; def s[0:1]
18627; GFX940-NEXT:    ;;#ASMEND
18628; GFX940-NEXT:    ;;#ASMSTART
18629; GFX940-NEXT:    ; def s[2:3]
18630; GFX940-NEXT:    ;;#ASMEND
18631; GFX940-NEXT:    s_pack_lh_b32_b16 s9, s3, s1
18632; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
18633; GFX940-NEXT:    ;;#ASMSTART
18634; GFX940-NEXT:    ; use s[8:9]
18635; GFX940-NEXT:    ;;#ASMEND
18636; GFX940-NEXT:    s_setpc_b64 s[30:31]
18637  %vec0 = call <4 x i16> asm "; def $0", "=s"()
18638  %vec1 = call <4 x i16> asm "; def $0", "=s"()
18639  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 3>
18640  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
18641  ret void
18642}
18643
18644define void @s_shuffle_v4i16_v4i16__u_4_4_4() {
18645; GFX9-LABEL: s_shuffle_v4i16_v4i16__u_4_4_4:
18646; GFX9:       ; %bb.0:
18647; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18648; GFX9-NEXT:    ;;#ASMSTART
18649; GFX9-NEXT:    ; use s[8:9]
18650; GFX9-NEXT:    ;;#ASMEND
18651; GFX9-NEXT:    s_setpc_b64 s[30:31]
18652  %vec0 = call <4 x i16> asm "; def $0", "=s"()
18653  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
18654  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
18655  ret void
18656}
18657
18658define void @s_shuffle_v4i16_v4i16__0_4_4_4() {
18659; GFX900-LABEL: s_shuffle_v4i16_v4i16__0_4_4_4:
18660; GFX900:       ; %bb.0:
18661; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18662; GFX900-NEXT:    ;;#ASMSTART
18663; GFX900-NEXT:    ; def s[8:9]
18664; GFX900-NEXT:    ;;#ASMEND
18665; GFX900-NEXT:    ;;#ASMSTART
18666; GFX900-NEXT:    ; use s[8:9]
18667; GFX900-NEXT:    ;;#ASMEND
18668; GFX900-NEXT:    s_setpc_b64 s[30:31]
18669;
18670; GFX90A-LABEL: s_shuffle_v4i16_v4i16__0_4_4_4:
18671; GFX90A:       ; %bb.0:
18672; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18673; GFX90A-NEXT:    ;;#ASMSTART
18674; GFX90A-NEXT:    ; def s[8:9]
18675; GFX90A-NEXT:    ;;#ASMEND
18676; GFX90A-NEXT:    ;;#ASMSTART
18677; GFX90A-NEXT:    ; use s[8:9]
18678; GFX90A-NEXT:    ;;#ASMEND
18679; GFX90A-NEXT:    s_setpc_b64 s[30:31]
18680;
18681; GFX940-LABEL: s_shuffle_v4i16_v4i16__0_4_4_4:
18682; GFX940:       ; %bb.0:
18683; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18684; GFX940-NEXT:    ;;#ASMSTART
18685; GFX940-NEXT:    ; def s[8:9]
18686; GFX940-NEXT:    ;;#ASMEND
18687; GFX940-NEXT:    s_nop 0
18688; GFX940-NEXT:    ;;#ASMSTART
18689; GFX940-NEXT:    ; use s[8:9]
18690; GFX940-NEXT:    ;;#ASMEND
18691; GFX940-NEXT:    s_setpc_b64 s[30:31]
18692  %vec0 = call <4 x i16> asm "; def $0", "=s"()
18693  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
18694  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
18695  ret void
18696}
18697
18698define void @s_shuffle_v4i16_v4i16__1_4_4_4() {
18699; GFX900-LABEL: s_shuffle_v4i16_v4i16__1_4_4_4:
18700; GFX900:       ; %bb.0:
18701; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18702; GFX900-NEXT:    ;;#ASMSTART
18703; GFX900-NEXT:    ; def s[4:5]
18704; GFX900-NEXT:    ;;#ASMEND
18705; GFX900-NEXT:    s_lshr_b32 s8, s4, 16
18706; GFX900-NEXT:    ;;#ASMSTART
18707; GFX900-NEXT:    ; use s[8:9]
18708; GFX900-NEXT:    ;;#ASMEND
18709; GFX900-NEXT:    s_setpc_b64 s[30:31]
18710;
18711; GFX90A-LABEL: s_shuffle_v4i16_v4i16__1_4_4_4:
18712; GFX90A:       ; %bb.0:
18713; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18714; GFX90A-NEXT:    ;;#ASMSTART
18715; GFX90A-NEXT:    ; def s[4:5]
18716; GFX90A-NEXT:    ;;#ASMEND
18717; GFX90A-NEXT:    s_lshr_b32 s8, s4, 16
18718; GFX90A-NEXT:    ;;#ASMSTART
18719; GFX90A-NEXT:    ; use s[8:9]
18720; GFX90A-NEXT:    ;;#ASMEND
18721; GFX90A-NEXT:    s_setpc_b64 s[30:31]
18722;
18723; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_4_4_4:
18724; GFX940:       ; %bb.0:
18725; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18726; GFX940-NEXT:    ;;#ASMSTART
18727; GFX940-NEXT:    ; def s[0:1]
18728; GFX940-NEXT:    ;;#ASMEND
18729; GFX940-NEXT:    s_lshr_b32 s8, s0, 16
18730; GFX940-NEXT:    ;;#ASMSTART
18731; GFX940-NEXT:    ; use s[8:9]
18732; GFX940-NEXT:    ;;#ASMEND
18733; GFX940-NEXT:    s_setpc_b64 s[30:31]
18734  %vec0 = call <4 x i16> asm "; def $0", "=s"()
18735  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
18736  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
18737  ret void
18738}
18739
18740define void @s_shuffle_v4i16_v4i16__2_4_4_4() {
18741; GFX900-LABEL: s_shuffle_v4i16_v4i16__2_4_4_4:
18742; GFX900:       ; %bb.0:
18743; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18744; GFX900-NEXT:    ;;#ASMSTART
18745; GFX900-NEXT:    ; def s[4:5]
18746; GFX900-NEXT:    ;;#ASMEND
18747; GFX900-NEXT:    s_mov_b32 s8, s5
18748; GFX900-NEXT:    ;;#ASMSTART
18749; GFX900-NEXT:    ; use s[8:9]
18750; GFX900-NEXT:    ;;#ASMEND
18751; GFX900-NEXT:    s_setpc_b64 s[30:31]
18752;
18753; GFX90A-LABEL: s_shuffle_v4i16_v4i16__2_4_4_4:
18754; GFX90A:       ; %bb.0:
18755; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18756; GFX90A-NEXT:    ;;#ASMSTART
18757; GFX90A-NEXT:    ; def s[4:5]
18758; GFX90A-NEXT:    ;;#ASMEND
18759; GFX90A-NEXT:    s_mov_b32 s8, s5
18760; GFX90A-NEXT:    ;;#ASMSTART
18761; GFX90A-NEXT:    ; use s[8:9]
18762; GFX90A-NEXT:    ;;#ASMEND
18763; GFX90A-NEXT:    s_setpc_b64 s[30:31]
18764;
18765; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_4_4_4:
18766; GFX940:       ; %bb.0:
18767; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18768; GFX940-NEXT:    ;;#ASMSTART
18769; GFX940-NEXT:    ; def s[0:1]
18770; GFX940-NEXT:    ;;#ASMEND
18771; GFX940-NEXT:    s_mov_b32 s8, s1
18772; GFX940-NEXT:    ;;#ASMSTART
18773; GFX940-NEXT:    ; use s[8:9]
18774; GFX940-NEXT:    ;;#ASMEND
18775; GFX940-NEXT:    s_setpc_b64 s[30:31]
18776  %vec0 = call <4 x i16> asm "; def $0", "=s"()
18777  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
18778  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
18779  ret void
18780}
18781
18782define void @s_shuffle_v4i16_v4i16__3_4_4_4() {
18783; GFX900-LABEL: s_shuffle_v4i16_v4i16__3_4_4_4:
18784; GFX900:       ; %bb.0:
18785; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18786; GFX900-NEXT:    ;;#ASMSTART
18787; GFX900-NEXT:    ; def s[4:5]
18788; GFX900-NEXT:    ;;#ASMEND
18789; GFX900-NEXT:    s_lshr_b32 s8, s5, 16
18790; GFX900-NEXT:    ;;#ASMSTART
18791; GFX900-NEXT:    ; use s[8:9]
18792; GFX900-NEXT:    ;;#ASMEND
18793; GFX900-NEXT:    s_setpc_b64 s[30:31]
18794;
18795; GFX90A-LABEL: s_shuffle_v4i16_v4i16__3_4_4_4:
18796; GFX90A:       ; %bb.0:
18797; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18798; GFX90A-NEXT:    ;;#ASMSTART
18799; GFX90A-NEXT:    ; def s[4:5]
18800; GFX90A-NEXT:    ;;#ASMEND
18801; GFX90A-NEXT:    s_lshr_b32 s8, s5, 16
18802; GFX90A-NEXT:    ;;#ASMSTART
18803; GFX90A-NEXT:    ; use s[8:9]
18804; GFX90A-NEXT:    ;;#ASMEND
18805; GFX90A-NEXT:    s_setpc_b64 s[30:31]
18806;
18807; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_4_4_4:
18808; GFX940:       ; %bb.0:
18809; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18810; GFX940-NEXT:    ;;#ASMSTART
18811; GFX940-NEXT:    ; def s[0:1]
18812; GFX940-NEXT:    ;;#ASMEND
18813; GFX940-NEXT:    s_lshr_b32 s8, s1, 16
18814; GFX940-NEXT:    ;;#ASMSTART
18815; GFX940-NEXT:    ; use s[8:9]
18816; GFX940-NEXT:    ;;#ASMEND
18817; GFX940-NEXT:    s_setpc_b64 s[30:31]
18818  %vec0 = call <4 x i16> asm "; def $0", "=s"()
18819  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
18820  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
18821  ret void
18822}
18823
18824define void @s_shuffle_v4i16_v4i16__4_4_4_4() {
18825; GFX9-LABEL: s_shuffle_v4i16_v4i16__4_4_4_4:
18826; GFX9:       ; %bb.0:
18827; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18828; GFX9-NEXT:    ;;#ASMSTART
18829; GFX9-NEXT:    ; use s[8:9]
18830; GFX9-NEXT:    ;;#ASMEND
18831; GFX9-NEXT:    s_setpc_b64 s[30:31]
18832  %vec0 = call <4 x i16> asm "; def $0", "=s"()
18833  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
18834  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
18835  ret void
18836}
18837
18838define void @s_shuffle_v4i16_v4i16__5_4_4_4() {
18839; GFX900-LABEL: s_shuffle_v4i16_v4i16__5_4_4_4:
18840; GFX900:       ; %bb.0:
18841; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18842; GFX900-NEXT:    ;;#ASMSTART
18843; GFX900-NEXT:    ; def s[4:5]
18844; GFX900-NEXT:    ;;#ASMEND
18845; GFX900-NEXT:    s_lshr_b32 s5, s4, 16
18846; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
18847; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
18848; GFX900-NEXT:    ;;#ASMSTART
18849; GFX900-NEXT:    ; use s[8:9]
18850; GFX900-NEXT:    ;;#ASMEND
18851; GFX900-NEXT:    s_setpc_b64 s[30:31]
18852;
18853; GFX90A-LABEL: s_shuffle_v4i16_v4i16__5_4_4_4:
18854; GFX90A:       ; %bb.0:
18855; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18856; GFX90A-NEXT:    ;;#ASMSTART
18857; GFX90A-NEXT:    ; def s[4:5]
18858; GFX90A-NEXT:    ;;#ASMEND
18859; GFX90A-NEXT:    s_lshr_b32 s5, s4, 16
18860; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
18861; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
18862; GFX90A-NEXT:    ;;#ASMSTART
18863; GFX90A-NEXT:    ; use s[8:9]
18864; GFX90A-NEXT:    ;;#ASMEND
18865; GFX90A-NEXT:    s_setpc_b64 s[30:31]
18866;
18867; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_4_4_4:
18868; GFX940:       ; %bb.0:
18869; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18870; GFX940-NEXT:    ;;#ASMSTART
18871; GFX940-NEXT:    ; def s[0:1]
18872; GFX940-NEXT:    ;;#ASMEND
18873; GFX940-NEXT:    s_lshr_b32 s1, s0, 16
18874; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
18875; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
18876; GFX940-NEXT:    ;;#ASMSTART
18877; GFX940-NEXT:    ; use s[8:9]
18878; GFX940-NEXT:    ;;#ASMEND
18879; GFX940-NEXT:    s_setpc_b64 s[30:31]
18880  %vec0 = call <4 x i16> asm "; def $0", "=s"()
18881  %vec1 = call <4 x i16> asm "; def $0", "=s"()
18882  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
18883  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
18884  ret void
18885}
18886
18887define void @s_shuffle_v4i16_v4i16__6_4_4_4() {
18888; GFX900-LABEL: s_shuffle_v4i16_v4i16__6_4_4_4:
18889; GFX900:       ; %bb.0:
18890; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18891; GFX900-NEXT:    ;;#ASMSTART
18892; GFX900-NEXT:    ; def s[4:5]
18893; GFX900-NEXT:    ;;#ASMEND
18894; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
18895; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
18896; GFX900-NEXT:    ;;#ASMSTART
18897; GFX900-NEXT:    ; use s[8:9]
18898; GFX900-NEXT:    ;;#ASMEND
18899; GFX900-NEXT:    s_setpc_b64 s[30:31]
18900;
18901; GFX90A-LABEL: s_shuffle_v4i16_v4i16__6_4_4_4:
18902; GFX90A:       ; %bb.0:
18903; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18904; GFX90A-NEXT:    ;;#ASMSTART
18905; GFX90A-NEXT:    ; def s[4:5]
18906; GFX90A-NEXT:    ;;#ASMEND
18907; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
18908; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
18909; GFX90A-NEXT:    ;;#ASMSTART
18910; GFX90A-NEXT:    ; use s[8:9]
18911; GFX90A-NEXT:    ;;#ASMEND
18912; GFX90A-NEXT:    s_setpc_b64 s[30:31]
18913;
18914; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_4_4_4:
18915; GFX940:       ; %bb.0:
18916; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18917; GFX940-NEXT:    ;;#ASMSTART
18918; GFX940-NEXT:    ; def s[0:1]
18919; GFX940-NEXT:    ;;#ASMEND
18920; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
18921; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
18922; GFX940-NEXT:    ;;#ASMSTART
18923; GFX940-NEXT:    ; use s[8:9]
18924; GFX940-NEXT:    ;;#ASMEND
18925; GFX940-NEXT:    s_setpc_b64 s[30:31]
18926  %vec0 = call <4 x i16> asm "; def $0", "=s"()
18927  %vec1 = call <4 x i16> asm "; def $0", "=s"()
18928  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 4, i32 4, i32 4>
18929  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
18930  ret void
18931}
18932
18933define void @s_shuffle_v4i16_v4i16__7_4_4_4() {
18934; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_4_4_4:
18935; GFX900:       ; %bb.0:
18936; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18937; GFX900-NEXT:    ;;#ASMSTART
18938; GFX900-NEXT:    ; def s[4:5]
18939; GFX900-NEXT:    ;;#ASMEND
18940; GFX900-NEXT:    s_lshr_b32 s5, s5, 16
18941; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
18942; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
18943; GFX900-NEXT:    ;;#ASMSTART
18944; GFX900-NEXT:    ; use s[8:9]
18945; GFX900-NEXT:    ;;#ASMEND
18946; GFX900-NEXT:    s_setpc_b64 s[30:31]
18947;
18948; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_4_4_4:
18949; GFX90A:       ; %bb.0:
18950; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18951; GFX90A-NEXT:    ;;#ASMSTART
18952; GFX90A-NEXT:    ; def s[4:5]
18953; GFX90A-NEXT:    ;;#ASMEND
18954; GFX90A-NEXT:    s_lshr_b32 s5, s5, 16
18955; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
18956; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
18957; GFX90A-NEXT:    ;;#ASMSTART
18958; GFX90A-NEXT:    ; use s[8:9]
18959; GFX90A-NEXT:    ;;#ASMEND
18960; GFX90A-NEXT:    s_setpc_b64 s[30:31]
18961;
18962; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_4_4:
18963; GFX940:       ; %bb.0:
18964; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18965; GFX940-NEXT:    ;;#ASMSTART
18966; GFX940-NEXT:    ; def s[0:1]
18967; GFX940-NEXT:    ;;#ASMEND
18968; GFX940-NEXT:    s_lshr_b32 s1, s1, 16
18969; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
18970; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
18971; GFX940-NEXT:    ;;#ASMSTART
18972; GFX940-NEXT:    ; use s[8:9]
18973; GFX940-NEXT:    ;;#ASMEND
18974; GFX940-NEXT:    s_setpc_b64 s[30:31]
18975  %vec0 = call <4 x i16> asm "; def $0", "=s"()
18976  %vec1 = call <4 x i16> asm "; def $0", "=s"()
18977  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 4, i32 4>
18978  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
18979  ret void
18980}
18981
18982define void @s_shuffle_v4i16_v4i16__7_u_4_4() {
18983; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_u_4_4:
18984; GFX900:       ; %bb.0:
18985; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18986; GFX900-NEXT:    ;;#ASMSTART
18987; GFX900-NEXT:    ; def s[4:5]
18988; GFX900-NEXT:    ;;#ASMEND
18989; GFX900-NEXT:    s_lshr_b32 s8, s5, 16
18990; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
18991; GFX900-NEXT:    ;;#ASMSTART
18992; GFX900-NEXT:    ; use s[8:9]
18993; GFX900-NEXT:    ;;#ASMEND
18994; GFX900-NEXT:    s_setpc_b64 s[30:31]
18995;
18996; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_u_4_4:
18997; GFX90A:       ; %bb.0:
18998; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18999; GFX90A-NEXT:    ;;#ASMSTART
19000; GFX90A-NEXT:    ; def s[4:5]
19001; GFX90A-NEXT:    ;;#ASMEND
19002; GFX90A-NEXT:    s_lshr_b32 s8, s5, 16
19003; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
19004; GFX90A-NEXT:    ;;#ASMSTART
19005; GFX90A-NEXT:    ; use s[8:9]
19006; GFX90A-NEXT:    ;;#ASMEND
19007; GFX90A-NEXT:    s_setpc_b64 s[30:31]
19008;
19009; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_4_4:
19010; GFX940:       ; %bb.0:
19011; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19012; GFX940-NEXT:    ;;#ASMSTART
19013; GFX940-NEXT:    ; def s[0:1]
19014; GFX940-NEXT:    ;;#ASMEND
19015; GFX940-NEXT:    s_lshr_b32 s8, s1, 16
19016; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
19017; GFX940-NEXT:    ;;#ASMSTART
19018; GFX940-NEXT:    ; use s[8:9]
19019; GFX940-NEXT:    ;;#ASMEND
19020; GFX940-NEXT:    s_setpc_b64 s[30:31]
19021  %vec0 = call <4 x i16> asm "; def $0", "=s"()
19022  %vec1 = call <4 x i16> asm "; def $0", "=s"()
19023  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 4, i32 4>
19024  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
19025  ret void
19026}
19027
19028define void @s_shuffle_v4i16_v4i16__7_0_4_4() {
19029; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_0_4_4:
19030; GFX900:       ; %bb.0:
19031; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19032; GFX900-NEXT:    ;;#ASMSTART
19033; GFX900-NEXT:    ; def s[4:5]
19034; GFX900-NEXT:    ;;#ASMEND
19035; GFX900-NEXT:    ;;#ASMSTART
19036; GFX900-NEXT:    ; def s[6:7]
19037; GFX900-NEXT:    ;;#ASMEND
19038; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
19039; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
19040; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s6, s6
19041; GFX900-NEXT:    ;;#ASMSTART
19042; GFX900-NEXT:    ; use s[8:9]
19043; GFX900-NEXT:    ;;#ASMEND
19044; GFX900-NEXT:    s_setpc_b64 s[30:31]
19045;
19046; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_0_4_4:
19047; GFX90A:       ; %bb.0:
19048; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19049; GFX90A-NEXT:    ;;#ASMSTART
19050; GFX90A-NEXT:    ; def s[4:5]
19051; GFX90A-NEXT:    ;;#ASMEND
19052; GFX90A-NEXT:    ;;#ASMSTART
19053; GFX90A-NEXT:    ; def s[6:7]
19054; GFX90A-NEXT:    ;;#ASMEND
19055; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
19056; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
19057; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s6, s6
19058; GFX90A-NEXT:    ;;#ASMSTART
19059; GFX90A-NEXT:    ; use s[8:9]
19060; GFX90A-NEXT:    ;;#ASMEND
19061; GFX90A-NEXT:    s_setpc_b64 s[30:31]
19062;
19063; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_4_4:
19064; GFX940:       ; %bb.0:
19065; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19066; GFX940-NEXT:    ;;#ASMSTART
19067; GFX940-NEXT:    ; def s[0:1]
19068; GFX940-NEXT:    ;;#ASMEND
19069; GFX940-NEXT:    ;;#ASMSTART
19070; GFX940-NEXT:    ; def s[2:3]
19071; GFX940-NEXT:    ;;#ASMEND
19072; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
19073; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
19074; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s2, s2
19075; GFX940-NEXT:    ;;#ASMSTART
19076; GFX940-NEXT:    ; use s[8:9]
19077; GFX940-NEXT:    ;;#ASMEND
19078; GFX940-NEXT:    s_setpc_b64 s[30:31]
19079  %vec0 = call <4 x i16> asm "; def $0", "=s"()
19080  %vec1 = call <4 x i16> asm "; def $0", "=s"()
19081  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 4, i32 4>
19082  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
19083  ret void
19084}
19085
19086define void @s_shuffle_v4i16_v4i16__7_1_4_4() {
19087; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_1_4_4:
19088; GFX900:       ; %bb.0:
19089; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19090; GFX900-NEXT:    ;;#ASMSTART
19091; GFX900-NEXT:    ; def s[4:5]
19092; GFX900-NEXT:    ;;#ASMEND
19093; GFX900-NEXT:    ;;#ASMSTART
19094; GFX900-NEXT:    ; def s[6:7]
19095; GFX900-NEXT:    ;;#ASMEND
19096; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
19097; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s6, s6
19098; GFX900-NEXT:    ;;#ASMSTART
19099; GFX900-NEXT:    ; use s[8:9]
19100; GFX900-NEXT:    ;;#ASMEND
19101; GFX900-NEXT:    s_setpc_b64 s[30:31]
19102;
19103; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_1_4_4:
19104; GFX90A:       ; %bb.0:
19105; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19106; GFX90A-NEXT:    ;;#ASMSTART
19107; GFX90A-NEXT:    ; def s[4:5]
19108; GFX90A-NEXT:    ;;#ASMEND
19109; GFX90A-NEXT:    ;;#ASMSTART
19110; GFX90A-NEXT:    ; def s[6:7]
19111; GFX90A-NEXT:    ;;#ASMEND
19112; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
19113; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s6, s6
19114; GFX90A-NEXT:    ;;#ASMSTART
19115; GFX90A-NEXT:    ; use s[8:9]
19116; GFX90A-NEXT:    ;;#ASMEND
19117; GFX90A-NEXT:    s_setpc_b64 s[30:31]
19118;
19119; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_4_4:
19120; GFX940:       ; %bb.0:
19121; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19122; GFX940-NEXT:    ;;#ASMSTART
19123; GFX940-NEXT:    ; def s[0:1]
19124; GFX940-NEXT:    ;;#ASMEND
19125; GFX940-NEXT:    ;;#ASMSTART
19126; GFX940-NEXT:    ; def s[2:3]
19127; GFX940-NEXT:    ;;#ASMEND
19128; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s0
19129; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s2, s2
19130; GFX940-NEXT:    ;;#ASMSTART
19131; GFX940-NEXT:    ; use s[8:9]
19132; GFX940-NEXT:    ;;#ASMEND
19133; GFX940-NEXT:    s_setpc_b64 s[30:31]
19134  %vec0 = call <4 x i16> asm "; def $0", "=s"()
19135  %vec1 = call <4 x i16> asm "; def $0", "=s"()
19136  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 4, i32 4>
19137  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
19138  ret void
19139}
19140
19141define void @s_shuffle_v4i16_v4i16__7_2_4_4() {
19142; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_2_4_4:
19143; GFX900:       ; %bb.0:
19144; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19145; GFX900-NEXT:    ;;#ASMSTART
19146; GFX900-NEXT:    ; def s[4:5]
19147; GFX900-NEXT:    ;;#ASMEND
19148; GFX900-NEXT:    ;;#ASMSTART
19149; GFX900-NEXT:    ; def s[6:7]
19150; GFX900-NEXT:    ;;#ASMEND
19151; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
19152; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
19153; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s6, s6
19154; GFX900-NEXT:    ;;#ASMSTART
19155; GFX900-NEXT:    ; use s[8:9]
19156; GFX900-NEXT:    ;;#ASMEND
19157; GFX900-NEXT:    s_setpc_b64 s[30:31]
19158;
19159; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_2_4_4:
19160; GFX90A:       ; %bb.0:
19161; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19162; GFX90A-NEXT:    ;;#ASMSTART
19163; GFX90A-NEXT:    ; def s[4:5]
19164; GFX90A-NEXT:    ;;#ASMEND
19165; GFX90A-NEXT:    ;;#ASMSTART
19166; GFX90A-NEXT:    ; def s[6:7]
19167; GFX90A-NEXT:    ;;#ASMEND
19168; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
19169; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
19170; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s6, s6
19171; GFX90A-NEXT:    ;;#ASMSTART
19172; GFX90A-NEXT:    ; use s[8:9]
19173; GFX90A-NEXT:    ;;#ASMEND
19174; GFX90A-NEXT:    s_setpc_b64 s[30:31]
19175;
19176; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_4_4:
19177; GFX940:       ; %bb.0:
19178; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19179; GFX940-NEXT:    ;;#ASMSTART
19180; GFX940-NEXT:    ; def s[0:1]
19181; GFX940-NEXT:    ;;#ASMEND
19182; GFX940-NEXT:    ;;#ASMSTART
19183; GFX940-NEXT:    ; def s[2:3]
19184; GFX940-NEXT:    ;;#ASMEND
19185; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
19186; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
19187; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s2, s2
19188; GFX940-NEXT:    ;;#ASMSTART
19189; GFX940-NEXT:    ; use s[8:9]
19190; GFX940-NEXT:    ;;#ASMEND
19191; GFX940-NEXT:    s_setpc_b64 s[30:31]
19192  %vec0 = call <4 x i16> asm "; def $0", "=s"()
19193  %vec1 = call <4 x i16> asm "; def $0", "=s"()
19194  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 4, i32 4>
19195  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
19196  ret void
19197}
19198
19199define void @s_shuffle_v4i16_v4i16__7_3_4_4() {
19200; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_3_4_4:
19201; GFX900:       ; %bb.0:
19202; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19203; GFX900-NEXT:    ;;#ASMSTART
19204; GFX900-NEXT:    ; def s[4:5]
19205; GFX900-NEXT:    ;;#ASMEND
19206; GFX900-NEXT:    ;;#ASMSTART
19207; GFX900-NEXT:    ; def s[6:7]
19208; GFX900-NEXT:    ;;#ASMEND
19209; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
19210; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s6, s6
19211; GFX900-NEXT:    ;;#ASMSTART
19212; GFX900-NEXT:    ; use s[8:9]
19213; GFX900-NEXT:    ;;#ASMEND
19214; GFX900-NEXT:    s_setpc_b64 s[30:31]
19215;
19216; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_3_4_4:
19217; GFX90A:       ; %bb.0:
19218; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19219; GFX90A-NEXT:    ;;#ASMSTART
19220; GFX90A-NEXT:    ; def s[4:5]
19221; GFX90A-NEXT:    ;;#ASMEND
19222; GFX90A-NEXT:    ;;#ASMSTART
19223; GFX90A-NEXT:    ; def s[6:7]
19224; GFX90A-NEXT:    ;;#ASMEND
19225; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
19226; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s6, s6
19227; GFX90A-NEXT:    ;;#ASMSTART
19228; GFX90A-NEXT:    ; use s[8:9]
19229; GFX90A-NEXT:    ;;#ASMEND
19230; GFX90A-NEXT:    s_setpc_b64 s[30:31]
19231;
19232; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_4_4:
19233; GFX940:       ; %bb.0:
19234; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19235; GFX940-NEXT:    ;;#ASMSTART
19236; GFX940-NEXT:    ; def s[0:1]
19237; GFX940-NEXT:    ;;#ASMEND
19238; GFX940-NEXT:    ;;#ASMSTART
19239; GFX940-NEXT:    ; def s[2:3]
19240; GFX940-NEXT:    ;;#ASMEND
19241; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s1
19242; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s2, s2
19243; GFX940-NEXT:    ;;#ASMSTART
19244; GFX940-NEXT:    ; use s[8:9]
19245; GFX940-NEXT:    ;;#ASMEND
19246; GFX940-NEXT:    s_setpc_b64 s[30:31]
19247  %vec0 = call <4 x i16> asm "; def $0", "=s"()
19248  %vec1 = call <4 x i16> asm "; def $0", "=s"()
19249  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 4, i32 4>
19250  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
19251  ret void
19252}
19253
19254define void @s_shuffle_v4i16_v4i16__7_5_4_4() {
19255; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_5_4_4:
19256; GFX900:       ; %bb.0:
19257; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19258; GFX900-NEXT:    ;;#ASMSTART
19259; GFX900-NEXT:    ; def s[4:5]
19260; GFX900-NEXT:    ;;#ASMEND
19261; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s4
19262; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
19263; GFX900-NEXT:    ;;#ASMSTART
19264; GFX900-NEXT:    ; use s[8:9]
19265; GFX900-NEXT:    ;;#ASMEND
19266; GFX900-NEXT:    s_setpc_b64 s[30:31]
19267;
19268; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_5_4_4:
19269; GFX90A:       ; %bb.0:
19270; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19271; GFX90A-NEXT:    ;;#ASMSTART
19272; GFX90A-NEXT:    ; def s[4:5]
19273; GFX90A-NEXT:    ;;#ASMEND
19274; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s4
19275; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
19276; GFX90A-NEXT:    ;;#ASMSTART
19277; GFX90A-NEXT:    ; use s[8:9]
19278; GFX90A-NEXT:    ;;#ASMEND
19279; GFX90A-NEXT:    s_setpc_b64 s[30:31]
19280;
19281; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_4_4:
19282; GFX940:       ; %bb.0:
19283; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19284; GFX940-NEXT:    ;;#ASMSTART
19285; GFX940-NEXT:    ; def s[0:1]
19286; GFX940-NEXT:    ;;#ASMEND
19287; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s0
19288; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
19289; GFX940-NEXT:    ;;#ASMSTART
19290; GFX940-NEXT:    ; use s[8:9]
19291; GFX940-NEXT:    ;;#ASMEND
19292; GFX940-NEXT:    s_setpc_b64 s[30:31]
19293  %vec0 = call <4 x i16> asm "; def $0", "=s"()
19294  %vec1 = call <4 x i16> asm "; def $0", "=s"()
19295  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 4, i32 4>
19296  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
19297  ret void
19298}
19299
19300define void @s_shuffle_v4i16_v4i16__7_6_4_4() {
19301; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_6_4_4:
19302; GFX900:       ; %bb.0:
19303; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19304; GFX900-NEXT:    ;;#ASMSTART
19305; GFX900-NEXT:    ; def s[4:5]
19306; GFX900-NEXT:    ;;#ASMEND
19307; GFX900-NEXT:    s_lshr_b32 s6, s5, 16
19308; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
19309; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
19310; GFX900-NEXT:    ;;#ASMSTART
19311; GFX900-NEXT:    ; use s[8:9]
19312; GFX900-NEXT:    ;;#ASMEND
19313; GFX900-NEXT:    s_setpc_b64 s[30:31]
19314;
19315; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_6_4_4:
19316; GFX90A:       ; %bb.0:
19317; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19318; GFX90A-NEXT:    ;;#ASMSTART
19319; GFX90A-NEXT:    ; def s[4:5]
19320; GFX90A-NEXT:    ;;#ASMEND
19321; GFX90A-NEXT:    s_lshr_b32 s6, s5, 16
19322; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
19323; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
19324; GFX90A-NEXT:    ;;#ASMSTART
19325; GFX90A-NEXT:    ; use s[8:9]
19326; GFX90A-NEXT:    ;;#ASMEND
19327; GFX90A-NEXT:    s_setpc_b64 s[30:31]
19328;
19329; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_4_4:
19330; GFX940:       ; %bb.0:
19331; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19332; GFX940-NEXT:    ;;#ASMSTART
19333; GFX940-NEXT:    ; def s[0:1]
19334; GFX940-NEXT:    ;;#ASMEND
19335; GFX940-NEXT:    s_lshr_b32 s2, s1, 16
19336; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s2, s1
19337; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
19338; GFX940-NEXT:    ;;#ASMSTART
19339; GFX940-NEXT:    ; use s[8:9]
19340; GFX940-NEXT:    ;;#ASMEND
19341; GFX940-NEXT:    s_setpc_b64 s[30:31]
19342  %vec0 = call <4 x i16> asm "; def $0", "=s"()
19343  %vec1 = call <4 x i16> asm "; def $0", "=s"()
19344  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 4, i32 4>
19345  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
19346  ret void
19347}
19348
19349define void @s_shuffle_v4i16_v4i16__7_7_4_4() {
19350; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_4_4:
19351; GFX900:       ; %bb.0:
19352; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19353; GFX900-NEXT:    ;;#ASMSTART
19354; GFX900-NEXT:    ; def s[4:5]
19355; GFX900-NEXT:    ;;#ASMEND
19356; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
19357; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
19358; GFX900-NEXT:    ;;#ASMSTART
19359; GFX900-NEXT:    ; use s[8:9]
19360; GFX900-NEXT:    ;;#ASMEND
19361; GFX900-NEXT:    s_setpc_b64 s[30:31]
19362;
19363; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_4_4:
19364; GFX90A:       ; %bb.0:
19365; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19366; GFX90A-NEXT:    ;;#ASMSTART
19367; GFX90A-NEXT:    ; def s[4:5]
19368; GFX90A-NEXT:    ;;#ASMEND
19369; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s4
19370; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
19371; GFX90A-NEXT:    ;;#ASMSTART
19372; GFX90A-NEXT:    ; use s[8:9]
19373; GFX90A-NEXT:    ;;#ASMEND
19374; GFX90A-NEXT:    s_setpc_b64 s[30:31]
19375;
19376; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_4:
19377; GFX940:       ; %bb.0:
19378; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19379; GFX940-NEXT:    ;;#ASMSTART
19380; GFX940-NEXT:    ; def s[0:1]
19381; GFX940-NEXT:    ;;#ASMEND
19382; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s0
19383; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
19384; GFX940-NEXT:    ;;#ASMSTART
19385; GFX940-NEXT:    ; use s[8:9]
19386; GFX940-NEXT:    ;;#ASMEND
19387; GFX940-NEXT:    s_setpc_b64 s[30:31]
19388  %vec0 = call <4 x i16> asm "; def $0", "=s"()
19389  %vec1 = call <4 x i16> asm "; def $0", "=s"()
19390  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 4>
19391  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
19392  ret void
19393}
19394
19395define void @s_shuffle_v4i16_v4i16__7_7_u_4() {
19396; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_u_4:
19397; GFX900:       ; %bb.0:
19398; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19399; GFX900-NEXT:    ;;#ASMSTART
19400; GFX900-NEXT:    ; def s[4:5]
19401; GFX900-NEXT:    ;;#ASMEND
19402; GFX900-NEXT:    s_lshl_b32 s9, s4, 16
19403; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
19404; GFX900-NEXT:    ;;#ASMSTART
19405; GFX900-NEXT:    ; use s[8:9]
19406; GFX900-NEXT:    ;;#ASMEND
19407; GFX900-NEXT:    s_setpc_b64 s[30:31]
19408;
19409; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_u_4:
19410; GFX90A:       ; %bb.0:
19411; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19412; GFX90A-NEXT:    ;;#ASMSTART
19413; GFX90A-NEXT:    ; def s[4:5]
19414; GFX90A-NEXT:    ;;#ASMEND
19415; GFX90A-NEXT:    s_lshl_b32 s9, s4, 16
19416; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
19417; GFX90A-NEXT:    ;;#ASMSTART
19418; GFX90A-NEXT:    ; use s[8:9]
19419; GFX90A-NEXT:    ;;#ASMEND
19420; GFX90A-NEXT:    s_setpc_b64 s[30:31]
19421;
19422; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_u_4:
19423; GFX940:       ; %bb.0:
19424; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19425; GFX940-NEXT:    ;;#ASMSTART
19426; GFX940-NEXT:    ; def s[0:1]
19427; GFX940-NEXT:    ;;#ASMEND
19428; GFX940-NEXT:    s_lshl_b32 s9, s0, 16
19429; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
19430; GFX940-NEXT:    ;;#ASMSTART
19431; GFX940-NEXT:    ; use s[8:9]
19432; GFX940-NEXT:    ;;#ASMEND
19433; GFX940-NEXT:    s_setpc_b64 s[30:31]
19434  %vec0 = call <4 x i16> asm "; def $0", "=s"()
19435  %vec1 = call <4 x i16> asm "; def $0", "=s"()
19436  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 4>
19437  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
19438  ret void
19439}
19440
19441define void @s_shuffle_v4i16_v4i16__7_7_0_4() {
19442; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_0_4:
19443; GFX900:       ; %bb.0:
19444; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19445; GFX900-NEXT:    ;;#ASMSTART
19446; GFX900-NEXT:    ; def s[4:5]
19447; GFX900-NEXT:    ;;#ASMEND
19448; GFX900-NEXT:    ;;#ASMSTART
19449; GFX900-NEXT:    ; def s[6:7]
19450; GFX900-NEXT:    ;;#ASMEND
19451; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s6
19452; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
19453; GFX900-NEXT:    ;;#ASMSTART
19454; GFX900-NEXT:    ; use s[8:9]
19455; GFX900-NEXT:    ;;#ASMEND
19456; GFX900-NEXT:    s_setpc_b64 s[30:31]
19457;
19458; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_0_4:
19459; GFX90A:       ; %bb.0:
19460; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19461; GFX90A-NEXT:    ;;#ASMSTART
19462; GFX90A-NEXT:    ; def s[4:5]
19463; GFX90A-NEXT:    ;;#ASMEND
19464; GFX90A-NEXT:    ;;#ASMSTART
19465; GFX90A-NEXT:    ; def s[6:7]
19466; GFX90A-NEXT:    ;;#ASMEND
19467; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s6
19468; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
19469; GFX90A-NEXT:    ;;#ASMSTART
19470; GFX90A-NEXT:    ; use s[8:9]
19471; GFX90A-NEXT:    ;;#ASMEND
19472; GFX90A-NEXT:    s_setpc_b64 s[30:31]
19473;
19474; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_4:
19475; GFX940:       ; %bb.0:
19476; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19477; GFX940-NEXT:    ;;#ASMSTART
19478; GFX940-NEXT:    ; def s[0:1]
19479; GFX940-NEXT:    ;;#ASMEND
19480; GFX940-NEXT:    ;;#ASMSTART
19481; GFX940-NEXT:    ; def s[2:3]
19482; GFX940-NEXT:    ;;#ASMEND
19483; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s2
19484; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
19485; GFX940-NEXT:    ;;#ASMSTART
19486; GFX940-NEXT:    ; use s[8:9]
19487; GFX940-NEXT:    ;;#ASMEND
19488; GFX940-NEXT:    s_setpc_b64 s[30:31]
19489  %vec0 = call <4 x i16> asm "; def $0", "=s"()
19490  %vec1 = call <4 x i16> asm "; def $0", "=s"()
19491  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 4>
19492  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
19493  ret void
19494}
19495
19496define void @s_shuffle_v4i16_v4i16__7_7_1_4() {
19497; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_1_4:
19498; GFX900:       ; %bb.0:
19499; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19500; GFX900-NEXT:    ;;#ASMSTART
19501; GFX900-NEXT:    ; def s[4:5]
19502; GFX900-NEXT:    ;;#ASMEND
19503; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
19504; GFX900-NEXT:    ;;#ASMSTART
19505; GFX900-NEXT:    ; def s[6:7]
19506; GFX900-NEXT:    ;;#ASMEND
19507; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s6
19508; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
19509; GFX900-NEXT:    ;;#ASMSTART
19510; GFX900-NEXT:    ; use s[8:9]
19511; GFX900-NEXT:    ;;#ASMEND
19512; GFX900-NEXT:    s_setpc_b64 s[30:31]
19513;
19514; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_1_4:
19515; GFX90A:       ; %bb.0:
19516; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19517; GFX90A-NEXT:    ;;#ASMSTART
19518; GFX90A-NEXT:    ; def s[4:5]
19519; GFX90A-NEXT:    ;;#ASMEND
19520; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
19521; GFX90A-NEXT:    ;;#ASMSTART
19522; GFX90A-NEXT:    ; def s[6:7]
19523; GFX90A-NEXT:    ;;#ASMEND
19524; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s6
19525; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
19526; GFX90A-NEXT:    ;;#ASMSTART
19527; GFX90A-NEXT:    ; use s[8:9]
19528; GFX90A-NEXT:    ;;#ASMEND
19529; GFX90A-NEXT:    s_setpc_b64 s[30:31]
19530;
19531; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_4:
19532; GFX940:       ; %bb.0:
19533; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19534; GFX940-NEXT:    ;;#ASMSTART
19535; GFX940-NEXT:    ; def s[0:1]
19536; GFX940-NEXT:    ;;#ASMEND
19537; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
19538; GFX940-NEXT:    ;;#ASMSTART
19539; GFX940-NEXT:    ; def s[2:3]
19540; GFX940-NEXT:    ;;#ASMEND
19541; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s2
19542; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
19543; GFX940-NEXT:    ;;#ASMSTART
19544; GFX940-NEXT:    ; use s[8:9]
19545; GFX940-NEXT:    ;;#ASMEND
19546; GFX940-NEXT:    s_setpc_b64 s[30:31]
19547  %vec0 = call <4 x i16> asm "; def $0", "=s"()
19548  %vec1 = call <4 x i16> asm "; def $0", "=s"()
19549  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 4>
19550  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
19551  ret void
19552}
19553
19554define void @s_shuffle_v4i16_v4i16__7_7_2_4() {
19555; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_2_4:
19556; GFX900:       ; %bb.0:
19557; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19558; GFX900-NEXT:    ;;#ASMSTART
19559; GFX900-NEXT:    ; def s[4:5]
19560; GFX900-NEXT:    ;;#ASMEND
19561; GFX900-NEXT:    ;;#ASMSTART
19562; GFX900-NEXT:    ; def s[6:7]
19563; GFX900-NEXT:    ;;#ASMEND
19564; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s6
19565; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
19566; GFX900-NEXT:    ;;#ASMSTART
19567; GFX900-NEXT:    ; use s[8:9]
19568; GFX900-NEXT:    ;;#ASMEND
19569; GFX900-NEXT:    s_setpc_b64 s[30:31]
19570;
19571; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_2_4:
19572; GFX90A:       ; %bb.0:
19573; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19574; GFX90A-NEXT:    ;;#ASMSTART
19575; GFX90A-NEXT:    ; def s[4:5]
19576; GFX90A-NEXT:    ;;#ASMEND
19577; GFX90A-NEXT:    ;;#ASMSTART
19578; GFX90A-NEXT:    ; def s[6:7]
19579; GFX90A-NEXT:    ;;#ASMEND
19580; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s6
19581; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
19582; GFX90A-NEXT:    ;;#ASMSTART
19583; GFX90A-NEXT:    ; use s[8:9]
19584; GFX90A-NEXT:    ;;#ASMEND
19585; GFX90A-NEXT:    s_setpc_b64 s[30:31]
19586;
19587; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_4:
19588; GFX940:       ; %bb.0:
19589; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19590; GFX940-NEXT:    ;;#ASMSTART
19591; GFX940-NEXT:    ; def s[0:1]
19592; GFX940-NEXT:    ;;#ASMEND
19593; GFX940-NEXT:    ;;#ASMSTART
19594; GFX940-NEXT:    ; def s[2:3]
19595; GFX940-NEXT:    ;;#ASMEND
19596; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s2
19597; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
19598; GFX940-NEXT:    ;;#ASMSTART
19599; GFX940-NEXT:    ; use s[8:9]
19600; GFX940-NEXT:    ;;#ASMEND
19601; GFX940-NEXT:    s_setpc_b64 s[30:31]
19602  %vec0 = call <4 x i16> asm "; def $0", "=s"()
19603  %vec1 = call <4 x i16> asm "; def $0", "=s"()
19604  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 4>
19605  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
19606  ret void
19607}
19608
19609define void @s_shuffle_v4i16_v4i16__7_7_3_4() {
19610; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_3_4:
19611; GFX900:       ; %bb.0:
19612; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19613; GFX900-NEXT:    ;;#ASMSTART
19614; GFX900-NEXT:    ; def s[4:5]
19615; GFX900-NEXT:    ;;#ASMEND
19616; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
19617; GFX900-NEXT:    ;;#ASMSTART
19618; GFX900-NEXT:    ; def s[6:7]
19619; GFX900-NEXT:    ;;#ASMEND
19620; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s6
19621; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
19622; GFX900-NEXT:    ;;#ASMSTART
19623; GFX900-NEXT:    ; use s[8:9]
19624; GFX900-NEXT:    ;;#ASMEND
19625; GFX900-NEXT:    s_setpc_b64 s[30:31]
19626;
19627; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_3_4:
19628; GFX90A:       ; %bb.0:
19629; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19630; GFX90A-NEXT:    ;;#ASMSTART
19631; GFX90A-NEXT:    ; def s[4:5]
19632; GFX90A-NEXT:    ;;#ASMEND
19633; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
19634; GFX90A-NEXT:    ;;#ASMSTART
19635; GFX90A-NEXT:    ; def s[6:7]
19636; GFX90A-NEXT:    ;;#ASMEND
19637; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s6
19638; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
19639; GFX90A-NEXT:    ;;#ASMSTART
19640; GFX90A-NEXT:    ; use s[8:9]
19641; GFX90A-NEXT:    ;;#ASMEND
19642; GFX90A-NEXT:    s_setpc_b64 s[30:31]
19643;
19644; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_4:
19645; GFX940:       ; %bb.0:
19646; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19647; GFX940-NEXT:    ;;#ASMSTART
19648; GFX940-NEXT:    ; def s[0:1]
19649; GFX940-NEXT:    ;;#ASMEND
19650; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
19651; GFX940-NEXT:    ;;#ASMSTART
19652; GFX940-NEXT:    ; def s[2:3]
19653; GFX940-NEXT:    ;;#ASMEND
19654; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s2
19655; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
19656; GFX940-NEXT:    ;;#ASMSTART
19657; GFX940-NEXT:    ; use s[8:9]
19658; GFX940-NEXT:    ;;#ASMEND
19659; GFX940-NEXT:    s_setpc_b64 s[30:31]
19660  %vec0 = call <4 x i16> asm "; def $0", "=s"()
19661  %vec1 = call <4 x i16> asm "; def $0", "=s"()
19662  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 4>
19663  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
19664  ret void
19665}
19666
19667define void @s_shuffle_v4i16_v4i16__7_7_5_4() {
19668; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_5_4:
19669; GFX900:       ; %bb.0:
19670; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19671; GFX900-NEXT:    ;;#ASMSTART
19672; GFX900-NEXT:    ; def s[4:5]
19673; GFX900-NEXT:    ;;#ASMEND
19674; GFX900-NEXT:    s_lshr_b32 s6, s4, 16
19675; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s6, s4
19676; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
19677; GFX900-NEXT:    ;;#ASMSTART
19678; GFX900-NEXT:    ; use s[8:9]
19679; GFX900-NEXT:    ;;#ASMEND
19680; GFX900-NEXT:    s_setpc_b64 s[30:31]
19681;
19682; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_5_4:
19683; GFX90A:       ; %bb.0:
19684; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19685; GFX90A-NEXT:    ;;#ASMSTART
19686; GFX90A-NEXT:    ; def s[4:5]
19687; GFX90A-NEXT:    ;;#ASMEND
19688; GFX90A-NEXT:    s_lshr_b32 s6, s4, 16
19689; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s6, s4
19690; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
19691; GFX90A-NEXT:    ;;#ASMSTART
19692; GFX90A-NEXT:    ; use s[8:9]
19693; GFX90A-NEXT:    ;;#ASMEND
19694; GFX90A-NEXT:    s_setpc_b64 s[30:31]
19695;
19696; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_4:
19697; GFX940:       ; %bb.0:
19698; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19699; GFX940-NEXT:    ;;#ASMSTART
19700; GFX940-NEXT:    ; def s[0:1]
19701; GFX940-NEXT:    ;;#ASMEND
19702; GFX940-NEXT:    s_lshr_b32 s2, s0, 16
19703; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s2, s0
19704; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
19705; GFX940-NEXT:    ;;#ASMSTART
19706; GFX940-NEXT:    ; use s[8:9]
19707; GFX940-NEXT:    ;;#ASMEND
19708; GFX940-NEXT:    s_setpc_b64 s[30:31]
19709  %vec0 = call <4 x i16> asm "; def $0", "=s"()
19710  %vec1 = call <4 x i16> asm "; def $0", "=s"()
19711  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 4>
19712  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
19713  ret void
19714}
19715
19716define void @s_shuffle_v4i16_v4i16__7_7_6_4() {
19717; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_6_4:
19718; GFX900:       ; %bb.0:
19719; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19720; GFX900-NEXT:    ;;#ASMSTART
19721; GFX900-NEXT:    ; def s[4:5]
19722; GFX900-NEXT:    ;;#ASMEND
19723; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s4
19724; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
19725; GFX900-NEXT:    ;;#ASMSTART
19726; GFX900-NEXT:    ; use s[8:9]
19727; GFX900-NEXT:    ;;#ASMEND
19728; GFX900-NEXT:    s_setpc_b64 s[30:31]
19729;
19730; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_6_4:
19731; GFX90A:       ; %bb.0:
19732; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19733; GFX90A-NEXT:    ;;#ASMSTART
19734; GFX90A-NEXT:    ; def s[4:5]
19735; GFX90A-NEXT:    ;;#ASMEND
19736; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s4
19737; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
19738; GFX90A-NEXT:    ;;#ASMSTART
19739; GFX90A-NEXT:    ; use s[8:9]
19740; GFX90A-NEXT:    ;;#ASMEND
19741; GFX90A-NEXT:    s_setpc_b64 s[30:31]
19742;
19743; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_6_4:
19744; GFX940:       ; %bb.0:
19745; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19746; GFX940-NEXT:    ;;#ASMSTART
19747; GFX940-NEXT:    ; def s[0:1]
19748; GFX940-NEXT:    ;;#ASMEND
19749; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s0
19750; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
19751; GFX940-NEXT:    ;;#ASMSTART
19752; GFX940-NEXT:    ; use s[8:9]
19753; GFX940-NEXT:    ;;#ASMEND
19754; GFX940-NEXT:    s_setpc_b64 s[30:31]
19755  %vec0 = call <4 x i16> asm "; def $0", "=s"()
19756  %vec1 = call <4 x i16> asm "; def $0", "=s"()
19757  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 4>
19758  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
19759  ret void
19760}
19761
19762define void @s_shuffle_v4i16_v4i16__u_5_5_5() {
19763; GFX9-LABEL: s_shuffle_v4i16_v4i16__u_5_5_5:
19764; GFX9:       ; %bb.0:
19765; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19766; GFX9-NEXT:    ;;#ASMSTART
19767; GFX9-NEXT:    ; def s[8:9]
19768; GFX9-NEXT:    ;;#ASMEND
19769; GFX9-NEXT:    s_pack_hh_b32_b16 s9, s8, s8
19770; GFX9-NEXT:    ;;#ASMSTART
19771; GFX9-NEXT:    ; use s[8:9]
19772; GFX9-NEXT:    ;;#ASMEND
19773; GFX9-NEXT:    s_setpc_b64 s[30:31]
19774  %vec0 = call <4 x i16> asm "; def $0", "=s"()
19775  %vec1 = call <4 x i16> asm "; def $0", "=s"()
19776  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
19777  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
19778  ret void
19779}
19780
19781define void @s_shuffle_v4i16_v4i16__0_5_5_5() {
19782; GFX900-LABEL: s_shuffle_v4i16_v4i16__0_5_5_5:
19783; GFX900:       ; %bb.0:
19784; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19785; GFX900-NEXT:    ;;#ASMSTART
19786; GFX900-NEXT:    ; def s[4:5]
19787; GFX900-NEXT:    ;;#ASMEND
19788; GFX900-NEXT:    ;;#ASMSTART
19789; GFX900-NEXT:    ; def s[6:7]
19790; GFX900-NEXT:    ;;#ASMEND
19791; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s4, s6
19792; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
19793; GFX900-NEXT:    ;;#ASMSTART
19794; GFX900-NEXT:    ; use s[8:9]
19795; GFX900-NEXT:    ;;#ASMEND
19796; GFX900-NEXT:    s_setpc_b64 s[30:31]
19797;
19798; GFX90A-LABEL: s_shuffle_v4i16_v4i16__0_5_5_5:
19799; GFX90A:       ; %bb.0:
19800; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19801; GFX90A-NEXT:    ;;#ASMSTART
19802; GFX90A-NEXT:    ; def s[4:5]
19803; GFX90A-NEXT:    ;;#ASMEND
19804; GFX90A-NEXT:    ;;#ASMSTART
19805; GFX90A-NEXT:    ; def s[6:7]
19806; GFX90A-NEXT:    ;;#ASMEND
19807; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s4, s6
19808; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
19809; GFX90A-NEXT:    ;;#ASMSTART
19810; GFX90A-NEXT:    ; use s[8:9]
19811; GFX90A-NEXT:    ;;#ASMEND
19812; GFX90A-NEXT:    s_setpc_b64 s[30:31]
19813;
19814; GFX940-LABEL: s_shuffle_v4i16_v4i16__0_5_5_5:
19815; GFX940:       ; %bb.0:
19816; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19817; GFX940-NEXT:    ;;#ASMSTART
19818; GFX940-NEXT:    ; def s[0:1]
19819; GFX940-NEXT:    ;;#ASMEND
19820; GFX940-NEXT:    ;;#ASMSTART
19821; GFX940-NEXT:    ; def s[2:3]
19822; GFX940-NEXT:    ;;#ASMEND
19823; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s0, s2
19824; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s2, s2
19825; GFX940-NEXT:    ;;#ASMSTART
19826; GFX940-NEXT:    ; use s[8:9]
19827; GFX940-NEXT:    ;;#ASMEND
19828; GFX940-NEXT:    s_setpc_b64 s[30:31]
19829  %vec0 = call <4 x i16> asm "; def $0", "=s"()
19830  %vec1 = call <4 x i16> asm "; def $0", "=s"()
19831  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
19832  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
19833  ret void
19834}
19835
19836define void @s_shuffle_v4i16_v4i16__1_5_5_5() {
19837; GFX900-LABEL: s_shuffle_v4i16_v4i16__1_5_5_5:
19838; GFX900:       ; %bb.0:
19839; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19840; GFX900-NEXT:    ;;#ASMSTART
19841; GFX900-NEXT:    ; def s[4:5]
19842; GFX900-NEXT:    ;;#ASMEND
19843; GFX900-NEXT:    ;;#ASMSTART
19844; GFX900-NEXT:    ; def s[6:7]
19845; GFX900-NEXT:    ;;#ASMEND
19846; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s4, s6
19847; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
19848; GFX900-NEXT:    ;;#ASMSTART
19849; GFX900-NEXT:    ; use s[8:9]
19850; GFX900-NEXT:    ;;#ASMEND
19851; GFX900-NEXT:    s_setpc_b64 s[30:31]
19852;
19853; GFX90A-LABEL: s_shuffle_v4i16_v4i16__1_5_5_5:
19854; GFX90A:       ; %bb.0:
19855; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19856; GFX90A-NEXT:    ;;#ASMSTART
19857; GFX90A-NEXT:    ; def s[4:5]
19858; GFX90A-NEXT:    ;;#ASMEND
19859; GFX90A-NEXT:    ;;#ASMSTART
19860; GFX90A-NEXT:    ; def s[6:7]
19861; GFX90A-NEXT:    ;;#ASMEND
19862; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s4, s6
19863; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
19864; GFX90A-NEXT:    ;;#ASMSTART
19865; GFX90A-NEXT:    ; use s[8:9]
19866; GFX90A-NEXT:    ;;#ASMEND
19867; GFX90A-NEXT:    s_setpc_b64 s[30:31]
19868;
19869; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_5_5_5:
19870; GFX940:       ; %bb.0:
19871; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19872; GFX940-NEXT:    ;;#ASMSTART
19873; GFX940-NEXT:    ; def s[0:1]
19874; GFX940-NEXT:    ;;#ASMEND
19875; GFX940-NEXT:    ;;#ASMSTART
19876; GFX940-NEXT:    ; def s[2:3]
19877; GFX940-NEXT:    ;;#ASMEND
19878; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s0, s2
19879; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s2, s2
19880; GFX940-NEXT:    ;;#ASMSTART
19881; GFX940-NEXT:    ; use s[8:9]
19882; GFX940-NEXT:    ;;#ASMEND
19883; GFX940-NEXT:    s_setpc_b64 s[30:31]
19884  %vec0 = call <4 x i16> asm "; def $0", "=s"()
19885  %vec1 = call <4 x i16> asm "; def $0", "=s"()
19886  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
19887  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
19888  ret void
19889}
19890
19891define void @s_shuffle_v4i16_v4i16__2_5_5_5() {
19892; GFX900-LABEL: s_shuffle_v4i16_v4i16__2_5_5_5:
19893; GFX900:       ; %bb.0:
19894; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19895; GFX900-NEXT:    ;;#ASMSTART
19896; GFX900-NEXT:    ; def s[4:5]
19897; GFX900-NEXT:    ;;#ASMEND
19898; GFX900-NEXT:    ;;#ASMSTART
19899; GFX900-NEXT:    ; def s[6:7]
19900; GFX900-NEXT:    ;;#ASMEND
19901; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s5, s6
19902; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
19903; GFX900-NEXT:    ;;#ASMSTART
19904; GFX900-NEXT:    ; use s[8:9]
19905; GFX900-NEXT:    ;;#ASMEND
19906; GFX900-NEXT:    s_setpc_b64 s[30:31]
19907;
19908; GFX90A-LABEL: s_shuffle_v4i16_v4i16__2_5_5_5:
19909; GFX90A:       ; %bb.0:
19910; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19911; GFX90A-NEXT:    ;;#ASMSTART
19912; GFX90A-NEXT:    ; def s[4:5]
19913; GFX90A-NEXT:    ;;#ASMEND
19914; GFX90A-NEXT:    ;;#ASMSTART
19915; GFX90A-NEXT:    ; def s[6:7]
19916; GFX90A-NEXT:    ;;#ASMEND
19917; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s5, s6
19918; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
19919; GFX90A-NEXT:    ;;#ASMSTART
19920; GFX90A-NEXT:    ; use s[8:9]
19921; GFX90A-NEXT:    ;;#ASMEND
19922; GFX90A-NEXT:    s_setpc_b64 s[30:31]
19923;
19924; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_5_5_5:
19925; GFX940:       ; %bb.0:
19926; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19927; GFX940-NEXT:    ;;#ASMSTART
19928; GFX940-NEXT:    ; def s[0:1]
19929; GFX940-NEXT:    ;;#ASMEND
19930; GFX940-NEXT:    ;;#ASMSTART
19931; GFX940-NEXT:    ; def s[2:3]
19932; GFX940-NEXT:    ;;#ASMEND
19933; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s1, s2
19934; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s2, s2
19935; GFX940-NEXT:    ;;#ASMSTART
19936; GFX940-NEXT:    ; use s[8:9]
19937; GFX940-NEXT:    ;;#ASMEND
19938; GFX940-NEXT:    s_setpc_b64 s[30:31]
19939  %vec0 = call <4 x i16> asm "; def $0", "=s"()
19940  %vec1 = call <4 x i16> asm "; def $0", "=s"()
19941  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
19942  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
19943  ret void
19944}
19945
19946define void @s_shuffle_v4i16_v4i16__3_5_5_5() {
19947; GFX900-LABEL: s_shuffle_v4i16_v4i16__3_5_5_5:
19948; GFX900:       ; %bb.0:
19949; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19950; GFX900-NEXT:    ;;#ASMSTART
19951; GFX900-NEXT:    ; def s[4:5]
19952; GFX900-NEXT:    ;;#ASMEND
19953; GFX900-NEXT:    ;;#ASMSTART
19954; GFX900-NEXT:    ; def s[6:7]
19955; GFX900-NEXT:    ;;#ASMEND
19956; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s6
19957; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
19958; GFX900-NEXT:    ;;#ASMSTART
19959; GFX900-NEXT:    ; use s[8:9]
19960; GFX900-NEXT:    ;;#ASMEND
19961; GFX900-NEXT:    s_setpc_b64 s[30:31]
19962;
19963; GFX90A-LABEL: s_shuffle_v4i16_v4i16__3_5_5_5:
19964; GFX90A:       ; %bb.0:
19965; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19966; GFX90A-NEXT:    ;;#ASMSTART
19967; GFX90A-NEXT:    ; def s[4:5]
19968; GFX90A-NEXT:    ;;#ASMEND
19969; GFX90A-NEXT:    ;;#ASMSTART
19970; GFX90A-NEXT:    ; def s[6:7]
19971; GFX90A-NEXT:    ;;#ASMEND
19972; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s6
19973; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
19974; GFX90A-NEXT:    ;;#ASMSTART
19975; GFX90A-NEXT:    ; use s[8:9]
19976; GFX90A-NEXT:    ;;#ASMEND
19977; GFX90A-NEXT:    s_setpc_b64 s[30:31]
19978;
19979; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_5_5_5:
19980; GFX940:       ; %bb.0:
19981; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19982; GFX940-NEXT:    ;;#ASMSTART
19983; GFX940-NEXT:    ; def s[0:1]
19984; GFX940-NEXT:    ;;#ASMEND
19985; GFX940-NEXT:    ;;#ASMSTART
19986; GFX940-NEXT:    ; def s[2:3]
19987; GFX940-NEXT:    ;;#ASMEND
19988; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s2
19989; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s2, s2
19990; GFX940-NEXT:    ;;#ASMSTART
19991; GFX940-NEXT:    ; use s[8:9]
19992; GFX940-NEXT:    ;;#ASMEND
19993; GFX940-NEXT:    s_setpc_b64 s[30:31]
19994  %vec0 = call <4 x i16> asm "; def $0", "=s"()
19995  %vec1 = call <4 x i16> asm "; def $0", "=s"()
19996  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
19997  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
19998  ret void
19999}
20000
20001define void @s_shuffle_v4i16_v4i16__4_5_5_5() {
20002; GFX9-LABEL: s_shuffle_v4i16_v4i16__4_5_5_5:
20003; GFX9:       ; %bb.0:
20004; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20005; GFX9-NEXT:    ;;#ASMSTART
20006; GFX9-NEXT:    ; def s[8:9]
20007; GFX9-NEXT:    ;;#ASMEND
20008; GFX9-NEXT:    s_pack_hh_b32_b16 s9, s8, s8
20009; GFX9-NEXT:    ;;#ASMSTART
20010; GFX9-NEXT:    ; use s[8:9]
20011; GFX9-NEXT:    ;;#ASMEND
20012; GFX9-NEXT:    s_setpc_b64 s[30:31]
20013  %vec0 = call <4 x i16> asm "; def $0", "=s"()
20014  %vec1 = call <4 x i16> asm "; def $0", "=s"()
20015  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
20016  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
20017  ret void
20018}
20019
20020define void @s_shuffle_v4i16_v4i16__5_5_5_5() {
20021; GFX900-LABEL: s_shuffle_v4i16_v4i16__5_5_5_5:
20022; GFX900:       ; %bb.0:
20023; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20024; GFX900-NEXT:    ;;#ASMSTART
20025; GFX900-NEXT:    ; def s[4:5]
20026; GFX900-NEXT:    ;;#ASMEND
20027; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s4, s4
20028; GFX900-NEXT:    s_mov_b32 s9, s8
20029; GFX900-NEXT:    ;;#ASMSTART
20030; GFX900-NEXT:    ; use s[8:9]
20031; GFX900-NEXT:    ;;#ASMEND
20032; GFX900-NEXT:    s_setpc_b64 s[30:31]
20033;
20034; GFX90A-LABEL: s_shuffle_v4i16_v4i16__5_5_5_5:
20035; GFX90A:       ; %bb.0:
20036; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20037; GFX90A-NEXT:    ;;#ASMSTART
20038; GFX90A-NEXT:    ; def s[4:5]
20039; GFX90A-NEXT:    ;;#ASMEND
20040; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s4, s4
20041; GFX90A-NEXT:    s_mov_b32 s9, s8
20042; GFX90A-NEXT:    ;;#ASMSTART
20043; GFX90A-NEXT:    ; use s[8:9]
20044; GFX90A-NEXT:    ;;#ASMEND
20045; GFX90A-NEXT:    s_setpc_b64 s[30:31]
20046;
20047; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_5_5_5:
20048; GFX940:       ; %bb.0:
20049; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20050; GFX940-NEXT:    ;;#ASMSTART
20051; GFX940-NEXT:    ; def s[0:1]
20052; GFX940-NEXT:    ;;#ASMEND
20053; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s0, s0
20054; GFX940-NEXT:    s_mov_b32 s9, s8
20055; GFX940-NEXT:    ;;#ASMSTART
20056; GFX940-NEXT:    ; use s[8:9]
20057; GFX940-NEXT:    ;;#ASMEND
20058; GFX940-NEXT:    s_setpc_b64 s[30:31]
20059  %vec0 = call <4 x i16> asm "; def $0", "=s"()
20060  %vec1 = call <4 x i16> asm "; def $0", "=s"()
20061  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
20062  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
20063  ret void
20064}
20065
20066define void @s_shuffle_v4i16_v4i16__6_5_5_5() {
20067; GFX900-LABEL: s_shuffle_v4i16_v4i16__6_5_5_5:
20068; GFX900:       ; %bb.0:
20069; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20070; GFX900-NEXT:    ;;#ASMSTART
20071; GFX900-NEXT:    ; def s[4:5]
20072; GFX900-NEXT:    ;;#ASMEND
20073; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
20074; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
20075; GFX900-NEXT:    ;;#ASMSTART
20076; GFX900-NEXT:    ; use s[8:9]
20077; GFX900-NEXT:    ;;#ASMEND
20078; GFX900-NEXT:    s_setpc_b64 s[30:31]
20079;
20080; GFX90A-LABEL: s_shuffle_v4i16_v4i16__6_5_5_5:
20081; GFX90A:       ; %bb.0:
20082; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20083; GFX90A-NEXT:    ;;#ASMSTART
20084; GFX90A-NEXT:    ; def s[4:5]
20085; GFX90A-NEXT:    ;;#ASMEND
20086; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
20087; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
20088; GFX90A-NEXT:    ;;#ASMSTART
20089; GFX90A-NEXT:    ; use s[8:9]
20090; GFX90A-NEXT:    ;;#ASMEND
20091; GFX90A-NEXT:    s_setpc_b64 s[30:31]
20092;
20093; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_5_5_5:
20094; GFX940:       ; %bb.0:
20095; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20096; GFX940-NEXT:    ;;#ASMSTART
20097; GFX940-NEXT:    ; def s[0:1]
20098; GFX940-NEXT:    ;;#ASMEND
20099; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s1, s0
20100; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
20101; GFX940-NEXT:    ;;#ASMSTART
20102; GFX940-NEXT:    ; use s[8:9]
20103; GFX940-NEXT:    ;;#ASMEND
20104; GFX940-NEXT:    s_setpc_b64 s[30:31]
20105  %vec0 = call <4 x i16> asm "; def $0", "=s"()
20106  %vec1 = call <4 x i16> asm "; def $0", "=s"()
20107  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 5, i32 5, i32 5>
20108  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
20109  ret void
20110}
20111
20112define void @s_shuffle_v4i16_v4i16__7_5_5_5() {
20113; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_5_5_5:
20114; GFX900:       ; %bb.0:
20115; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20116; GFX900-NEXT:    ;;#ASMSTART
20117; GFX900-NEXT:    ; def s[4:5]
20118; GFX900-NEXT:    ;;#ASMEND
20119; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s4
20120; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
20121; GFX900-NEXT:    ;;#ASMSTART
20122; GFX900-NEXT:    ; use s[8:9]
20123; GFX900-NEXT:    ;;#ASMEND
20124; GFX900-NEXT:    s_setpc_b64 s[30:31]
20125;
20126; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_5_5_5:
20127; GFX90A:       ; %bb.0:
20128; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20129; GFX90A-NEXT:    ;;#ASMSTART
20130; GFX90A-NEXT:    ; def s[4:5]
20131; GFX90A-NEXT:    ;;#ASMEND
20132; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s4
20133; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
20134; GFX90A-NEXT:    ;;#ASMSTART
20135; GFX90A-NEXT:    ; use s[8:9]
20136; GFX90A-NEXT:    ;;#ASMEND
20137; GFX90A-NEXT:    s_setpc_b64 s[30:31]
20138;
20139; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_5_5:
20140; GFX940:       ; %bb.0:
20141; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20142; GFX940-NEXT:    ;;#ASMSTART
20143; GFX940-NEXT:    ; def s[0:1]
20144; GFX940-NEXT:    ;;#ASMEND
20145; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s0
20146; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
20147; GFX940-NEXT:    ;;#ASMSTART
20148; GFX940-NEXT:    ; use s[8:9]
20149; GFX940-NEXT:    ;;#ASMEND
20150; GFX940-NEXT:    s_setpc_b64 s[30:31]
20151  %vec0 = call <4 x i16> asm "; def $0", "=s"()
20152  %vec1 = call <4 x i16> asm "; def $0", "=s"()
20153  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 5, i32 5>
20154  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
20155  ret void
20156}
20157
20158define void @s_shuffle_v4i16_v4i16__7_u_5_5() {
20159; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_u_5_5:
20160; GFX900:       ; %bb.0:
20161; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20162; GFX900-NEXT:    ;;#ASMSTART
20163; GFX900-NEXT:    ; def s[4:5]
20164; GFX900-NEXT:    ;;#ASMEND
20165; GFX900-NEXT:    s_lshr_b32 s8, s5, 16
20166; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
20167; GFX900-NEXT:    ;;#ASMSTART
20168; GFX900-NEXT:    ; use s[8:9]
20169; GFX900-NEXT:    ;;#ASMEND
20170; GFX900-NEXT:    s_setpc_b64 s[30:31]
20171;
20172; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_u_5_5:
20173; GFX90A:       ; %bb.0:
20174; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20175; GFX90A-NEXT:    ;;#ASMSTART
20176; GFX90A-NEXT:    ; def s[4:5]
20177; GFX90A-NEXT:    ;;#ASMEND
20178; GFX90A-NEXT:    s_lshr_b32 s8, s5, 16
20179; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
20180; GFX90A-NEXT:    ;;#ASMSTART
20181; GFX90A-NEXT:    ; use s[8:9]
20182; GFX90A-NEXT:    ;;#ASMEND
20183; GFX90A-NEXT:    s_setpc_b64 s[30:31]
20184;
20185; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_5_5:
20186; GFX940:       ; %bb.0:
20187; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20188; GFX940-NEXT:    ;;#ASMSTART
20189; GFX940-NEXT:    ; def s[0:1]
20190; GFX940-NEXT:    ;;#ASMEND
20191; GFX940-NEXT:    s_lshr_b32 s8, s1, 16
20192; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
20193; GFX940-NEXT:    ;;#ASMSTART
20194; GFX940-NEXT:    ; use s[8:9]
20195; GFX940-NEXT:    ;;#ASMEND
20196; GFX940-NEXT:    s_setpc_b64 s[30:31]
20197  %vec0 = call <4 x i16> asm "; def $0", "=s"()
20198  %vec1 = call <4 x i16> asm "; def $0", "=s"()
20199  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 5, i32 5>
20200  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
20201  ret void
20202}
20203
20204define void @s_shuffle_v4i16_v4i16__7_0_5_5() {
20205; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_0_5_5:
20206; GFX900:       ; %bb.0:
20207; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20208; GFX900-NEXT:    ;;#ASMSTART
20209; GFX900-NEXT:    ; def s[4:5]
20210; GFX900-NEXT:    ;;#ASMEND
20211; GFX900-NEXT:    ;;#ASMSTART
20212; GFX900-NEXT:    ; def s[6:7]
20213; GFX900-NEXT:    ;;#ASMEND
20214; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
20215; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
20216; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
20217; GFX900-NEXT:    ;;#ASMSTART
20218; GFX900-NEXT:    ; use s[8:9]
20219; GFX900-NEXT:    ;;#ASMEND
20220; GFX900-NEXT:    s_setpc_b64 s[30:31]
20221;
20222; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_0_5_5:
20223; GFX90A:       ; %bb.0:
20224; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20225; GFX90A-NEXT:    ;;#ASMSTART
20226; GFX90A-NEXT:    ; def s[4:5]
20227; GFX90A-NEXT:    ;;#ASMEND
20228; GFX90A-NEXT:    ;;#ASMSTART
20229; GFX90A-NEXT:    ; def s[6:7]
20230; GFX90A-NEXT:    ;;#ASMEND
20231; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
20232; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
20233; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
20234; GFX90A-NEXT:    ;;#ASMSTART
20235; GFX90A-NEXT:    ; use s[8:9]
20236; GFX90A-NEXT:    ;;#ASMEND
20237; GFX90A-NEXT:    s_setpc_b64 s[30:31]
20238;
20239; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_5_5:
20240; GFX940:       ; %bb.0:
20241; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20242; GFX940-NEXT:    ;;#ASMSTART
20243; GFX940-NEXT:    ; def s[0:1]
20244; GFX940-NEXT:    ;;#ASMEND
20245; GFX940-NEXT:    ;;#ASMSTART
20246; GFX940-NEXT:    ; def s[2:3]
20247; GFX940-NEXT:    ;;#ASMEND
20248; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
20249; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
20250; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s2, s2
20251; GFX940-NEXT:    ;;#ASMSTART
20252; GFX940-NEXT:    ; use s[8:9]
20253; GFX940-NEXT:    ;;#ASMEND
20254; GFX940-NEXT:    s_setpc_b64 s[30:31]
20255  %vec0 = call <4 x i16> asm "; def $0", "=s"()
20256  %vec1 = call <4 x i16> asm "; def $0", "=s"()
20257  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 5, i32 5>
20258  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
20259  ret void
20260}
20261
20262define void @s_shuffle_v4i16_v4i16__7_1_5_5() {
20263; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_1_5_5:
20264; GFX900:       ; %bb.0:
20265; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20266; GFX900-NEXT:    ;;#ASMSTART
20267; GFX900-NEXT:    ; def s[4:5]
20268; GFX900-NEXT:    ;;#ASMEND
20269; GFX900-NEXT:    ;;#ASMSTART
20270; GFX900-NEXT:    ; def s[6:7]
20271; GFX900-NEXT:    ;;#ASMEND
20272; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
20273; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
20274; GFX900-NEXT:    ;;#ASMSTART
20275; GFX900-NEXT:    ; use s[8:9]
20276; GFX900-NEXT:    ;;#ASMEND
20277; GFX900-NEXT:    s_setpc_b64 s[30:31]
20278;
20279; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_1_5_5:
20280; GFX90A:       ; %bb.0:
20281; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20282; GFX90A-NEXT:    ;;#ASMSTART
20283; GFX90A-NEXT:    ; def s[4:5]
20284; GFX90A-NEXT:    ;;#ASMEND
20285; GFX90A-NEXT:    ;;#ASMSTART
20286; GFX90A-NEXT:    ; def s[6:7]
20287; GFX90A-NEXT:    ;;#ASMEND
20288; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
20289; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
20290; GFX90A-NEXT:    ;;#ASMSTART
20291; GFX90A-NEXT:    ; use s[8:9]
20292; GFX90A-NEXT:    ;;#ASMEND
20293; GFX90A-NEXT:    s_setpc_b64 s[30:31]
20294;
20295; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_5_5:
20296; GFX940:       ; %bb.0:
20297; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20298; GFX940-NEXT:    ;;#ASMSTART
20299; GFX940-NEXT:    ; def s[0:1]
20300; GFX940-NEXT:    ;;#ASMEND
20301; GFX940-NEXT:    ;;#ASMSTART
20302; GFX940-NEXT:    ; def s[2:3]
20303; GFX940-NEXT:    ;;#ASMEND
20304; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s0
20305; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s2, s2
20306; GFX940-NEXT:    ;;#ASMSTART
20307; GFX940-NEXT:    ; use s[8:9]
20308; GFX940-NEXT:    ;;#ASMEND
20309; GFX940-NEXT:    s_setpc_b64 s[30:31]
20310  %vec0 = call <4 x i16> asm "; def $0", "=s"()
20311  %vec1 = call <4 x i16> asm "; def $0", "=s"()
20312  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 5, i32 5>
20313  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
20314  ret void
20315}
20316
20317define void @s_shuffle_v4i16_v4i16__7_2_5_5() {
20318; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_2_5_5:
20319; GFX900:       ; %bb.0:
20320; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20321; GFX900-NEXT:    ;;#ASMSTART
20322; GFX900-NEXT:    ; def s[4:5]
20323; GFX900-NEXT:    ;;#ASMEND
20324; GFX900-NEXT:    ;;#ASMSTART
20325; GFX900-NEXT:    ; def s[6:7]
20326; GFX900-NEXT:    ;;#ASMEND
20327; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
20328; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
20329; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
20330; GFX900-NEXT:    ;;#ASMSTART
20331; GFX900-NEXT:    ; use s[8:9]
20332; GFX900-NEXT:    ;;#ASMEND
20333; GFX900-NEXT:    s_setpc_b64 s[30:31]
20334;
20335; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_2_5_5:
20336; GFX90A:       ; %bb.0:
20337; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20338; GFX90A-NEXT:    ;;#ASMSTART
20339; GFX90A-NEXT:    ; def s[4:5]
20340; GFX90A-NEXT:    ;;#ASMEND
20341; GFX90A-NEXT:    ;;#ASMSTART
20342; GFX90A-NEXT:    ; def s[6:7]
20343; GFX90A-NEXT:    ;;#ASMEND
20344; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
20345; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
20346; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
20347; GFX90A-NEXT:    ;;#ASMSTART
20348; GFX90A-NEXT:    ; use s[8:9]
20349; GFX90A-NEXT:    ;;#ASMEND
20350; GFX90A-NEXT:    s_setpc_b64 s[30:31]
20351;
20352; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_5_5:
20353; GFX940:       ; %bb.0:
20354; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20355; GFX940-NEXT:    ;;#ASMSTART
20356; GFX940-NEXT:    ; def s[0:1]
20357; GFX940-NEXT:    ;;#ASMEND
20358; GFX940-NEXT:    ;;#ASMSTART
20359; GFX940-NEXT:    ; def s[2:3]
20360; GFX940-NEXT:    ;;#ASMEND
20361; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
20362; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
20363; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s2, s2
20364; GFX940-NEXT:    ;;#ASMSTART
20365; GFX940-NEXT:    ; use s[8:9]
20366; GFX940-NEXT:    ;;#ASMEND
20367; GFX940-NEXT:    s_setpc_b64 s[30:31]
20368  %vec0 = call <4 x i16> asm "; def $0", "=s"()
20369  %vec1 = call <4 x i16> asm "; def $0", "=s"()
20370  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 5, i32 5>
20371  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
20372  ret void
20373}
20374
20375define void @s_shuffle_v4i16_v4i16__7_3_5_5() {
20376; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_3_5_5:
20377; GFX900:       ; %bb.0:
20378; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20379; GFX900-NEXT:    ;;#ASMSTART
20380; GFX900-NEXT:    ; def s[4:5]
20381; GFX900-NEXT:    ;;#ASMEND
20382; GFX900-NEXT:    ;;#ASMSTART
20383; GFX900-NEXT:    ; def s[6:7]
20384; GFX900-NEXT:    ;;#ASMEND
20385; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
20386; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
20387; GFX900-NEXT:    ;;#ASMSTART
20388; GFX900-NEXT:    ; use s[8:9]
20389; GFX900-NEXT:    ;;#ASMEND
20390; GFX900-NEXT:    s_setpc_b64 s[30:31]
20391;
20392; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_3_5_5:
20393; GFX90A:       ; %bb.0:
20394; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20395; GFX90A-NEXT:    ;;#ASMSTART
20396; GFX90A-NEXT:    ; def s[4:5]
20397; GFX90A-NEXT:    ;;#ASMEND
20398; GFX90A-NEXT:    ;;#ASMSTART
20399; GFX90A-NEXT:    ; def s[6:7]
20400; GFX90A-NEXT:    ;;#ASMEND
20401; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
20402; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s6, s6
20403; GFX90A-NEXT:    ;;#ASMSTART
20404; GFX90A-NEXT:    ; use s[8:9]
20405; GFX90A-NEXT:    ;;#ASMEND
20406; GFX90A-NEXT:    s_setpc_b64 s[30:31]
20407;
20408; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_5_5:
20409; GFX940:       ; %bb.0:
20410; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20411; GFX940-NEXT:    ;;#ASMSTART
20412; GFX940-NEXT:    ; def s[0:1]
20413; GFX940-NEXT:    ;;#ASMEND
20414; GFX940-NEXT:    ;;#ASMSTART
20415; GFX940-NEXT:    ; def s[2:3]
20416; GFX940-NEXT:    ;;#ASMEND
20417; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s1
20418; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s2, s2
20419; GFX940-NEXT:    ;;#ASMSTART
20420; GFX940-NEXT:    ; use s[8:9]
20421; GFX940-NEXT:    ;;#ASMEND
20422; GFX940-NEXT:    s_setpc_b64 s[30:31]
20423  %vec0 = call <4 x i16> asm "; def $0", "=s"()
20424  %vec1 = call <4 x i16> asm "; def $0", "=s"()
20425  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 5, i32 5>
20426  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
20427  ret void
20428}
20429
20430define void @s_shuffle_v4i16_v4i16__7_4_5_5() {
20431; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_4_5_5:
20432; GFX900:       ; %bb.0:
20433; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20434; GFX900-NEXT:    ;;#ASMSTART
20435; GFX900-NEXT:    ; def s[4:5]
20436; GFX900-NEXT:    ;;#ASMEND
20437; GFX900-NEXT:    s_lshr_b32 s5, s5, 16
20438; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
20439; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
20440; GFX900-NEXT:    ;;#ASMSTART
20441; GFX900-NEXT:    ; use s[8:9]
20442; GFX900-NEXT:    ;;#ASMEND
20443; GFX900-NEXT:    s_setpc_b64 s[30:31]
20444;
20445; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_4_5_5:
20446; GFX90A:       ; %bb.0:
20447; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20448; GFX90A-NEXT:    ;;#ASMSTART
20449; GFX90A-NEXT:    ; def s[4:5]
20450; GFX90A-NEXT:    ;;#ASMEND
20451; GFX90A-NEXT:    s_lshr_b32 s5, s5, 16
20452; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
20453; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
20454; GFX90A-NEXT:    ;;#ASMSTART
20455; GFX90A-NEXT:    ; use s[8:9]
20456; GFX90A-NEXT:    ;;#ASMEND
20457; GFX90A-NEXT:    s_setpc_b64 s[30:31]
20458;
20459; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_5_5:
20460; GFX940:       ; %bb.0:
20461; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20462; GFX940-NEXT:    ;;#ASMSTART
20463; GFX940-NEXT:    ; def s[0:1]
20464; GFX940-NEXT:    ;;#ASMEND
20465; GFX940-NEXT:    s_lshr_b32 s1, s1, 16
20466; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
20467; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
20468; GFX940-NEXT:    ;;#ASMSTART
20469; GFX940-NEXT:    ; use s[8:9]
20470; GFX940-NEXT:    ;;#ASMEND
20471; GFX940-NEXT:    s_setpc_b64 s[30:31]
20472  %vec0 = call <4 x i16> asm "; def $0", "=s"()
20473  %vec1 = call <4 x i16> asm "; def $0", "=s"()
20474  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 5, i32 5>
20475  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
20476  ret void
20477}
20478
20479define void @s_shuffle_v4i16_v4i16__7_6_5_5() {
20480; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_6_5_5:
20481; GFX900:       ; %bb.0:
20482; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20483; GFX900-NEXT:    ;;#ASMSTART
20484; GFX900-NEXT:    ; def s[4:5]
20485; GFX900-NEXT:    ;;#ASMEND
20486; GFX900-NEXT:    s_lshr_b32 s6, s5, 16
20487; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
20488; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
20489; GFX900-NEXT:    ;;#ASMSTART
20490; GFX900-NEXT:    ; use s[8:9]
20491; GFX900-NEXT:    ;;#ASMEND
20492; GFX900-NEXT:    s_setpc_b64 s[30:31]
20493;
20494; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_6_5_5:
20495; GFX90A:       ; %bb.0:
20496; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20497; GFX90A-NEXT:    ;;#ASMSTART
20498; GFX90A-NEXT:    ; def s[4:5]
20499; GFX90A-NEXT:    ;;#ASMEND
20500; GFX90A-NEXT:    s_lshr_b32 s6, s5, 16
20501; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
20502; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
20503; GFX90A-NEXT:    ;;#ASMSTART
20504; GFX90A-NEXT:    ; use s[8:9]
20505; GFX90A-NEXT:    ;;#ASMEND
20506; GFX90A-NEXT:    s_setpc_b64 s[30:31]
20507;
20508; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_5_5:
20509; GFX940:       ; %bb.0:
20510; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20511; GFX940-NEXT:    ;;#ASMSTART
20512; GFX940-NEXT:    ; def s[0:1]
20513; GFX940-NEXT:    ;;#ASMEND
20514; GFX940-NEXT:    s_lshr_b32 s2, s1, 16
20515; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s2, s1
20516; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
20517; GFX940-NEXT:    ;;#ASMSTART
20518; GFX940-NEXT:    ; use s[8:9]
20519; GFX940-NEXT:    ;;#ASMEND
20520; GFX940-NEXT:    s_setpc_b64 s[30:31]
20521  %vec0 = call <4 x i16> asm "; def $0", "=s"()
20522  %vec1 = call <4 x i16> asm "; def $0", "=s"()
20523  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 5, i32 5>
20524  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
20525  ret void
20526}
20527
20528define void @s_shuffle_v4i16_v4i16__7_7_5_5() {
20529; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_5_5:
20530; GFX900:       ; %bb.0:
20531; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20532; GFX900-NEXT:    ;;#ASMSTART
20533; GFX900-NEXT:    ; def s[4:5]
20534; GFX900-NEXT:    ;;#ASMEND
20535; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
20536; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
20537; GFX900-NEXT:    ;;#ASMSTART
20538; GFX900-NEXT:    ; use s[8:9]
20539; GFX900-NEXT:    ;;#ASMEND
20540; GFX900-NEXT:    s_setpc_b64 s[30:31]
20541;
20542; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_5_5:
20543; GFX90A:       ; %bb.0:
20544; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20545; GFX90A-NEXT:    ;;#ASMSTART
20546; GFX90A-NEXT:    ; def s[4:5]
20547; GFX90A-NEXT:    ;;#ASMEND
20548; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s4
20549; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
20550; GFX90A-NEXT:    ;;#ASMSTART
20551; GFX90A-NEXT:    ; use s[8:9]
20552; GFX90A-NEXT:    ;;#ASMEND
20553; GFX90A-NEXT:    s_setpc_b64 s[30:31]
20554;
20555; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_5:
20556; GFX940:       ; %bb.0:
20557; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20558; GFX940-NEXT:    ;;#ASMSTART
20559; GFX940-NEXT:    ; def s[0:1]
20560; GFX940-NEXT:    ;;#ASMEND
20561; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s0
20562; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
20563; GFX940-NEXT:    ;;#ASMSTART
20564; GFX940-NEXT:    ; use s[8:9]
20565; GFX940-NEXT:    ;;#ASMEND
20566; GFX940-NEXT:    s_setpc_b64 s[30:31]
20567  %vec0 = call <4 x i16> asm "; def $0", "=s"()
20568  %vec1 = call <4 x i16> asm "; def $0", "=s"()
20569  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 5>
20570  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
20571  ret void
20572}
20573
20574define void @s_shuffle_v4i16_v4i16__7_7_u_5() {
20575; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_u_5:
20576; GFX900:       ; %bb.0:
20577; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20578; GFX900-NEXT:    ;;#ASMSTART
20579; GFX900-NEXT:    ; def s[4:5]
20580; GFX900-NEXT:    ;;#ASMEND
20581; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
20582; GFX900-NEXT:    s_mov_b32 s9, s4
20583; GFX900-NEXT:    ;;#ASMSTART
20584; GFX900-NEXT:    ; use s[8:9]
20585; GFX900-NEXT:    ;;#ASMEND
20586; GFX900-NEXT:    s_setpc_b64 s[30:31]
20587;
20588; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_u_5:
20589; GFX90A:       ; %bb.0:
20590; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20591; GFX90A-NEXT:    ;;#ASMSTART
20592; GFX90A-NEXT:    ; def s[4:5]
20593; GFX90A-NEXT:    ;;#ASMEND
20594; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
20595; GFX90A-NEXT:    s_mov_b32 s9, s4
20596; GFX90A-NEXT:    ;;#ASMSTART
20597; GFX90A-NEXT:    ; use s[8:9]
20598; GFX90A-NEXT:    ;;#ASMEND
20599; GFX90A-NEXT:    s_setpc_b64 s[30:31]
20600;
20601; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_u_5:
20602; GFX940:       ; %bb.0:
20603; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20604; GFX940-NEXT:    ;;#ASMSTART
20605; GFX940-NEXT:    ; def s[0:1]
20606; GFX940-NEXT:    ;;#ASMEND
20607; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
20608; GFX940-NEXT:    s_mov_b32 s9, s0
20609; GFX940-NEXT:    ;;#ASMSTART
20610; GFX940-NEXT:    ; use s[8:9]
20611; GFX940-NEXT:    ;;#ASMEND
20612; GFX940-NEXT:    s_setpc_b64 s[30:31]
20613  %vec0 = call <4 x i16> asm "; def $0", "=s"()
20614  %vec1 = call <4 x i16> asm "; def $0", "=s"()
20615  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 5>
20616  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
20617  ret void
20618}
20619
20620define void @s_shuffle_v4i16_v4i16__7_7_0_5() {
20621; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_0_5:
20622; GFX900:       ; %bb.0:
20623; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20624; GFX900-NEXT:    ;;#ASMSTART
20625; GFX900-NEXT:    ; def s[4:5]
20626; GFX900-NEXT:    ;;#ASMEND
20627; GFX900-NEXT:    ;;#ASMSTART
20628; GFX900-NEXT:    ; def s[6:7]
20629; GFX900-NEXT:    ;;#ASMEND
20630; GFX900-NEXT:    s_pack_lh_b32_b16 s9, s4, s6
20631; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
20632; GFX900-NEXT:    ;;#ASMSTART
20633; GFX900-NEXT:    ; use s[8:9]
20634; GFX900-NEXT:    ;;#ASMEND
20635; GFX900-NEXT:    s_setpc_b64 s[30:31]
20636;
20637; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_0_5:
20638; GFX90A:       ; %bb.0:
20639; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20640; GFX90A-NEXT:    ;;#ASMSTART
20641; GFX90A-NEXT:    ; def s[4:5]
20642; GFX90A-NEXT:    ;;#ASMEND
20643; GFX90A-NEXT:    ;;#ASMSTART
20644; GFX90A-NEXT:    ; def s[6:7]
20645; GFX90A-NEXT:    ;;#ASMEND
20646; GFX90A-NEXT:    s_pack_lh_b32_b16 s9, s4, s6
20647; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
20648; GFX90A-NEXT:    ;;#ASMSTART
20649; GFX90A-NEXT:    ; use s[8:9]
20650; GFX90A-NEXT:    ;;#ASMEND
20651; GFX90A-NEXT:    s_setpc_b64 s[30:31]
20652;
20653; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_5:
20654; GFX940:       ; %bb.0:
20655; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20656; GFX940-NEXT:    ;;#ASMSTART
20657; GFX940-NEXT:    ; def s[0:1]
20658; GFX940-NEXT:    ;;#ASMEND
20659; GFX940-NEXT:    ;;#ASMSTART
20660; GFX940-NEXT:    ; def s[2:3]
20661; GFX940-NEXT:    ;;#ASMEND
20662; GFX940-NEXT:    s_pack_lh_b32_b16 s9, s0, s2
20663; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
20664; GFX940-NEXT:    ;;#ASMSTART
20665; GFX940-NEXT:    ; use s[8:9]
20666; GFX940-NEXT:    ;;#ASMEND
20667; GFX940-NEXT:    s_setpc_b64 s[30:31]
20668  %vec0 = call <4 x i16> asm "; def $0", "=s"()
20669  %vec1 = call <4 x i16> asm "; def $0", "=s"()
20670  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 5>
20671  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
20672  ret void
20673}
20674
20675define void @s_shuffle_v4i16_v4i16__7_7_1_5() {
20676; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_1_5:
20677; GFX900:       ; %bb.0:
20678; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20679; GFX900-NEXT:    ;;#ASMSTART
20680; GFX900-NEXT:    ; def s[4:5]
20681; GFX900-NEXT:    ;;#ASMEND
20682; GFX900-NEXT:    ;;#ASMSTART
20683; GFX900-NEXT:    ; def s[6:7]
20684; GFX900-NEXT:    ;;#ASMEND
20685; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s6
20686; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
20687; GFX900-NEXT:    ;;#ASMSTART
20688; GFX900-NEXT:    ; use s[8:9]
20689; GFX900-NEXT:    ;;#ASMEND
20690; GFX900-NEXT:    s_setpc_b64 s[30:31]
20691;
20692; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_1_5:
20693; GFX90A:       ; %bb.0:
20694; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20695; GFX90A-NEXT:    ;;#ASMSTART
20696; GFX90A-NEXT:    ; def s[4:5]
20697; GFX90A-NEXT:    ;;#ASMEND
20698; GFX90A-NEXT:    ;;#ASMSTART
20699; GFX90A-NEXT:    ; def s[6:7]
20700; GFX90A-NEXT:    ;;#ASMEND
20701; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s6
20702; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
20703; GFX90A-NEXT:    ;;#ASMSTART
20704; GFX90A-NEXT:    ; use s[8:9]
20705; GFX90A-NEXT:    ;;#ASMEND
20706; GFX90A-NEXT:    s_setpc_b64 s[30:31]
20707;
20708; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_5:
20709; GFX940:       ; %bb.0:
20710; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20711; GFX940-NEXT:    ;;#ASMSTART
20712; GFX940-NEXT:    ; def s[0:1]
20713; GFX940-NEXT:    ;;#ASMEND
20714; GFX940-NEXT:    ;;#ASMSTART
20715; GFX940-NEXT:    ; def s[2:3]
20716; GFX940-NEXT:    ;;#ASMEND
20717; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s2
20718; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
20719; GFX940-NEXT:    ;;#ASMSTART
20720; GFX940-NEXT:    ; use s[8:9]
20721; GFX940-NEXT:    ;;#ASMEND
20722; GFX940-NEXT:    s_setpc_b64 s[30:31]
20723  %vec0 = call <4 x i16> asm "; def $0", "=s"()
20724  %vec1 = call <4 x i16> asm "; def $0", "=s"()
20725  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 5>
20726  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
20727  ret void
20728}
20729
20730define void @s_shuffle_v4i16_v4i16__7_7_2_5() {
20731; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_2_5:
20732; GFX900:       ; %bb.0:
20733; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20734; GFX900-NEXT:    ;;#ASMSTART
20735; GFX900-NEXT:    ; def s[4:5]
20736; GFX900-NEXT:    ;;#ASMEND
20737; GFX900-NEXT:    ;;#ASMSTART
20738; GFX900-NEXT:    ; def s[6:7]
20739; GFX900-NEXT:    ;;#ASMEND
20740; GFX900-NEXT:    s_pack_lh_b32_b16 s9, s5, s6
20741; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
20742; GFX900-NEXT:    ;;#ASMSTART
20743; GFX900-NEXT:    ; use s[8:9]
20744; GFX900-NEXT:    ;;#ASMEND
20745; GFX900-NEXT:    s_setpc_b64 s[30:31]
20746;
20747; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_2_5:
20748; GFX90A:       ; %bb.0:
20749; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20750; GFX90A-NEXT:    ;;#ASMSTART
20751; GFX90A-NEXT:    ; def s[4:5]
20752; GFX90A-NEXT:    ;;#ASMEND
20753; GFX90A-NEXT:    ;;#ASMSTART
20754; GFX90A-NEXT:    ; def s[6:7]
20755; GFX90A-NEXT:    ;;#ASMEND
20756; GFX90A-NEXT:    s_pack_lh_b32_b16 s9, s5, s6
20757; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
20758; GFX90A-NEXT:    ;;#ASMSTART
20759; GFX90A-NEXT:    ; use s[8:9]
20760; GFX90A-NEXT:    ;;#ASMEND
20761; GFX90A-NEXT:    s_setpc_b64 s[30:31]
20762;
20763; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_5:
20764; GFX940:       ; %bb.0:
20765; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20766; GFX940-NEXT:    ;;#ASMSTART
20767; GFX940-NEXT:    ; def s[0:1]
20768; GFX940-NEXT:    ;;#ASMEND
20769; GFX940-NEXT:    ;;#ASMSTART
20770; GFX940-NEXT:    ; def s[2:3]
20771; GFX940-NEXT:    ;;#ASMEND
20772; GFX940-NEXT:    s_pack_lh_b32_b16 s9, s1, s2
20773; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
20774; GFX940-NEXT:    ;;#ASMSTART
20775; GFX940-NEXT:    ; use s[8:9]
20776; GFX940-NEXT:    ;;#ASMEND
20777; GFX940-NEXT:    s_setpc_b64 s[30:31]
20778  %vec0 = call <4 x i16> asm "; def $0", "=s"()
20779  %vec1 = call <4 x i16> asm "; def $0", "=s"()
20780  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 5>
20781  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
20782  ret void
20783}
20784
20785define void @s_shuffle_v4i16_v4i16__7_7_3_5() {
20786; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_3_5:
20787; GFX900:       ; %bb.0:
20788; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20789; GFX900-NEXT:    ;;#ASMSTART
20790; GFX900-NEXT:    ; def s[4:5]
20791; GFX900-NEXT:    ;;#ASMEND
20792; GFX900-NEXT:    ;;#ASMSTART
20793; GFX900-NEXT:    ; def s[6:7]
20794; GFX900-NEXT:    ;;#ASMEND
20795; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s6
20796; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
20797; GFX900-NEXT:    ;;#ASMSTART
20798; GFX900-NEXT:    ; use s[8:9]
20799; GFX900-NEXT:    ;;#ASMEND
20800; GFX900-NEXT:    s_setpc_b64 s[30:31]
20801;
20802; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_3_5:
20803; GFX90A:       ; %bb.0:
20804; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20805; GFX90A-NEXT:    ;;#ASMSTART
20806; GFX90A-NEXT:    ; def s[4:5]
20807; GFX90A-NEXT:    ;;#ASMEND
20808; GFX90A-NEXT:    ;;#ASMSTART
20809; GFX90A-NEXT:    ; def s[6:7]
20810; GFX90A-NEXT:    ;;#ASMEND
20811; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s6
20812; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
20813; GFX90A-NEXT:    ;;#ASMSTART
20814; GFX90A-NEXT:    ; use s[8:9]
20815; GFX90A-NEXT:    ;;#ASMEND
20816; GFX90A-NEXT:    s_setpc_b64 s[30:31]
20817;
20818; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_5:
20819; GFX940:       ; %bb.0:
20820; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20821; GFX940-NEXT:    ;;#ASMSTART
20822; GFX940-NEXT:    ; def s[0:1]
20823; GFX940-NEXT:    ;;#ASMEND
20824; GFX940-NEXT:    ;;#ASMSTART
20825; GFX940-NEXT:    ; def s[2:3]
20826; GFX940-NEXT:    ;;#ASMEND
20827; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s2
20828; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
20829; GFX940-NEXT:    ;;#ASMSTART
20830; GFX940-NEXT:    ; use s[8:9]
20831; GFX940-NEXT:    ;;#ASMEND
20832; GFX940-NEXT:    s_setpc_b64 s[30:31]
20833  %vec0 = call <4 x i16> asm "; def $0", "=s"()
20834  %vec1 = call <4 x i16> asm "; def $0", "=s"()
20835  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 5>
20836  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
20837  ret void
20838}
20839
20840define void @s_shuffle_v4i16_v4i16__7_7_4_5() {
20841; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_4_5:
20842; GFX900:       ; %bb.0:
20843; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20844; GFX900-NEXT:    ;;#ASMSTART
20845; GFX900-NEXT:    ; def s[4:5]
20846; GFX900-NEXT:    ;;#ASMEND
20847; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
20848; GFX900-NEXT:    s_mov_b32 s9, s4
20849; GFX900-NEXT:    ;;#ASMSTART
20850; GFX900-NEXT:    ; use s[8:9]
20851; GFX900-NEXT:    ;;#ASMEND
20852; GFX900-NEXT:    s_setpc_b64 s[30:31]
20853;
20854; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_4_5:
20855; GFX90A:       ; %bb.0:
20856; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20857; GFX90A-NEXT:    ;;#ASMSTART
20858; GFX90A-NEXT:    ; def s[4:5]
20859; GFX90A-NEXT:    ;;#ASMEND
20860; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
20861; GFX90A-NEXT:    s_mov_b32 s9, s4
20862; GFX90A-NEXT:    ;;#ASMSTART
20863; GFX90A-NEXT:    ; use s[8:9]
20864; GFX90A-NEXT:    ;;#ASMEND
20865; GFX90A-NEXT:    s_setpc_b64 s[30:31]
20866;
20867; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_5:
20868; GFX940:       ; %bb.0:
20869; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20870; GFX940-NEXT:    ;;#ASMSTART
20871; GFX940-NEXT:    ; def s[0:1]
20872; GFX940-NEXT:    ;;#ASMEND
20873; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
20874; GFX940-NEXT:    s_mov_b32 s9, s0
20875; GFX940-NEXT:    ;;#ASMSTART
20876; GFX940-NEXT:    ; use s[8:9]
20877; GFX940-NEXT:    ;;#ASMEND
20878; GFX940-NEXT:    s_setpc_b64 s[30:31]
20879  %vec0 = call <4 x i16> asm "; def $0", "=s"()
20880  %vec1 = call <4 x i16> asm "; def $0", "=s"()
20881  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 5>
20882  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
20883  ret void
20884}
20885
20886define void @s_shuffle_v4i16_v4i16__7_7_6_5() {
20887; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_6_5:
20888; GFX900:       ; %bb.0:
20889; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20890; GFX900-NEXT:    ;;#ASMSTART
20891; GFX900-NEXT:    ; def s[4:5]
20892; GFX900-NEXT:    ;;#ASMEND
20893; GFX900-NEXT:    s_pack_lh_b32_b16 s9, s5, s4
20894; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
20895; GFX900-NEXT:    ;;#ASMSTART
20896; GFX900-NEXT:    ; use s[8:9]
20897; GFX900-NEXT:    ;;#ASMEND
20898; GFX900-NEXT:    s_setpc_b64 s[30:31]
20899;
20900; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_6_5:
20901; GFX90A:       ; %bb.0:
20902; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20903; GFX90A-NEXT:    ;;#ASMSTART
20904; GFX90A-NEXT:    ; def s[4:5]
20905; GFX90A-NEXT:    ;;#ASMEND
20906; GFX90A-NEXT:    s_pack_lh_b32_b16 s9, s5, s4
20907; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
20908; GFX90A-NEXT:    ;;#ASMSTART
20909; GFX90A-NEXT:    ; use s[8:9]
20910; GFX90A-NEXT:    ;;#ASMEND
20911; GFX90A-NEXT:    s_setpc_b64 s[30:31]
20912;
20913; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_6_5:
20914; GFX940:       ; %bb.0:
20915; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20916; GFX940-NEXT:    ;;#ASMSTART
20917; GFX940-NEXT:    ; def s[0:1]
20918; GFX940-NEXT:    ;;#ASMEND
20919; GFX940-NEXT:    s_pack_lh_b32_b16 s9, s1, s0
20920; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
20921; GFX940-NEXT:    ;;#ASMSTART
20922; GFX940-NEXT:    ; use s[8:9]
20923; GFX940-NEXT:    ;;#ASMEND
20924; GFX940-NEXT:    s_setpc_b64 s[30:31]
20925  %vec0 = call <4 x i16> asm "; def $0", "=s"()
20926  %vec1 = call <4 x i16> asm "; def $0", "=s"()
20927  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 5>
20928  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
20929  ret void
20930}
20931
20932define void @s_shuffle_v4i16_v4i16__u_6_6_6() {
20933; GFX900-LABEL: s_shuffle_v4i16_v4i16__u_6_6_6:
20934; GFX900:       ; %bb.0:
20935; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20936; GFX900-NEXT:    ;;#ASMSTART
20937; GFX900-NEXT:    ; def s[4:5]
20938; GFX900-NEXT:    ;;#ASMEND
20939; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
20940; GFX900-NEXT:    s_lshl_b32 s8, s5, 16
20941; GFX900-NEXT:    ;;#ASMSTART
20942; GFX900-NEXT:    ; use s[8:9]
20943; GFX900-NEXT:    ;;#ASMEND
20944; GFX900-NEXT:    s_setpc_b64 s[30:31]
20945;
20946; GFX90A-LABEL: s_shuffle_v4i16_v4i16__u_6_6_6:
20947; GFX90A:       ; %bb.0:
20948; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20949; GFX90A-NEXT:    ;;#ASMSTART
20950; GFX90A-NEXT:    ; def s[4:5]
20951; GFX90A-NEXT:    ;;#ASMEND
20952; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
20953; GFX90A-NEXT:    s_lshl_b32 s8, s5, 16
20954; GFX90A-NEXT:    ;;#ASMSTART
20955; GFX90A-NEXT:    ; use s[8:9]
20956; GFX90A-NEXT:    ;;#ASMEND
20957; GFX90A-NEXT:    s_setpc_b64 s[30:31]
20958;
20959; GFX940-LABEL: s_shuffle_v4i16_v4i16__u_6_6_6:
20960; GFX940:       ; %bb.0:
20961; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20962; GFX940-NEXT:    ;;#ASMSTART
20963; GFX940-NEXT:    ; def s[0:1]
20964; GFX940-NEXT:    ;;#ASMEND
20965; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
20966; GFX940-NEXT:    s_lshl_b32 s8, s1, 16
20967; GFX940-NEXT:    ;;#ASMSTART
20968; GFX940-NEXT:    ; use s[8:9]
20969; GFX940-NEXT:    ;;#ASMEND
20970; GFX940-NEXT:    s_setpc_b64 s[30:31]
20971  %vec0 = call <4 x i16> asm "; def $0", "=s"()
20972  %vec1 = call <4 x i16> asm "; def $0", "=s"()
20973  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 poison, i32 6, i32 6, i32 6>
20974  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
20975  ret void
20976}
20977
20978define void @s_shuffle_v4i16_v4i16__0_6_6_6() {
20979; GFX900-LABEL: s_shuffle_v4i16_v4i16__0_6_6_6:
20980; GFX900:       ; %bb.0:
20981; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20982; GFX900-NEXT:    ;;#ASMSTART
20983; GFX900-NEXT:    ; def s[4:5]
20984; GFX900-NEXT:    ;;#ASMEND
20985; GFX900-NEXT:    ;;#ASMSTART
20986; GFX900-NEXT:    ; def s[6:7]
20987; GFX900-NEXT:    ;;#ASMEND
20988; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s7
20989; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
20990; GFX900-NEXT:    ;;#ASMSTART
20991; GFX900-NEXT:    ; use s[8:9]
20992; GFX900-NEXT:    ;;#ASMEND
20993; GFX900-NEXT:    s_setpc_b64 s[30:31]
20994;
20995; GFX90A-LABEL: s_shuffle_v4i16_v4i16__0_6_6_6:
20996; GFX90A:       ; %bb.0:
20997; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20998; GFX90A-NEXT:    ;;#ASMSTART
20999; GFX90A-NEXT:    ; def s[4:5]
21000; GFX90A-NEXT:    ;;#ASMEND
21001; GFX90A-NEXT:    ;;#ASMSTART
21002; GFX90A-NEXT:    ; def s[6:7]
21003; GFX90A-NEXT:    ;;#ASMEND
21004; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s7
21005; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
21006; GFX90A-NEXT:    ;;#ASMSTART
21007; GFX90A-NEXT:    ; use s[8:9]
21008; GFX90A-NEXT:    ;;#ASMEND
21009; GFX90A-NEXT:    s_setpc_b64 s[30:31]
21010;
21011; GFX940-LABEL: s_shuffle_v4i16_v4i16__0_6_6_6:
21012; GFX940:       ; %bb.0:
21013; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21014; GFX940-NEXT:    ;;#ASMSTART
21015; GFX940-NEXT:    ; def s[0:1]
21016; GFX940-NEXT:    ;;#ASMEND
21017; GFX940-NEXT:    ;;#ASMSTART
21018; GFX940-NEXT:    ; def s[2:3]
21019; GFX940-NEXT:    ;;#ASMEND
21020; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s3
21021; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s3, s3
21022; GFX940-NEXT:    ;;#ASMSTART
21023; GFX940-NEXT:    ; use s[8:9]
21024; GFX940-NEXT:    ;;#ASMEND
21025; GFX940-NEXT:    s_setpc_b64 s[30:31]
21026  %vec0 = call <4 x i16> asm "; def $0", "=s"()
21027  %vec1 = call <4 x i16> asm "; def $0", "=s"()
21028  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 0, i32 6, i32 6, i32 6>
21029  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
21030  ret void
21031}
21032
21033define void @s_shuffle_v4i16_v4i16__1_6_6_6() {
21034; GFX900-LABEL: s_shuffle_v4i16_v4i16__1_6_6_6:
21035; GFX900:       ; %bb.0:
21036; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21037; GFX900-NEXT:    ;;#ASMSTART
21038; GFX900-NEXT:    ; def s[4:5]
21039; GFX900-NEXT:    ;;#ASMEND
21040; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
21041; GFX900-NEXT:    ;;#ASMSTART
21042; GFX900-NEXT:    ; def s[6:7]
21043; GFX900-NEXT:    ;;#ASMEND
21044; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s7
21045; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
21046; GFX900-NEXT:    ;;#ASMSTART
21047; GFX900-NEXT:    ; use s[8:9]
21048; GFX900-NEXT:    ;;#ASMEND
21049; GFX900-NEXT:    s_setpc_b64 s[30:31]
21050;
21051; GFX90A-LABEL: s_shuffle_v4i16_v4i16__1_6_6_6:
21052; GFX90A:       ; %bb.0:
21053; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21054; GFX90A-NEXT:    ;;#ASMSTART
21055; GFX90A-NEXT:    ; def s[4:5]
21056; GFX90A-NEXT:    ;;#ASMEND
21057; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
21058; GFX90A-NEXT:    ;;#ASMSTART
21059; GFX90A-NEXT:    ; def s[6:7]
21060; GFX90A-NEXT:    ;;#ASMEND
21061; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s7
21062; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
21063; GFX90A-NEXT:    ;;#ASMSTART
21064; GFX90A-NEXT:    ; use s[8:9]
21065; GFX90A-NEXT:    ;;#ASMEND
21066; GFX90A-NEXT:    s_setpc_b64 s[30:31]
21067;
21068; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_6_6_6:
21069; GFX940:       ; %bb.0:
21070; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21071; GFX940-NEXT:    ;;#ASMSTART
21072; GFX940-NEXT:    ; def s[0:1]
21073; GFX940-NEXT:    ;;#ASMEND
21074; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
21075; GFX940-NEXT:    ;;#ASMSTART
21076; GFX940-NEXT:    ; def s[2:3]
21077; GFX940-NEXT:    ;;#ASMEND
21078; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s3
21079; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s3, s3
21080; GFX940-NEXT:    ;;#ASMSTART
21081; GFX940-NEXT:    ; use s[8:9]
21082; GFX940-NEXT:    ;;#ASMEND
21083; GFX940-NEXT:    s_setpc_b64 s[30:31]
21084  %vec0 = call <4 x i16> asm "; def $0", "=s"()
21085  %vec1 = call <4 x i16> asm "; def $0", "=s"()
21086  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 1, i32 6, i32 6, i32 6>
21087  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
21088  ret void
21089}
21090
21091define void @s_shuffle_v4i16_v4i16__2_6_6_6() {
21092; GFX900-LABEL: s_shuffle_v4i16_v4i16__2_6_6_6:
21093; GFX900:       ; %bb.0:
21094; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21095; GFX900-NEXT:    ;;#ASMSTART
21096; GFX900-NEXT:    ; def s[4:5]
21097; GFX900-NEXT:    ;;#ASMEND
21098; GFX900-NEXT:    ;;#ASMSTART
21099; GFX900-NEXT:    ; def s[6:7]
21100; GFX900-NEXT:    ;;#ASMEND
21101; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s7
21102; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
21103; GFX900-NEXT:    ;;#ASMSTART
21104; GFX900-NEXT:    ; use s[8:9]
21105; GFX900-NEXT:    ;;#ASMEND
21106; GFX900-NEXT:    s_setpc_b64 s[30:31]
21107;
21108; GFX90A-LABEL: s_shuffle_v4i16_v4i16__2_6_6_6:
21109; GFX90A:       ; %bb.0:
21110; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21111; GFX90A-NEXT:    ;;#ASMSTART
21112; GFX90A-NEXT:    ; def s[4:5]
21113; GFX90A-NEXT:    ;;#ASMEND
21114; GFX90A-NEXT:    ;;#ASMSTART
21115; GFX90A-NEXT:    ; def s[6:7]
21116; GFX90A-NEXT:    ;;#ASMEND
21117; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s7
21118; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
21119; GFX90A-NEXT:    ;;#ASMSTART
21120; GFX90A-NEXT:    ; use s[8:9]
21121; GFX90A-NEXT:    ;;#ASMEND
21122; GFX90A-NEXT:    s_setpc_b64 s[30:31]
21123;
21124; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_6_6_6:
21125; GFX940:       ; %bb.0:
21126; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21127; GFX940-NEXT:    ;;#ASMSTART
21128; GFX940-NEXT:    ; def s[0:1]
21129; GFX940-NEXT:    ;;#ASMEND
21130; GFX940-NEXT:    ;;#ASMSTART
21131; GFX940-NEXT:    ; def s[2:3]
21132; GFX940-NEXT:    ;;#ASMEND
21133; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s3
21134; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s3, s3
21135; GFX940-NEXT:    ;;#ASMSTART
21136; GFX940-NEXT:    ; use s[8:9]
21137; GFX940-NEXT:    ;;#ASMEND
21138; GFX940-NEXT:    s_setpc_b64 s[30:31]
21139  %vec0 = call <4 x i16> asm "; def $0", "=s"()
21140  %vec1 = call <4 x i16> asm "; def $0", "=s"()
21141  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 2, i32 6, i32 6, i32 6>
21142  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
21143  ret void
21144}
21145
21146define void @s_shuffle_v4i16_v4i16__3_6_6_6() {
21147; GFX900-LABEL: s_shuffle_v4i16_v4i16__3_6_6_6:
21148; GFX900:       ; %bb.0:
21149; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21150; GFX900-NEXT:    ;;#ASMSTART
21151; GFX900-NEXT:    ; def s[4:5]
21152; GFX900-NEXT:    ;;#ASMEND
21153; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
21154; GFX900-NEXT:    ;;#ASMSTART
21155; GFX900-NEXT:    ; def s[6:7]
21156; GFX900-NEXT:    ;;#ASMEND
21157; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s7
21158; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
21159; GFX900-NEXT:    ;;#ASMSTART
21160; GFX900-NEXT:    ; use s[8:9]
21161; GFX900-NEXT:    ;;#ASMEND
21162; GFX900-NEXT:    s_setpc_b64 s[30:31]
21163;
21164; GFX90A-LABEL: s_shuffle_v4i16_v4i16__3_6_6_6:
21165; GFX90A:       ; %bb.0:
21166; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21167; GFX90A-NEXT:    ;;#ASMSTART
21168; GFX90A-NEXT:    ; def s[4:5]
21169; GFX90A-NEXT:    ;;#ASMEND
21170; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
21171; GFX90A-NEXT:    ;;#ASMSTART
21172; GFX90A-NEXT:    ; def s[6:7]
21173; GFX90A-NEXT:    ;;#ASMEND
21174; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s7
21175; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
21176; GFX90A-NEXT:    ;;#ASMSTART
21177; GFX90A-NEXT:    ; use s[8:9]
21178; GFX90A-NEXT:    ;;#ASMEND
21179; GFX90A-NEXT:    s_setpc_b64 s[30:31]
21180;
21181; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_6_6_6:
21182; GFX940:       ; %bb.0:
21183; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21184; GFX940-NEXT:    ;;#ASMSTART
21185; GFX940-NEXT:    ; def s[0:1]
21186; GFX940-NEXT:    ;;#ASMEND
21187; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
21188; GFX940-NEXT:    ;;#ASMSTART
21189; GFX940-NEXT:    ; def s[2:3]
21190; GFX940-NEXT:    ;;#ASMEND
21191; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s3
21192; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s3, s3
21193; GFX940-NEXT:    ;;#ASMSTART
21194; GFX940-NEXT:    ; use s[8:9]
21195; GFX940-NEXT:    ;;#ASMEND
21196; GFX940-NEXT:    s_setpc_b64 s[30:31]
21197  %vec0 = call <4 x i16> asm "; def $0", "=s"()
21198  %vec1 = call <4 x i16> asm "; def $0", "=s"()
21199  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 3, i32 6, i32 6, i32 6>
21200  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
21201  ret void
21202}
21203
21204define void @s_shuffle_v4i16_v4i16__4_6_6_6() {
21205; GFX900-LABEL: s_shuffle_v4i16_v4i16__4_6_6_6:
21206; GFX900:       ; %bb.0:
21207; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21208; GFX900-NEXT:    ;;#ASMSTART
21209; GFX900-NEXT:    ; def s[4:5]
21210; GFX900-NEXT:    ;;#ASMEND
21211; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
21212; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
21213; GFX900-NEXT:    ;;#ASMSTART
21214; GFX900-NEXT:    ; use s[8:9]
21215; GFX900-NEXT:    ;;#ASMEND
21216; GFX900-NEXT:    s_setpc_b64 s[30:31]
21217;
21218; GFX90A-LABEL: s_shuffle_v4i16_v4i16__4_6_6_6:
21219; GFX90A:       ; %bb.0:
21220; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21221; GFX90A-NEXT:    ;;#ASMSTART
21222; GFX90A-NEXT:    ; def s[4:5]
21223; GFX90A-NEXT:    ;;#ASMEND
21224; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
21225; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
21226; GFX90A-NEXT:    ;;#ASMSTART
21227; GFX90A-NEXT:    ; use s[8:9]
21228; GFX90A-NEXT:    ;;#ASMEND
21229; GFX90A-NEXT:    s_setpc_b64 s[30:31]
21230;
21231; GFX940-LABEL: s_shuffle_v4i16_v4i16__4_6_6_6:
21232; GFX940:       ; %bb.0:
21233; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21234; GFX940-NEXT:    ;;#ASMSTART
21235; GFX940-NEXT:    ; def s[0:1]
21236; GFX940-NEXT:    ;;#ASMEND
21237; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
21238; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
21239; GFX940-NEXT:    ;;#ASMSTART
21240; GFX940-NEXT:    ; use s[8:9]
21241; GFX940-NEXT:    ;;#ASMEND
21242; GFX940-NEXT:    s_setpc_b64 s[30:31]
21243  %vec0 = call <4 x i16> asm "; def $0", "=s"()
21244  %vec1 = call <4 x i16> asm "; def $0", "=s"()
21245  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 4, i32 6, i32 6, i32 6>
21246  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
21247  ret void
21248}
21249
21250define void @s_shuffle_v4i16_v4i16__5_6_6_6() {
21251; GFX900-LABEL: s_shuffle_v4i16_v4i16__5_6_6_6:
21252; GFX900:       ; %bb.0:
21253; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21254; GFX900-NEXT:    ;;#ASMSTART
21255; GFX900-NEXT:    ; def s[4:5]
21256; GFX900-NEXT:    ;;#ASMEND
21257; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
21258; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
21259; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
21260; GFX900-NEXT:    ;;#ASMSTART
21261; GFX900-NEXT:    ; use s[8:9]
21262; GFX900-NEXT:    ;;#ASMEND
21263; GFX900-NEXT:    s_setpc_b64 s[30:31]
21264;
21265; GFX90A-LABEL: s_shuffle_v4i16_v4i16__5_6_6_6:
21266; GFX90A:       ; %bb.0:
21267; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21268; GFX90A-NEXT:    ;;#ASMSTART
21269; GFX90A-NEXT:    ; def s[4:5]
21270; GFX90A-NEXT:    ;;#ASMEND
21271; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
21272; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
21273; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
21274; GFX90A-NEXT:    ;;#ASMSTART
21275; GFX90A-NEXT:    ; use s[8:9]
21276; GFX90A-NEXT:    ;;#ASMEND
21277; GFX90A-NEXT:    s_setpc_b64 s[30:31]
21278;
21279; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_6_6_6:
21280; GFX940:       ; %bb.0:
21281; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21282; GFX940-NEXT:    ;;#ASMSTART
21283; GFX940-NEXT:    ; def s[0:1]
21284; GFX940-NEXT:    ;;#ASMEND
21285; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
21286; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
21287; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
21288; GFX940-NEXT:    ;;#ASMSTART
21289; GFX940-NEXT:    ; use s[8:9]
21290; GFX940-NEXT:    ;;#ASMEND
21291; GFX940-NEXT:    s_setpc_b64 s[30:31]
21292  %vec0 = call <4 x i16> asm "; def $0", "=s"()
21293  %vec1 = call <4 x i16> asm "; def $0", "=s"()
21294  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 6, i32 6, i32 6>
21295  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
21296  ret void
21297}
21298
21299define void @s_shuffle_v4i16_v4i16__6_6_6_6() {
21300; GFX900-LABEL: s_shuffle_v4i16_v4i16__6_6_6_6:
21301; GFX900:       ; %bb.0:
21302; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21303; GFX900-NEXT:    ;;#ASMSTART
21304; GFX900-NEXT:    ; def s[4:5]
21305; GFX900-NEXT:    ;;#ASMEND
21306; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
21307; GFX900-NEXT:    s_mov_b32 s9, s8
21308; GFX900-NEXT:    ;;#ASMSTART
21309; GFX900-NEXT:    ; use s[8:9]
21310; GFX900-NEXT:    ;;#ASMEND
21311; GFX900-NEXT:    s_setpc_b64 s[30:31]
21312;
21313; GFX90A-LABEL: s_shuffle_v4i16_v4i16__6_6_6_6:
21314; GFX90A:       ; %bb.0:
21315; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21316; GFX90A-NEXT:    ;;#ASMSTART
21317; GFX90A-NEXT:    ; def s[4:5]
21318; GFX90A-NEXT:    ;;#ASMEND
21319; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
21320; GFX90A-NEXT:    s_mov_b32 s9, s8
21321; GFX90A-NEXT:    ;;#ASMSTART
21322; GFX90A-NEXT:    ; use s[8:9]
21323; GFX90A-NEXT:    ;;#ASMEND
21324; GFX90A-NEXT:    s_setpc_b64 s[30:31]
21325;
21326; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_6_6_6:
21327; GFX940:       ; %bb.0:
21328; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21329; GFX940-NEXT:    ;;#ASMSTART
21330; GFX940-NEXT:    ; def s[0:1]
21331; GFX940-NEXT:    ;;#ASMEND
21332; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
21333; GFX940-NEXT:    s_mov_b32 s9, s8
21334; GFX940-NEXT:    ;;#ASMSTART
21335; GFX940-NEXT:    ; use s[8:9]
21336; GFX940-NEXT:    ;;#ASMEND
21337; GFX940-NEXT:    s_setpc_b64 s[30:31]
21338  %vec0 = call <4 x i16> asm "; def $0", "=s"()
21339  %vec1 = call <4 x i16> asm "; def $0", "=s"()
21340  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 6, i32 6, i32 6>
21341  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
21342  ret void
21343}
21344
21345define void @s_shuffle_v4i16_v4i16__7_6_6_6() {
21346; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_6_6_6:
21347; GFX900:       ; %bb.0:
21348; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21349; GFX900-NEXT:    ;;#ASMSTART
21350; GFX900-NEXT:    ; def s[4:5]
21351; GFX900-NEXT:    ;;#ASMEND
21352; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
21353; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
21354; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
21355; GFX900-NEXT:    ;;#ASMSTART
21356; GFX900-NEXT:    ; use s[8:9]
21357; GFX900-NEXT:    ;;#ASMEND
21358; GFX900-NEXT:    s_setpc_b64 s[30:31]
21359;
21360; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_6_6_6:
21361; GFX90A:       ; %bb.0:
21362; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21363; GFX90A-NEXT:    ;;#ASMSTART
21364; GFX90A-NEXT:    ; def s[4:5]
21365; GFX90A-NEXT:    ;;#ASMEND
21366; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
21367; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
21368; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
21369; GFX90A-NEXT:    ;;#ASMSTART
21370; GFX90A-NEXT:    ; use s[8:9]
21371; GFX90A-NEXT:    ;;#ASMEND
21372; GFX90A-NEXT:    s_setpc_b64 s[30:31]
21373;
21374; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_6_6:
21375; GFX940:       ; %bb.0:
21376; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21377; GFX940-NEXT:    ;;#ASMSTART
21378; GFX940-NEXT:    ; def s[0:1]
21379; GFX940-NEXT:    ;;#ASMEND
21380; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
21381; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
21382; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
21383; GFX940-NEXT:    ;;#ASMSTART
21384; GFX940-NEXT:    ; use s[8:9]
21385; GFX940-NEXT:    ;;#ASMEND
21386; GFX940-NEXT:    s_setpc_b64 s[30:31]
21387  %vec0 = call <4 x i16> asm "; def $0", "=s"()
21388  %vec1 = call <4 x i16> asm "; def $0", "=s"()
21389  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 6, i32 6>
21390  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
21391  ret void
21392}
21393
21394define void @s_shuffle_v4i16_v4i16__7_u_6_6() {
21395; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_u_6_6:
21396; GFX900:       ; %bb.0:
21397; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21398; GFX900-NEXT:    ;;#ASMSTART
21399; GFX900-NEXT:    ; def s[4:5]
21400; GFX900-NEXT:    ;;#ASMEND
21401; GFX900-NEXT:    s_lshr_b32 s8, s5, 16
21402; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
21403; GFX900-NEXT:    ;;#ASMSTART
21404; GFX900-NEXT:    ; use s[8:9]
21405; GFX900-NEXT:    ;;#ASMEND
21406; GFX900-NEXT:    s_setpc_b64 s[30:31]
21407;
21408; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_u_6_6:
21409; GFX90A:       ; %bb.0:
21410; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21411; GFX90A-NEXT:    ;;#ASMSTART
21412; GFX90A-NEXT:    ; def s[4:5]
21413; GFX90A-NEXT:    ;;#ASMEND
21414; GFX90A-NEXT:    s_lshr_b32 s8, s5, 16
21415; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
21416; GFX90A-NEXT:    ;;#ASMSTART
21417; GFX90A-NEXT:    ; use s[8:9]
21418; GFX90A-NEXT:    ;;#ASMEND
21419; GFX90A-NEXT:    s_setpc_b64 s[30:31]
21420;
21421; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_6_6:
21422; GFX940:       ; %bb.0:
21423; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21424; GFX940-NEXT:    ;;#ASMSTART
21425; GFX940-NEXT:    ; def s[0:1]
21426; GFX940-NEXT:    ;;#ASMEND
21427; GFX940-NEXT:    s_lshr_b32 s8, s1, 16
21428; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
21429; GFX940-NEXT:    ;;#ASMSTART
21430; GFX940-NEXT:    ; use s[8:9]
21431; GFX940-NEXT:    ;;#ASMEND
21432; GFX940-NEXT:    s_setpc_b64 s[30:31]
21433  %vec0 = call <4 x i16> asm "; def $0", "=s"()
21434  %vec1 = call <4 x i16> asm "; def $0", "=s"()
21435  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 6, i32 6>
21436  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
21437  ret void
21438}
21439
21440define void @s_shuffle_v4i16_v4i16__7_0_6_6() {
21441; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_0_6_6:
21442; GFX900:       ; %bb.0:
21443; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21444; GFX900-NEXT:    ;;#ASMSTART
21445; GFX900-NEXT:    ; def s[4:5]
21446; GFX900-NEXT:    ;;#ASMEND
21447; GFX900-NEXT:    ;;#ASMSTART
21448; GFX900-NEXT:    ; def s[6:7]
21449; GFX900-NEXT:    ;;#ASMEND
21450; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
21451; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
21452; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
21453; GFX900-NEXT:    ;;#ASMSTART
21454; GFX900-NEXT:    ; use s[8:9]
21455; GFX900-NEXT:    ;;#ASMEND
21456; GFX900-NEXT:    s_setpc_b64 s[30:31]
21457;
21458; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_0_6_6:
21459; GFX90A:       ; %bb.0:
21460; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21461; GFX90A-NEXT:    ;;#ASMSTART
21462; GFX90A-NEXT:    ; def s[4:5]
21463; GFX90A-NEXT:    ;;#ASMEND
21464; GFX90A-NEXT:    ;;#ASMSTART
21465; GFX90A-NEXT:    ; def s[6:7]
21466; GFX90A-NEXT:    ;;#ASMEND
21467; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
21468; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
21469; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
21470; GFX90A-NEXT:    ;;#ASMSTART
21471; GFX90A-NEXT:    ; use s[8:9]
21472; GFX90A-NEXT:    ;;#ASMEND
21473; GFX90A-NEXT:    s_setpc_b64 s[30:31]
21474;
21475; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_6_6:
21476; GFX940:       ; %bb.0:
21477; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21478; GFX940-NEXT:    ;;#ASMSTART
21479; GFX940-NEXT:    ; def s[0:1]
21480; GFX940-NEXT:    ;;#ASMEND
21481; GFX940-NEXT:    ;;#ASMSTART
21482; GFX940-NEXT:    ; def s[2:3]
21483; GFX940-NEXT:    ;;#ASMEND
21484; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
21485; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
21486; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s3, s3
21487; GFX940-NEXT:    ;;#ASMSTART
21488; GFX940-NEXT:    ; use s[8:9]
21489; GFX940-NEXT:    ;;#ASMEND
21490; GFX940-NEXT:    s_setpc_b64 s[30:31]
21491  %vec0 = call <4 x i16> asm "; def $0", "=s"()
21492  %vec1 = call <4 x i16> asm "; def $0", "=s"()
21493  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 6, i32 6>
21494  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
21495  ret void
21496}
21497
21498define void @s_shuffle_v4i16_v4i16__7_1_6_6() {
21499; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_1_6_6:
21500; GFX900:       ; %bb.0:
21501; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21502; GFX900-NEXT:    ;;#ASMSTART
21503; GFX900-NEXT:    ; def s[4:5]
21504; GFX900-NEXT:    ;;#ASMEND
21505; GFX900-NEXT:    ;;#ASMSTART
21506; GFX900-NEXT:    ; def s[6:7]
21507; GFX900-NEXT:    ;;#ASMEND
21508; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
21509; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
21510; GFX900-NEXT:    ;;#ASMSTART
21511; GFX900-NEXT:    ; use s[8:9]
21512; GFX900-NEXT:    ;;#ASMEND
21513; GFX900-NEXT:    s_setpc_b64 s[30:31]
21514;
21515; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_1_6_6:
21516; GFX90A:       ; %bb.0:
21517; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21518; GFX90A-NEXT:    ;;#ASMSTART
21519; GFX90A-NEXT:    ; def s[4:5]
21520; GFX90A-NEXT:    ;;#ASMEND
21521; GFX90A-NEXT:    ;;#ASMSTART
21522; GFX90A-NEXT:    ; def s[6:7]
21523; GFX90A-NEXT:    ;;#ASMEND
21524; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
21525; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
21526; GFX90A-NEXT:    ;;#ASMSTART
21527; GFX90A-NEXT:    ; use s[8:9]
21528; GFX90A-NEXT:    ;;#ASMEND
21529; GFX90A-NEXT:    s_setpc_b64 s[30:31]
21530;
21531; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_6_6:
21532; GFX940:       ; %bb.0:
21533; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21534; GFX940-NEXT:    ;;#ASMSTART
21535; GFX940-NEXT:    ; def s[0:1]
21536; GFX940-NEXT:    ;;#ASMEND
21537; GFX940-NEXT:    ;;#ASMSTART
21538; GFX940-NEXT:    ; def s[2:3]
21539; GFX940-NEXT:    ;;#ASMEND
21540; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s0
21541; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s3, s3
21542; GFX940-NEXT:    ;;#ASMSTART
21543; GFX940-NEXT:    ; use s[8:9]
21544; GFX940-NEXT:    ;;#ASMEND
21545; GFX940-NEXT:    s_setpc_b64 s[30:31]
21546  %vec0 = call <4 x i16> asm "; def $0", "=s"()
21547  %vec1 = call <4 x i16> asm "; def $0", "=s"()
21548  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 6, i32 6>
21549  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
21550  ret void
21551}
21552
21553define void @s_shuffle_v4i16_v4i16__7_2_6_6() {
21554; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_2_6_6:
21555; GFX900:       ; %bb.0:
21556; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21557; GFX900-NEXT:    ;;#ASMSTART
21558; GFX900-NEXT:    ; def s[4:5]
21559; GFX900-NEXT:    ;;#ASMEND
21560; GFX900-NEXT:    ;;#ASMSTART
21561; GFX900-NEXT:    ; def s[6:7]
21562; GFX900-NEXT:    ;;#ASMEND
21563; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
21564; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
21565; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
21566; GFX900-NEXT:    ;;#ASMSTART
21567; GFX900-NEXT:    ; use s[8:9]
21568; GFX900-NEXT:    ;;#ASMEND
21569; GFX900-NEXT:    s_setpc_b64 s[30:31]
21570;
21571; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_2_6_6:
21572; GFX90A:       ; %bb.0:
21573; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21574; GFX90A-NEXT:    ;;#ASMSTART
21575; GFX90A-NEXT:    ; def s[4:5]
21576; GFX90A-NEXT:    ;;#ASMEND
21577; GFX90A-NEXT:    ;;#ASMSTART
21578; GFX90A-NEXT:    ; def s[6:7]
21579; GFX90A-NEXT:    ;;#ASMEND
21580; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
21581; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
21582; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
21583; GFX90A-NEXT:    ;;#ASMSTART
21584; GFX90A-NEXT:    ; use s[8:9]
21585; GFX90A-NEXT:    ;;#ASMEND
21586; GFX90A-NEXT:    s_setpc_b64 s[30:31]
21587;
21588; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_6_6:
21589; GFX940:       ; %bb.0:
21590; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21591; GFX940-NEXT:    ;;#ASMSTART
21592; GFX940-NEXT:    ; def s[0:1]
21593; GFX940-NEXT:    ;;#ASMEND
21594; GFX940-NEXT:    ;;#ASMSTART
21595; GFX940-NEXT:    ; def s[2:3]
21596; GFX940-NEXT:    ;;#ASMEND
21597; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
21598; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
21599; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s3, s3
21600; GFX940-NEXT:    ;;#ASMSTART
21601; GFX940-NEXT:    ; use s[8:9]
21602; GFX940-NEXT:    ;;#ASMEND
21603; GFX940-NEXT:    s_setpc_b64 s[30:31]
21604  %vec0 = call <4 x i16> asm "; def $0", "=s"()
21605  %vec1 = call <4 x i16> asm "; def $0", "=s"()
21606  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 6, i32 6>
21607  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
21608  ret void
21609}
21610
21611define void @s_shuffle_v4i16_v4i16__7_3_6_6() {
21612; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_3_6_6:
21613; GFX900:       ; %bb.0:
21614; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21615; GFX900-NEXT:    ;;#ASMSTART
21616; GFX900-NEXT:    ; def s[4:5]
21617; GFX900-NEXT:    ;;#ASMEND
21618; GFX900-NEXT:    ;;#ASMSTART
21619; GFX900-NEXT:    ; def s[6:7]
21620; GFX900-NEXT:    ;;#ASMEND
21621; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
21622; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
21623; GFX900-NEXT:    ;;#ASMSTART
21624; GFX900-NEXT:    ; use s[8:9]
21625; GFX900-NEXT:    ;;#ASMEND
21626; GFX900-NEXT:    s_setpc_b64 s[30:31]
21627;
21628; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_3_6_6:
21629; GFX90A:       ; %bb.0:
21630; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21631; GFX90A-NEXT:    ;;#ASMSTART
21632; GFX90A-NEXT:    ; def s[4:5]
21633; GFX90A-NEXT:    ;;#ASMEND
21634; GFX90A-NEXT:    ;;#ASMSTART
21635; GFX90A-NEXT:    ; def s[6:7]
21636; GFX90A-NEXT:    ;;#ASMEND
21637; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
21638; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s7, s7
21639; GFX90A-NEXT:    ;;#ASMSTART
21640; GFX90A-NEXT:    ; use s[8:9]
21641; GFX90A-NEXT:    ;;#ASMEND
21642; GFX90A-NEXT:    s_setpc_b64 s[30:31]
21643;
21644; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_6_6:
21645; GFX940:       ; %bb.0:
21646; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21647; GFX940-NEXT:    ;;#ASMSTART
21648; GFX940-NEXT:    ; def s[0:1]
21649; GFX940-NEXT:    ;;#ASMEND
21650; GFX940-NEXT:    ;;#ASMSTART
21651; GFX940-NEXT:    ; def s[2:3]
21652; GFX940-NEXT:    ;;#ASMEND
21653; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s1
21654; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s3, s3
21655; GFX940-NEXT:    ;;#ASMSTART
21656; GFX940-NEXT:    ; use s[8:9]
21657; GFX940-NEXT:    ;;#ASMEND
21658; GFX940-NEXT:    s_setpc_b64 s[30:31]
21659  %vec0 = call <4 x i16> asm "; def $0", "=s"()
21660  %vec1 = call <4 x i16> asm "; def $0", "=s"()
21661  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 6, i32 6>
21662  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
21663  ret void
21664}
21665
21666define void @s_shuffle_v4i16_v4i16__7_4_6_6() {
21667; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_4_6_6:
21668; GFX900:       ; %bb.0:
21669; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21670; GFX900-NEXT:    ;;#ASMSTART
21671; GFX900-NEXT:    ; def s[4:5]
21672; GFX900-NEXT:    ;;#ASMEND
21673; GFX900-NEXT:    s_lshr_b32 s6, s5, 16
21674; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s6, s4
21675; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
21676; GFX900-NEXT:    ;;#ASMSTART
21677; GFX900-NEXT:    ; use s[8:9]
21678; GFX900-NEXT:    ;;#ASMEND
21679; GFX900-NEXT:    s_setpc_b64 s[30:31]
21680;
21681; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_4_6_6:
21682; GFX90A:       ; %bb.0:
21683; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21684; GFX90A-NEXT:    ;;#ASMSTART
21685; GFX90A-NEXT:    ; def s[4:5]
21686; GFX90A-NEXT:    ;;#ASMEND
21687; GFX90A-NEXT:    s_lshr_b32 s6, s5, 16
21688; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s6, s4
21689; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
21690; GFX90A-NEXT:    ;;#ASMSTART
21691; GFX90A-NEXT:    ; use s[8:9]
21692; GFX90A-NEXT:    ;;#ASMEND
21693; GFX90A-NEXT:    s_setpc_b64 s[30:31]
21694;
21695; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_6_6:
21696; GFX940:       ; %bb.0:
21697; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21698; GFX940-NEXT:    ;;#ASMSTART
21699; GFX940-NEXT:    ; def s[0:1]
21700; GFX940-NEXT:    ;;#ASMEND
21701; GFX940-NEXT:    s_lshr_b32 s2, s1, 16
21702; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s2, s0
21703; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
21704; GFX940-NEXT:    ;;#ASMSTART
21705; GFX940-NEXT:    ; use s[8:9]
21706; GFX940-NEXT:    ;;#ASMEND
21707; GFX940-NEXT:    s_setpc_b64 s[30:31]
21708  %vec0 = call <4 x i16> asm "; def $0", "=s"()
21709  %vec1 = call <4 x i16> asm "; def $0", "=s"()
21710  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 6, i32 6>
21711  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
21712  ret void
21713}
21714
21715define void @s_shuffle_v4i16_v4i16__7_5_6_6() {
21716; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_5_6_6:
21717; GFX900:       ; %bb.0:
21718; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21719; GFX900-NEXT:    ;;#ASMSTART
21720; GFX900-NEXT:    ; def s[4:5]
21721; GFX900-NEXT:    ;;#ASMEND
21722; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s4
21723; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
21724; GFX900-NEXT:    ;;#ASMSTART
21725; GFX900-NEXT:    ; use s[8:9]
21726; GFX900-NEXT:    ;;#ASMEND
21727; GFX900-NEXT:    s_setpc_b64 s[30:31]
21728;
21729; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_5_6_6:
21730; GFX90A:       ; %bb.0:
21731; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21732; GFX90A-NEXT:    ;;#ASMSTART
21733; GFX90A-NEXT:    ; def s[4:5]
21734; GFX90A-NEXT:    ;;#ASMEND
21735; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s4
21736; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
21737; GFX90A-NEXT:    ;;#ASMSTART
21738; GFX90A-NEXT:    ; use s[8:9]
21739; GFX90A-NEXT:    ;;#ASMEND
21740; GFX90A-NEXT:    s_setpc_b64 s[30:31]
21741;
21742; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_6_6:
21743; GFX940:       ; %bb.0:
21744; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21745; GFX940-NEXT:    ;;#ASMSTART
21746; GFX940-NEXT:    ; def s[0:1]
21747; GFX940-NEXT:    ;;#ASMEND
21748; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s0
21749; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
21750; GFX940-NEXT:    ;;#ASMSTART
21751; GFX940-NEXT:    ; use s[8:9]
21752; GFX940-NEXT:    ;;#ASMEND
21753; GFX940-NEXT:    s_setpc_b64 s[30:31]
21754  %vec0 = call <4 x i16> asm "; def $0", "=s"()
21755  %vec1 = call <4 x i16> asm "; def $0", "=s"()
21756  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 6, i32 6>
21757  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
21758  ret void
21759}
21760
21761define void @s_shuffle_v4i16_v4i16__7_7_6_6() {
21762; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_6_6:
21763; GFX900:       ; %bb.0:
21764; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21765; GFX900-NEXT:    ;;#ASMSTART
21766; GFX900-NEXT:    ; def s[4:5]
21767; GFX900-NEXT:    ;;#ASMEND
21768; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
21769; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
21770; GFX900-NEXT:    ;;#ASMSTART
21771; GFX900-NEXT:    ; use s[8:9]
21772; GFX900-NEXT:    ;;#ASMEND
21773; GFX900-NEXT:    s_setpc_b64 s[30:31]
21774;
21775; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_6_6:
21776; GFX90A:       ; %bb.0:
21777; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21778; GFX90A-NEXT:    ;;#ASMSTART
21779; GFX90A-NEXT:    ; def s[4:5]
21780; GFX90A-NEXT:    ;;#ASMEND
21781; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
21782; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s5
21783; GFX90A-NEXT:    ;;#ASMSTART
21784; GFX90A-NEXT:    ; use s[8:9]
21785; GFX90A-NEXT:    ;;#ASMEND
21786; GFX90A-NEXT:    s_setpc_b64 s[30:31]
21787;
21788; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_6_6:
21789; GFX940:       ; %bb.0:
21790; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21791; GFX940-NEXT:    ;;#ASMSTART
21792; GFX940-NEXT:    ; def s[0:1]
21793; GFX940-NEXT:    ;;#ASMEND
21794; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
21795; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s1
21796; GFX940-NEXT:    ;;#ASMSTART
21797; GFX940-NEXT:    ; use s[8:9]
21798; GFX940-NEXT:    ;;#ASMEND
21799; GFX940-NEXT:    s_setpc_b64 s[30:31]
21800  %vec0 = call <4 x i16> asm "; def $0", "=s"()
21801  %vec1 = call <4 x i16> asm "; def $0", "=s"()
21802  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 6>
21803  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
21804  ret void
21805}
21806
21807define void @s_shuffle_v4i16_v4i16__7_7_u_6() {
21808; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_u_6:
21809; GFX900:       ; %bb.0:
21810; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21811; GFX900-NEXT:    ;;#ASMSTART
21812; GFX900-NEXT:    ; def s[4:5]
21813; GFX900-NEXT:    ;;#ASMEND
21814; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
21815; GFX900-NEXT:    s_lshl_b32 s9, s5, 16
21816; GFX900-NEXT:    ;;#ASMSTART
21817; GFX900-NEXT:    ; use s[8:9]
21818; GFX900-NEXT:    ;;#ASMEND
21819; GFX900-NEXT:    s_setpc_b64 s[30:31]
21820;
21821; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_u_6:
21822; GFX90A:       ; %bb.0:
21823; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21824; GFX90A-NEXT:    ;;#ASMSTART
21825; GFX90A-NEXT:    ; def s[4:5]
21826; GFX90A-NEXT:    ;;#ASMEND
21827; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
21828; GFX90A-NEXT:    s_lshl_b32 s9, s5, 16
21829; GFX90A-NEXT:    ;;#ASMSTART
21830; GFX90A-NEXT:    ; use s[8:9]
21831; GFX90A-NEXT:    ;;#ASMEND
21832; GFX90A-NEXT:    s_setpc_b64 s[30:31]
21833;
21834; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_u_6:
21835; GFX940:       ; %bb.0:
21836; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21837; GFX940-NEXT:    ;;#ASMSTART
21838; GFX940-NEXT:    ; def s[0:1]
21839; GFX940-NEXT:    ;;#ASMEND
21840; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
21841; GFX940-NEXT:    s_lshl_b32 s9, s1, 16
21842; GFX940-NEXT:    ;;#ASMSTART
21843; GFX940-NEXT:    ; use s[8:9]
21844; GFX940-NEXT:    ;;#ASMEND
21845; GFX940-NEXT:    s_setpc_b64 s[30:31]
21846  %vec0 = call <4 x i16> asm "; def $0", "=s"()
21847  %vec1 = call <4 x i16> asm "; def $0", "=s"()
21848  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 6>
21849  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
21850  ret void
21851}
21852
21853define void @s_shuffle_v4i16_v4i16__7_7_0_6() {
21854; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_0_6:
21855; GFX900:       ; %bb.0:
21856; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21857; GFX900-NEXT:    ;;#ASMSTART
21858; GFX900-NEXT:    ; def s[4:5]
21859; GFX900-NEXT:    ;;#ASMEND
21860; GFX900-NEXT:    ;;#ASMSTART
21861; GFX900-NEXT:    ; def s[6:7]
21862; GFX900-NEXT:    ;;#ASMEND
21863; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s7
21864; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
21865; GFX900-NEXT:    ;;#ASMSTART
21866; GFX900-NEXT:    ; use s[8:9]
21867; GFX900-NEXT:    ;;#ASMEND
21868; GFX900-NEXT:    s_setpc_b64 s[30:31]
21869;
21870; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_0_6:
21871; GFX90A:       ; %bb.0:
21872; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21873; GFX90A-NEXT:    ;;#ASMSTART
21874; GFX90A-NEXT:    ; def s[4:5]
21875; GFX90A-NEXT:    ;;#ASMEND
21876; GFX90A-NEXT:    ;;#ASMSTART
21877; GFX90A-NEXT:    ; def s[6:7]
21878; GFX90A-NEXT:    ;;#ASMEND
21879; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s7
21880; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
21881; GFX90A-NEXT:    ;;#ASMSTART
21882; GFX90A-NEXT:    ; use s[8:9]
21883; GFX90A-NEXT:    ;;#ASMEND
21884; GFX90A-NEXT:    s_setpc_b64 s[30:31]
21885;
21886; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_6:
21887; GFX940:       ; %bb.0:
21888; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21889; GFX940-NEXT:    ;;#ASMSTART
21890; GFX940-NEXT:    ; def s[0:1]
21891; GFX940-NEXT:    ;;#ASMEND
21892; GFX940-NEXT:    ;;#ASMSTART
21893; GFX940-NEXT:    ; def s[2:3]
21894; GFX940-NEXT:    ;;#ASMEND
21895; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s3
21896; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
21897; GFX940-NEXT:    ;;#ASMSTART
21898; GFX940-NEXT:    ; use s[8:9]
21899; GFX940-NEXT:    ;;#ASMEND
21900; GFX940-NEXT:    s_setpc_b64 s[30:31]
21901  %vec0 = call <4 x i16> asm "; def $0", "=s"()
21902  %vec1 = call <4 x i16> asm "; def $0", "=s"()
21903  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 6>
21904  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
21905  ret void
21906}
21907
21908define void @s_shuffle_v4i16_v4i16__7_7_1_6() {
21909; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_1_6:
21910; GFX900:       ; %bb.0:
21911; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21912; GFX900-NEXT:    ;;#ASMSTART
21913; GFX900-NEXT:    ; def s[4:5]
21914; GFX900-NEXT:    ;;#ASMEND
21915; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
21916; GFX900-NEXT:    ;;#ASMSTART
21917; GFX900-NEXT:    ; def s[6:7]
21918; GFX900-NEXT:    ;;#ASMEND
21919; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s7
21920; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
21921; GFX900-NEXT:    ;;#ASMSTART
21922; GFX900-NEXT:    ; use s[8:9]
21923; GFX900-NEXT:    ;;#ASMEND
21924; GFX900-NEXT:    s_setpc_b64 s[30:31]
21925;
21926; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_1_6:
21927; GFX90A:       ; %bb.0:
21928; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21929; GFX90A-NEXT:    ;;#ASMSTART
21930; GFX90A-NEXT:    ; def s[4:5]
21931; GFX90A-NEXT:    ;;#ASMEND
21932; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
21933; GFX90A-NEXT:    ;;#ASMSTART
21934; GFX90A-NEXT:    ; def s[6:7]
21935; GFX90A-NEXT:    ;;#ASMEND
21936; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s7
21937; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
21938; GFX90A-NEXT:    ;;#ASMSTART
21939; GFX90A-NEXT:    ; use s[8:9]
21940; GFX90A-NEXT:    ;;#ASMEND
21941; GFX90A-NEXT:    s_setpc_b64 s[30:31]
21942;
21943; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_6:
21944; GFX940:       ; %bb.0:
21945; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21946; GFX940-NEXT:    ;;#ASMSTART
21947; GFX940-NEXT:    ; def s[0:1]
21948; GFX940-NEXT:    ;;#ASMEND
21949; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
21950; GFX940-NEXT:    ;;#ASMSTART
21951; GFX940-NEXT:    ; def s[2:3]
21952; GFX940-NEXT:    ;;#ASMEND
21953; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s3
21954; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
21955; GFX940-NEXT:    ;;#ASMSTART
21956; GFX940-NEXT:    ; use s[8:9]
21957; GFX940-NEXT:    ;;#ASMEND
21958; GFX940-NEXT:    s_setpc_b64 s[30:31]
21959  %vec0 = call <4 x i16> asm "; def $0", "=s"()
21960  %vec1 = call <4 x i16> asm "; def $0", "=s"()
21961  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 6>
21962  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
21963  ret void
21964}
21965
21966define void @s_shuffle_v4i16_v4i16__7_7_2_6() {
21967; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_2_6:
21968; GFX900:       ; %bb.0:
21969; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21970; GFX900-NEXT:    ;;#ASMSTART
21971; GFX900-NEXT:    ; def s[4:5]
21972; GFX900-NEXT:    ;;#ASMEND
21973; GFX900-NEXT:    ;;#ASMSTART
21974; GFX900-NEXT:    ; def s[6:7]
21975; GFX900-NEXT:    ;;#ASMEND
21976; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s5, s7
21977; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
21978; GFX900-NEXT:    ;;#ASMSTART
21979; GFX900-NEXT:    ; use s[8:9]
21980; GFX900-NEXT:    ;;#ASMEND
21981; GFX900-NEXT:    s_setpc_b64 s[30:31]
21982;
21983; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_2_6:
21984; GFX90A:       ; %bb.0:
21985; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21986; GFX90A-NEXT:    ;;#ASMSTART
21987; GFX90A-NEXT:    ; def s[4:5]
21988; GFX90A-NEXT:    ;;#ASMEND
21989; GFX90A-NEXT:    ;;#ASMSTART
21990; GFX90A-NEXT:    ; def s[6:7]
21991; GFX90A-NEXT:    ;;#ASMEND
21992; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s5, s7
21993; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
21994; GFX90A-NEXT:    ;;#ASMSTART
21995; GFX90A-NEXT:    ; use s[8:9]
21996; GFX90A-NEXT:    ;;#ASMEND
21997; GFX90A-NEXT:    s_setpc_b64 s[30:31]
21998;
21999; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_6:
22000; GFX940:       ; %bb.0:
22001; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22002; GFX940-NEXT:    ;;#ASMSTART
22003; GFX940-NEXT:    ; def s[0:1]
22004; GFX940-NEXT:    ;;#ASMEND
22005; GFX940-NEXT:    ;;#ASMSTART
22006; GFX940-NEXT:    ; def s[2:3]
22007; GFX940-NEXT:    ;;#ASMEND
22008; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s1, s3
22009; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
22010; GFX940-NEXT:    ;;#ASMSTART
22011; GFX940-NEXT:    ; use s[8:9]
22012; GFX940-NEXT:    ;;#ASMEND
22013; GFX940-NEXT:    s_setpc_b64 s[30:31]
22014  %vec0 = call <4 x i16> asm "; def $0", "=s"()
22015  %vec1 = call <4 x i16> asm "; def $0", "=s"()
22016  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 6>
22017  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
22018  ret void
22019}
22020
22021define void @s_shuffle_v4i16_v4i16__7_7_3_6() {
22022; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_3_6:
22023; GFX900:       ; %bb.0:
22024; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22025; GFX900-NEXT:    ;;#ASMSTART
22026; GFX900-NEXT:    ; def s[4:5]
22027; GFX900-NEXT:    ;;#ASMEND
22028; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
22029; GFX900-NEXT:    ;;#ASMSTART
22030; GFX900-NEXT:    ; def s[6:7]
22031; GFX900-NEXT:    ;;#ASMEND
22032; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s7
22033; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
22034; GFX900-NEXT:    ;;#ASMSTART
22035; GFX900-NEXT:    ; use s[8:9]
22036; GFX900-NEXT:    ;;#ASMEND
22037; GFX900-NEXT:    s_setpc_b64 s[30:31]
22038;
22039; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_3_6:
22040; GFX90A:       ; %bb.0:
22041; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22042; GFX90A-NEXT:    ;;#ASMSTART
22043; GFX90A-NEXT:    ; def s[4:5]
22044; GFX90A-NEXT:    ;;#ASMEND
22045; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
22046; GFX90A-NEXT:    ;;#ASMSTART
22047; GFX90A-NEXT:    ; def s[6:7]
22048; GFX90A-NEXT:    ;;#ASMEND
22049; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s7
22050; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
22051; GFX90A-NEXT:    ;;#ASMSTART
22052; GFX90A-NEXT:    ; use s[8:9]
22053; GFX90A-NEXT:    ;;#ASMEND
22054; GFX90A-NEXT:    s_setpc_b64 s[30:31]
22055;
22056; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_6:
22057; GFX940:       ; %bb.0:
22058; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22059; GFX940-NEXT:    ;;#ASMSTART
22060; GFX940-NEXT:    ; def s[0:1]
22061; GFX940-NEXT:    ;;#ASMEND
22062; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
22063; GFX940-NEXT:    ;;#ASMSTART
22064; GFX940-NEXT:    ; def s[2:3]
22065; GFX940-NEXT:    ;;#ASMEND
22066; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s3
22067; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
22068; GFX940-NEXT:    ;;#ASMSTART
22069; GFX940-NEXT:    ; use s[8:9]
22070; GFX940-NEXT:    ;;#ASMEND
22071; GFX940-NEXT:    s_setpc_b64 s[30:31]
22072  %vec0 = call <4 x i16> asm "; def $0", "=s"()
22073  %vec1 = call <4 x i16> asm "; def $0", "=s"()
22074  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 6>
22075  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
22076  ret void
22077}
22078
22079define void @s_shuffle_v4i16_v4i16__7_7_4_6() {
22080; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_4_6:
22081; GFX900:       ; %bb.0:
22082; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22083; GFX900-NEXT:    ;;#ASMSTART
22084; GFX900-NEXT:    ; def s[4:5]
22085; GFX900-NEXT:    ;;#ASMEND
22086; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s5
22087; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
22088; GFX900-NEXT:    ;;#ASMSTART
22089; GFX900-NEXT:    ; use s[8:9]
22090; GFX900-NEXT:    ;;#ASMEND
22091; GFX900-NEXT:    s_setpc_b64 s[30:31]
22092;
22093; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_4_6:
22094; GFX90A:       ; %bb.0:
22095; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22096; GFX90A-NEXT:    ;;#ASMSTART
22097; GFX90A-NEXT:    ; def s[4:5]
22098; GFX90A-NEXT:    ;;#ASMEND
22099; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s5
22100; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
22101; GFX90A-NEXT:    ;;#ASMSTART
22102; GFX90A-NEXT:    ; use s[8:9]
22103; GFX90A-NEXT:    ;;#ASMEND
22104; GFX90A-NEXT:    s_setpc_b64 s[30:31]
22105;
22106; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_6:
22107; GFX940:       ; %bb.0:
22108; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22109; GFX940-NEXT:    ;;#ASMSTART
22110; GFX940-NEXT:    ; def s[0:1]
22111; GFX940-NEXT:    ;;#ASMEND
22112; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s1
22113; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
22114; GFX940-NEXT:    ;;#ASMSTART
22115; GFX940-NEXT:    ; use s[8:9]
22116; GFX940-NEXT:    ;;#ASMEND
22117; GFX940-NEXT:    s_setpc_b64 s[30:31]
22118  %vec0 = call <4 x i16> asm "; def $0", "=s"()
22119  %vec1 = call <4 x i16> asm "; def $0", "=s"()
22120  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 6>
22121  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
22122  ret void
22123}
22124
22125define void @s_shuffle_v4i16_v4i16__7_7_5_6() {
22126; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_5_6:
22127; GFX900:       ; %bb.0:
22128; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22129; GFX900-NEXT:    ;;#ASMSTART
22130; GFX900-NEXT:    ; def s[4:5]
22131; GFX900-NEXT:    ;;#ASMEND
22132; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
22133; GFX900-NEXT:    s_pack_ll_b32_b16 s9, s4, s5
22134; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
22135; GFX900-NEXT:    ;;#ASMSTART
22136; GFX900-NEXT:    ; use s[8:9]
22137; GFX900-NEXT:    ;;#ASMEND
22138; GFX900-NEXT:    s_setpc_b64 s[30:31]
22139;
22140; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_5_6:
22141; GFX90A:       ; %bb.0:
22142; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22143; GFX90A-NEXT:    ;;#ASMSTART
22144; GFX90A-NEXT:    ; def s[4:5]
22145; GFX90A-NEXT:    ;;#ASMEND
22146; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
22147; GFX90A-NEXT:    s_pack_ll_b32_b16 s9, s4, s5
22148; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
22149; GFX90A-NEXT:    ;;#ASMSTART
22150; GFX90A-NEXT:    ; use s[8:9]
22151; GFX90A-NEXT:    ;;#ASMEND
22152; GFX90A-NEXT:    s_setpc_b64 s[30:31]
22153;
22154; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_6:
22155; GFX940:       ; %bb.0:
22156; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22157; GFX940-NEXT:    ;;#ASMSTART
22158; GFX940-NEXT:    ; def s[0:1]
22159; GFX940-NEXT:    ;;#ASMEND
22160; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
22161; GFX940-NEXT:    s_pack_ll_b32_b16 s9, s0, s1
22162; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
22163; GFX940-NEXT:    ;;#ASMSTART
22164; GFX940-NEXT:    ; use s[8:9]
22165; GFX940-NEXT:    ;;#ASMEND
22166; GFX940-NEXT:    s_setpc_b64 s[30:31]
22167  %vec0 = call <4 x i16> asm "; def $0", "=s"()
22168  %vec1 = call <4 x i16> asm "; def $0", "=s"()
22169  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 6>
22170  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
22171  ret void
22172}
22173
22174define void @s_shuffle_v4i16_v4i16__u_7_7_7() {
22175; GFX900-LABEL: s_shuffle_v4i16_v4i16__u_7_7_7:
22176; GFX900:       ; %bb.0:
22177; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22178; GFX900-NEXT:    ;;#ASMSTART
22179; GFX900-NEXT:    ; def s[4:5]
22180; GFX900-NEXT:    ;;#ASMEND
22181; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
22182; GFX900-NEXT:    s_mov_b32 s8, s5
22183; GFX900-NEXT:    ;;#ASMSTART
22184; GFX900-NEXT:    ; use s[8:9]
22185; GFX900-NEXT:    ;;#ASMEND
22186; GFX900-NEXT:    s_setpc_b64 s[30:31]
22187;
22188; GFX90A-LABEL: s_shuffle_v4i16_v4i16__u_7_7_7:
22189; GFX90A:       ; %bb.0:
22190; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22191; GFX90A-NEXT:    ;;#ASMSTART
22192; GFX90A-NEXT:    ; def s[4:5]
22193; GFX90A-NEXT:    ;;#ASMEND
22194; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
22195; GFX90A-NEXT:    s_mov_b32 s8, s5
22196; GFX90A-NEXT:    ;;#ASMSTART
22197; GFX90A-NEXT:    ; use s[8:9]
22198; GFX90A-NEXT:    ;;#ASMEND
22199; GFX90A-NEXT:    s_setpc_b64 s[30:31]
22200;
22201; GFX940-LABEL: s_shuffle_v4i16_v4i16__u_7_7_7:
22202; GFX940:       ; %bb.0:
22203; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22204; GFX940-NEXT:    ;;#ASMSTART
22205; GFX940-NEXT:    ; def s[0:1]
22206; GFX940-NEXT:    ;;#ASMEND
22207; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s1
22208; GFX940-NEXT:    s_mov_b32 s8, s1
22209; GFX940-NEXT:    ;;#ASMSTART
22210; GFX940-NEXT:    ; use s[8:9]
22211; GFX940-NEXT:    ;;#ASMEND
22212; GFX940-NEXT:    s_setpc_b64 s[30:31]
22213  %vec0 = call <4 x i16> asm "; def $0", "=s"()
22214  %vec1 = call <4 x i16> asm "; def $0", "=s"()
22215  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 poison, i32 7, i32 7, i32 7>
22216  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
22217  ret void
22218}
22219
22220define void @s_shuffle_v4i16_v4i16__0_7_7_7() {
22221; GFX900-LABEL: s_shuffle_v4i16_v4i16__0_7_7_7:
22222; GFX900:       ; %bb.0:
22223; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22224; GFX900-NEXT:    ;;#ASMSTART
22225; GFX900-NEXT:    ; def s[4:5]
22226; GFX900-NEXT:    ;;#ASMEND
22227; GFX900-NEXT:    ;;#ASMSTART
22228; GFX900-NEXT:    ; def s[6:7]
22229; GFX900-NEXT:    ;;#ASMEND
22230; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s4, s7
22231; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s7, s7
22232; GFX900-NEXT:    ;;#ASMSTART
22233; GFX900-NEXT:    ; use s[8:9]
22234; GFX900-NEXT:    ;;#ASMEND
22235; GFX900-NEXT:    s_setpc_b64 s[30:31]
22236;
22237; GFX90A-LABEL: s_shuffle_v4i16_v4i16__0_7_7_7:
22238; GFX90A:       ; %bb.0:
22239; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22240; GFX90A-NEXT:    ;;#ASMSTART
22241; GFX90A-NEXT:    ; def s[4:5]
22242; GFX90A-NEXT:    ;;#ASMEND
22243; GFX90A-NEXT:    ;;#ASMSTART
22244; GFX90A-NEXT:    ; def s[6:7]
22245; GFX90A-NEXT:    ;;#ASMEND
22246; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s4, s7
22247; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s7, s7
22248; GFX90A-NEXT:    ;;#ASMSTART
22249; GFX90A-NEXT:    ; use s[8:9]
22250; GFX90A-NEXT:    ;;#ASMEND
22251; GFX90A-NEXT:    s_setpc_b64 s[30:31]
22252;
22253; GFX940-LABEL: s_shuffle_v4i16_v4i16__0_7_7_7:
22254; GFX940:       ; %bb.0:
22255; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22256; GFX940-NEXT:    ;;#ASMSTART
22257; GFX940-NEXT:    ; def s[0:1]
22258; GFX940-NEXT:    ;;#ASMEND
22259; GFX940-NEXT:    ;;#ASMSTART
22260; GFX940-NEXT:    ; def s[2:3]
22261; GFX940-NEXT:    ;;#ASMEND
22262; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s0, s3
22263; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s3, s3
22264; GFX940-NEXT:    ;;#ASMSTART
22265; GFX940-NEXT:    ; use s[8:9]
22266; GFX940-NEXT:    ;;#ASMEND
22267; GFX940-NEXT:    s_setpc_b64 s[30:31]
22268  %vec0 = call <4 x i16> asm "; def $0", "=s"()
22269  %vec1 = call <4 x i16> asm "; def $0", "=s"()
22270  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 0, i32 7, i32 7, i32 7>
22271  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
22272  ret void
22273}
22274
22275define void @s_shuffle_v4i16_v4i16__1_7_7_7() {
22276; GFX900-LABEL: s_shuffle_v4i16_v4i16__1_7_7_7:
22277; GFX900:       ; %bb.0:
22278; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22279; GFX900-NEXT:    ;;#ASMSTART
22280; GFX900-NEXT:    ; def s[4:5]
22281; GFX900-NEXT:    ;;#ASMEND
22282; GFX900-NEXT:    ;;#ASMSTART
22283; GFX900-NEXT:    ; def s[6:7]
22284; GFX900-NEXT:    ;;#ASMEND
22285; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s4, s7
22286; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s7, s7
22287; GFX900-NEXT:    ;;#ASMSTART
22288; GFX900-NEXT:    ; use s[8:9]
22289; GFX900-NEXT:    ;;#ASMEND
22290; GFX900-NEXT:    s_setpc_b64 s[30:31]
22291;
22292; GFX90A-LABEL: s_shuffle_v4i16_v4i16__1_7_7_7:
22293; GFX90A:       ; %bb.0:
22294; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22295; GFX90A-NEXT:    ;;#ASMSTART
22296; GFX90A-NEXT:    ; def s[4:5]
22297; GFX90A-NEXT:    ;;#ASMEND
22298; GFX90A-NEXT:    ;;#ASMSTART
22299; GFX90A-NEXT:    ; def s[6:7]
22300; GFX90A-NEXT:    ;;#ASMEND
22301; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s4, s7
22302; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s7, s7
22303; GFX90A-NEXT:    ;;#ASMSTART
22304; GFX90A-NEXT:    ; use s[8:9]
22305; GFX90A-NEXT:    ;;#ASMEND
22306; GFX90A-NEXT:    s_setpc_b64 s[30:31]
22307;
22308; GFX940-LABEL: s_shuffle_v4i16_v4i16__1_7_7_7:
22309; GFX940:       ; %bb.0:
22310; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22311; GFX940-NEXT:    ;;#ASMSTART
22312; GFX940-NEXT:    ; def s[0:1]
22313; GFX940-NEXT:    ;;#ASMEND
22314; GFX940-NEXT:    ;;#ASMSTART
22315; GFX940-NEXT:    ; def s[2:3]
22316; GFX940-NEXT:    ;;#ASMEND
22317; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s0, s3
22318; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s3, s3
22319; GFX940-NEXT:    ;;#ASMSTART
22320; GFX940-NEXT:    ; use s[8:9]
22321; GFX940-NEXT:    ;;#ASMEND
22322; GFX940-NEXT:    s_setpc_b64 s[30:31]
22323  %vec0 = call <4 x i16> asm "; def $0", "=s"()
22324  %vec1 = call <4 x i16> asm "; def $0", "=s"()
22325  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 1, i32 7, i32 7, i32 7>
22326  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
22327  ret void
22328}
22329
22330define void @s_shuffle_v4i16_v4i16__2_7_7_7() {
22331; GFX900-LABEL: s_shuffle_v4i16_v4i16__2_7_7_7:
22332; GFX900:       ; %bb.0:
22333; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22334; GFX900-NEXT:    ;;#ASMSTART
22335; GFX900-NEXT:    ; def s[4:5]
22336; GFX900-NEXT:    ;;#ASMEND
22337; GFX900-NEXT:    ;;#ASMSTART
22338; GFX900-NEXT:    ; def s[6:7]
22339; GFX900-NEXT:    ;;#ASMEND
22340; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s5, s7
22341; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s7, s7
22342; GFX900-NEXT:    ;;#ASMSTART
22343; GFX900-NEXT:    ; use s[8:9]
22344; GFX900-NEXT:    ;;#ASMEND
22345; GFX900-NEXT:    s_setpc_b64 s[30:31]
22346;
22347; GFX90A-LABEL: s_shuffle_v4i16_v4i16__2_7_7_7:
22348; GFX90A:       ; %bb.0:
22349; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22350; GFX90A-NEXT:    ;;#ASMSTART
22351; GFX90A-NEXT:    ; def s[4:5]
22352; GFX90A-NEXT:    ;;#ASMEND
22353; GFX90A-NEXT:    ;;#ASMSTART
22354; GFX90A-NEXT:    ; def s[6:7]
22355; GFX90A-NEXT:    ;;#ASMEND
22356; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s5, s7
22357; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s7, s7
22358; GFX90A-NEXT:    ;;#ASMSTART
22359; GFX90A-NEXT:    ; use s[8:9]
22360; GFX90A-NEXT:    ;;#ASMEND
22361; GFX90A-NEXT:    s_setpc_b64 s[30:31]
22362;
22363; GFX940-LABEL: s_shuffle_v4i16_v4i16__2_7_7_7:
22364; GFX940:       ; %bb.0:
22365; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22366; GFX940-NEXT:    ;;#ASMSTART
22367; GFX940-NEXT:    ; def s[0:1]
22368; GFX940-NEXT:    ;;#ASMEND
22369; GFX940-NEXT:    ;;#ASMSTART
22370; GFX940-NEXT:    ; def s[2:3]
22371; GFX940-NEXT:    ;;#ASMEND
22372; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s1, s3
22373; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s3, s3
22374; GFX940-NEXT:    ;;#ASMSTART
22375; GFX940-NEXT:    ; use s[8:9]
22376; GFX940-NEXT:    ;;#ASMEND
22377; GFX940-NEXT:    s_setpc_b64 s[30:31]
22378  %vec0 = call <4 x i16> asm "; def $0", "=s"()
22379  %vec1 = call <4 x i16> asm "; def $0", "=s"()
22380  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 2, i32 7, i32 7, i32 7>
22381  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
22382  ret void
22383}
22384
22385define void @s_shuffle_v4i16_v4i16__3_7_7_7() {
22386; GFX900-LABEL: s_shuffle_v4i16_v4i16__3_7_7_7:
22387; GFX900:       ; %bb.0:
22388; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22389; GFX900-NEXT:    ;;#ASMSTART
22390; GFX900-NEXT:    ; def s[4:5]
22391; GFX900-NEXT:    ;;#ASMEND
22392; GFX900-NEXT:    ;;#ASMSTART
22393; GFX900-NEXT:    ; def s[6:7]
22394; GFX900-NEXT:    ;;#ASMEND
22395; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s7
22396; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s7, s7
22397; GFX900-NEXT:    ;;#ASMSTART
22398; GFX900-NEXT:    ; use s[8:9]
22399; GFX900-NEXT:    ;;#ASMEND
22400; GFX900-NEXT:    s_setpc_b64 s[30:31]
22401;
22402; GFX90A-LABEL: s_shuffle_v4i16_v4i16__3_7_7_7:
22403; GFX90A:       ; %bb.0:
22404; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22405; GFX90A-NEXT:    ;;#ASMSTART
22406; GFX90A-NEXT:    ; def s[4:5]
22407; GFX90A-NEXT:    ;;#ASMEND
22408; GFX90A-NEXT:    ;;#ASMSTART
22409; GFX90A-NEXT:    ; def s[6:7]
22410; GFX90A-NEXT:    ;;#ASMEND
22411; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s7
22412; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s7, s7
22413; GFX90A-NEXT:    ;;#ASMSTART
22414; GFX90A-NEXT:    ; use s[8:9]
22415; GFX90A-NEXT:    ;;#ASMEND
22416; GFX90A-NEXT:    s_setpc_b64 s[30:31]
22417;
22418; GFX940-LABEL: s_shuffle_v4i16_v4i16__3_7_7_7:
22419; GFX940:       ; %bb.0:
22420; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22421; GFX940-NEXT:    ;;#ASMSTART
22422; GFX940-NEXT:    ; def s[0:1]
22423; GFX940-NEXT:    ;;#ASMEND
22424; GFX940-NEXT:    ;;#ASMSTART
22425; GFX940-NEXT:    ; def s[2:3]
22426; GFX940-NEXT:    ;;#ASMEND
22427; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s3
22428; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s3, s3
22429; GFX940-NEXT:    ;;#ASMSTART
22430; GFX940-NEXT:    ; use s[8:9]
22431; GFX940-NEXT:    ;;#ASMEND
22432; GFX940-NEXT:    s_setpc_b64 s[30:31]
22433  %vec0 = call <4 x i16> asm "; def $0", "=s"()
22434  %vec1 = call <4 x i16> asm "; def $0", "=s"()
22435  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 3, i32 7, i32 7, i32 7>
22436  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
22437  ret void
22438}
22439
22440define void @s_shuffle_v4i16_v4i16__4_7_7_7() {
22441; GFX900-LABEL: s_shuffle_v4i16_v4i16__4_7_7_7:
22442; GFX900:       ; %bb.0:
22443; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22444; GFX900-NEXT:    ;;#ASMSTART
22445; GFX900-NEXT:    ; def s[4:5]
22446; GFX900-NEXT:    ;;#ASMEND
22447; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s4, s5
22448; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
22449; GFX900-NEXT:    ;;#ASMSTART
22450; GFX900-NEXT:    ; use s[8:9]
22451; GFX900-NEXT:    ;;#ASMEND
22452; GFX900-NEXT:    s_setpc_b64 s[30:31]
22453;
22454; GFX90A-LABEL: s_shuffle_v4i16_v4i16__4_7_7_7:
22455; GFX90A:       ; %bb.0:
22456; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22457; GFX90A-NEXT:    ;;#ASMSTART
22458; GFX90A-NEXT:    ; def s[4:5]
22459; GFX90A-NEXT:    ;;#ASMEND
22460; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s4, s5
22461; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
22462; GFX90A-NEXT:    ;;#ASMSTART
22463; GFX90A-NEXT:    ; use s[8:9]
22464; GFX90A-NEXT:    ;;#ASMEND
22465; GFX90A-NEXT:    s_setpc_b64 s[30:31]
22466;
22467; GFX940-LABEL: s_shuffle_v4i16_v4i16__4_7_7_7:
22468; GFX940:       ; %bb.0:
22469; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22470; GFX940-NEXT:    ;;#ASMSTART
22471; GFX940-NEXT:    ; def s[0:1]
22472; GFX940-NEXT:    ;;#ASMEND
22473; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s0, s1
22474; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s1
22475; GFX940-NEXT:    ;;#ASMSTART
22476; GFX940-NEXT:    ; use s[8:9]
22477; GFX940-NEXT:    ;;#ASMEND
22478; GFX940-NEXT:    s_setpc_b64 s[30:31]
22479  %vec0 = call <4 x i16> asm "; def $0", "=s"()
22480  %vec1 = call <4 x i16> asm "; def $0", "=s"()
22481  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 4, i32 7, i32 7, i32 7>
22482  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
22483  ret void
22484}
22485
22486define void @s_shuffle_v4i16_v4i16__5_7_7_7() {
22487; GFX900-LABEL: s_shuffle_v4i16_v4i16__5_7_7_7:
22488; GFX900:       ; %bb.0:
22489; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22490; GFX900-NEXT:    ;;#ASMSTART
22491; GFX900-NEXT:    ; def s[4:5]
22492; GFX900-NEXT:    ;;#ASMEND
22493; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s4, s5
22494; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
22495; GFX900-NEXT:    ;;#ASMSTART
22496; GFX900-NEXT:    ; use s[8:9]
22497; GFX900-NEXT:    ;;#ASMEND
22498; GFX900-NEXT:    s_setpc_b64 s[30:31]
22499;
22500; GFX90A-LABEL: s_shuffle_v4i16_v4i16__5_7_7_7:
22501; GFX90A:       ; %bb.0:
22502; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22503; GFX90A-NEXT:    ;;#ASMSTART
22504; GFX90A-NEXT:    ; def s[4:5]
22505; GFX90A-NEXT:    ;;#ASMEND
22506; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s4, s5
22507; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
22508; GFX90A-NEXT:    ;;#ASMSTART
22509; GFX90A-NEXT:    ; use s[8:9]
22510; GFX90A-NEXT:    ;;#ASMEND
22511; GFX90A-NEXT:    s_setpc_b64 s[30:31]
22512;
22513; GFX940-LABEL: s_shuffle_v4i16_v4i16__5_7_7_7:
22514; GFX940:       ; %bb.0:
22515; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22516; GFX940-NEXT:    ;;#ASMSTART
22517; GFX940-NEXT:    ; def s[0:1]
22518; GFX940-NEXT:    ;;#ASMEND
22519; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s0, s1
22520; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s1
22521; GFX940-NEXT:    ;;#ASMSTART
22522; GFX940-NEXT:    ; use s[8:9]
22523; GFX940-NEXT:    ;;#ASMEND
22524; GFX940-NEXT:    s_setpc_b64 s[30:31]
22525  %vec0 = call <4 x i16> asm "; def $0", "=s"()
22526  %vec1 = call <4 x i16> asm "; def $0", "=s"()
22527  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 5, i32 7, i32 7, i32 7>
22528  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
22529  ret void
22530}
22531
22532define void @s_shuffle_v4i16_v4i16__6_7_7_7() {
22533; GFX900-LABEL: s_shuffle_v4i16_v4i16__6_7_7_7:
22534; GFX900:       ; %bb.0:
22535; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22536; GFX900-NEXT:    ;;#ASMSTART
22537; GFX900-NEXT:    ; def s[4:5]
22538; GFX900-NEXT:    ;;#ASMEND
22539; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
22540; GFX900-NEXT:    s_mov_b32 s8, s5
22541; GFX900-NEXT:    ;;#ASMSTART
22542; GFX900-NEXT:    ; use s[8:9]
22543; GFX900-NEXT:    ;;#ASMEND
22544; GFX900-NEXT:    s_setpc_b64 s[30:31]
22545;
22546; GFX90A-LABEL: s_shuffle_v4i16_v4i16__6_7_7_7:
22547; GFX90A:       ; %bb.0:
22548; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22549; GFX90A-NEXT:    ;;#ASMSTART
22550; GFX90A-NEXT:    ; def s[4:5]
22551; GFX90A-NEXT:    ;;#ASMEND
22552; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
22553; GFX90A-NEXT:    s_mov_b32 s8, s5
22554; GFX90A-NEXT:    ;;#ASMSTART
22555; GFX90A-NEXT:    ; use s[8:9]
22556; GFX90A-NEXT:    ;;#ASMEND
22557; GFX90A-NEXT:    s_setpc_b64 s[30:31]
22558;
22559; GFX940-LABEL: s_shuffle_v4i16_v4i16__6_7_7_7:
22560; GFX940:       ; %bb.0:
22561; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22562; GFX940-NEXT:    ;;#ASMSTART
22563; GFX940-NEXT:    ; def s[0:1]
22564; GFX940-NEXT:    ;;#ASMEND
22565; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s1
22566; GFX940-NEXT:    s_mov_b32 s8, s1
22567; GFX940-NEXT:    ;;#ASMSTART
22568; GFX940-NEXT:    ; use s[8:9]
22569; GFX940-NEXT:    ;;#ASMEND
22570; GFX940-NEXT:    s_setpc_b64 s[30:31]
22571  %vec0 = call <4 x i16> asm "; def $0", "=s"()
22572  %vec1 = call <4 x i16> asm "; def $0", "=s"()
22573  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 6, i32 7, i32 7, i32 7>
22574  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
22575  ret void
22576}
22577
22578define void @s_shuffle_v4i16_v4i16__7_u_7_7() {
22579; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_u_7_7:
22580; GFX900:       ; %bb.0:
22581; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22582; GFX900-NEXT:    ;;#ASMSTART
22583; GFX900-NEXT:    ; def s[4:5]
22584; GFX900-NEXT:    ;;#ASMEND
22585; GFX900-NEXT:    s_lshr_b32 s8, s5, 16
22586; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
22587; GFX900-NEXT:    ;;#ASMSTART
22588; GFX900-NEXT:    ; use s[8:9]
22589; GFX900-NEXT:    ;;#ASMEND
22590; GFX900-NEXT:    s_setpc_b64 s[30:31]
22591;
22592; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_u_7_7:
22593; GFX90A:       ; %bb.0:
22594; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22595; GFX90A-NEXT:    ;;#ASMSTART
22596; GFX90A-NEXT:    ; def s[4:5]
22597; GFX90A-NEXT:    ;;#ASMEND
22598; GFX90A-NEXT:    s_lshr_b32 s8, s5, 16
22599; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
22600; GFX90A-NEXT:    ;;#ASMSTART
22601; GFX90A-NEXT:    ; use s[8:9]
22602; GFX90A-NEXT:    ;;#ASMEND
22603; GFX90A-NEXT:    s_setpc_b64 s[30:31]
22604;
22605; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_u_7_7:
22606; GFX940:       ; %bb.0:
22607; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22608; GFX940-NEXT:    ;;#ASMSTART
22609; GFX940-NEXT:    ; def s[0:1]
22610; GFX940-NEXT:    ;;#ASMEND
22611; GFX940-NEXT:    s_lshr_b32 s8, s1, 16
22612; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s1
22613; GFX940-NEXT:    ;;#ASMSTART
22614; GFX940-NEXT:    ; use s[8:9]
22615; GFX940-NEXT:    ;;#ASMEND
22616; GFX940-NEXT:    s_setpc_b64 s[30:31]
22617  %vec0 = call <4 x i16> asm "; def $0", "=s"()
22618  %vec1 = call <4 x i16> asm "; def $0", "=s"()
22619  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 poison, i32 7, i32 7>
22620  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
22621  ret void
22622}
22623
22624define void @s_shuffle_v4i16_v4i16__7_0_7_7() {
22625; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_0_7_7:
22626; GFX900:       ; %bb.0:
22627; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22628; GFX900-NEXT:    ;;#ASMSTART
22629; GFX900-NEXT:    ; def s[4:5]
22630; GFX900-NEXT:    ;;#ASMEND
22631; GFX900-NEXT:    ;;#ASMSTART
22632; GFX900-NEXT:    ; def s[6:7]
22633; GFX900-NEXT:    ;;#ASMEND
22634; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
22635; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
22636; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s7, s7
22637; GFX900-NEXT:    ;;#ASMSTART
22638; GFX900-NEXT:    ; use s[8:9]
22639; GFX900-NEXT:    ;;#ASMEND
22640; GFX900-NEXT:    s_setpc_b64 s[30:31]
22641;
22642; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_0_7_7:
22643; GFX90A:       ; %bb.0:
22644; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22645; GFX90A-NEXT:    ;;#ASMSTART
22646; GFX90A-NEXT:    ; def s[4:5]
22647; GFX90A-NEXT:    ;;#ASMEND
22648; GFX90A-NEXT:    ;;#ASMSTART
22649; GFX90A-NEXT:    ; def s[6:7]
22650; GFX90A-NEXT:    ;;#ASMEND
22651; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
22652; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
22653; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s7, s7
22654; GFX90A-NEXT:    ;;#ASMSTART
22655; GFX90A-NEXT:    ; use s[8:9]
22656; GFX90A-NEXT:    ;;#ASMEND
22657; GFX90A-NEXT:    s_setpc_b64 s[30:31]
22658;
22659; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_0_7_7:
22660; GFX940:       ; %bb.0:
22661; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22662; GFX940-NEXT:    ;;#ASMSTART
22663; GFX940-NEXT:    ; def s[0:1]
22664; GFX940-NEXT:    ;;#ASMEND
22665; GFX940-NEXT:    ;;#ASMSTART
22666; GFX940-NEXT:    ; def s[2:3]
22667; GFX940-NEXT:    ;;#ASMEND
22668; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
22669; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
22670; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s3, s3
22671; GFX940-NEXT:    ;;#ASMSTART
22672; GFX940-NEXT:    ; use s[8:9]
22673; GFX940-NEXT:    ;;#ASMEND
22674; GFX940-NEXT:    s_setpc_b64 s[30:31]
22675  %vec0 = call <4 x i16> asm "; def $0", "=s"()
22676  %vec1 = call <4 x i16> asm "; def $0", "=s"()
22677  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 0, i32 7, i32 7>
22678  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
22679  ret void
22680}
22681
22682define void @s_shuffle_v4i16_v4i16__7_1_7_7() {
22683; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_1_7_7:
22684; GFX900:       ; %bb.0:
22685; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22686; GFX900-NEXT:    ;;#ASMSTART
22687; GFX900-NEXT:    ; def s[4:5]
22688; GFX900-NEXT:    ;;#ASMEND
22689; GFX900-NEXT:    ;;#ASMSTART
22690; GFX900-NEXT:    ; def s[6:7]
22691; GFX900-NEXT:    ;;#ASMEND
22692; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
22693; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s7, s7
22694; GFX900-NEXT:    ;;#ASMSTART
22695; GFX900-NEXT:    ; use s[8:9]
22696; GFX900-NEXT:    ;;#ASMEND
22697; GFX900-NEXT:    s_setpc_b64 s[30:31]
22698;
22699; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_1_7_7:
22700; GFX90A:       ; %bb.0:
22701; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22702; GFX90A-NEXT:    ;;#ASMSTART
22703; GFX90A-NEXT:    ; def s[4:5]
22704; GFX90A-NEXT:    ;;#ASMEND
22705; GFX90A-NEXT:    ;;#ASMSTART
22706; GFX90A-NEXT:    ; def s[6:7]
22707; GFX90A-NEXT:    ;;#ASMEND
22708; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
22709; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s7, s7
22710; GFX90A-NEXT:    ;;#ASMSTART
22711; GFX90A-NEXT:    ; use s[8:9]
22712; GFX90A-NEXT:    ;;#ASMEND
22713; GFX90A-NEXT:    s_setpc_b64 s[30:31]
22714;
22715; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_1_7_7:
22716; GFX940:       ; %bb.0:
22717; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22718; GFX940-NEXT:    ;;#ASMSTART
22719; GFX940-NEXT:    ; def s[0:1]
22720; GFX940-NEXT:    ;;#ASMEND
22721; GFX940-NEXT:    ;;#ASMSTART
22722; GFX940-NEXT:    ; def s[2:3]
22723; GFX940-NEXT:    ;;#ASMEND
22724; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s0
22725; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s3, s3
22726; GFX940-NEXT:    ;;#ASMSTART
22727; GFX940-NEXT:    ; use s[8:9]
22728; GFX940-NEXT:    ;;#ASMEND
22729; GFX940-NEXT:    s_setpc_b64 s[30:31]
22730  %vec0 = call <4 x i16> asm "; def $0", "=s"()
22731  %vec1 = call <4 x i16> asm "; def $0", "=s"()
22732  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 1, i32 7, i32 7>
22733  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
22734  ret void
22735}
22736
22737define void @s_shuffle_v4i16_v4i16__7_2_7_7() {
22738; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_2_7_7:
22739; GFX900:       ; %bb.0:
22740; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22741; GFX900-NEXT:    ;;#ASMSTART
22742; GFX900-NEXT:    ; def s[4:5]
22743; GFX900-NEXT:    ;;#ASMEND
22744; GFX900-NEXT:    ;;#ASMSTART
22745; GFX900-NEXT:    ; def s[6:7]
22746; GFX900-NEXT:    ;;#ASMEND
22747; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
22748; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
22749; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s7, s7
22750; GFX900-NEXT:    ;;#ASMSTART
22751; GFX900-NEXT:    ; use s[8:9]
22752; GFX900-NEXT:    ;;#ASMEND
22753; GFX900-NEXT:    s_setpc_b64 s[30:31]
22754;
22755; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_2_7_7:
22756; GFX90A:       ; %bb.0:
22757; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22758; GFX90A-NEXT:    ;;#ASMSTART
22759; GFX90A-NEXT:    ; def s[4:5]
22760; GFX90A-NEXT:    ;;#ASMEND
22761; GFX90A-NEXT:    ;;#ASMSTART
22762; GFX90A-NEXT:    ; def s[6:7]
22763; GFX90A-NEXT:    ;;#ASMEND
22764; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
22765; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
22766; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s7, s7
22767; GFX90A-NEXT:    ;;#ASMSTART
22768; GFX90A-NEXT:    ; use s[8:9]
22769; GFX90A-NEXT:    ;;#ASMEND
22770; GFX90A-NEXT:    s_setpc_b64 s[30:31]
22771;
22772; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_2_7_7:
22773; GFX940:       ; %bb.0:
22774; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22775; GFX940-NEXT:    ;;#ASMSTART
22776; GFX940-NEXT:    ; def s[0:1]
22777; GFX940-NEXT:    ;;#ASMEND
22778; GFX940-NEXT:    ;;#ASMSTART
22779; GFX940-NEXT:    ; def s[2:3]
22780; GFX940-NEXT:    ;;#ASMEND
22781; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
22782; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
22783; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s3, s3
22784; GFX940-NEXT:    ;;#ASMSTART
22785; GFX940-NEXT:    ; use s[8:9]
22786; GFX940-NEXT:    ;;#ASMEND
22787; GFX940-NEXT:    s_setpc_b64 s[30:31]
22788  %vec0 = call <4 x i16> asm "; def $0", "=s"()
22789  %vec1 = call <4 x i16> asm "; def $0", "=s"()
22790  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 2, i32 7, i32 7>
22791  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
22792  ret void
22793}
22794
22795define void @s_shuffle_v4i16_v4i16__7_3_7_7() {
22796; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_3_7_7:
22797; GFX900:       ; %bb.0:
22798; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22799; GFX900-NEXT:    ;;#ASMSTART
22800; GFX900-NEXT:    ; def s[4:5]
22801; GFX900-NEXT:    ;;#ASMEND
22802; GFX900-NEXT:    ;;#ASMSTART
22803; GFX900-NEXT:    ; def s[6:7]
22804; GFX900-NEXT:    ;;#ASMEND
22805; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
22806; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s7, s7
22807; GFX900-NEXT:    ;;#ASMSTART
22808; GFX900-NEXT:    ; use s[8:9]
22809; GFX900-NEXT:    ;;#ASMEND
22810; GFX900-NEXT:    s_setpc_b64 s[30:31]
22811;
22812; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_3_7_7:
22813; GFX90A:       ; %bb.0:
22814; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22815; GFX90A-NEXT:    ;;#ASMSTART
22816; GFX90A-NEXT:    ; def s[4:5]
22817; GFX90A-NEXT:    ;;#ASMEND
22818; GFX90A-NEXT:    ;;#ASMSTART
22819; GFX90A-NEXT:    ; def s[6:7]
22820; GFX90A-NEXT:    ;;#ASMEND
22821; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
22822; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s7, s7
22823; GFX90A-NEXT:    ;;#ASMSTART
22824; GFX90A-NEXT:    ; use s[8:9]
22825; GFX90A-NEXT:    ;;#ASMEND
22826; GFX90A-NEXT:    s_setpc_b64 s[30:31]
22827;
22828; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_3_7_7:
22829; GFX940:       ; %bb.0:
22830; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22831; GFX940-NEXT:    ;;#ASMSTART
22832; GFX940-NEXT:    ; def s[0:1]
22833; GFX940-NEXT:    ;;#ASMEND
22834; GFX940-NEXT:    ;;#ASMSTART
22835; GFX940-NEXT:    ; def s[2:3]
22836; GFX940-NEXT:    ;;#ASMEND
22837; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s1
22838; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s3, s3
22839; GFX940-NEXT:    ;;#ASMSTART
22840; GFX940-NEXT:    ; use s[8:9]
22841; GFX940-NEXT:    ;;#ASMEND
22842; GFX940-NEXT:    s_setpc_b64 s[30:31]
22843  %vec0 = call <4 x i16> asm "; def $0", "=s"()
22844  %vec1 = call <4 x i16> asm "; def $0", "=s"()
22845  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 3, i32 7, i32 7>
22846  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
22847  ret void
22848}
22849
22850define void @s_shuffle_v4i16_v4i16__7_4_7_7() {
22851; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_4_7_7:
22852; GFX900:       ; %bb.0:
22853; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22854; GFX900-NEXT:    ;;#ASMSTART
22855; GFX900-NEXT:    ; def s[4:5]
22856; GFX900-NEXT:    ;;#ASMEND
22857; GFX900-NEXT:    s_lshr_b32 s6, s5, 16
22858; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s6, s4
22859; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
22860; GFX900-NEXT:    ;;#ASMSTART
22861; GFX900-NEXT:    ; use s[8:9]
22862; GFX900-NEXT:    ;;#ASMEND
22863; GFX900-NEXT:    s_setpc_b64 s[30:31]
22864;
22865; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_4_7_7:
22866; GFX90A:       ; %bb.0:
22867; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22868; GFX90A-NEXT:    ;;#ASMSTART
22869; GFX90A-NEXT:    ; def s[4:5]
22870; GFX90A-NEXT:    ;;#ASMEND
22871; GFX90A-NEXT:    s_lshr_b32 s6, s5, 16
22872; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s6, s4
22873; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
22874; GFX90A-NEXT:    ;;#ASMSTART
22875; GFX90A-NEXT:    ; use s[8:9]
22876; GFX90A-NEXT:    ;;#ASMEND
22877; GFX90A-NEXT:    s_setpc_b64 s[30:31]
22878;
22879; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_4_7_7:
22880; GFX940:       ; %bb.0:
22881; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22882; GFX940-NEXT:    ;;#ASMSTART
22883; GFX940-NEXT:    ; def s[0:1]
22884; GFX940-NEXT:    ;;#ASMEND
22885; GFX940-NEXT:    s_lshr_b32 s2, s1, 16
22886; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s2, s0
22887; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s1
22888; GFX940-NEXT:    ;;#ASMSTART
22889; GFX940-NEXT:    ; use s[8:9]
22890; GFX940-NEXT:    ;;#ASMEND
22891; GFX940-NEXT:    s_setpc_b64 s[30:31]
22892  %vec0 = call <4 x i16> asm "; def $0", "=s"()
22893  %vec1 = call <4 x i16> asm "; def $0", "=s"()
22894  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 4, i32 7, i32 7>
22895  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
22896  ret void
22897}
22898
22899define void @s_shuffle_v4i16_v4i16__7_5_7_7() {
22900; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_5_7_7:
22901; GFX900:       ; %bb.0:
22902; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22903; GFX900-NEXT:    ;;#ASMSTART
22904; GFX900-NEXT:    ; def s[4:5]
22905; GFX900-NEXT:    ;;#ASMEND
22906; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s4
22907; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
22908; GFX900-NEXT:    ;;#ASMSTART
22909; GFX900-NEXT:    ; use s[8:9]
22910; GFX900-NEXT:    ;;#ASMEND
22911; GFX900-NEXT:    s_setpc_b64 s[30:31]
22912;
22913; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_5_7_7:
22914; GFX90A:       ; %bb.0:
22915; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22916; GFX90A-NEXT:    ;;#ASMSTART
22917; GFX90A-NEXT:    ; def s[4:5]
22918; GFX90A-NEXT:    ;;#ASMEND
22919; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s4
22920; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
22921; GFX90A-NEXT:    ;;#ASMSTART
22922; GFX90A-NEXT:    ; use s[8:9]
22923; GFX90A-NEXT:    ;;#ASMEND
22924; GFX90A-NEXT:    s_setpc_b64 s[30:31]
22925;
22926; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_5_7_7:
22927; GFX940:       ; %bb.0:
22928; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22929; GFX940-NEXT:    ;;#ASMSTART
22930; GFX940-NEXT:    ; def s[0:1]
22931; GFX940-NEXT:    ;;#ASMEND
22932; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s0
22933; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s1
22934; GFX940-NEXT:    ;;#ASMSTART
22935; GFX940-NEXT:    ; use s[8:9]
22936; GFX940-NEXT:    ;;#ASMEND
22937; GFX940-NEXT:    s_setpc_b64 s[30:31]
22938  %vec0 = call <4 x i16> asm "; def $0", "=s"()
22939  %vec1 = call <4 x i16> asm "; def $0", "=s"()
22940  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 5, i32 7, i32 7>
22941  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
22942  ret void
22943}
22944
22945define void @s_shuffle_v4i16_v4i16__7_6_7_7() {
22946; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_6_7_7:
22947; GFX900:       ; %bb.0:
22948; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22949; GFX900-NEXT:    ;;#ASMSTART
22950; GFX900-NEXT:    ; def s[4:5]
22951; GFX900-NEXT:    ;;#ASMEND
22952; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
22953; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
22954; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
22955; GFX900-NEXT:    ;;#ASMSTART
22956; GFX900-NEXT:    ; use s[8:9]
22957; GFX900-NEXT:    ;;#ASMEND
22958; GFX900-NEXT:    s_setpc_b64 s[30:31]
22959;
22960; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_6_7_7:
22961; GFX90A:       ; %bb.0:
22962; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22963; GFX90A-NEXT:    ;;#ASMSTART
22964; GFX90A-NEXT:    ; def s[4:5]
22965; GFX90A-NEXT:    ;;#ASMEND
22966; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
22967; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
22968; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s5
22969; GFX90A-NEXT:    ;;#ASMSTART
22970; GFX90A-NEXT:    ; use s[8:9]
22971; GFX90A-NEXT:    ;;#ASMEND
22972; GFX90A-NEXT:    s_setpc_b64 s[30:31]
22973;
22974; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_6_7_7:
22975; GFX940:       ; %bb.0:
22976; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22977; GFX940-NEXT:    ;;#ASMSTART
22978; GFX940-NEXT:    ; def s[0:1]
22979; GFX940-NEXT:    ;;#ASMEND
22980; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
22981; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
22982; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s1
22983; GFX940-NEXT:    ;;#ASMSTART
22984; GFX940-NEXT:    ; use s[8:9]
22985; GFX940-NEXT:    ;;#ASMEND
22986; GFX940-NEXT:    s_setpc_b64 s[30:31]
22987  %vec0 = call <4 x i16> asm "; def $0", "=s"()
22988  %vec1 = call <4 x i16> asm "; def $0", "=s"()
22989  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 6, i32 7, i32 7>
22990  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
22991  ret void
22992}
22993
22994define void @s_shuffle_v4i16_v4i16__7_7_u_7() {
22995; GFX9-LABEL: s_shuffle_v4i16_v4i16__7_7_u_7:
22996; GFX9:       ; %bb.0:
22997; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22998; GFX9-NEXT:    ;;#ASMSTART
22999; GFX9-NEXT:    ; def s[8:9]
23000; GFX9-NEXT:    ;;#ASMEND
23001; GFX9-NEXT:    s_pack_hh_b32_b16 s8, s9, s9
23002; GFX9-NEXT:    ;;#ASMSTART
23003; GFX9-NEXT:    ; use s[8:9]
23004; GFX9-NEXT:    ;;#ASMEND
23005; GFX9-NEXT:    s_setpc_b64 s[30:31]
23006  %vec0 = call <4 x i16> asm "; def $0", "=s"()
23007  %vec1 = call <4 x i16> asm "; def $0", "=s"()
23008  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 poison, i32 7>
23009  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
23010  ret void
23011}
23012
23013define void @s_shuffle_v4i16_v4i16__7_7_0_7() {
23014; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_0_7:
23015; GFX900:       ; %bb.0:
23016; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23017; GFX900-NEXT:    ;;#ASMSTART
23018; GFX900-NEXT:    ; def s[4:5]
23019; GFX900-NEXT:    ;;#ASMEND
23020; GFX900-NEXT:    ;;#ASMSTART
23021; GFX900-NEXT:    ; def s[6:7]
23022; GFX900-NEXT:    ;;#ASMEND
23023; GFX900-NEXT:    s_pack_lh_b32_b16 s9, s4, s7
23024; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
23025; GFX900-NEXT:    ;;#ASMSTART
23026; GFX900-NEXT:    ; use s[8:9]
23027; GFX900-NEXT:    ;;#ASMEND
23028; GFX900-NEXT:    s_setpc_b64 s[30:31]
23029;
23030; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_0_7:
23031; GFX90A:       ; %bb.0:
23032; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23033; GFX90A-NEXT:    ;;#ASMSTART
23034; GFX90A-NEXT:    ; def s[4:5]
23035; GFX90A-NEXT:    ;;#ASMEND
23036; GFX90A-NEXT:    ;;#ASMSTART
23037; GFX90A-NEXT:    ; def s[6:7]
23038; GFX90A-NEXT:    ;;#ASMEND
23039; GFX90A-NEXT:    s_pack_lh_b32_b16 s9, s4, s7
23040; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
23041; GFX90A-NEXT:    ;;#ASMSTART
23042; GFX90A-NEXT:    ; use s[8:9]
23043; GFX90A-NEXT:    ;;#ASMEND
23044; GFX90A-NEXT:    s_setpc_b64 s[30:31]
23045;
23046; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_0_7:
23047; GFX940:       ; %bb.0:
23048; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23049; GFX940-NEXT:    ;;#ASMSTART
23050; GFX940-NEXT:    ; def s[0:1]
23051; GFX940-NEXT:    ;;#ASMEND
23052; GFX940-NEXT:    ;;#ASMSTART
23053; GFX940-NEXT:    ; def s[2:3]
23054; GFX940-NEXT:    ;;#ASMEND
23055; GFX940-NEXT:    s_pack_lh_b32_b16 s9, s0, s3
23056; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
23057; GFX940-NEXT:    ;;#ASMSTART
23058; GFX940-NEXT:    ; use s[8:9]
23059; GFX940-NEXT:    ;;#ASMEND
23060; GFX940-NEXT:    s_setpc_b64 s[30:31]
23061  %vec0 = call <4 x i16> asm "; def $0", "=s"()
23062  %vec1 = call <4 x i16> asm "; def $0", "=s"()
23063  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 0, i32 7>
23064  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
23065  ret void
23066}
23067
23068define void @s_shuffle_v4i16_v4i16__7_7_1_7() {
23069; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_1_7:
23070; GFX900:       ; %bb.0:
23071; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23072; GFX900-NEXT:    ;;#ASMSTART
23073; GFX900-NEXT:    ; def s[4:5]
23074; GFX900-NEXT:    ;;#ASMEND
23075; GFX900-NEXT:    ;;#ASMSTART
23076; GFX900-NEXT:    ; def s[6:7]
23077; GFX900-NEXT:    ;;#ASMEND
23078; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s7
23079; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
23080; GFX900-NEXT:    ;;#ASMSTART
23081; GFX900-NEXT:    ; use s[8:9]
23082; GFX900-NEXT:    ;;#ASMEND
23083; GFX900-NEXT:    s_setpc_b64 s[30:31]
23084;
23085; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_1_7:
23086; GFX90A:       ; %bb.0:
23087; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23088; GFX90A-NEXT:    ;;#ASMSTART
23089; GFX90A-NEXT:    ; def s[4:5]
23090; GFX90A-NEXT:    ;;#ASMEND
23091; GFX90A-NEXT:    ;;#ASMSTART
23092; GFX90A-NEXT:    ; def s[6:7]
23093; GFX90A-NEXT:    ;;#ASMEND
23094; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s7
23095; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
23096; GFX90A-NEXT:    ;;#ASMSTART
23097; GFX90A-NEXT:    ; use s[8:9]
23098; GFX90A-NEXT:    ;;#ASMEND
23099; GFX90A-NEXT:    s_setpc_b64 s[30:31]
23100;
23101; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_1_7:
23102; GFX940:       ; %bb.0:
23103; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23104; GFX940-NEXT:    ;;#ASMSTART
23105; GFX940-NEXT:    ; def s[0:1]
23106; GFX940-NEXT:    ;;#ASMEND
23107; GFX940-NEXT:    ;;#ASMSTART
23108; GFX940-NEXT:    ; def s[2:3]
23109; GFX940-NEXT:    ;;#ASMEND
23110; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s3
23111; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
23112; GFX940-NEXT:    ;;#ASMSTART
23113; GFX940-NEXT:    ; use s[8:9]
23114; GFX940-NEXT:    ;;#ASMEND
23115; GFX940-NEXT:    s_setpc_b64 s[30:31]
23116  %vec0 = call <4 x i16> asm "; def $0", "=s"()
23117  %vec1 = call <4 x i16> asm "; def $0", "=s"()
23118  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 1, i32 7>
23119  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
23120  ret void
23121}
23122
23123define void @s_shuffle_v4i16_v4i16__7_7_2_7() {
23124; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_2_7:
23125; GFX900:       ; %bb.0:
23126; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23127; GFX900-NEXT:    ;;#ASMSTART
23128; GFX900-NEXT:    ; def s[4:5]
23129; GFX900-NEXT:    ;;#ASMEND
23130; GFX900-NEXT:    ;;#ASMSTART
23131; GFX900-NEXT:    ; def s[6:7]
23132; GFX900-NEXT:    ;;#ASMEND
23133; GFX900-NEXT:    s_pack_lh_b32_b16 s9, s5, s7
23134; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
23135; GFX900-NEXT:    ;;#ASMSTART
23136; GFX900-NEXT:    ; use s[8:9]
23137; GFX900-NEXT:    ;;#ASMEND
23138; GFX900-NEXT:    s_setpc_b64 s[30:31]
23139;
23140; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_2_7:
23141; GFX90A:       ; %bb.0:
23142; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23143; GFX90A-NEXT:    ;;#ASMSTART
23144; GFX90A-NEXT:    ; def s[4:5]
23145; GFX90A-NEXT:    ;;#ASMEND
23146; GFX90A-NEXT:    ;;#ASMSTART
23147; GFX90A-NEXT:    ; def s[6:7]
23148; GFX90A-NEXT:    ;;#ASMEND
23149; GFX90A-NEXT:    s_pack_lh_b32_b16 s9, s5, s7
23150; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
23151; GFX90A-NEXT:    ;;#ASMSTART
23152; GFX90A-NEXT:    ; use s[8:9]
23153; GFX90A-NEXT:    ;;#ASMEND
23154; GFX90A-NEXT:    s_setpc_b64 s[30:31]
23155;
23156; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_2_7:
23157; GFX940:       ; %bb.0:
23158; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23159; GFX940-NEXT:    ;;#ASMSTART
23160; GFX940-NEXT:    ; def s[0:1]
23161; GFX940-NEXT:    ;;#ASMEND
23162; GFX940-NEXT:    ;;#ASMSTART
23163; GFX940-NEXT:    ; def s[2:3]
23164; GFX940-NEXT:    ;;#ASMEND
23165; GFX940-NEXT:    s_pack_lh_b32_b16 s9, s1, s3
23166; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
23167; GFX940-NEXT:    ;;#ASMSTART
23168; GFX940-NEXT:    ; use s[8:9]
23169; GFX940-NEXT:    ;;#ASMEND
23170; GFX940-NEXT:    s_setpc_b64 s[30:31]
23171  %vec0 = call <4 x i16> asm "; def $0", "=s"()
23172  %vec1 = call <4 x i16> asm "; def $0", "=s"()
23173  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 2, i32 7>
23174  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
23175  ret void
23176}
23177
23178define void @s_shuffle_v4i16_v4i16__7_7_3_7() {
23179; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_3_7:
23180; GFX900:       ; %bb.0:
23181; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23182; GFX900-NEXT:    ;;#ASMSTART
23183; GFX900-NEXT:    ; def s[4:5]
23184; GFX900-NEXT:    ;;#ASMEND
23185; GFX900-NEXT:    ;;#ASMSTART
23186; GFX900-NEXT:    ; def s[6:7]
23187; GFX900-NEXT:    ;;#ASMEND
23188; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s5, s7
23189; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
23190; GFX900-NEXT:    ;;#ASMSTART
23191; GFX900-NEXT:    ; use s[8:9]
23192; GFX900-NEXT:    ;;#ASMEND
23193; GFX900-NEXT:    s_setpc_b64 s[30:31]
23194;
23195; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_3_7:
23196; GFX90A:       ; %bb.0:
23197; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23198; GFX90A-NEXT:    ;;#ASMSTART
23199; GFX90A-NEXT:    ; def s[4:5]
23200; GFX90A-NEXT:    ;;#ASMEND
23201; GFX90A-NEXT:    ;;#ASMSTART
23202; GFX90A-NEXT:    ; def s[6:7]
23203; GFX90A-NEXT:    ;;#ASMEND
23204; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s5, s7
23205; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
23206; GFX90A-NEXT:    ;;#ASMSTART
23207; GFX90A-NEXT:    ; use s[8:9]
23208; GFX90A-NEXT:    ;;#ASMEND
23209; GFX90A-NEXT:    s_setpc_b64 s[30:31]
23210;
23211; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_3_7:
23212; GFX940:       ; %bb.0:
23213; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23214; GFX940-NEXT:    ;;#ASMSTART
23215; GFX940-NEXT:    ; def s[0:1]
23216; GFX940-NEXT:    ;;#ASMEND
23217; GFX940-NEXT:    ;;#ASMSTART
23218; GFX940-NEXT:    ; def s[2:3]
23219; GFX940-NEXT:    ;;#ASMEND
23220; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s1, s3
23221; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
23222; GFX940-NEXT:    ;;#ASMSTART
23223; GFX940-NEXT:    ; use s[8:9]
23224; GFX940-NEXT:    ;;#ASMEND
23225; GFX940-NEXT:    s_setpc_b64 s[30:31]
23226  %vec0 = call <4 x i16> asm "; def $0", "=s"()
23227  %vec1 = call <4 x i16> asm "; def $0", "=s"()
23228  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 3, i32 7>
23229  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
23230  ret void
23231}
23232
23233define void @s_shuffle_v4i16_v4i16__7_7_4_7() {
23234; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_4_7:
23235; GFX900:       ; %bb.0:
23236; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23237; GFX900-NEXT:    ;;#ASMSTART
23238; GFX900-NEXT:    ; def s[4:5]
23239; GFX900-NEXT:    ;;#ASMEND
23240; GFX900-NEXT:    s_pack_lh_b32_b16 s9, s4, s5
23241; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
23242; GFX900-NEXT:    ;;#ASMSTART
23243; GFX900-NEXT:    ; use s[8:9]
23244; GFX900-NEXT:    ;;#ASMEND
23245; GFX900-NEXT:    s_setpc_b64 s[30:31]
23246;
23247; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_4_7:
23248; GFX90A:       ; %bb.0:
23249; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23250; GFX90A-NEXT:    ;;#ASMSTART
23251; GFX90A-NEXT:    ; def s[4:5]
23252; GFX90A-NEXT:    ;;#ASMEND
23253; GFX90A-NEXT:    s_pack_lh_b32_b16 s9, s4, s5
23254; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
23255; GFX90A-NEXT:    ;;#ASMSTART
23256; GFX90A-NEXT:    ; use s[8:9]
23257; GFX90A-NEXT:    ;;#ASMEND
23258; GFX90A-NEXT:    s_setpc_b64 s[30:31]
23259;
23260; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_4_7:
23261; GFX940:       ; %bb.0:
23262; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23263; GFX940-NEXT:    ;;#ASMSTART
23264; GFX940-NEXT:    ; def s[0:1]
23265; GFX940-NEXT:    ;;#ASMEND
23266; GFX940-NEXT:    s_pack_lh_b32_b16 s9, s0, s1
23267; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
23268; GFX940-NEXT:    ;;#ASMSTART
23269; GFX940-NEXT:    ; use s[8:9]
23270; GFX940-NEXT:    ;;#ASMEND
23271; GFX940-NEXT:    s_setpc_b64 s[30:31]
23272  %vec0 = call <4 x i16> asm "; def $0", "=s"()
23273  %vec1 = call <4 x i16> asm "; def $0", "=s"()
23274  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 4, i32 7>
23275  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
23276  ret void
23277}
23278
23279define void @s_shuffle_v4i16_v4i16__7_7_5_7() {
23280; GFX900-LABEL: s_shuffle_v4i16_v4i16__7_7_5_7:
23281; GFX900:       ; %bb.0:
23282; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23283; GFX900-NEXT:    ;;#ASMSTART
23284; GFX900-NEXT:    ; def s[4:5]
23285; GFX900-NEXT:    ;;#ASMEND
23286; GFX900-NEXT:    s_pack_hh_b32_b16 s9, s4, s5
23287; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
23288; GFX900-NEXT:    ;;#ASMSTART
23289; GFX900-NEXT:    ; use s[8:9]
23290; GFX900-NEXT:    ;;#ASMEND
23291; GFX900-NEXT:    s_setpc_b64 s[30:31]
23292;
23293; GFX90A-LABEL: s_shuffle_v4i16_v4i16__7_7_5_7:
23294; GFX90A:       ; %bb.0:
23295; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23296; GFX90A-NEXT:    ;;#ASMSTART
23297; GFX90A-NEXT:    ; def s[4:5]
23298; GFX90A-NEXT:    ;;#ASMEND
23299; GFX90A-NEXT:    s_pack_hh_b32_b16 s9, s4, s5
23300; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
23301; GFX90A-NEXT:    ;;#ASMSTART
23302; GFX90A-NEXT:    ; use s[8:9]
23303; GFX90A-NEXT:    ;;#ASMEND
23304; GFX90A-NEXT:    s_setpc_b64 s[30:31]
23305;
23306; GFX940-LABEL: s_shuffle_v4i16_v4i16__7_7_5_7:
23307; GFX940:       ; %bb.0:
23308; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23309; GFX940-NEXT:    ;;#ASMSTART
23310; GFX940-NEXT:    ; def s[0:1]
23311; GFX940-NEXT:    ;;#ASMEND
23312; GFX940-NEXT:    s_pack_hh_b32_b16 s9, s0, s1
23313; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
23314; GFX940-NEXT:    ;;#ASMSTART
23315; GFX940-NEXT:    ; use s[8:9]
23316; GFX940-NEXT:    ;;#ASMEND
23317; GFX940-NEXT:    s_setpc_b64 s[30:31]
23318  %vec0 = call <4 x i16> asm "; def $0", "=s"()
23319  %vec1 = call <4 x i16> asm "; def $0", "=s"()
23320  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 5, i32 7>
23321  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
23322  ret void
23323}
23324
23325define void @s_shuffle_v4i16_v4i16__7_7_6_7() {
23326; GFX9-LABEL: s_shuffle_v4i16_v4i16__7_7_6_7:
23327; GFX9:       ; %bb.0:
23328; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23329; GFX9-NEXT:    ;;#ASMSTART
23330; GFX9-NEXT:    ; def s[8:9]
23331; GFX9-NEXT:    ;;#ASMEND
23332; GFX9-NEXT:    s_pack_hh_b32_b16 s8, s9, s9
23333; GFX9-NEXT:    ;;#ASMSTART
23334; GFX9-NEXT:    ; use s[8:9]
23335; GFX9-NEXT:    ;;#ASMEND
23336; GFX9-NEXT:    s_setpc_b64 s[30:31]
23337  %vec0 = call <4 x i16> asm "; def $0", "=s"()
23338  %vec1 = call <4 x i16> asm "; def $0", "=s"()
23339  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <4 x i32> <i32 7, i32 7, i32 6, i32 7>
23340  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %shuf)
23341  ret void
23342}
23343;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
23344; GFX90APLUS: {{.*}}
23345