xref: /llvm-project/llvm/test/CodeGen/AMDGPU/shufflevector.v3i16.v4i16.ll (revision 585858aeb6247b3892218edb9d353c63f1c33186)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s
5
6
7define void @v_shuffle_v3i16_v4i16__u_u_u(ptr addrspace(1) inreg %ptr) {
8; GFX9-LABEL: v_shuffle_v3i16_v4i16__u_u_u:
9; GFX9:       ; %bb.0:
10; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GFX9-NEXT:    s_setpc_b64 s[30:31]
12  %vec0 = call <4 x i16> asm "; def $0", "=v"()
13  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> poison
14  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
15  ret void
16}
17
18define void @v_shuffle_v3i16_v4i16__0_u_u(ptr addrspace(1) inreg %ptr) {
19; GFX900-LABEL: v_shuffle_v3i16_v4i16__0_u_u:
20; GFX900:       ; %bb.0:
21; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22; GFX900-NEXT:    v_mov_b32_e32 v2, 0
23; GFX900-NEXT:    ;;#ASMSTART
24; GFX900-NEXT:    ; def v[0:1]
25; GFX900-NEXT:    ;;#ASMEND
26; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
27; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
28; GFX900-NEXT:    s_waitcnt vmcnt(0)
29; GFX900-NEXT:    s_setpc_b64 s[30:31]
30;
31; GFX90A-LABEL: v_shuffle_v3i16_v4i16__0_u_u:
32; GFX90A:       ; %bb.0:
33; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
35; GFX90A-NEXT:    ;;#ASMSTART
36; GFX90A-NEXT:    ; def v[0:1]
37; GFX90A-NEXT:    ;;#ASMEND
38; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
39; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
40; GFX90A-NEXT:    s_waitcnt vmcnt(0)
41; GFX90A-NEXT:    s_setpc_b64 s[30:31]
42;
43; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_u_u:
44; GFX940:       ; %bb.0:
45; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46; GFX940-NEXT:    v_mov_b32_e32 v2, 0
47; GFX940-NEXT:    ;;#ASMSTART
48; GFX940-NEXT:    ; def v[0:1]
49; GFX940-NEXT:    ;;#ASMEND
50; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
51; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
52; GFX940-NEXT:    s_waitcnt vmcnt(0)
53; GFX940-NEXT:    s_setpc_b64 s[30:31]
54  %vec0 = call <4 x i16> asm "; def $0", "=v"()
55  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 poison, i32 poison>
56  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
57  ret void
58}
59
60define void @v_shuffle_v3i16_v4i16__1_u_u(ptr addrspace(1) inreg %ptr) {
61; GFX900-LABEL: v_shuffle_v3i16_v4i16__1_u_u:
62; GFX900:       ; %bb.0:
63; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64; GFX900-NEXT:    ;;#ASMSTART
65; GFX900-NEXT:    ; def v[0:1]
66; GFX900-NEXT:    ;;#ASMEND
67; GFX900-NEXT:    v_mov_b32_e32 v2, 0
68; GFX900-NEXT:    v_alignbit_b32 v0, s4, v0, 16
69; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
70; GFX900-NEXT:    s_waitcnt vmcnt(0)
71; GFX900-NEXT:    s_setpc_b64 s[30:31]
72;
73; GFX90A-LABEL: v_shuffle_v3i16_v4i16__1_u_u:
74; GFX90A:       ; %bb.0:
75; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76; GFX90A-NEXT:    ;;#ASMSTART
77; GFX90A-NEXT:    ; def v[0:1]
78; GFX90A-NEXT:    ;;#ASMEND
79; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
80; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v0, 16
81; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
82; GFX90A-NEXT:    s_waitcnt vmcnt(0)
83; GFX90A-NEXT:    s_setpc_b64 s[30:31]
84;
85; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_u_u:
86; GFX940:       ; %bb.0:
87; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88; GFX940-NEXT:    ;;#ASMSTART
89; GFX940-NEXT:    ; def v[0:1]
90; GFX940-NEXT:    ;;#ASMEND
91; GFX940-NEXT:    v_mov_b32_e32 v2, 0
92; GFX940-NEXT:    v_alignbit_b32 v0, s0, v0, 16
93; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
94; GFX940-NEXT:    s_waitcnt vmcnt(0)
95; GFX940-NEXT:    s_setpc_b64 s[30:31]
96  %vec0 = call <4 x i16> asm "; def $0", "=v"()
97  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 1, i32 poison, i32 poison>
98  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
99  ret void
100}
101
102define void @v_shuffle_v3i16_v4i16__2_u_u(ptr addrspace(1) inreg %ptr) {
103; GFX900-LABEL: v_shuffle_v3i16_v4i16__2_u_u:
104; GFX900:       ; %bb.0:
105; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
106; GFX900-NEXT:    v_mov_b32_e32 v2, 0
107; GFX900-NEXT:    ;;#ASMSTART
108; GFX900-NEXT:    ; def v[0:1]
109; GFX900-NEXT:    ;;#ASMEND
110; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
111; GFX900-NEXT:    s_waitcnt vmcnt(0)
112; GFX900-NEXT:    s_setpc_b64 s[30:31]
113;
114; GFX90A-LABEL: v_shuffle_v3i16_v4i16__2_u_u:
115; GFX90A:       ; %bb.0:
116; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
117; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
118; GFX90A-NEXT:    ;;#ASMSTART
119; GFX90A-NEXT:    ; def v[0:1]
120; GFX90A-NEXT:    ;;#ASMEND
121; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
122; GFX90A-NEXT:    s_waitcnt vmcnt(0)
123; GFX90A-NEXT:    s_setpc_b64 s[30:31]
124;
125; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_u_u:
126; GFX940:       ; %bb.0:
127; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
128; GFX940-NEXT:    v_mov_b32_e32 v2, 0
129; GFX940-NEXT:    ;;#ASMSTART
130; GFX940-NEXT:    ; def v[0:1]
131; GFX940-NEXT:    ;;#ASMEND
132; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
133; GFX940-NEXT:    s_waitcnt vmcnt(0)
134; GFX940-NEXT:    s_setpc_b64 s[30:31]
135  %vec0 = call <4 x i16> asm "; def $0", "=v"()
136  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 2, i32 poison, i32 poison>
137  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
138  ret void
139}
140
141define void @v_shuffle_v3i16_v4i16__3_u_u(ptr addrspace(1) inreg %ptr) {
142; GFX900-LABEL: v_shuffle_v3i16_v4i16__3_u_u:
143; GFX900:       ; %bb.0:
144; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145; GFX900-NEXT:    ;;#ASMSTART
146; GFX900-NEXT:    ; def v[0:1]
147; GFX900-NEXT:    ;;#ASMEND
148; GFX900-NEXT:    v_mov_b32_e32 v2, 0
149; GFX900-NEXT:    v_alignbit_b32 v0, s4, v1, 16
150; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
151; GFX900-NEXT:    s_waitcnt vmcnt(0)
152; GFX900-NEXT:    s_setpc_b64 s[30:31]
153;
154; GFX90A-LABEL: v_shuffle_v3i16_v4i16__3_u_u:
155; GFX90A:       ; %bb.0:
156; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157; GFX90A-NEXT:    ;;#ASMSTART
158; GFX90A-NEXT:    ; def v[0:1]
159; GFX90A-NEXT:    ;;#ASMEND
160; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
161; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v1, 16
162; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
163; GFX90A-NEXT:    s_waitcnt vmcnt(0)
164; GFX90A-NEXT:    s_setpc_b64 s[30:31]
165;
166; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_u_u:
167; GFX940:       ; %bb.0:
168; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
169; GFX940-NEXT:    ;;#ASMSTART
170; GFX940-NEXT:    ; def v[0:1]
171; GFX940-NEXT:    ;;#ASMEND
172; GFX940-NEXT:    v_mov_b32_e32 v2, 0
173; GFX940-NEXT:    v_alignbit_b32 v0, s0, v1, 16
174; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
175; GFX940-NEXT:    s_waitcnt vmcnt(0)
176; GFX940-NEXT:    s_setpc_b64 s[30:31]
177  %vec0 = call <4 x i16> asm "; def $0", "=v"()
178  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 3, i32 poison, i32 poison>
179  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
180  ret void
181}
182
183define void @v_shuffle_v3i16_v4i16__4_u_u(ptr addrspace(1) inreg %ptr) {
184; GFX9-LABEL: v_shuffle_v3i16_v4i16__4_u_u:
185; GFX9:       ; %bb.0:
186; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
187; GFX9-NEXT:    s_setpc_b64 s[30:31]
188  %vec0 = call <4 x i16> asm "; def $0", "=v"()
189  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 4, i32 poison, i32 poison>
190  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
191  ret void
192}
193
194define void @v_shuffle_v3i16_v4i16__5_u_u(ptr addrspace(1) inreg %ptr) {
195; GFX900-LABEL: v_shuffle_v3i16_v4i16__5_u_u:
196; GFX900:       ; %bb.0:
197; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
198; GFX900-NEXT:    ;;#ASMSTART
199; GFX900-NEXT:    ; def v[0:1]
200; GFX900-NEXT:    ;;#ASMEND
201; GFX900-NEXT:    v_mov_b32_e32 v2, 0
202; GFX900-NEXT:    v_alignbit_b32 v0, s4, v0, 16
203; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
204; GFX900-NEXT:    s_waitcnt vmcnt(0)
205; GFX900-NEXT:    s_setpc_b64 s[30:31]
206;
207; GFX90A-LABEL: v_shuffle_v3i16_v4i16__5_u_u:
208; GFX90A:       ; %bb.0:
209; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210; GFX90A-NEXT:    ;;#ASMSTART
211; GFX90A-NEXT:    ; def v[0:1]
212; GFX90A-NEXT:    ;;#ASMEND
213; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
214; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v0, 16
215; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
216; GFX90A-NEXT:    s_waitcnt vmcnt(0)
217; GFX90A-NEXT:    s_setpc_b64 s[30:31]
218;
219; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_u_u:
220; GFX940:       ; %bb.0:
221; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222; GFX940-NEXT:    ;;#ASMSTART
223; GFX940-NEXT:    ; def v[0:1]
224; GFX940-NEXT:    ;;#ASMEND
225; GFX940-NEXT:    v_mov_b32_e32 v2, 0
226; GFX940-NEXT:    v_alignbit_b32 v0, s0, v0, 16
227; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
228; GFX940-NEXT:    s_waitcnt vmcnt(0)
229; GFX940-NEXT:    s_setpc_b64 s[30:31]
230  %vec0 = call <4 x i16> asm "; def $0", "=v"()
231  %vec1 = call <4 x i16> asm "; def $0", "=v"()
232  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 poison, i32 poison>
233  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
234  ret void
235}
236
237define void @v_shuffle_v3i16_v4i16__6_u_u(ptr addrspace(1) inreg %ptr) {
238; GFX900-LABEL: v_shuffle_v3i16_v4i16__6_u_u:
239; GFX900:       ; %bb.0:
240; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
241; GFX900-NEXT:    v_mov_b32_e32 v2, 0
242; GFX900-NEXT:    ;;#ASMSTART
243; GFX900-NEXT:    ; def v[0:1]
244; GFX900-NEXT:    ;;#ASMEND
245; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
246; GFX900-NEXT:    s_waitcnt vmcnt(0)
247; GFX900-NEXT:    s_setpc_b64 s[30:31]
248;
249; GFX90A-LABEL: v_shuffle_v3i16_v4i16__6_u_u:
250; GFX90A:       ; %bb.0:
251; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
252; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
253; GFX90A-NEXT:    ;;#ASMSTART
254; GFX90A-NEXT:    ; def v[0:1]
255; GFX90A-NEXT:    ;;#ASMEND
256; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
257; GFX90A-NEXT:    s_waitcnt vmcnt(0)
258; GFX90A-NEXT:    s_setpc_b64 s[30:31]
259;
260; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_u_u:
261; GFX940:       ; %bb.0:
262; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
263; GFX940-NEXT:    v_mov_b32_e32 v2, 0
264; GFX940-NEXT:    ;;#ASMSTART
265; GFX940-NEXT:    ; def v[0:1]
266; GFX940-NEXT:    ;;#ASMEND
267; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
268; GFX940-NEXT:    s_waitcnt vmcnt(0)
269; GFX940-NEXT:    s_setpc_b64 s[30:31]
270  %vec0 = call <4 x i16> asm "; def $0", "=v"()
271  %vec1 = call <4 x i16> asm "; def $0", "=v"()
272  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 poison, i32 poison>
273  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
274  ret void
275}
276
277define void @v_shuffle_v3i16_v4i16__7_u_u(ptr addrspace(1) inreg %ptr) {
278; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_u_u:
279; GFX900:       ; %bb.0:
280; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
281; GFX900-NEXT:    ;;#ASMSTART
282; GFX900-NEXT:    ; def v[0:1]
283; GFX900-NEXT:    ;;#ASMEND
284; GFX900-NEXT:    v_mov_b32_e32 v2, 0
285; GFX900-NEXT:    v_alignbit_b32 v0, s4, v1, 16
286; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
287; GFX900-NEXT:    s_waitcnt vmcnt(0)
288; GFX900-NEXT:    s_setpc_b64 s[30:31]
289;
290; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_u_u:
291; GFX90A:       ; %bb.0:
292; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
293; GFX90A-NEXT:    ;;#ASMSTART
294; GFX90A-NEXT:    ; def v[0:1]
295; GFX90A-NEXT:    ;;#ASMEND
296; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
297; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v1, 16
298; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
299; GFX90A-NEXT:    s_waitcnt vmcnt(0)
300; GFX90A-NEXT:    s_setpc_b64 s[30:31]
301;
302; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_u:
303; GFX940:       ; %bb.0:
304; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
305; GFX940-NEXT:    ;;#ASMSTART
306; GFX940-NEXT:    ; def v[0:1]
307; GFX940-NEXT:    ;;#ASMEND
308; GFX940-NEXT:    v_mov_b32_e32 v2, 0
309; GFX940-NEXT:    v_alignbit_b32 v0, s0, v1, 16
310; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
311; GFX940-NEXT:    s_waitcnt vmcnt(0)
312; GFX940-NEXT:    s_setpc_b64 s[30:31]
313  %vec0 = call <4 x i16> asm "; def $0", "=v"()
314  %vec1 = call <4 x i16> asm "; def $0", "=v"()
315  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 poison>
316  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
317  ret void
318}
319
320define void @v_shuffle_v3i16_v4i16__7_0_u(ptr addrspace(1) inreg %ptr) {
321; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_0_u:
322; GFX900:       ; %bb.0:
323; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
324; GFX900-NEXT:    ;;#ASMSTART
325; GFX900-NEXT:    ; def v[0:1]
326; GFX900-NEXT:    ;;#ASMEND
327; GFX900-NEXT:    v_mov_b32_e32 v3, 0
328; GFX900-NEXT:    ;;#ASMSTART
329; GFX900-NEXT:    ; def v[1:2]
330; GFX900-NEXT:    ;;#ASMEND
331; GFX900-NEXT:    v_alignbit_b32 v0, v0, v2, 16
332; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
333; GFX900-NEXT:    s_waitcnt vmcnt(0)
334; GFX900-NEXT:    s_setpc_b64 s[30:31]
335;
336; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_0_u:
337; GFX90A:       ; %bb.0:
338; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
339; GFX90A-NEXT:    ;;#ASMSTART
340; GFX90A-NEXT:    ; def v[0:1]
341; GFX90A-NEXT:    ;;#ASMEND
342; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
343; GFX90A-NEXT:    ;;#ASMSTART
344; GFX90A-NEXT:    ; def v[2:3]
345; GFX90A-NEXT:    ;;#ASMEND
346; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v3, 16
347; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
348; GFX90A-NEXT:    s_waitcnt vmcnt(0)
349; GFX90A-NEXT:    s_setpc_b64 s[30:31]
350;
351; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_u:
352; GFX940:       ; %bb.0:
353; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
354; GFX940-NEXT:    ;;#ASMSTART
355; GFX940-NEXT:    ; def v[0:1]
356; GFX940-NEXT:    ;;#ASMEND
357; GFX940-NEXT:    v_mov_b32_e32 v4, 0
358; GFX940-NEXT:    ;;#ASMSTART
359; GFX940-NEXT:    ; def v[2:3]
360; GFX940-NEXT:    ;;#ASMEND
361; GFX940-NEXT:    s_nop 0
362; GFX940-NEXT:    v_alignbit_b32 v0, v0, v3, 16
363; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
364; GFX940-NEXT:    s_waitcnt vmcnt(0)
365; GFX940-NEXT:    s_setpc_b64 s[30:31]
366  %vec0 = call <4 x i16> asm "; def $0", "=v"()
367  %vec1 = call <4 x i16> asm "; def $0", "=v"()
368  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 poison>
369  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
370  ret void
371}
372
373define void @v_shuffle_v3i16_v4i16__7_1_u(ptr addrspace(1) inreg %ptr) {
374; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_1_u:
375; GFX900:       ; %bb.0:
376; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377; GFX900-NEXT:    ;;#ASMSTART
378; GFX900-NEXT:    ; def v[0:1]
379; GFX900-NEXT:    ;;#ASMEND
380; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
381; GFX900-NEXT:    v_mov_b32_e32 v3, 0
382; GFX900-NEXT:    ;;#ASMSTART
383; GFX900-NEXT:    ; def v[1:2]
384; GFX900-NEXT:    ;;#ASMEND
385; GFX900-NEXT:    v_perm_b32 v0, v0, v2, s4
386; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
387; GFX900-NEXT:    s_waitcnt vmcnt(0)
388; GFX900-NEXT:    s_setpc_b64 s[30:31]
389;
390; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_1_u:
391; GFX90A:       ; %bb.0:
392; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
393; GFX90A-NEXT:    ;;#ASMSTART
394; GFX90A-NEXT:    ; def v[0:1]
395; GFX90A-NEXT:    ;;#ASMEND
396; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
397; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
398; GFX90A-NEXT:    ;;#ASMSTART
399; GFX90A-NEXT:    ; def v[2:3]
400; GFX90A-NEXT:    ;;#ASMEND
401; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
402; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
403; GFX90A-NEXT:    s_waitcnt vmcnt(0)
404; GFX90A-NEXT:    s_setpc_b64 s[30:31]
405;
406; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_u:
407; GFX940:       ; %bb.0:
408; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
409; GFX940-NEXT:    ;;#ASMSTART
410; GFX940-NEXT:    ; def v[0:1]
411; GFX940-NEXT:    ;;#ASMEND
412; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
413; GFX940-NEXT:    v_mov_b32_e32 v4, 0
414; GFX940-NEXT:    ;;#ASMSTART
415; GFX940-NEXT:    ; def v[2:3]
416; GFX940-NEXT:    ;;#ASMEND
417; GFX940-NEXT:    s_nop 0
418; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
419; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
420; GFX940-NEXT:    s_waitcnt vmcnt(0)
421; GFX940-NEXT:    s_setpc_b64 s[30:31]
422  %vec0 = call <4 x i16> asm "; def $0", "=v"()
423  %vec1 = call <4 x i16> asm "; def $0", "=v"()
424  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 poison>
425  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
426  ret void
427}
428
429define void @v_shuffle_v3i16_v4i16__7_2_u(ptr addrspace(1) inreg %ptr) {
430; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_2_u:
431; GFX900:       ; %bb.0:
432; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
433; GFX900-NEXT:    ;;#ASMSTART
434; GFX900-NEXT:    ; def v[0:1]
435; GFX900-NEXT:    ;;#ASMEND
436; GFX900-NEXT:    v_mov_b32_e32 v4, 0
437; GFX900-NEXT:    ;;#ASMSTART
438; GFX900-NEXT:    ; def v[2:3]
439; GFX900-NEXT:    ;;#ASMEND
440; GFX900-NEXT:    v_alignbit_b32 v0, v1, v3, 16
441; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
442; GFX900-NEXT:    s_waitcnt vmcnt(0)
443; GFX900-NEXT:    s_setpc_b64 s[30:31]
444;
445; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_2_u:
446; GFX90A:       ; %bb.0:
447; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
448; GFX90A-NEXT:    ;;#ASMSTART
449; GFX90A-NEXT:    ; def v[0:1]
450; GFX90A-NEXT:    ;;#ASMEND
451; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
452; GFX90A-NEXT:    ;;#ASMSTART
453; GFX90A-NEXT:    ; def v[2:3]
454; GFX90A-NEXT:    ;;#ASMEND
455; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v3, 16
456; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
457; GFX90A-NEXT:    s_waitcnt vmcnt(0)
458; GFX90A-NEXT:    s_setpc_b64 s[30:31]
459;
460; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_u:
461; GFX940:       ; %bb.0:
462; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
463; GFX940-NEXT:    ;;#ASMSTART
464; GFX940-NEXT:    ; def v[0:1]
465; GFX940-NEXT:    ;;#ASMEND
466; GFX940-NEXT:    v_mov_b32_e32 v4, 0
467; GFX940-NEXT:    ;;#ASMSTART
468; GFX940-NEXT:    ; def v[2:3]
469; GFX940-NEXT:    ;;#ASMEND
470; GFX940-NEXT:    s_nop 0
471; GFX940-NEXT:    v_alignbit_b32 v0, v1, v3, 16
472; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
473; GFX940-NEXT:    s_waitcnt vmcnt(0)
474; GFX940-NEXT:    s_setpc_b64 s[30:31]
475  %vec0 = call <4 x i16> asm "; def $0", "=v"()
476  %vec1 = call <4 x i16> asm "; def $0", "=v"()
477  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 poison>
478  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
479  ret void
480}
481
482define void @v_shuffle_v3i16_v4i16__7_3_u(ptr addrspace(1) inreg %ptr) {
483; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_3_u:
484; GFX900:       ; %bb.0:
485; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
486; GFX900-NEXT:    ;;#ASMSTART
487; GFX900-NEXT:    ; def v[0:1]
488; GFX900-NEXT:    ;;#ASMEND
489; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
490; GFX900-NEXT:    v_mov_b32_e32 v4, 0
491; GFX900-NEXT:    ;;#ASMSTART
492; GFX900-NEXT:    ; def v[2:3]
493; GFX900-NEXT:    ;;#ASMEND
494; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
495; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
496; GFX900-NEXT:    s_waitcnt vmcnt(0)
497; GFX900-NEXT:    s_setpc_b64 s[30:31]
498;
499; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_3_u:
500; GFX90A:       ; %bb.0:
501; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
502; GFX90A-NEXT:    ;;#ASMSTART
503; GFX90A-NEXT:    ; def v[0:1]
504; GFX90A-NEXT:    ;;#ASMEND
505; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
506; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
507; GFX90A-NEXT:    ;;#ASMSTART
508; GFX90A-NEXT:    ; def v[2:3]
509; GFX90A-NEXT:    ;;#ASMEND
510; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
511; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
512; GFX90A-NEXT:    s_waitcnt vmcnt(0)
513; GFX90A-NEXT:    s_setpc_b64 s[30:31]
514;
515; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_u:
516; GFX940:       ; %bb.0:
517; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
518; GFX940-NEXT:    ;;#ASMSTART
519; GFX940-NEXT:    ; def v[0:1]
520; GFX940-NEXT:    ;;#ASMEND
521; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
522; GFX940-NEXT:    v_mov_b32_e32 v4, 0
523; GFX940-NEXT:    ;;#ASMSTART
524; GFX940-NEXT:    ; def v[2:3]
525; GFX940-NEXT:    ;;#ASMEND
526; GFX940-NEXT:    s_nop 0
527; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
528; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
529; GFX940-NEXT:    s_waitcnt vmcnt(0)
530; GFX940-NEXT:    s_setpc_b64 s[30:31]
531  %vec0 = call <4 x i16> asm "; def $0", "=v"()
532  %vec1 = call <4 x i16> asm "; def $0", "=v"()
533  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 poison>
534  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
535  ret void
536}
537
538define void @v_shuffle_v3i16_v4i16__7_4_u(ptr addrspace(1) inreg %ptr) {
539; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_4_u:
540; GFX900:       ; %bb.0:
541; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
542; GFX900-NEXT:    ;;#ASMSTART
543; GFX900-NEXT:    ; def v[0:1]
544; GFX900-NEXT:    ;;#ASMEND
545; GFX900-NEXT:    v_mov_b32_e32 v2, 0
546; GFX900-NEXT:    v_alignbit_b32 v0, v0, v1, 16
547; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
548; GFX900-NEXT:    s_waitcnt vmcnt(0)
549; GFX900-NEXT:    s_setpc_b64 s[30:31]
550;
551; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_4_u:
552; GFX90A:       ; %bb.0:
553; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
554; GFX90A-NEXT:    ;;#ASMSTART
555; GFX90A-NEXT:    ; def v[0:1]
556; GFX90A-NEXT:    ;;#ASMEND
557; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
558; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v1, 16
559; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
560; GFX90A-NEXT:    s_waitcnt vmcnt(0)
561; GFX90A-NEXT:    s_setpc_b64 s[30:31]
562;
563; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_u:
564; GFX940:       ; %bb.0:
565; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
566; GFX940-NEXT:    ;;#ASMSTART
567; GFX940-NEXT:    ; def v[0:1]
568; GFX940-NEXT:    ;;#ASMEND
569; GFX940-NEXT:    v_mov_b32_e32 v2, 0
570; GFX940-NEXT:    v_alignbit_b32 v0, v0, v1, 16
571; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
572; GFX940-NEXT:    s_waitcnt vmcnt(0)
573; GFX940-NEXT:    s_setpc_b64 s[30:31]
574  %vec0 = call <4 x i16> asm "; def $0", "=v"()
575  %vec1 = call <4 x i16> asm "; def $0", "=v"()
576  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 poison>
577  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
578  ret void
579}
580
581define void @v_shuffle_v3i16_v4i16__7_5_u(ptr addrspace(1) inreg %ptr) {
582; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_5_u:
583; GFX900:       ; %bb.0:
584; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
585; GFX900-NEXT:    ;;#ASMSTART
586; GFX900-NEXT:    ; def v[0:1]
587; GFX900-NEXT:    ;;#ASMEND
588; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
589; GFX900-NEXT:    v_mov_b32_e32 v2, 0
590; GFX900-NEXT:    v_perm_b32 v0, v0, v1, s4
591; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
592; GFX900-NEXT:    s_waitcnt vmcnt(0)
593; GFX900-NEXT:    s_setpc_b64 s[30:31]
594;
595; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_5_u:
596; GFX90A:       ; %bb.0:
597; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
598; GFX90A-NEXT:    ;;#ASMSTART
599; GFX90A-NEXT:    ; def v[0:1]
600; GFX90A-NEXT:    ;;#ASMEND
601; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
602; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
603; GFX90A-NEXT:    v_perm_b32 v0, v0, v1, s4
604; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
605; GFX90A-NEXT:    s_waitcnt vmcnt(0)
606; GFX90A-NEXT:    s_setpc_b64 s[30:31]
607;
608; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_u:
609; GFX940:       ; %bb.0:
610; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
611; GFX940-NEXT:    ;;#ASMSTART
612; GFX940-NEXT:    ; def v[0:1]
613; GFX940-NEXT:    ;;#ASMEND
614; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
615; GFX940-NEXT:    v_mov_b32_e32 v2, 0
616; GFX940-NEXT:    v_perm_b32 v0, v0, v1, s2
617; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
618; GFX940-NEXT:    s_waitcnt vmcnt(0)
619; GFX940-NEXT:    s_setpc_b64 s[30:31]
620  %vec0 = call <4 x i16> asm "; def $0", "=v"()
621  %vec1 = call <4 x i16> asm "; def $0", "=v"()
622  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 poison>
623  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
624  ret void
625}
626
627define void @v_shuffle_v3i16_v4i16__7_6_u(ptr addrspace(1) inreg %ptr) {
628; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_6_u:
629; GFX900:       ; %bb.0:
630; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
631; GFX900-NEXT:    ;;#ASMSTART
632; GFX900-NEXT:    ; def v[0:1]
633; GFX900-NEXT:    ;;#ASMEND
634; GFX900-NEXT:    v_mov_b32_e32 v2, 0
635; GFX900-NEXT:    v_alignbit_b32 v0, v1, v1, 16
636; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
637; GFX900-NEXT:    s_waitcnt vmcnt(0)
638; GFX900-NEXT:    s_setpc_b64 s[30:31]
639;
640; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_6_u:
641; GFX90A:       ; %bb.0:
642; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
643; GFX90A-NEXT:    ;;#ASMSTART
644; GFX90A-NEXT:    ; def v[0:1]
645; GFX90A-NEXT:    ;;#ASMEND
646; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
647; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v1, 16
648; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
649; GFX90A-NEXT:    s_waitcnt vmcnt(0)
650; GFX90A-NEXT:    s_setpc_b64 s[30:31]
651;
652; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_u:
653; GFX940:       ; %bb.0:
654; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
655; GFX940-NEXT:    ;;#ASMSTART
656; GFX940-NEXT:    ; def v[0:1]
657; GFX940-NEXT:    ;;#ASMEND
658; GFX940-NEXT:    v_mov_b32_e32 v2, 0
659; GFX940-NEXT:    v_alignbit_b32 v0, v1, v1, 16
660; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
661; GFX940-NEXT:    s_waitcnt vmcnt(0)
662; GFX940-NEXT:    s_setpc_b64 s[30:31]
663  %vec0 = call <4 x i16> asm "; def $0", "=v"()
664  %vec1 = call <4 x i16> asm "; def $0", "=v"()
665  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 poison>
666  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
667  ret void
668}
669
670define void @v_shuffle_v3i16_v4i16__7_7_u(ptr addrspace(1) inreg %ptr) {
671; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_7_u:
672; GFX900:       ; %bb.0:
673; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
674; GFX900-NEXT:    ;;#ASMSTART
675; GFX900-NEXT:    ; def v[0:1]
676; GFX900-NEXT:    ;;#ASMEND
677; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
678; GFX900-NEXT:    v_mov_b32_e32 v2, 0
679; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
680; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
681; GFX900-NEXT:    s_waitcnt vmcnt(0)
682; GFX900-NEXT:    s_setpc_b64 s[30:31]
683;
684; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_7_u:
685; GFX90A:       ; %bb.0:
686; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
687; GFX90A-NEXT:    ;;#ASMSTART
688; GFX90A-NEXT:    ; def v[0:1]
689; GFX90A-NEXT:    ;;#ASMEND
690; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
691; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
692; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
693; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
694; GFX90A-NEXT:    s_waitcnt vmcnt(0)
695; GFX90A-NEXT:    s_setpc_b64 s[30:31]
696;
697; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_u:
698; GFX940:       ; %bb.0:
699; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
700; GFX940-NEXT:    ;;#ASMSTART
701; GFX940-NEXT:    ; def v[0:1]
702; GFX940-NEXT:    ;;#ASMEND
703; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
704; GFX940-NEXT:    v_mov_b32_e32 v2, 0
705; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
706; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
707; GFX940-NEXT:    s_waitcnt vmcnt(0)
708; GFX940-NEXT:    s_setpc_b64 s[30:31]
709  %vec0 = call <4 x i16> asm "; def $0", "=v"()
710  %vec1 = call <4 x i16> asm "; def $0", "=v"()
711  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 poison>
712  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
713  ret void
714}
715
716define void @v_shuffle_v3i16_v4i16__7_7_0(ptr addrspace(1) inreg %ptr) {
717; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_7_0:
718; GFX900:       ; %bb.0:
719; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
720; GFX900-NEXT:    ;;#ASMSTART
721; GFX900-NEXT:    ; def v[0:1]
722; GFX900-NEXT:    ;;#ASMEND
723; GFX900-NEXT:    v_mov_b32_e32 v3, 0
724; GFX900-NEXT:    ;;#ASMSTART
725; GFX900-NEXT:    ; def v[1:2]
726; GFX900-NEXT:    ;;#ASMEND
727; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
728; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
729; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
730; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
731; GFX900-NEXT:    s_waitcnt vmcnt(0)
732; GFX900-NEXT:    s_setpc_b64 s[30:31]
733;
734; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_7_0:
735; GFX90A:       ; %bb.0:
736; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
737; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
738; GFX90A-NEXT:    ;;#ASMSTART
739; GFX90A-NEXT:    ; def v[0:1]
740; GFX90A-NEXT:    ;;#ASMEND
741; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
742; GFX90A-NEXT:    ;;#ASMSTART
743; GFX90A-NEXT:    ; def v[2:3]
744; GFX90A-NEXT:    ;;#ASMEND
745; GFX90A-NEXT:    v_perm_b32 v1, v3, v3, s4
746; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
747; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
748; GFX90A-NEXT:    s_waitcnt vmcnt(0)
749; GFX90A-NEXT:    s_setpc_b64 s[30:31]
750;
751; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_0:
752; GFX940:       ; %bb.0:
753; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
754; GFX940-NEXT:    v_mov_b32_e32 v4, 0
755; GFX940-NEXT:    ;;#ASMSTART
756; GFX940-NEXT:    ; def v[0:1]
757; GFX940-NEXT:    ;;#ASMEND
758; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
759; GFX940-NEXT:    ;;#ASMSTART
760; GFX940-NEXT:    ; def v[2:3]
761; GFX940-NEXT:    ;;#ASMEND
762; GFX940-NEXT:    s_nop 0
763; GFX940-NEXT:    v_perm_b32 v1, v3, v3, s2
764; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
765; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
766; GFX940-NEXT:    s_waitcnt vmcnt(0)
767; GFX940-NEXT:    s_setpc_b64 s[30:31]
768  %vec0 = call <4 x i16> asm "; def $0", "=v"()
769  %vec1 = call <4 x i16> asm "; def $0", "=v"()
770  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 0>
771  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
772  ret void
773}
774
775define void @v_shuffle_v3i16_v4i16__7_7_1(ptr addrspace(1) inreg %ptr) {
776; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_7_1:
777; GFX900:       ; %bb.0:
778; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
779; GFX900-NEXT:    ;;#ASMSTART
780; GFX900-NEXT:    ; def v[0:1]
781; GFX900-NEXT:    ;;#ASMEND
782; GFX900-NEXT:    v_mov_b32_e32 v3, 0
783; GFX900-NEXT:    ;;#ASMSTART
784; GFX900-NEXT:    ; def v[1:2]
785; GFX900-NEXT:    ;;#ASMEND
786; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
787; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
788; GFX900-NEXT:    global_store_short_d16_hi v3, v0, s[16:17] offset:4
789; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
790; GFX900-NEXT:    s_waitcnt vmcnt(0)
791; GFX900-NEXT:    s_setpc_b64 s[30:31]
792;
793; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_7_1:
794; GFX90A:       ; %bb.0:
795; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
796; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
797; GFX90A-NEXT:    ;;#ASMSTART
798; GFX90A-NEXT:    ; def v[0:1]
799; GFX90A-NEXT:    ;;#ASMEND
800; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
801; GFX90A-NEXT:    ;;#ASMSTART
802; GFX90A-NEXT:    ; def v[2:3]
803; GFX90A-NEXT:    ;;#ASMEND
804; GFX90A-NEXT:    v_perm_b32 v1, v3, v3, s4
805; GFX90A-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
806; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
807; GFX90A-NEXT:    s_waitcnt vmcnt(0)
808; GFX90A-NEXT:    s_setpc_b64 s[30:31]
809;
810; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_1:
811; GFX940:       ; %bb.0:
812; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
813; GFX940-NEXT:    v_mov_b32_e32 v4, 0
814; GFX940-NEXT:    ;;#ASMSTART
815; GFX940-NEXT:    ; def v[0:1]
816; GFX940-NEXT:    ;;#ASMEND
817; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
818; GFX940-NEXT:    ;;#ASMSTART
819; GFX940-NEXT:    ; def v[2:3]
820; GFX940-NEXT:    ;;#ASMEND
821; GFX940-NEXT:    s_nop 0
822; GFX940-NEXT:    v_perm_b32 v1, v3, v3, s2
823; GFX940-NEXT:    global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
824; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
825; GFX940-NEXT:    s_waitcnt vmcnt(0)
826; GFX940-NEXT:    s_setpc_b64 s[30:31]
827  %vec0 = call <4 x i16> asm "; def $0", "=v"()
828  %vec1 = call <4 x i16> asm "; def $0", "=v"()
829  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 1>
830  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
831  ret void
832}
833
834define void @v_shuffle_v3i16_v4i16__7_7_2(ptr addrspace(1) inreg %ptr) {
835; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_7_2:
836; GFX900:       ; %bb.0:
837; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
838; GFX900-NEXT:    v_mov_b32_e32 v4, 0
839; GFX900-NEXT:    ;;#ASMSTART
840; GFX900-NEXT:    ; def v[0:1]
841; GFX900-NEXT:    ;;#ASMEND
842; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
843; GFX900-NEXT:    ;;#ASMSTART
844; GFX900-NEXT:    ; def v[2:3]
845; GFX900-NEXT:    ;;#ASMEND
846; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
847; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
848; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
849; GFX900-NEXT:    s_waitcnt vmcnt(0)
850; GFX900-NEXT:    s_setpc_b64 s[30:31]
851;
852; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_7_2:
853; GFX90A:       ; %bb.0:
854; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
855; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
856; GFX90A-NEXT:    ;;#ASMSTART
857; GFX90A-NEXT:    ; def v[0:1]
858; GFX90A-NEXT:    ;;#ASMEND
859; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
860; GFX90A-NEXT:    ;;#ASMSTART
861; GFX90A-NEXT:    ; def v[2:3]
862; GFX90A-NEXT:    ;;#ASMEND
863; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
864; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
865; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
866; GFX90A-NEXT:    s_waitcnt vmcnt(0)
867; GFX90A-NEXT:    s_setpc_b64 s[30:31]
868;
869; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_2:
870; GFX940:       ; %bb.0:
871; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
872; GFX940-NEXT:    v_mov_b32_e32 v4, 0
873; GFX940-NEXT:    ;;#ASMSTART
874; GFX940-NEXT:    ; def v[0:1]
875; GFX940-NEXT:    ;;#ASMEND
876; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
877; GFX940-NEXT:    ;;#ASMSTART
878; GFX940-NEXT:    ; def v[2:3]
879; GFX940-NEXT:    ;;#ASMEND
880; GFX940-NEXT:    s_nop 0
881; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
882; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
883; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
884; GFX940-NEXT:    s_waitcnt vmcnt(0)
885; GFX940-NEXT:    s_setpc_b64 s[30:31]
886  %vec0 = call <4 x i16> asm "; def $0", "=v"()
887  %vec1 = call <4 x i16> asm "; def $0", "=v"()
888  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 2>
889  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
890  ret void
891}
892
893define void @v_shuffle_v3i16_v4i16__7_7_3(ptr addrspace(1) inreg %ptr) {
894; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_7_3:
895; GFX900:       ; %bb.0:
896; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
897; GFX900-NEXT:    v_mov_b32_e32 v4, 0
898; GFX900-NEXT:    ;;#ASMSTART
899; GFX900-NEXT:    ; def v[0:1]
900; GFX900-NEXT:    ;;#ASMEND
901; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
902; GFX900-NEXT:    ;;#ASMSTART
903; GFX900-NEXT:    ; def v[2:3]
904; GFX900-NEXT:    ;;#ASMEND
905; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
906; GFX900-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
907; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
908; GFX900-NEXT:    s_waitcnt vmcnt(0)
909; GFX900-NEXT:    s_setpc_b64 s[30:31]
910;
911; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_7_3:
912; GFX90A:       ; %bb.0:
913; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
914; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
915; GFX90A-NEXT:    ;;#ASMSTART
916; GFX90A-NEXT:    ; def v[0:1]
917; GFX90A-NEXT:    ;;#ASMEND
918; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
919; GFX90A-NEXT:    ;;#ASMSTART
920; GFX90A-NEXT:    ; def v[2:3]
921; GFX90A-NEXT:    ;;#ASMEND
922; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
923; GFX90A-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
924; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
925; GFX90A-NEXT:    s_waitcnt vmcnt(0)
926; GFX90A-NEXT:    s_setpc_b64 s[30:31]
927;
928; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_3:
929; GFX940:       ; %bb.0:
930; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
931; GFX940-NEXT:    v_mov_b32_e32 v4, 0
932; GFX940-NEXT:    ;;#ASMSTART
933; GFX940-NEXT:    ; def v[0:1]
934; GFX940-NEXT:    ;;#ASMEND
935; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
936; GFX940-NEXT:    ;;#ASMSTART
937; GFX940-NEXT:    ; def v[2:3]
938; GFX940-NEXT:    ;;#ASMEND
939; GFX940-NEXT:    s_nop 0
940; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
941; GFX940-NEXT:    global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1
942; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
943; GFX940-NEXT:    s_waitcnt vmcnt(0)
944; GFX940-NEXT:    s_setpc_b64 s[30:31]
945  %vec0 = call <4 x i16> asm "; def $0", "=v"()
946  %vec1 = call <4 x i16> asm "; def $0", "=v"()
947  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 3>
948  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
949  ret void
950}
951
952define void @v_shuffle_v3i16_v4i16__7_7_4(ptr addrspace(1) inreg %ptr) {
953; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_7_4:
954; GFX900:       ; %bb.0:
955; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
956; GFX900-NEXT:    v_mov_b32_e32 v2, 0
957; GFX900-NEXT:    ;;#ASMSTART
958; GFX900-NEXT:    ; def v[0:1]
959; GFX900-NEXT:    ;;#ASMEND
960; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
961; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
962; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
963; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
964; GFX900-NEXT:    s_waitcnt vmcnt(0)
965; GFX900-NEXT:    s_setpc_b64 s[30:31]
966;
967; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_7_4:
968; GFX90A:       ; %bb.0:
969; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
970; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
971; GFX90A-NEXT:    ;;#ASMSTART
972; GFX90A-NEXT:    ; def v[0:1]
973; GFX90A-NEXT:    ;;#ASMEND
974; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
975; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
976; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
977; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
978; GFX90A-NEXT:    s_waitcnt vmcnt(0)
979; GFX90A-NEXT:    s_setpc_b64 s[30:31]
980;
981; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_4:
982; GFX940:       ; %bb.0:
983; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
984; GFX940-NEXT:    v_mov_b32_e32 v2, 0
985; GFX940-NEXT:    ;;#ASMSTART
986; GFX940-NEXT:    ; def v[0:1]
987; GFX940-NEXT:    ;;#ASMEND
988; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
989; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
990; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
991; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
992; GFX940-NEXT:    s_waitcnt vmcnt(0)
993; GFX940-NEXT:    s_setpc_b64 s[30:31]
994  %vec0 = call <4 x i16> asm "; def $0", "=v"()
995  %vec1 = call <4 x i16> asm "; def $0", "=v"()
996  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 4>
997  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
998  ret void
999}
1000
1001define void @v_shuffle_v3i16_v4i16__7_7_5(ptr addrspace(1) inreg %ptr) {
1002; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_7_5:
1003; GFX900:       ; %bb.0:
1004; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1005; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1006; GFX900-NEXT:    ;;#ASMSTART
1007; GFX900-NEXT:    ; def v[0:1]
1008; GFX900-NEXT:    ;;#ASMEND
1009; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1010; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
1011; GFX900-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
1012; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
1013; GFX900-NEXT:    s_waitcnt vmcnt(0)
1014; GFX900-NEXT:    s_setpc_b64 s[30:31]
1015;
1016; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_7_5:
1017; GFX90A:       ; %bb.0:
1018; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1019; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1020; GFX90A-NEXT:    ;;#ASMSTART
1021; GFX90A-NEXT:    ; def v[0:1]
1022; GFX90A-NEXT:    ;;#ASMEND
1023; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1024; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
1025; GFX90A-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
1026; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
1027; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1028; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1029;
1030; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_5:
1031; GFX940:       ; %bb.0:
1032; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1033; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1034; GFX940-NEXT:    ;;#ASMSTART
1035; GFX940-NEXT:    ; def v[0:1]
1036; GFX940-NEXT:    ;;#ASMEND
1037; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1038; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
1039; GFX940-NEXT:    global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1
1040; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
1041; GFX940-NEXT:    s_waitcnt vmcnt(0)
1042; GFX940-NEXT:    s_setpc_b64 s[30:31]
1043  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1044  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1045  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 5>
1046  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1047  ret void
1048}
1049
1050define void @v_shuffle_v3i16_v4i16__7_7_6(ptr addrspace(1) inreg %ptr) {
1051; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_7_6:
1052; GFX900:       ; %bb.0:
1053; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1054; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1055; GFX900-NEXT:    ;;#ASMSTART
1056; GFX900-NEXT:    ; def v[0:1]
1057; GFX900-NEXT:    ;;#ASMEND
1058; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1059; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
1060; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
1061; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
1062; GFX900-NEXT:    s_waitcnt vmcnt(0)
1063; GFX900-NEXT:    s_setpc_b64 s[30:31]
1064;
1065; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_7_6:
1066; GFX90A:       ; %bb.0:
1067; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1068; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1069; GFX90A-NEXT:    ;;#ASMSTART
1070; GFX90A-NEXT:    ; def v[0:1]
1071; GFX90A-NEXT:    ;;#ASMEND
1072; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1073; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
1074; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
1075; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
1076; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1077; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1078;
1079; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_6:
1080; GFX940:       ; %bb.0:
1081; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1082; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1083; GFX940-NEXT:    ;;#ASMSTART
1084; GFX940-NEXT:    ; def v[0:1]
1085; GFX940-NEXT:    ;;#ASMEND
1086; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1087; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
1088; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
1089; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
1090; GFX940-NEXT:    s_waitcnt vmcnt(0)
1091; GFX940-NEXT:    s_setpc_b64 s[30:31]
1092  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1093  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1094  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 6>
1095  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1096  ret void
1097}
1098
1099define void @v_shuffle_v3i16_v4i16__7_7_7(ptr addrspace(1) inreg %ptr) {
1100; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_7_7:
1101; GFX900:       ; %bb.0:
1102; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1103; GFX900-NEXT:    ;;#ASMSTART
1104; GFX900-NEXT:    ; def v[0:1]
1105; GFX900-NEXT:    ;;#ASMEND
1106; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1107; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1108; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
1109; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
1110; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
1111; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1112; GFX900-NEXT:    s_waitcnt vmcnt(0)
1113; GFX900-NEXT:    s_setpc_b64 s[30:31]
1114;
1115; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_7_7:
1116; GFX90A:       ; %bb.0:
1117; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1118; GFX90A-NEXT:    ;;#ASMSTART
1119; GFX90A-NEXT:    ; def v[0:1]
1120; GFX90A-NEXT:    ;;#ASMEND
1121; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1122; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1123; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
1124; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
1125; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
1126; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1127; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1128; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1129;
1130; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_7_7:
1131; GFX940:       ; %bb.0:
1132; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1133; GFX940-NEXT:    ;;#ASMSTART
1134; GFX940-NEXT:    ; def v[0:1]
1135; GFX940-NEXT:    ;;#ASMEND
1136; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1137; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1138; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
1139; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
1140; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
1141; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1142; GFX940-NEXT:    s_waitcnt vmcnt(0)
1143; GFX940-NEXT:    s_setpc_b64 s[30:31]
1144  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1145  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1146  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 7>
1147  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1148  ret void
1149}
1150
1151define void @v_shuffle_v3i16_v4i16__u_0_0(ptr addrspace(1) inreg %ptr) {
1152; GFX900-LABEL: v_shuffle_v3i16_v4i16__u_0_0:
1153; GFX900:       ; %bb.0:
1154; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1155; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1156; GFX900-NEXT:    ;;#ASMSTART
1157; GFX900-NEXT:    ; def v[0:1]
1158; GFX900-NEXT:    ;;#ASMEND
1159; GFX900-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
1160; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1161; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
1162; GFX900-NEXT:    s_waitcnt vmcnt(0)
1163; GFX900-NEXT:    s_setpc_b64 s[30:31]
1164;
1165; GFX90A-LABEL: v_shuffle_v3i16_v4i16__u_0_0:
1166; GFX90A:       ; %bb.0:
1167; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1168; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1169; GFX90A-NEXT:    ;;#ASMSTART
1170; GFX90A-NEXT:    ; def v[0:1]
1171; GFX90A-NEXT:    ;;#ASMEND
1172; GFX90A-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
1173; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1174; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
1175; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1176; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1177;
1178; GFX940-LABEL: v_shuffle_v3i16_v4i16__u_0_0:
1179; GFX940:       ; %bb.0:
1180; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1181; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1182; GFX940-NEXT:    ;;#ASMSTART
1183; GFX940-NEXT:    ; def v[0:1]
1184; GFX940-NEXT:    ;;#ASMEND
1185; GFX940-NEXT:    s_nop 0
1186; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
1187; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1188; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
1189; GFX940-NEXT:    s_waitcnt vmcnt(0)
1190; GFX940-NEXT:    s_setpc_b64 s[30:31]
1191  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1192  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 poison, i32 0, i32 0>
1193  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1194  ret void
1195}
1196
1197define void @v_shuffle_v3i16_v4i16__0_0_0(ptr addrspace(1) inreg %ptr) {
1198; GFX900-LABEL: v_shuffle_v3i16_v4i16__0_0_0:
1199; GFX900:       ; %bb.0:
1200; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1201; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1202; GFX900-NEXT:    ;;#ASMSTART
1203; GFX900-NEXT:    ; def v[0:1]
1204; GFX900-NEXT:    ;;#ASMEND
1205; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1206; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
1207; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1208; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
1209; GFX900-NEXT:    s_waitcnt vmcnt(0)
1210; GFX900-NEXT:    s_setpc_b64 s[30:31]
1211;
1212; GFX90A-LABEL: v_shuffle_v3i16_v4i16__0_0_0:
1213; GFX90A:       ; %bb.0:
1214; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1215; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1216; GFX90A-NEXT:    ;;#ASMSTART
1217; GFX90A-NEXT:    ; def v[0:1]
1218; GFX90A-NEXT:    ;;#ASMEND
1219; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1220; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
1221; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1222; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
1223; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1224; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1225;
1226; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_0_0:
1227; GFX940:       ; %bb.0:
1228; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1229; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1230; GFX940-NEXT:    ;;#ASMSTART
1231; GFX940-NEXT:    ; def v[0:1]
1232; GFX940-NEXT:    ;;#ASMEND
1233; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1234; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
1235; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1236; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
1237; GFX940-NEXT:    s_waitcnt vmcnt(0)
1238; GFX940-NEXT:    s_setpc_b64 s[30:31]
1239  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1240  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> zeroinitializer
1241  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1242  ret void
1243}
1244
1245define void @v_shuffle_v3i16_v4i16__1_0_0(ptr addrspace(1) inreg %ptr) {
1246; GFX900-LABEL: v_shuffle_v3i16_v4i16__1_0_0:
1247; GFX900:       ; %bb.0:
1248; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1249; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1250; GFX900-NEXT:    ;;#ASMSTART
1251; GFX900-NEXT:    ; def v[0:1]
1252; GFX900-NEXT:    ;;#ASMEND
1253; GFX900-NEXT:    v_alignbit_b32 v1, v0, v0, 16
1254; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1255; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
1256; GFX900-NEXT:    s_waitcnt vmcnt(0)
1257; GFX900-NEXT:    s_setpc_b64 s[30:31]
1258;
1259; GFX90A-LABEL: v_shuffle_v3i16_v4i16__1_0_0:
1260; GFX90A:       ; %bb.0:
1261; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1262; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1263; GFX90A-NEXT:    ;;#ASMSTART
1264; GFX90A-NEXT:    ; def v[0:1]
1265; GFX90A-NEXT:    ;;#ASMEND
1266; GFX90A-NEXT:    v_alignbit_b32 v1, v0, v0, 16
1267; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1268; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
1269; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1270; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1271;
1272; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_0_0:
1273; GFX940:       ; %bb.0:
1274; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1275; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1276; GFX940-NEXT:    ;;#ASMSTART
1277; GFX940-NEXT:    ; def v[0:1]
1278; GFX940-NEXT:    ;;#ASMEND
1279; GFX940-NEXT:    s_nop 0
1280; GFX940-NEXT:    v_alignbit_b32 v1, v0, v0, 16
1281; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1282; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
1283; GFX940-NEXT:    s_waitcnt vmcnt(0)
1284; GFX940-NEXT:    s_setpc_b64 s[30:31]
1285  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1286  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 1, i32 0, i32 0>
1287  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1288  ret void
1289}
1290
1291define void @v_shuffle_v3i16_v4i16__2_0_0(ptr addrspace(1) inreg %ptr) {
1292; GFX900-LABEL: v_shuffle_v3i16_v4i16__2_0_0:
1293; GFX900:       ; %bb.0:
1294; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1295; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1296; GFX900-NEXT:    ;;#ASMSTART
1297; GFX900-NEXT:    ; def v[0:1]
1298; GFX900-NEXT:    ;;#ASMEND
1299; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1300; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
1301; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1302; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
1303; GFX900-NEXT:    s_waitcnt vmcnt(0)
1304; GFX900-NEXT:    s_setpc_b64 s[30:31]
1305;
1306; GFX90A-LABEL: v_shuffle_v3i16_v4i16__2_0_0:
1307; GFX90A:       ; %bb.0:
1308; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1309; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1310; GFX90A-NEXT:    ;;#ASMSTART
1311; GFX90A-NEXT:    ; def v[0:1]
1312; GFX90A-NEXT:    ;;#ASMEND
1313; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1314; GFX90A-NEXT:    v_perm_b32 v1, v0, v1, s4
1315; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1316; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
1317; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1318; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1319;
1320; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_0_0:
1321; GFX940:       ; %bb.0:
1322; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1323; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1324; GFX940-NEXT:    ;;#ASMSTART
1325; GFX940-NEXT:    ; def v[0:1]
1326; GFX940-NEXT:    ;;#ASMEND
1327; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1328; GFX940-NEXT:    v_perm_b32 v1, v0, v1, s2
1329; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1330; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
1331; GFX940-NEXT:    s_waitcnt vmcnt(0)
1332; GFX940-NEXT:    s_setpc_b64 s[30:31]
1333  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1334  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 2, i32 0, i32 0>
1335  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1336  ret void
1337}
1338
1339define void @v_shuffle_v3i16_v4i16__3_0_0(ptr addrspace(1) inreg %ptr) {
1340; GFX900-LABEL: v_shuffle_v3i16_v4i16__3_0_0:
1341; GFX900:       ; %bb.0:
1342; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1343; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1344; GFX900-NEXT:    ;;#ASMSTART
1345; GFX900-NEXT:    ; def v[0:1]
1346; GFX900-NEXT:    ;;#ASMEND
1347; GFX900-NEXT:    v_alignbit_b32 v1, v0, v1, 16
1348; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1349; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
1350; GFX900-NEXT:    s_waitcnt vmcnt(0)
1351; GFX900-NEXT:    s_setpc_b64 s[30:31]
1352;
1353; GFX90A-LABEL: v_shuffle_v3i16_v4i16__3_0_0:
1354; GFX90A:       ; %bb.0:
1355; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1356; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1357; GFX90A-NEXT:    ;;#ASMSTART
1358; GFX90A-NEXT:    ; def v[0:1]
1359; GFX90A-NEXT:    ;;#ASMEND
1360; GFX90A-NEXT:    v_alignbit_b32 v1, v0, v1, 16
1361; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1362; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
1363; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1364; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1365;
1366; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_0_0:
1367; GFX940:       ; %bb.0:
1368; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1369; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1370; GFX940-NEXT:    ;;#ASMSTART
1371; GFX940-NEXT:    ; def v[0:1]
1372; GFX940-NEXT:    ;;#ASMEND
1373; GFX940-NEXT:    s_nop 0
1374; GFX940-NEXT:    v_alignbit_b32 v1, v0, v1, 16
1375; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1376; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
1377; GFX940-NEXT:    s_waitcnt vmcnt(0)
1378; GFX940-NEXT:    s_setpc_b64 s[30:31]
1379  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1380  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 3, i32 0, i32 0>
1381  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1382  ret void
1383}
1384
1385define void @v_shuffle_v3i16_v4i16__4_0_0(ptr addrspace(1) inreg %ptr) {
1386; GFX900-LABEL: v_shuffle_v3i16_v4i16__4_0_0:
1387; GFX900:       ; %bb.0:
1388; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1389; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1390; GFX900-NEXT:    ;;#ASMSTART
1391; GFX900-NEXT:    ; def v[0:1]
1392; GFX900-NEXT:    ;;#ASMEND
1393; GFX900-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
1394; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1395; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
1396; GFX900-NEXT:    s_waitcnt vmcnt(0)
1397; GFX900-NEXT:    s_setpc_b64 s[30:31]
1398;
1399; GFX90A-LABEL: v_shuffle_v3i16_v4i16__4_0_0:
1400; GFX90A:       ; %bb.0:
1401; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1402; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1403; GFX90A-NEXT:    ;;#ASMSTART
1404; GFX90A-NEXT:    ; def v[0:1]
1405; GFX90A-NEXT:    ;;#ASMEND
1406; GFX90A-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
1407; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1408; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
1409; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1410; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1411;
1412; GFX940-LABEL: v_shuffle_v3i16_v4i16__4_0_0:
1413; GFX940:       ; %bb.0:
1414; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1415; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1416; GFX940-NEXT:    ;;#ASMSTART
1417; GFX940-NEXT:    ; def v[0:1]
1418; GFX940-NEXT:    ;;#ASMEND
1419; GFX940-NEXT:    s_nop 0
1420; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
1421; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1422; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
1423; GFX940-NEXT:    s_waitcnt vmcnt(0)
1424; GFX940-NEXT:    s_setpc_b64 s[30:31]
1425  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1426  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 4, i32 0, i32 0>
1427  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1428  ret void
1429}
1430
1431define void @v_shuffle_v3i16_v4i16__5_0_0(ptr addrspace(1) inreg %ptr) {
1432; GFX900-LABEL: v_shuffle_v3i16_v4i16__5_0_0:
1433; GFX900:       ; %bb.0:
1434; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1435; GFX900-NEXT:    ;;#ASMSTART
1436; GFX900-NEXT:    ; def v[0:1]
1437; GFX900-NEXT:    ;;#ASMEND
1438; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1439; GFX900-NEXT:    ;;#ASMSTART
1440; GFX900-NEXT:    ; def v[1:2]
1441; GFX900-NEXT:    ;;#ASMEND
1442; GFX900-NEXT:    v_alignbit_b32 v1, v0, v1, 16
1443; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
1444; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
1445; GFX900-NEXT:    s_waitcnt vmcnt(0)
1446; GFX900-NEXT:    s_setpc_b64 s[30:31]
1447;
1448; GFX90A-LABEL: v_shuffle_v3i16_v4i16__5_0_0:
1449; GFX90A:       ; %bb.0:
1450; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1451; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1452; GFX90A-NEXT:    ;;#ASMSTART
1453; GFX90A-NEXT:    ; def v[0:1]
1454; GFX90A-NEXT:    ;;#ASMEND
1455; GFX90A-NEXT:    ;;#ASMSTART
1456; GFX90A-NEXT:    ; def v[2:3]
1457; GFX90A-NEXT:    ;;#ASMEND
1458; GFX90A-NEXT:    v_alignbit_b32 v1, v0, v2, 16
1459; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1460; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1461; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1462; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1463;
1464; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_0_0:
1465; GFX940:       ; %bb.0:
1466; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1467; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1468; GFX940-NEXT:    ;;#ASMSTART
1469; GFX940-NEXT:    ; def v[0:1]
1470; GFX940-NEXT:    ;;#ASMEND
1471; GFX940-NEXT:    ;;#ASMSTART
1472; GFX940-NEXT:    ; def v[2:3]
1473; GFX940-NEXT:    ;;#ASMEND
1474; GFX940-NEXT:    s_nop 0
1475; GFX940-NEXT:    v_alignbit_b32 v1, v0, v2, 16
1476; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1477; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1478; GFX940-NEXT:    s_waitcnt vmcnt(0)
1479; GFX940-NEXT:    s_setpc_b64 s[30:31]
1480  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1481  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1482  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 0, i32 0>
1483  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1484  ret void
1485}
1486
1487define void @v_shuffle_v3i16_v4i16__6_0_0(ptr addrspace(1) inreg %ptr) {
1488; GFX900-LABEL: v_shuffle_v3i16_v4i16__6_0_0:
1489; GFX900:       ; %bb.0:
1490; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1491; GFX900-NEXT:    ;;#ASMSTART
1492; GFX900-NEXT:    ; def v[0:1]
1493; GFX900-NEXT:    ;;#ASMEND
1494; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1495; GFX900-NEXT:    ;;#ASMSTART
1496; GFX900-NEXT:    ; def v[1:2]
1497; GFX900-NEXT:    ;;#ASMEND
1498; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1499; GFX900-NEXT:    v_perm_b32 v1, v0, v2, s4
1500; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
1501; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
1502; GFX900-NEXT:    s_waitcnt vmcnt(0)
1503; GFX900-NEXT:    s_setpc_b64 s[30:31]
1504;
1505; GFX90A-LABEL: v_shuffle_v3i16_v4i16__6_0_0:
1506; GFX90A:       ; %bb.0:
1507; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1508; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1509; GFX90A-NEXT:    ;;#ASMSTART
1510; GFX90A-NEXT:    ; def v[0:1]
1511; GFX90A-NEXT:    ;;#ASMEND
1512; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1513; GFX90A-NEXT:    ;;#ASMSTART
1514; GFX90A-NEXT:    ; def v[2:3]
1515; GFX90A-NEXT:    ;;#ASMEND
1516; GFX90A-NEXT:    v_perm_b32 v1, v0, v3, s4
1517; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1518; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1519; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1520; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1521;
1522; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_0_0:
1523; GFX940:       ; %bb.0:
1524; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1525; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1526; GFX940-NEXT:    ;;#ASMSTART
1527; GFX940-NEXT:    ; def v[0:1]
1528; GFX940-NEXT:    ;;#ASMEND
1529; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1530; GFX940-NEXT:    ;;#ASMSTART
1531; GFX940-NEXT:    ; def v[2:3]
1532; GFX940-NEXT:    ;;#ASMEND
1533; GFX940-NEXT:    s_nop 0
1534; GFX940-NEXT:    v_perm_b32 v1, v0, v3, s2
1535; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1536; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1537; GFX940-NEXT:    s_waitcnt vmcnt(0)
1538; GFX940-NEXT:    s_setpc_b64 s[30:31]
1539  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1540  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1541  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 0, i32 0>
1542  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1543  ret void
1544}
1545
1546define void @v_shuffle_v3i16_v4i16__7_0_0(ptr addrspace(1) inreg %ptr) {
1547; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_0_0:
1548; GFX900:       ; %bb.0:
1549; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1550; GFX900-NEXT:    ;;#ASMSTART
1551; GFX900-NEXT:    ; def v[0:1]
1552; GFX900-NEXT:    ;;#ASMEND
1553; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1554; GFX900-NEXT:    ;;#ASMSTART
1555; GFX900-NEXT:    ; def v[1:2]
1556; GFX900-NEXT:    ;;#ASMEND
1557; GFX900-NEXT:    v_alignbit_b32 v1, v0, v2, 16
1558; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
1559; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
1560; GFX900-NEXT:    s_waitcnt vmcnt(0)
1561; GFX900-NEXT:    s_setpc_b64 s[30:31]
1562;
1563; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_0_0:
1564; GFX90A:       ; %bb.0:
1565; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1566; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1567; GFX90A-NEXT:    ;;#ASMSTART
1568; GFX90A-NEXT:    ; def v[0:1]
1569; GFX90A-NEXT:    ;;#ASMEND
1570; GFX90A-NEXT:    ;;#ASMSTART
1571; GFX90A-NEXT:    ; def v[2:3]
1572; GFX90A-NEXT:    ;;#ASMEND
1573; GFX90A-NEXT:    v_alignbit_b32 v1, v0, v3, 16
1574; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1575; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1576; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1577; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1578;
1579; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_0:
1580; GFX940:       ; %bb.0:
1581; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1582; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1583; GFX940-NEXT:    ;;#ASMSTART
1584; GFX940-NEXT:    ; def v[0:1]
1585; GFX940-NEXT:    ;;#ASMEND
1586; GFX940-NEXT:    ;;#ASMSTART
1587; GFX940-NEXT:    ; def v[2:3]
1588; GFX940-NEXT:    ;;#ASMEND
1589; GFX940-NEXT:    s_nop 0
1590; GFX940-NEXT:    v_alignbit_b32 v1, v0, v3, 16
1591; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1592; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1593; GFX940-NEXT:    s_waitcnt vmcnt(0)
1594; GFX940-NEXT:    s_setpc_b64 s[30:31]
1595  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1596  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1597  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 0>
1598  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1599  ret void
1600}
1601
1602define void @v_shuffle_v3i16_v4i16__7_u_0(ptr addrspace(1) inreg %ptr) {
1603; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_u_0:
1604; GFX900:       ; %bb.0:
1605; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1606; GFX900-NEXT:    ;;#ASMSTART
1607; GFX900-NEXT:    ; def v[0:1]
1608; GFX900-NEXT:    ;;#ASMEND
1609; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1610; GFX900-NEXT:    ;;#ASMSTART
1611; GFX900-NEXT:    ; def v[1:2]
1612; GFX900-NEXT:    ;;#ASMEND
1613; GFX900-NEXT:    v_alignbit_b32 v1, s4, v2, 16
1614; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
1615; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
1616; GFX900-NEXT:    s_waitcnt vmcnt(0)
1617; GFX900-NEXT:    s_setpc_b64 s[30:31]
1618;
1619; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_u_0:
1620; GFX90A:       ; %bb.0:
1621; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1622; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1623; GFX90A-NEXT:    ;;#ASMSTART
1624; GFX90A-NEXT:    ; def v[0:1]
1625; GFX90A-NEXT:    ;;#ASMEND
1626; GFX90A-NEXT:    ;;#ASMSTART
1627; GFX90A-NEXT:    ; def v[2:3]
1628; GFX90A-NEXT:    ;;#ASMEND
1629; GFX90A-NEXT:    v_alignbit_b32 v1, s4, v3, 16
1630; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1631; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1632; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1633; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1634;
1635; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_0:
1636; GFX940:       ; %bb.0:
1637; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1638; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1639; GFX940-NEXT:    ;;#ASMSTART
1640; GFX940-NEXT:    ; def v[0:1]
1641; GFX940-NEXT:    ;;#ASMEND
1642; GFX940-NEXT:    ;;#ASMSTART
1643; GFX940-NEXT:    ; def v[2:3]
1644; GFX940-NEXT:    ;;#ASMEND
1645; GFX940-NEXT:    s_nop 0
1646; GFX940-NEXT:    v_alignbit_b32 v1, s0, v3, 16
1647; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1648; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1649; GFX940-NEXT:    s_waitcnt vmcnt(0)
1650; GFX940-NEXT:    s_setpc_b64 s[30:31]
1651  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1652  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1653  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 0>
1654  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1655  ret void
1656}
1657
1658define void @v_shuffle_v3i16_v4i16__7_1_0(ptr addrspace(1) inreg %ptr) {
1659; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_1_0:
1660; GFX900:       ; %bb.0:
1661; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1662; GFX900-NEXT:    ;;#ASMSTART
1663; GFX900-NEXT:    ; def v[0:1]
1664; GFX900-NEXT:    ;;#ASMEND
1665; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1666; GFX900-NEXT:    ;;#ASMSTART
1667; GFX900-NEXT:    ; def v[1:2]
1668; GFX900-NEXT:    ;;#ASMEND
1669; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1670; GFX900-NEXT:    v_perm_b32 v1, v0, v2, s4
1671; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
1672; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
1673; GFX900-NEXT:    s_waitcnt vmcnt(0)
1674; GFX900-NEXT:    s_setpc_b64 s[30:31]
1675;
1676; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_1_0:
1677; GFX90A:       ; %bb.0:
1678; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1679; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1680; GFX90A-NEXT:    ;;#ASMSTART
1681; GFX90A-NEXT:    ; def v[0:1]
1682; GFX90A-NEXT:    ;;#ASMEND
1683; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1684; GFX90A-NEXT:    ;;#ASMSTART
1685; GFX90A-NEXT:    ; def v[2:3]
1686; GFX90A-NEXT:    ;;#ASMEND
1687; GFX90A-NEXT:    v_perm_b32 v1, v0, v3, s4
1688; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1689; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1690; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1691; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1692;
1693; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_0:
1694; GFX940:       ; %bb.0:
1695; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1696; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1697; GFX940-NEXT:    ;;#ASMSTART
1698; GFX940-NEXT:    ; def v[0:1]
1699; GFX940-NEXT:    ;;#ASMEND
1700; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1701; GFX940-NEXT:    ;;#ASMSTART
1702; GFX940-NEXT:    ; def v[2:3]
1703; GFX940-NEXT:    ;;#ASMEND
1704; GFX940-NEXT:    s_nop 0
1705; GFX940-NEXT:    v_perm_b32 v1, v0, v3, s2
1706; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1707; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1708; GFX940-NEXT:    s_waitcnt vmcnt(0)
1709; GFX940-NEXT:    s_setpc_b64 s[30:31]
1710  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1711  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1712  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 0>
1713  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1714  ret void
1715}
1716
1717define void @v_shuffle_v3i16_v4i16__7_2_0(ptr addrspace(1) inreg %ptr) {
1718; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_2_0:
1719; GFX900:       ; %bb.0:
1720; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1721; GFX900-NEXT:    v_mov_b32_e32 v4, 0
1722; GFX900-NEXT:    ;;#ASMSTART
1723; GFX900-NEXT:    ; def v[0:1]
1724; GFX900-NEXT:    ;;#ASMEND
1725; GFX900-NEXT:    ;;#ASMSTART
1726; GFX900-NEXT:    ; def v[2:3]
1727; GFX900-NEXT:    ;;#ASMEND
1728; GFX900-NEXT:    v_alignbit_b32 v1, v1, v3, 16
1729; GFX900-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1730; GFX900-NEXT:    global_store_dword v4, v1, s[16:17]
1731; GFX900-NEXT:    s_waitcnt vmcnt(0)
1732; GFX900-NEXT:    s_setpc_b64 s[30:31]
1733;
1734; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_2_0:
1735; GFX90A:       ; %bb.0:
1736; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1737; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1738; GFX90A-NEXT:    ;;#ASMSTART
1739; GFX90A-NEXT:    ; def v[0:1]
1740; GFX90A-NEXT:    ;;#ASMEND
1741; GFX90A-NEXT:    ;;#ASMSTART
1742; GFX90A-NEXT:    ; def v[2:3]
1743; GFX90A-NEXT:    ;;#ASMEND
1744; GFX90A-NEXT:    v_alignbit_b32 v1, v1, v3, 16
1745; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1746; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1747; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1748; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1749;
1750; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_0:
1751; GFX940:       ; %bb.0:
1752; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1753; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1754; GFX940-NEXT:    ;;#ASMSTART
1755; GFX940-NEXT:    ; def v[0:1]
1756; GFX940-NEXT:    ;;#ASMEND
1757; GFX940-NEXT:    ;;#ASMSTART
1758; GFX940-NEXT:    ; def v[2:3]
1759; GFX940-NEXT:    ;;#ASMEND
1760; GFX940-NEXT:    s_nop 0
1761; GFX940-NEXT:    v_alignbit_b32 v1, v1, v3, 16
1762; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1763; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1764; GFX940-NEXT:    s_waitcnt vmcnt(0)
1765; GFX940-NEXT:    s_setpc_b64 s[30:31]
1766  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1767  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1768  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 0>
1769  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1770  ret void
1771}
1772
1773define void @v_shuffle_v3i16_v4i16__7_3_0(ptr addrspace(1) inreg %ptr) {
1774; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_3_0:
1775; GFX900:       ; %bb.0:
1776; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1777; GFX900-NEXT:    v_mov_b32_e32 v4, 0
1778; GFX900-NEXT:    ;;#ASMSTART
1779; GFX900-NEXT:    ; def v[0:1]
1780; GFX900-NEXT:    ;;#ASMEND
1781; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1782; GFX900-NEXT:    ;;#ASMSTART
1783; GFX900-NEXT:    ; def v[2:3]
1784; GFX900-NEXT:    ;;#ASMEND
1785; GFX900-NEXT:    v_perm_b32 v1, v1, v3, s4
1786; GFX900-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1787; GFX900-NEXT:    global_store_dword v4, v1, s[16:17]
1788; GFX900-NEXT:    s_waitcnt vmcnt(0)
1789; GFX900-NEXT:    s_setpc_b64 s[30:31]
1790;
1791; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_3_0:
1792; GFX90A:       ; %bb.0:
1793; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1794; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1795; GFX90A-NEXT:    ;;#ASMSTART
1796; GFX90A-NEXT:    ; def v[0:1]
1797; GFX90A-NEXT:    ;;#ASMEND
1798; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1799; GFX90A-NEXT:    ;;#ASMSTART
1800; GFX90A-NEXT:    ; def v[2:3]
1801; GFX90A-NEXT:    ;;#ASMEND
1802; GFX90A-NEXT:    v_perm_b32 v1, v1, v3, s4
1803; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1804; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1805; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1806; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1807;
1808; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_0:
1809; GFX940:       ; %bb.0:
1810; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1811; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1812; GFX940-NEXT:    ;;#ASMSTART
1813; GFX940-NEXT:    ; def v[0:1]
1814; GFX940-NEXT:    ;;#ASMEND
1815; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1816; GFX940-NEXT:    ;;#ASMSTART
1817; GFX940-NEXT:    ; def v[2:3]
1818; GFX940-NEXT:    ;;#ASMEND
1819; GFX940-NEXT:    s_nop 0
1820; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s2
1821; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1822; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1823; GFX940-NEXT:    s_waitcnt vmcnt(0)
1824; GFX940-NEXT:    s_setpc_b64 s[30:31]
1825  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1826  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1827  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 0>
1828  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1829  ret void
1830}
1831
1832define void @v_shuffle_v3i16_v4i16__7_4_0(ptr addrspace(1) inreg %ptr) {
1833; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_4_0:
1834; GFX900:       ; %bb.0:
1835; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1836; GFX900-NEXT:    ;;#ASMSTART
1837; GFX900-NEXT:    ; def v[0:1]
1838; GFX900-NEXT:    ;;#ASMEND
1839; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1840; GFX900-NEXT:    ;;#ASMSTART
1841; GFX900-NEXT:    ; def v[1:2]
1842; GFX900-NEXT:    ;;#ASMEND
1843; GFX900-NEXT:    v_alignbit_b32 v1, v1, v2, 16
1844; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
1845; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
1846; GFX900-NEXT:    s_waitcnt vmcnt(0)
1847; GFX900-NEXT:    s_setpc_b64 s[30:31]
1848;
1849; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_4_0:
1850; GFX90A:       ; %bb.0:
1851; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1852; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1853; GFX90A-NEXT:    ;;#ASMSTART
1854; GFX90A-NEXT:    ; def v[0:1]
1855; GFX90A-NEXT:    ;;#ASMEND
1856; GFX90A-NEXT:    ;;#ASMSTART
1857; GFX90A-NEXT:    ; def v[2:3]
1858; GFX90A-NEXT:    ;;#ASMEND
1859; GFX90A-NEXT:    v_alignbit_b32 v1, v2, v3, 16
1860; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1861; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1862; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1863; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1864;
1865; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_0:
1866; GFX940:       ; %bb.0:
1867; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1868; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1869; GFX940-NEXT:    ;;#ASMSTART
1870; GFX940-NEXT:    ; def v[0:1]
1871; GFX940-NEXT:    ;;#ASMEND
1872; GFX940-NEXT:    ;;#ASMSTART
1873; GFX940-NEXT:    ; def v[2:3]
1874; GFX940-NEXT:    ;;#ASMEND
1875; GFX940-NEXT:    s_nop 0
1876; GFX940-NEXT:    v_alignbit_b32 v1, v2, v3, 16
1877; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1878; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1879; GFX940-NEXT:    s_waitcnt vmcnt(0)
1880; GFX940-NEXT:    s_setpc_b64 s[30:31]
1881  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1882  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1883  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 0>
1884  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1885  ret void
1886}
1887
1888define void @v_shuffle_v3i16_v4i16__7_5_0(ptr addrspace(1) inreg %ptr) {
1889; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_5_0:
1890; GFX900:       ; %bb.0:
1891; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1892; GFX900-NEXT:    ;;#ASMSTART
1893; GFX900-NEXT:    ; def v[0:1]
1894; GFX900-NEXT:    ;;#ASMEND
1895; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1896; GFX900-NEXT:    ;;#ASMSTART
1897; GFX900-NEXT:    ; def v[1:2]
1898; GFX900-NEXT:    ;;#ASMEND
1899; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1900; GFX900-NEXT:    v_perm_b32 v1, v1, v2, s4
1901; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
1902; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
1903; GFX900-NEXT:    s_waitcnt vmcnt(0)
1904; GFX900-NEXT:    s_setpc_b64 s[30:31]
1905;
1906; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_5_0:
1907; GFX90A:       ; %bb.0:
1908; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1909; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1910; GFX90A-NEXT:    ;;#ASMSTART
1911; GFX90A-NEXT:    ; def v[0:1]
1912; GFX90A-NEXT:    ;;#ASMEND
1913; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1914; GFX90A-NEXT:    ;;#ASMSTART
1915; GFX90A-NEXT:    ; def v[2:3]
1916; GFX90A-NEXT:    ;;#ASMEND
1917; GFX90A-NEXT:    v_perm_b32 v1, v2, v3, s4
1918; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1919; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1920; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1921; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1922;
1923; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_0:
1924; GFX940:       ; %bb.0:
1925; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1926; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1927; GFX940-NEXT:    ;;#ASMSTART
1928; GFX940-NEXT:    ; def v[0:1]
1929; GFX940-NEXT:    ;;#ASMEND
1930; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1931; GFX940-NEXT:    ;;#ASMSTART
1932; GFX940-NEXT:    ; def v[2:3]
1933; GFX940-NEXT:    ;;#ASMEND
1934; GFX940-NEXT:    s_nop 0
1935; GFX940-NEXT:    v_perm_b32 v1, v2, v3, s2
1936; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1937; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1938; GFX940-NEXT:    s_waitcnt vmcnt(0)
1939; GFX940-NEXT:    s_setpc_b64 s[30:31]
1940  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1941  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1942  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 0>
1943  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1944  ret void
1945}
1946
1947define void @v_shuffle_v3i16_v4i16__7_6_0(ptr addrspace(1) inreg %ptr) {
1948; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_6_0:
1949; GFX900:       ; %bb.0:
1950; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1951; GFX900-NEXT:    ;;#ASMSTART
1952; GFX900-NEXT:    ; def v[0:1]
1953; GFX900-NEXT:    ;;#ASMEND
1954; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1955; GFX900-NEXT:    ;;#ASMSTART
1956; GFX900-NEXT:    ; def v[1:2]
1957; GFX900-NEXT:    ;;#ASMEND
1958; GFX900-NEXT:    v_alignbit_b32 v1, v2, v2, 16
1959; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
1960; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
1961; GFX900-NEXT:    s_waitcnt vmcnt(0)
1962; GFX900-NEXT:    s_setpc_b64 s[30:31]
1963;
1964; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_6_0:
1965; GFX90A:       ; %bb.0:
1966; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1967; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1968; GFX90A-NEXT:    ;;#ASMSTART
1969; GFX90A-NEXT:    ; def v[0:1]
1970; GFX90A-NEXT:    ;;#ASMEND
1971; GFX90A-NEXT:    ;;#ASMSTART
1972; GFX90A-NEXT:    ; def v[2:3]
1973; GFX90A-NEXT:    ;;#ASMEND
1974; GFX90A-NEXT:    v_alignbit_b32 v1, v3, v3, 16
1975; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1976; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1977; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1978; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1979;
1980; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_0:
1981; GFX940:       ; %bb.0:
1982; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1983; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1984; GFX940-NEXT:    ;;#ASMSTART
1985; GFX940-NEXT:    ; def v[0:1]
1986; GFX940-NEXT:    ;;#ASMEND
1987; GFX940-NEXT:    ;;#ASMSTART
1988; GFX940-NEXT:    ; def v[2:3]
1989; GFX940-NEXT:    ;;#ASMEND
1990; GFX940-NEXT:    s_nop 0
1991; GFX940-NEXT:    v_alignbit_b32 v1, v3, v3, 16
1992; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1993; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1994; GFX940-NEXT:    s_waitcnt vmcnt(0)
1995; GFX940-NEXT:    s_setpc_b64 s[30:31]
1996  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1997  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1998  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 0>
1999  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2000  ret void
2001}
2002
2003define void @v_shuffle_v3i16_v4i16__u_1_1(ptr addrspace(1) inreg %ptr) {
2004; GFX900-LABEL: v_shuffle_v3i16_v4i16__u_1_1:
2005; GFX900:       ; %bb.0:
2006; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2007; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2008; GFX900-NEXT:    ;;#ASMSTART
2009; GFX900-NEXT:    ; def v[0:1]
2010; GFX900-NEXT:    ;;#ASMEND
2011; GFX900-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
2012; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
2013; GFX900-NEXT:    s_waitcnt vmcnt(0)
2014; GFX900-NEXT:    s_setpc_b64 s[30:31]
2015;
2016; GFX90A-LABEL: v_shuffle_v3i16_v4i16__u_1_1:
2017; GFX90A:       ; %bb.0:
2018; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2019; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2020; GFX90A-NEXT:    ;;#ASMSTART
2021; GFX90A-NEXT:    ; def v[0:1]
2022; GFX90A-NEXT:    ;;#ASMEND
2023; GFX90A-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
2024; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
2025; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2026; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2027;
2028; GFX940-LABEL: v_shuffle_v3i16_v4i16__u_1_1:
2029; GFX940:       ; %bb.0:
2030; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2031; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2032; GFX940-NEXT:    ;;#ASMSTART
2033; GFX940-NEXT:    ; def v[0:1]
2034; GFX940-NEXT:    ;;#ASMEND
2035; GFX940-NEXT:    global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1
2036; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
2037; GFX940-NEXT:    s_waitcnt vmcnt(0)
2038; GFX940-NEXT:    s_setpc_b64 s[30:31]
2039  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2040  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 poison, i32 1, i32 1>
2041  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2042  ret void
2043}
2044
2045define void @v_shuffle_v3i16_v4i16__0_1_1(ptr addrspace(1) inreg %ptr) {
2046; GFX900-LABEL: v_shuffle_v3i16_v4i16__0_1_1:
2047; GFX900:       ; %bb.0:
2048; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2049; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2050; GFX900-NEXT:    ;;#ASMSTART
2051; GFX900-NEXT:    ; def v[0:1]
2052; GFX900-NEXT:    ;;#ASMEND
2053; GFX900-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
2054; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
2055; GFX900-NEXT:    s_waitcnt vmcnt(0)
2056; GFX900-NEXT:    s_setpc_b64 s[30:31]
2057;
2058; GFX90A-LABEL: v_shuffle_v3i16_v4i16__0_1_1:
2059; GFX90A:       ; %bb.0:
2060; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2061; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2062; GFX90A-NEXT:    ;;#ASMSTART
2063; GFX90A-NEXT:    ; def v[0:1]
2064; GFX90A-NEXT:    ;;#ASMEND
2065; GFX90A-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
2066; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
2067; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2068; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2069;
2070; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_1_1:
2071; GFX940:       ; %bb.0:
2072; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2073; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2074; GFX940-NEXT:    ;;#ASMSTART
2075; GFX940-NEXT:    ; def v[0:1]
2076; GFX940-NEXT:    ;;#ASMEND
2077; GFX940-NEXT:    global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1
2078; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
2079; GFX940-NEXT:    s_waitcnt vmcnt(0)
2080; GFX940-NEXT:    s_setpc_b64 s[30:31]
2081  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2082  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 1>
2083  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2084  ret void
2085}
2086
2087define void @v_shuffle_v3i16_v4i16__1_1_1(ptr addrspace(1) inreg %ptr) {
2088; GFX900-LABEL: v_shuffle_v3i16_v4i16__1_1_1:
2089; GFX900:       ; %bb.0:
2090; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2091; GFX900-NEXT:    ;;#ASMSTART
2092; GFX900-NEXT:    ; def v[0:1]
2093; GFX900-NEXT:    ;;#ASMEND
2094; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2095; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2096; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
2097; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2098; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
2099; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
2100; GFX900-NEXT:    s_waitcnt vmcnt(0)
2101; GFX900-NEXT:    s_setpc_b64 s[30:31]
2102;
2103; GFX90A-LABEL: v_shuffle_v3i16_v4i16__1_1_1:
2104; GFX90A:       ; %bb.0:
2105; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2106; GFX90A-NEXT:    ;;#ASMSTART
2107; GFX90A-NEXT:    ; def v[0:1]
2108; GFX90A-NEXT:    ;;#ASMEND
2109; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2110; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2111; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
2112; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2113; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
2114; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
2115; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2116; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2117;
2118; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_1_1:
2119; GFX940:       ; %bb.0:
2120; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2121; GFX940-NEXT:    ;;#ASMSTART
2122; GFX940-NEXT:    ; def v[0:1]
2123; GFX940-NEXT:    ;;#ASMEND
2124; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2125; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2126; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
2127; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2128; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
2129; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
2130; GFX940-NEXT:    s_waitcnt vmcnt(0)
2131; GFX940-NEXT:    s_setpc_b64 s[30:31]
2132  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2133  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 1, i32 1, i32 1>
2134  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2135  ret void
2136}
2137
2138define void @v_shuffle_v3i16_v4i16__2_1_1(ptr addrspace(1) inreg %ptr) {
2139; GFX900-LABEL: v_shuffle_v3i16_v4i16__2_1_1:
2140; GFX900:       ; %bb.0:
2141; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2142; GFX900-NEXT:    ;;#ASMSTART
2143; GFX900-NEXT:    ; def v[0:1]
2144; GFX900-NEXT:    ;;#ASMEND
2145; GFX900-NEXT:    s_mov_b32 s4, 0xffff
2146; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2147; GFX900-NEXT:    v_bfi_b32 v1, s4, v1, v0
2148; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2149; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
2150; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
2151; GFX900-NEXT:    s_waitcnt vmcnt(0)
2152; GFX900-NEXT:    s_setpc_b64 s[30:31]
2153;
2154; GFX90A-LABEL: v_shuffle_v3i16_v4i16__2_1_1:
2155; GFX90A:       ; %bb.0:
2156; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2157; GFX90A-NEXT:    ;;#ASMSTART
2158; GFX90A-NEXT:    ; def v[0:1]
2159; GFX90A-NEXT:    ;;#ASMEND
2160; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
2161; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2162; GFX90A-NEXT:    v_bfi_b32 v1, s4, v1, v0
2163; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2164; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
2165; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
2166; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2167; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2168;
2169; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_1_1:
2170; GFX940:       ; %bb.0:
2171; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2172; GFX940-NEXT:    ;;#ASMSTART
2173; GFX940-NEXT:    ; def v[0:1]
2174; GFX940-NEXT:    ;;#ASMEND
2175; GFX940-NEXT:    s_mov_b32 s2, 0xffff
2176; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2177; GFX940-NEXT:    v_bfi_b32 v1, s2, v1, v0
2178; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2179; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
2180; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
2181; GFX940-NEXT:    s_waitcnt vmcnt(0)
2182; GFX940-NEXT:    s_setpc_b64 s[30:31]
2183  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2184  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 2, i32 1, i32 1>
2185  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2186  ret void
2187}
2188
2189define void @v_shuffle_v3i16_v4i16__3_1_1(ptr addrspace(1) inreg %ptr) {
2190; GFX900-LABEL: v_shuffle_v3i16_v4i16__3_1_1:
2191; GFX900:       ; %bb.0:
2192; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2193; GFX900-NEXT:    ;;#ASMSTART
2194; GFX900-NEXT:    ; def v[0:1]
2195; GFX900-NEXT:    ;;#ASMEND
2196; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2197; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2198; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
2199; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2200; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
2201; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
2202; GFX900-NEXT:    s_waitcnt vmcnt(0)
2203; GFX900-NEXT:    s_setpc_b64 s[30:31]
2204;
2205; GFX90A-LABEL: v_shuffle_v3i16_v4i16__3_1_1:
2206; GFX90A:       ; %bb.0:
2207; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2208; GFX90A-NEXT:    ;;#ASMSTART
2209; GFX90A-NEXT:    ; def v[0:1]
2210; GFX90A-NEXT:    ;;#ASMEND
2211; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2212; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2213; GFX90A-NEXT:    v_perm_b32 v1, v0, v1, s4
2214; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2215; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
2216; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
2217; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2218; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2219;
2220; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_1_1:
2221; GFX940:       ; %bb.0:
2222; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2223; GFX940-NEXT:    ;;#ASMSTART
2224; GFX940-NEXT:    ; def v[0:1]
2225; GFX940-NEXT:    ;;#ASMEND
2226; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2227; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2228; GFX940-NEXT:    v_perm_b32 v1, v0, v1, s2
2229; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2230; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
2231; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
2232; GFX940-NEXT:    s_waitcnt vmcnt(0)
2233; GFX940-NEXT:    s_setpc_b64 s[30:31]
2234  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2235  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 3, i32 1, i32 1>
2236  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2237  ret void
2238}
2239
2240define void @v_shuffle_v3i16_v4i16__4_1_1(ptr addrspace(1) inreg %ptr) {
2241; GFX900-LABEL: v_shuffle_v3i16_v4i16__4_1_1:
2242; GFX900:       ; %bb.0:
2243; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2244; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2245; GFX900-NEXT:    ;;#ASMSTART
2246; GFX900-NEXT:    ; def v[0:1]
2247; GFX900-NEXT:    ;;#ASMEND
2248; GFX900-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
2249; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
2250; GFX900-NEXT:    s_waitcnt vmcnt(0)
2251; GFX900-NEXT:    s_setpc_b64 s[30:31]
2252;
2253; GFX90A-LABEL: v_shuffle_v3i16_v4i16__4_1_1:
2254; GFX90A:       ; %bb.0:
2255; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2256; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2257; GFX90A-NEXT:    ;;#ASMSTART
2258; GFX90A-NEXT:    ; def v[0:1]
2259; GFX90A-NEXT:    ;;#ASMEND
2260; GFX90A-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
2261; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
2262; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2263; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2264;
2265; GFX940-LABEL: v_shuffle_v3i16_v4i16__4_1_1:
2266; GFX940:       ; %bb.0:
2267; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2268; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2269; GFX940-NEXT:    ;;#ASMSTART
2270; GFX940-NEXT:    ; def v[0:1]
2271; GFX940-NEXT:    ;;#ASMEND
2272; GFX940-NEXT:    global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1
2273; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
2274; GFX940-NEXT:    s_waitcnt vmcnt(0)
2275; GFX940-NEXT:    s_setpc_b64 s[30:31]
2276  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2277  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 4, i32 1, i32 1>
2278  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2279  ret void
2280}
2281
2282define void @v_shuffle_v3i16_v4i16__5_1_1(ptr addrspace(1) inreg %ptr) {
2283; GFX900-LABEL: v_shuffle_v3i16_v4i16__5_1_1:
2284; GFX900:       ; %bb.0:
2285; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2286; GFX900-NEXT:    ;;#ASMSTART
2287; GFX900-NEXT:    ; def v[0:1]
2288; GFX900-NEXT:    ;;#ASMEND
2289; GFX900-NEXT:    ;;#ASMSTART
2290; GFX900-NEXT:    ; def v[1:2]
2291; GFX900-NEXT:    ;;#ASMEND
2292; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2293; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2294; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
2295; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2296; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
2297; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
2298; GFX900-NEXT:    s_waitcnt vmcnt(0)
2299; GFX900-NEXT:    s_setpc_b64 s[30:31]
2300;
2301; GFX90A-LABEL: v_shuffle_v3i16_v4i16__5_1_1:
2302; GFX90A:       ; %bb.0:
2303; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2304; GFX90A-NEXT:    ;;#ASMSTART
2305; GFX90A-NEXT:    ; def v[0:1]
2306; GFX90A-NEXT:    ;;#ASMEND
2307; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2308; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2309; GFX90A-NEXT:    ;;#ASMSTART
2310; GFX90A-NEXT:    ; def v[2:3]
2311; GFX90A-NEXT:    ;;#ASMEND
2312; GFX90A-NEXT:    v_perm_b32 v1, v0, v2, s4
2313; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2314; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
2315; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
2316; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2317; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2318;
2319; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_1_1:
2320; GFX940:       ; %bb.0:
2321; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2322; GFX940-NEXT:    ;;#ASMSTART
2323; GFX940-NEXT:    ; def v[0:1]
2324; GFX940-NEXT:    ;;#ASMEND
2325; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2326; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2327; GFX940-NEXT:    ;;#ASMSTART
2328; GFX940-NEXT:    ; def v[2:3]
2329; GFX940-NEXT:    ;;#ASMEND
2330; GFX940-NEXT:    s_nop 0
2331; GFX940-NEXT:    v_perm_b32 v1, v0, v2, s2
2332; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2333; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
2334; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
2335; GFX940-NEXT:    s_waitcnt vmcnt(0)
2336; GFX940-NEXT:    s_setpc_b64 s[30:31]
2337  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2338  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2339  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 1, i32 1>
2340  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2341  ret void
2342}
2343
2344define void @v_shuffle_v3i16_v4i16__6_1_1(ptr addrspace(1) inreg %ptr) {
2345; GFX900-LABEL: v_shuffle_v3i16_v4i16__6_1_1:
2346; GFX900:       ; %bb.0:
2347; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2348; GFX900-NEXT:    ;;#ASMSTART
2349; GFX900-NEXT:    ; def v[0:1]
2350; GFX900-NEXT:    ;;#ASMEND
2351; GFX900-NEXT:    ;;#ASMSTART
2352; GFX900-NEXT:    ; def v[1:2]
2353; GFX900-NEXT:    ;;#ASMEND
2354; GFX900-NEXT:    s_mov_b32 s4, 0xffff
2355; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2356; GFX900-NEXT:    v_bfi_b32 v1, s4, v2, v0
2357; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2358; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
2359; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
2360; GFX900-NEXT:    s_waitcnt vmcnt(0)
2361; GFX900-NEXT:    s_setpc_b64 s[30:31]
2362;
2363; GFX90A-LABEL: v_shuffle_v3i16_v4i16__6_1_1:
2364; GFX90A:       ; %bb.0:
2365; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2366; GFX90A-NEXT:    ;;#ASMSTART
2367; GFX90A-NEXT:    ; def v[0:1]
2368; GFX90A-NEXT:    ;;#ASMEND
2369; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
2370; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2371; GFX90A-NEXT:    ;;#ASMSTART
2372; GFX90A-NEXT:    ; def v[2:3]
2373; GFX90A-NEXT:    ;;#ASMEND
2374; GFX90A-NEXT:    v_bfi_b32 v1, s4, v3, v0
2375; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2376; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
2377; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
2378; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2379; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2380;
2381; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_1_1:
2382; GFX940:       ; %bb.0:
2383; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2384; GFX940-NEXT:    ;;#ASMSTART
2385; GFX940-NEXT:    ; def v[0:1]
2386; GFX940-NEXT:    ;;#ASMEND
2387; GFX940-NEXT:    s_mov_b32 s2, 0xffff
2388; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2389; GFX940-NEXT:    ;;#ASMSTART
2390; GFX940-NEXT:    ; def v[2:3]
2391; GFX940-NEXT:    ;;#ASMEND
2392; GFX940-NEXT:    s_nop 0
2393; GFX940-NEXT:    v_bfi_b32 v1, s2, v3, v0
2394; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2395; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
2396; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
2397; GFX940-NEXT:    s_waitcnt vmcnt(0)
2398; GFX940-NEXT:    s_setpc_b64 s[30:31]
2399  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2400  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2401  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 1, i32 1>
2402  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2403  ret void
2404}
2405
2406define void @v_shuffle_v3i16_v4i16__7_1_1(ptr addrspace(1) inreg %ptr) {
2407; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_1_1:
2408; GFX900:       ; %bb.0:
2409; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2410; GFX900-NEXT:    ;;#ASMSTART
2411; GFX900-NEXT:    ; def v[0:1]
2412; GFX900-NEXT:    ;;#ASMEND
2413; GFX900-NEXT:    ;;#ASMSTART
2414; GFX900-NEXT:    ; def v[1:2]
2415; GFX900-NEXT:    ;;#ASMEND
2416; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2417; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2418; GFX900-NEXT:    v_perm_b32 v1, v0, v2, s4
2419; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2420; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
2421; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
2422; GFX900-NEXT:    s_waitcnt vmcnt(0)
2423; GFX900-NEXT:    s_setpc_b64 s[30:31]
2424;
2425; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_1_1:
2426; GFX90A:       ; %bb.0:
2427; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2428; GFX90A-NEXT:    ;;#ASMSTART
2429; GFX90A-NEXT:    ; def v[0:1]
2430; GFX90A-NEXT:    ;;#ASMEND
2431; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2432; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2433; GFX90A-NEXT:    ;;#ASMSTART
2434; GFX90A-NEXT:    ; def v[2:3]
2435; GFX90A-NEXT:    ;;#ASMEND
2436; GFX90A-NEXT:    v_perm_b32 v1, v0, v3, s4
2437; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2438; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
2439; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
2440; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2441; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2442;
2443; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_1:
2444; GFX940:       ; %bb.0:
2445; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2446; GFX940-NEXT:    ;;#ASMSTART
2447; GFX940-NEXT:    ; def v[0:1]
2448; GFX940-NEXT:    ;;#ASMEND
2449; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2450; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2451; GFX940-NEXT:    ;;#ASMSTART
2452; GFX940-NEXT:    ; def v[2:3]
2453; GFX940-NEXT:    ;;#ASMEND
2454; GFX940-NEXT:    s_nop 0
2455; GFX940-NEXT:    v_perm_b32 v1, v0, v3, s2
2456; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2457; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
2458; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
2459; GFX940-NEXT:    s_waitcnt vmcnt(0)
2460; GFX940-NEXT:    s_setpc_b64 s[30:31]
2461  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2462  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2463  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 1>
2464  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2465  ret void
2466}
2467
2468define void @v_shuffle_v3i16_v4i16__7_u_1(ptr addrspace(1) inreg %ptr) {
2469; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_u_1:
2470; GFX900:       ; %bb.0:
2471; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2472; GFX900-NEXT:    ;;#ASMSTART
2473; GFX900-NEXT:    ; def v[0:1]
2474; GFX900-NEXT:    ;;#ASMEND
2475; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2476; GFX900-NEXT:    ;;#ASMSTART
2477; GFX900-NEXT:    ; def v[1:2]
2478; GFX900-NEXT:    ;;#ASMEND
2479; GFX900-NEXT:    v_alignbit_b32 v1, s4, v2, 16
2480; GFX900-NEXT:    global_store_short_d16_hi v3, v0, s[16:17] offset:4
2481; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
2482; GFX900-NEXT:    s_waitcnt vmcnt(0)
2483; GFX900-NEXT:    s_setpc_b64 s[30:31]
2484;
2485; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_u_1:
2486; GFX90A:       ; %bb.0:
2487; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2488; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2489; GFX90A-NEXT:    ;;#ASMSTART
2490; GFX90A-NEXT:    ; def v[0:1]
2491; GFX90A-NEXT:    ;;#ASMEND
2492; GFX90A-NEXT:    ;;#ASMSTART
2493; GFX90A-NEXT:    ; def v[2:3]
2494; GFX90A-NEXT:    ;;#ASMEND
2495; GFX90A-NEXT:    v_alignbit_b32 v1, s4, v3, 16
2496; GFX90A-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
2497; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
2498; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2499; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2500;
2501; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_1:
2502; GFX940:       ; %bb.0:
2503; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2504; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2505; GFX940-NEXT:    ;;#ASMSTART
2506; GFX940-NEXT:    ; def v[0:1]
2507; GFX940-NEXT:    ;;#ASMEND
2508; GFX940-NEXT:    ;;#ASMSTART
2509; GFX940-NEXT:    ; def v[2:3]
2510; GFX940-NEXT:    ;;#ASMEND
2511; GFX940-NEXT:    s_nop 0
2512; GFX940-NEXT:    v_alignbit_b32 v1, s0, v3, 16
2513; GFX940-NEXT:    global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2514; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
2515; GFX940-NEXT:    s_waitcnt vmcnt(0)
2516; GFX940-NEXT:    s_setpc_b64 s[30:31]
2517  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2518  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2519  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 1>
2520  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2521  ret void
2522}
2523
2524define void @v_shuffle_v3i16_v4i16__7_0_1(ptr addrspace(1) inreg %ptr) {
2525; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_0_1:
2526; GFX900:       ; %bb.0:
2527; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2528; GFX900-NEXT:    ;;#ASMSTART
2529; GFX900-NEXT:    ; def v[0:1]
2530; GFX900-NEXT:    ;;#ASMEND
2531; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2532; GFX900-NEXT:    ;;#ASMSTART
2533; GFX900-NEXT:    ; def v[1:2]
2534; GFX900-NEXT:    ;;#ASMEND
2535; GFX900-NEXT:    v_alignbit_b32 v1, v0, v2, 16
2536; GFX900-NEXT:    global_store_short_d16_hi v3, v0, s[16:17] offset:4
2537; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
2538; GFX900-NEXT:    s_waitcnt vmcnt(0)
2539; GFX900-NEXT:    s_setpc_b64 s[30:31]
2540;
2541; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_0_1:
2542; GFX90A:       ; %bb.0:
2543; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2544; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2545; GFX90A-NEXT:    ;;#ASMSTART
2546; GFX90A-NEXT:    ; def v[0:1]
2547; GFX90A-NEXT:    ;;#ASMEND
2548; GFX90A-NEXT:    ;;#ASMSTART
2549; GFX90A-NEXT:    ; def v[2:3]
2550; GFX90A-NEXT:    ;;#ASMEND
2551; GFX90A-NEXT:    v_alignbit_b32 v1, v0, v3, 16
2552; GFX90A-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
2553; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
2554; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2555; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2556;
2557; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_1:
2558; GFX940:       ; %bb.0:
2559; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2560; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2561; GFX940-NEXT:    ;;#ASMSTART
2562; GFX940-NEXT:    ; def v[0:1]
2563; GFX940-NEXT:    ;;#ASMEND
2564; GFX940-NEXT:    ;;#ASMSTART
2565; GFX940-NEXT:    ; def v[2:3]
2566; GFX940-NEXT:    ;;#ASMEND
2567; GFX940-NEXT:    s_nop 0
2568; GFX940-NEXT:    v_alignbit_b32 v1, v0, v3, 16
2569; GFX940-NEXT:    global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2570; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
2571; GFX940-NEXT:    s_waitcnt vmcnt(0)
2572; GFX940-NEXT:    s_setpc_b64 s[30:31]
2573  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2574  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2575  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 1>
2576  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2577  ret void
2578}
2579
2580define void @v_shuffle_v3i16_v4i16__7_2_1(ptr addrspace(1) inreg %ptr) {
2581; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_2_1:
2582; GFX900:       ; %bb.0:
2583; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2584; GFX900-NEXT:    v_mov_b32_e32 v4, 0
2585; GFX900-NEXT:    ;;#ASMSTART
2586; GFX900-NEXT:    ; def v[0:1]
2587; GFX900-NEXT:    ;;#ASMEND
2588; GFX900-NEXT:    ;;#ASMSTART
2589; GFX900-NEXT:    ; def v[2:3]
2590; GFX900-NEXT:    ;;#ASMEND
2591; GFX900-NEXT:    v_alignbit_b32 v1, v1, v3, 16
2592; GFX900-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
2593; GFX900-NEXT:    global_store_dword v4, v1, s[16:17]
2594; GFX900-NEXT:    s_waitcnt vmcnt(0)
2595; GFX900-NEXT:    s_setpc_b64 s[30:31]
2596;
2597; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_2_1:
2598; GFX90A:       ; %bb.0:
2599; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2600; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2601; GFX90A-NEXT:    ;;#ASMSTART
2602; GFX90A-NEXT:    ; def v[0:1]
2603; GFX90A-NEXT:    ;;#ASMEND
2604; GFX90A-NEXT:    ;;#ASMSTART
2605; GFX90A-NEXT:    ; def v[2:3]
2606; GFX90A-NEXT:    ;;#ASMEND
2607; GFX90A-NEXT:    v_alignbit_b32 v1, v1, v3, 16
2608; GFX90A-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
2609; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
2610; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2611; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2612;
2613; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_1:
2614; GFX940:       ; %bb.0:
2615; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2616; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2617; GFX940-NEXT:    ;;#ASMSTART
2618; GFX940-NEXT:    ; def v[0:1]
2619; GFX940-NEXT:    ;;#ASMEND
2620; GFX940-NEXT:    ;;#ASMSTART
2621; GFX940-NEXT:    ; def v[2:3]
2622; GFX940-NEXT:    ;;#ASMEND
2623; GFX940-NEXT:    s_nop 0
2624; GFX940-NEXT:    v_alignbit_b32 v1, v1, v3, 16
2625; GFX940-NEXT:    global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2626; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
2627; GFX940-NEXT:    s_waitcnt vmcnt(0)
2628; GFX940-NEXT:    s_setpc_b64 s[30:31]
2629  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2630  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2631  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 1>
2632  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2633  ret void
2634}
2635
2636define void @v_shuffle_v3i16_v4i16__7_3_1(ptr addrspace(1) inreg %ptr) {
2637; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_3_1:
2638; GFX900:       ; %bb.0:
2639; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2640; GFX900-NEXT:    v_mov_b32_e32 v4, 0
2641; GFX900-NEXT:    ;;#ASMSTART
2642; GFX900-NEXT:    ; def v[0:1]
2643; GFX900-NEXT:    ;;#ASMEND
2644; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2645; GFX900-NEXT:    ;;#ASMSTART
2646; GFX900-NEXT:    ; def v[2:3]
2647; GFX900-NEXT:    ;;#ASMEND
2648; GFX900-NEXT:    v_perm_b32 v1, v1, v3, s4
2649; GFX900-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
2650; GFX900-NEXT:    global_store_dword v4, v1, s[16:17]
2651; GFX900-NEXT:    s_waitcnt vmcnt(0)
2652; GFX900-NEXT:    s_setpc_b64 s[30:31]
2653;
2654; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_3_1:
2655; GFX90A:       ; %bb.0:
2656; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2657; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2658; GFX90A-NEXT:    ;;#ASMSTART
2659; GFX90A-NEXT:    ; def v[0:1]
2660; GFX90A-NEXT:    ;;#ASMEND
2661; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2662; GFX90A-NEXT:    ;;#ASMSTART
2663; GFX90A-NEXT:    ; def v[2:3]
2664; GFX90A-NEXT:    ;;#ASMEND
2665; GFX90A-NEXT:    v_perm_b32 v1, v1, v3, s4
2666; GFX90A-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
2667; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
2668; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2669; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2670;
2671; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_1:
2672; GFX940:       ; %bb.0:
2673; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2674; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2675; GFX940-NEXT:    ;;#ASMSTART
2676; GFX940-NEXT:    ; def v[0:1]
2677; GFX940-NEXT:    ;;#ASMEND
2678; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2679; GFX940-NEXT:    ;;#ASMSTART
2680; GFX940-NEXT:    ; def v[2:3]
2681; GFX940-NEXT:    ;;#ASMEND
2682; GFX940-NEXT:    s_nop 0
2683; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s2
2684; GFX940-NEXT:    global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2685; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
2686; GFX940-NEXT:    s_waitcnt vmcnt(0)
2687; GFX940-NEXT:    s_setpc_b64 s[30:31]
2688  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2689  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2690  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 1>
2691  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2692  ret void
2693}
2694
2695define void @v_shuffle_v3i16_v4i16__7_4_1(ptr addrspace(1) inreg %ptr) {
2696; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_4_1:
2697; GFX900:       ; %bb.0:
2698; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2699; GFX900-NEXT:    ;;#ASMSTART
2700; GFX900-NEXT:    ; def v[0:1]
2701; GFX900-NEXT:    ;;#ASMEND
2702; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2703; GFX900-NEXT:    ;;#ASMSTART
2704; GFX900-NEXT:    ; def v[1:2]
2705; GFX900-NEXT:    ;;#ASMEND
2706; GFX900-NEXT:    v_alignbit_b32 v1, v1, v2, 16
2707; GFX900-NEXT:    global_store_short_d16_hi v3, v0, s[16:17] offset:4
2708; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
2709; GFX900-NEXT:    s_waitcnt vmcnt(0)
2710; GFX900-NEXT:    s_setpc_b64 s[30:31]
2711;
2712; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_4_1:
2713; GFX90A:       ; %bb.0:
2714; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2715; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2716; GFX90A-NEXT:    ;;#ASMSTART
2717; GFX90A-NEXT:    ; def v[0:1]
2718; GFX90A-NEXT:    ;;#ASMEND
2719; GFX90A-NEXT:    ;;#ASMSTART
2720; GFX90A-NEXT:    ; def v[2:3]
2721; GFX90A-NEXT:    ;;#ASMEND
2722; GFX90A-NEXT:    v_alignbit_b32 v1, v2, v3, 16
2723; GFX90A-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
2724; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
2725; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2726; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2727;
2728; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_1:
2729; GFX940:       ; %bb.0:
2730; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2731; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2732; GFX940-NEXT:    ;;#ASMSTART
2733; GFX940-NEXT:    ; def v[0:1]
2734; GFX940-NEXT:    ;;#ASMEND
2735; GFX940-NEXT:    ;;#ASMSTART
2736; GFX940-NEXT:    ; def v[2:3]
2737; GFX940-NEXT:    ;;#ASMEND
2738; GFX940-NEXT:    s_nop 0
2739; GFX940-NEXT:    v_alignbit_b32 v1, v2, v3, 16
2740; GFX940-NEXT:    global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2741; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
2742; GFX940-NEXT:    s_waitcnt vmcnt(0)
2743; GFX940-NEXT:    s_setpc_b64 s[30:31]
2744  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2745  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2746  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 1>
2747  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2748  ret void
2749}
2750
2751define void @v_shuffle_v3i16_v4i16__7_5_1(ptr addrspace(1) inreg %ptr) {
2752; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_5_1:
2753; GFX900:       ; %bb.0:
2754; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2755; GFX900-NEXT:    ;;#ASMSTART
2756; GFX900-NEXT:    ; def v[0:1]
2757; GFX900-NEXT:    ;;#ASMEND
2758; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2759; GFX900-NEXT:    ;;#ASMSTART
2760; GFX900-NEXT:    ; def v[1:2]
2761; GFX900-NEXT:    ;;#ASMEND
2762; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2763; GFX900-NEXT:    v_perm_b32 v1, v1, v2, s4
2764; GFX900-NEXT:    global_store_short_d16_hi v3, v0, s[16:17] offset:4
2765; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
2766; GFX900-NEXT:    s_waitcnt vmcnt(0)
2767; GFX900-NEXT:    s_setpc_b64 s[30:31]
2768;
2769; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_5_1:
2770; GFX90A:       ; %bb.0:
2771; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2772; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2773; GFX90A-NEXT:    ;;#ASMSTART
2774; GFX90A-NEXT:    ; def v[0:1]
2775; GFX90A-NEXT:    ;;#ASMEND
2776; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2777; GFX90A-NEXT:    ;;#ASMSTART
2778; GFX90A-NEXT:    ; def v[2:3]
2779; GFX90A-NEXT:    ;;#ASMEND
2780; GFX90A-NEXT:    v_perm_b32 v1, v2, v3, s4
2781; GFX90A-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
2782; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
2783; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2784; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2785;
2786; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_1:
2787; GFX940:       ; %bb.0:
2788; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2789; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2790; GFX940-NEXT:    ;;#ASMSTART
2791; GFX940-NEXT:    ; def v[0:1]
2792; GFX940-NEXT:    ;;#ASMEND
2793; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2794; GFX940-NEXT:    ;;#ASMSTART
2795; GFX940-NEXT:    ; def v[2:3]
2796; GFX940-NEXT:    ;;#ASMEND
2797; GFX940-NEXT:    s_nop 0
2798; GFX940-NEXT:    v_perm_b32 v1, v2, v3, s2
2799; GFX940-NEXT:    global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2800; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
2801; GFX940-NEXT:    s_waitcnt vmcnt(0)
2802; GFX940-NEXT:    s_setpc_b64 s[30:31]
2803  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2804  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2805  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 1>
2806  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2807  ret void
2808}
2809
2810define void @v_shuffle_v3i16_v4i16__7_6_1(ptr addrspace(1) inreg %ptr) {
2811; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_6_1:
2812; GFX900:       ; %bb.0:
2813; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2814; GFX900-NEXT:    ;;#ASMSTART
2815; GFX900-NEXT:    ; def v[0:1]
2816; GFX900-NEXT:    ;;#ASMEND
2817; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2818; GFX900-NEXT:    ;;#ASMSTART
2819; GFX900-NEXT:    ; def v[1:2]
2820; GFX900-NEXT:    ;;#ASMEND
2821; GFX900-NEXT:    v_alignbit_b32 v1, v2, v2, 16
2822; GFX900-NEXT:    global_store_short_d16_hi v3, v0, s[16:17] offset:4
2823; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
2824; GFX900-NEXT:    s_waitcnt vmcnt(0)
2825; GFX900-NEXT:    s_setpc_b64 s[30:31]
2826;
2827; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_6_1:
2828; GFX90A:       ; %bb.0:
2829; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2830; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2831; GFX90A-NEXT:    ;;#ASMSTART
2832; GFX90A-NEXT:    ; def v[0:1]
2833; GFX90A-NEXT:    ;;#ASMEND
2834; GFX90A-NEXT:    ;;#ASMSTART
2835; GFX90A-NEXT:    ; def v[2:3]
2836; GFX90A-NEXT:    ;;#ASMEND
2837; GFX90A-NEXT:    v_alignbit_b32 v1, v3, v3, 16
2838; GFX90A-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
2839; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
2840; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2841; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2842;
2843; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_1:
2844; GFX940:       ; %bb.0:
2845; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2846; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2847; GFX940-NEXT:    ;;#ASMSTART
2848; GFX940-NEXT:    ; def v[0:1]
2849; GFX940-NEXT:    ;;#ASMEND
2850; GFX940-NEXT:    ;;#ASMSTART
2851; GFX940-NEXT:    ; def v[2:3]
2852; GFX940-NEXT:    ;;#ASMEND
2853; GFX940-NEXT:    s_nop 0
2854; GFX940-NEXT:    v_alignbit_b32 v1, v3, v3, 16
2855; GFX940-NEXT:    global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2856; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
2857; GFX940-NEXT:    s_waitcnt vmcnt(0)
2858; GFX940-NEXT:    s_setpc_b64 s[30:31]
2859  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2860  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2861  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 1>
2862  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2863  ret void
2864}
2865
2866define void @v_shuffle_v3i16_v4i16__u_2_2(ptr addrspace(1) inreg %ptr) {
2867; GFX900-LABEL: v_shuffle_v3i16_v4i16__u_2_2:
2868; GFX900:       ; %bb.0:
2869; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2870; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2871; GFX900-NEXT:    ;;#ASMSTART
2872; GFX900-NEXT:    ; def v[0:1]
2873; GFX900-NEXT:    ;;#ASMEND
2874; GFX900-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
2875; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
2876; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
2877; GFX900-NEXT:    s_waitcnt vmcnt(0)
2878; GFX900-NEXT:    s_setpc_b64 s[30:31]
2879;
2880; GFX90A-LABEL: v_shuffle_v3i16_v4i16__u_2_2:
2881; GFX90A:       ; %bb.0:
2882; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2883; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2884; GFX90A-NEXT:    ;;#ASMSTART
2885; GFX90A-NEXT:    ; def v[0:1]
2886; GFX90A-NEXT:    ;;#ASMEND
2887; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
2888; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
2889; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
2890; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2891; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2892;
2893; GFX940-LABEL: v_shuffle_v3i16_v4i16__u_2_2:
2894; GFX940:       ; %bb.0:
2895; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2896; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2897; GFX940-NEXT:    ;;#ASMSTART
2898; GFX940-NEXT:    ; def v[0:1]
2899; GFX940-NEXT:    ;;#ASMEND
2900; GFX940-NEXT:    s_nop 0
2901; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
2902; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
2903; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
2904; GFX940-NEXT:    s_waitcnt vmcnt(0)
2905; GFX940-NEXT:    s_setpc_b64 s[30:31]
2906  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2907  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 poison, i32 2, i32 2>
2908  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2909  ret void
2910}
2911
2912define void @v_shuffle_v3i16_v4i16__0_2_2(ptr addrspace(1) inreg %ptr) {
2913; GFX900-LABEL: v_shuffle_v3i16_v4i16__0_2_2:
2914; GFX900:       ; %bb.0:
2915; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2916; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2917; GFX900-NEXT:    ;;#ASMSTART
2918; GFX900-NEXT:    ; def v[0:1]
2919; GFX900-NEXT:    ;;#ASMEND
2920; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2921; GFX900-NEXT:    v_perm_b32 v0, v1, v0, s4
2922; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
2923; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
2924; GFX900-NEXT:    s_waitcnt vmcnt(0)
2925; GFX900-NEXT:    s_setpc_b64 s[30:31]
2926;
2927; GFX90A-LABEL: v_shuffle_v3i16_v4i16__0_2_2:
2928; GFX90A:       ; %bb.0:
2929; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2930; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2931; GFX90A-NEXT:    ;;#ASMSTART
2932; GFX90A-NEXT:    ; def v[0:1]
2933; GFX90A-NEXT:    ;;#ASMEND
2934; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2935; GFX90A-NEXT:    v_perm_b32 v0, v1, v0, s4
2936; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
2937; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
2938; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2939; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2940;
2941; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_2_2:
2942; GFX940:       ; %bb.0:
2943; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2944; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2945; GFX940-NEXT:    ;;#ASMSTART
2946; GFX940-NEXT:    ; def v[0:1]
2947; GFX940-NEXT:    ;;#ASMEND
2948; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2949; GFX940-NEXT:    v_perm_b32 v0, v1, v0, s2
2950; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
2951; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
2952; GFX940-NEXT:    s_waitcnt vmcnt(0)
2953; GFX940-NEXT:    s_setpc_b64 s[30:31]
2954  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2955  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 2, i32 2>
2956  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2957  ret void
2958}
2959
2960define void @v_shuffle_v3i16_v4i16__1_2_2(ptr addrspace(1) inreg %ptr) {
2961; GFX900-LABEL: v_shuffle_v3i16_v4i16__1_2_2:
2962; GFX900:       ; %bb.0:
2963; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2964; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2965; GFX900-NEXT:    ;;#ASMSTART
2966; GFX900-NEXT:    ; def v[0:1]
2967; GFX900-NEXT:    ;;#ASMEND
2968; GFX900-NEXT:    v_alignbit_b32 v0, v1, v0, 16
2969; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
2970; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
2971; GFX900-NEXT:    s_waitcnt vmcnt(0)
2972; GFX900-NEXT:    s_setpc_b64 s[30:31]
2973;
2974; GFX90A-LABEL: v_shuffle_v3i16_v4i16__1_2_2:
2975; GFX90A:       ; %bb.0:
2976; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2977; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2978; GFX90A-NEXT:    ;;#ASMSTART
2979; GFX90A-NEXT:    ; def v[0:1]
2980; GFX90A-NEXT:    ;;#ASMEND
2981; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v0, 16
2982; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
2983; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
2984; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2985; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2986;
2987; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_2_2:
2988; GFX940:       ; %bb.0:
2989; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2990; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2991; GFX940-NEXT:    ;;#ASMSTART
2992; GFX940-NEXT:    ; def v[0:1]
2993; GFX940-NEXT:    ;;#ASMEND
2994; GFX940-NEXT:    s_nop 0
2995; GFX940-NEXT:    v_alignbit_b32 v0, v1, v0, 16
2996; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
2997; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
2998; GFX940-NEXT:    s_waitcnt vmcnt(0)
2999; GFX940-NEXT:    s_setpc_b64 s[30:31]
3000  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3001  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 1, i32 2, i32 2>
3002  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3003  ret void
3004}
3005
3006define void @v_shuffle_v3i16_v4i16__2_2_2(ptr addrspace(1) inreg %ptr) {
3007; GFX900-LABEL: v_shuffle_v3i16_v4i16__2_2_2:
3008; GFX900:       ; %bb.0:
3009; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3010; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3011; GFX900-NEXT:    ;;#ASMSTART
3012; GFX900-NEXT:    ; def v[0:1]
3013; GFX900-NEXT:    ;;#ASMEND
3014; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
3015; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
3016; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
3017; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
3018; GFX900-NEXT:    s_waitcnt vmcnt(0)
3019; GFX900-NEXT:    s_setpc_b64 s[30:31]
3020;
3021; GFX90A-LABEL: v_shuffle_v3i16_v4i16__2_2_2:
3022; GFX90A:       ; %bb.0:
3023; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3024; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3025; GFX90A-NEXT:    ;;#ASMSTART
3026; GFX90A-NEXT:    ; def v[0:1]
3027; GFX90A-NEXT:    ;;#ASMEND
3028; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
3029; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
3030; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
3031; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
3032; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3033; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3034;
3035; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_2_2:
3036; GFX940:       ; %bb.0:
3037; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3038; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3039; GFX940-NEXT:    ;;#ASMSTART
3040; GFX940-NEXT:    ; def v[0:1]
3041; GFX940-NEXT:    ;;#ASMEND
3042; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
3043; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
3044; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
3045; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
3046; GFX940-NEXT:    s_waitcnt vmcnt(0)
3047; GFX940-NEXT:    s_setpc_b64 s[30:31]
3048  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3049  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 2, i32 2, i32 2>
3050  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3051  ret void
3052}
3053
3054define void @v_shuffle_v3i16_v4i16__3_2_2(ptr addrspace(1) inreg %ptr) {
3055; GFX900-LABEL: v_shuffle_v3i16_v4i16__3_2_2:
3056; GFX900:       ; %bb.0:
3057; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3058; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3059; GFX900-NEXT:    ;;#ASMSTART
3060; GFX900-NEXT:    ; def v[0:1]
3061; GFX900-NEXT:    ;;#ASMEND
3062; GFX900-NEXT:    v_alignbit_b32 v0, v1, v1, 16
3063; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
3064; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
3065; GFX900-NEXT:    s_waitcnt vmcnt(0)
3066; GFX900-NEXT:    s_setpc_b64 s[30:31]
3067;
3068; GFX90A-LABEL: v_shuffle_v3i16_v4i16__3_2_2:
3069; GFX90A:       ; %bb.0:
3070; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3071; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3072; GFX90A-NEXT:    ;;#ASMSTART
3073; GFX90A-NEXT:    ; def v[0:1]
3074; GFX90A-NEXT:    ;;#ASMEND
3075; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v1, 16
3076; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
3077; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
3078; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3079; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3080;
3081; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_2_2:
3082; GFX940:       ; %bb.0:
3083; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3084; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3085; GFX940-NEXT:    ;;#ASMSTART
3086; GFX940-NEXT:    ; def v[0:1]
3087; GFX940-NEXT:    ;;#ASMEND
3088; GFX940-NEXT:    s_nop 0
3089; GFX940-NEXT:    v_alignbit_b32 v0, v1, v1, 16
3090; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
3091; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
3092; GFX940-NEXT:    s_waitcnt vmcnt(0)
3093; GFX940-NEXT:    s_setpc_b64 s[30:31]
3094  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3095  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 3, i32 2, i32 2>
3096  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3097  ret void
3098}
3099
3100define void @v_shuffle_v3i16_v4i16__4_2_2(ptr addrspace(1) inreg %ptr) {
3101; GFX900-LABEL: v_shuffle_v3i16_v4i16__4_2_2:
3102; GFX900:       ; %bb.0:
3103; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3104; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3105; GFX900-NEXT:    ;;#ASMSTART
3106; GFX900-NEXT:    ; def v[0:1]
3107; GFX900-NEXT:    ;;#ASMEND
3108; GFX900-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
3109; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
3110; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
3111; GFX900-NEXT:    s_waitcnt vmcnt(0)
3112; GFX900-NEXT:    s_setpc_b64 s[30:31]
3113;
3114; GFX90A-LABEL: v_shuffle_v3i16_v4i16__4_2_2:
3115; GFX90A:       ; %bb.0:
3116; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3117; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3118; GFX90A-NEXT:    ;;#ASMSTART
3119; GFX90A-NEXT:    ; def v[0:1]
3120; GFX90A-NEXT:    ;;#ASMEND
3121; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
3122; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
3123; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
3124; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3125; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3126;
3127; GFX940-LABEL: v_shuffle_v3i16_v4i16__4_2_2:
3128; GFX940:       ; %bb.0:
3129; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3130; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3131; GFX940-NEXT:    ;;#ASMSTART
3132; GFX940-NEXT:    ; def v[0:1]
3133; GFX940-NEXT:    ;;#ASMEND
3134; GFX940-NEXT:    s_nop 0
3135; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
3136; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
3137; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
3138; GFX940-NEXT:    s_waitcnt vmcnt(0)
3139; GFX940-NEXT:    s_setpc_b64 s[30:31]
3140  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3141  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 4, i32 2, i32 2>
3142  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3143  ret void
3144}
3145
3146define void @v_shuffle_v3i16_v4i16__5_2_2(ptr addrspace(1) inreg %ptr) {
3147; GFX900-LABEL: v_shuffle_v3i16_v4i16__5_2_2:
3148; GFX900:       ; %bb.0:
3149; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3150; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3151; GFX900-NEXT:    ;;#ASMSTART
3152; GFX900-NEXT:    ; def v[0:1]
3153; GFX900-NEXT:    ;;#ASMEND
3154; GFX900-NEXT:    ;;#ASMSTART
3155; GFX900-NEXT:    ; def v[2:3]
3156; GFX900-NEXT:    ;;#ASMEND
3157; GFX900-NEXT:    v_alignbit_b32 v0, v1, v2, 16
3158; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3159; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
3160; GFX900-NEXT:    s_waitcnt vmcnt(0)
3161; GFX900-NEXT:    s_setpc_b64 s[30:31]
3162;
3163; GFX90A-LABEL: v_shuffle_v3i16_v4i16__5_2_2:
3164; GFX90A:       ; %bb.0:
3165; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3166; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3167; GFX90A-NEXT:    ;;#ASMSTART
3168; GFX90A-NEXT:    ; def v[0:1]
3169; GFX90A-NEXT:    ;;#ASMEND
3170; GFX90A-NEXT:    ;;#ASMSTART
3171; GFX90A-NEXT:    ; def v[2:3]
3172; GFX90A-NEXT:    ;;#ASMEND
3173; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v2, 16
3174; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3175; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3176; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3177; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3178;
3179; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_2_2:
3180; GFX940:       ; %bb.0:
3181; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3182; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3183; GFX940-NEXT:    ;;#ASMSTART
3184; GFX940-NEXT:    ; def v[0:1]
3185; GFX940-NEXT:    ;;#ASMEND
3186; GFX940-NEXT:    ;;#ASMSTART
3187; GFX940-NEXT:    ; def v[2:3]
3188; GFX940-NEXT:    ;;#ASMEND
3189; GFX940-NEXT:    s_nop 0
3190; GFX940-NEXT:    v_alignbit_b32 v0, v1, v2, 16
3191; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3192; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3193; GFX940-NEXT:    s_waitcnt vmcnt(0)
3194; GFX940-NEXT:    s_setpc_b64 s[30:31]
3195  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3196  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3197  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 2, i32 2>
3198  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3199  ret void
3200}
3201
3202define void @v_shuffle_v3i16_v4i16__6_2_2(ptr addrspace(1) inreg %ptr) {
3203; GFX900-LABEL: v_shuffle_v3i16_v4i16__6_2_2:
3204; GFX900:       ; %bb.0:
3205; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3206; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3207; GFX900-NEXT:    ;;#ASMSTART
3208; GFX900-NEXT:    ; def v[0:1]
3209; GFX900-NEXT:    ;;#ASMEND
3210; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
3211; GFX900-NEXT:    ;;#ASMSTART
3212; GFX900-NEXT:    ; def v[2:3]
3213; GFX900-NEXT:    ;;#ASMEND
3214; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
3215; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3216; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
3217; GFX900-NEXT:    s_waitcnt vmcnt(0)
3218; GFX900-NEXT:    s_setpc_b64 s[30:31]
3219;
3220; GFX90A-LABEL: v_shuffle_v3i16_v4i16__6_2_2:
3221; GFX90A:       ; %bb.0:
3222; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3223; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3224; GFX90A-NEXT:    ;;#ASMSTART
3225; GFX90A-NEXT:    ; def v[0:1]
3226; GFX90A-NEXT:    ;;#ASMEND
3227; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
3228; GFX90A-NEXT:    ;;#ASMSTART
3229; GFX90A-NEXT:    ; def v[2:3]
3230; GFX90A-NEXT:    ;;#ASMEND
3231; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
3232; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3233; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3234; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3235; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3236;
3237; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_2_2:
3238; GFX940:       ; %bb.0:
3239; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3240; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3241; GFX940-NEXT:    ;;#ASMSTART
3242; GFX940-NEXT:    ; def v[0:1]
3243; GFX940-NEXT:    ;;#ASMEND
3244; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
3245; GFX940-NEXT:    ;;#ASMSTART
3246; GFX940-NEXT:    ; def v[2:3]
3247; GFX940-NEXT:    ;;#ASMEND
3248; GFX940-NEXT:    s_nop 0
3249; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
3250; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3251; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3252; GFX940-NEXT:    s_waitcnt vmcnt(0)
3253; GFX940-NEXT:    s_setpc_b64 s[30:31]
3254  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3255  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3256  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 2, i32 2>
3257  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3258  ret void
3259}
3260
3261define void @v_shuffle_v3i16_v4i16__7_2_2(ptr addrspace(1) inreg %ptr) {
3262; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_2_2:
3263; GFX900:       ; %bb.0:
3264; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3265; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3266; GFX900-NEXT:    ;;#ASMSTART
3267; GFX900-NEXT:    ; def v[0:1]
3268; GFX900-NEXT:    ;;#ASMEND
3269; GFX900-NEXT:    ;;#ASMSTART
3270; GFX900-NEXT:    ; def v[2:3]
3271; GFX900-NEXT:    ;;#ASMEND
3272; GFX900-NEXT:    v_alignbit_b32 v0, v1, v3, 16
3273; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3274; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
3275; GFX900-NEXT:    s_waitcnt vmcnt(0)
3276; GFX900-NEXT:    s_setpc_b64 s[30:31]
3277;
3278; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_2_2:
3279; GFX90A:       ; %bb.0:
3280; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3281; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3282; GFX90A-NEXT:    ;;#ASMSTART
3283; GFX90A-NEXT:    ; def v[0:1]
3284; GFX90A-NEXT:    ;;#ASMEND
3285; GFX90A-NEXT:    ;;#ASMSTART
3286; GFX90A-NEXT:    ; def v[2:3]
3287; GFX90A-NEXT:    ;;#ASMEND
3288; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v3, 16
3289; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3290; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3291; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3292; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3293;
3294; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_2:
3295; GFX940:       ; %bb.0:
3296; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3297; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3298; GFX940-NEXT:    ;;#ASMSTART
3299; GFX940-NEXT:    ; def v[0:1]
3300; GFX940-NEXT:    ;;#ASMEND
3301; GFX940-NEXT:    ;;#ASMSTART
3302; GFX940-NEXT:    ; def v[2:3]
3303; GFX940-NEXT:    ;;#ASMEND
3304; GFX940-NEXT:    s_nop 0
3305; GFX940-NEXT:    v_alignbit_b32 v0, v1, v3, 16
3306; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3307; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3308; GFX940-NEXT:    s_waitcnt vmcnt(0)
3309; GFX940-NEXT:    s_setpc_b64 s[30:31]
3310  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3311  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3312  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 2>
3313  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3314  ret void
3315}
3316
3317define void @v_shuffle_v3i16_v4i16__7_u_2(ptr addrspace(1) inreg %ptr) {
3318; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_u_2:
3319; GFX900:       ; %bb.0:
3320; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3321; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3322; GFX900-NEXT:    ;;#ASMSTART
3323; GFX900-NEXT:    ; def v[0:1]
3324; GFX900-NEXT:    ;;#ASMEND
3325; GFX900-NEXT:    ;;#ASMSTART
3326; GFX900-NEXT:    ; def v[2:3]
3327; GFX900-NEXT:    ;;#ASMEND
3328; GFX900-NEXT:    v_alignbit_b32 v0, s4, v3, 16
3329; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3330; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
3331; GFX900-NEXT:    s_waitcnt vmcnt(0)
3332; GFX900-NEXT:    s_setpc_b64 s[30:31]
3333;
3334; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_u_2:
3335; GFX90A:       ; %bb.0:
3336; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3337; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3338; GFX90A-NEXT:    ;;#ASMSTART
3339; GFX90A-NEXT:    ; def v[0:1]
3340; GFX90A-NEXT:    ;;#ASMEND
3341; GFX90A-NEXT:    ;;#ASMSTART
3342; GFX90A-NEXT:    ; def v[2:3]
3343; GFX90A-NEXT:    ;;#ASMEND
3344; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v3, 16
3345; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3346; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3347; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3348; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3349;
3350; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_2:
3351; GFX940:       ; %bb.0:
3352; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3353; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3354; GFX940-NEXT:    ;;#ASMSTART
3355; GFX940-NEXT:    ; def v[0:1]
3356; GFX940-NEXT:    ;;#ASMEND
3357; GFX940-NEXT:    ;;#ASMSTART
3358; GFX940-NEXT:    ; def v[2:3]
3359; GFX940-NEXT:    ;;#ASMEND
3360; GFX940-NEXT:    s_nop 0
3361; GFX940-NEXT:    v_alignbit_b32 v0, s0, v3, 16
3362; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3363; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3364; GFX940-NEXT:    s_waitcnt vmcnt(0)
3365; GFX940-NEXT:    s_setpc_b64 s[30:31]
3366  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3367  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3368  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 2>
3369  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3370  ret void
3371}
3372
3373define void @v_shuffle_v3i16_v4i16__7_0_2(ptr addrspace(1) inreg %ptr) {
3374; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_0_2:
3375; GFX900:       ; %bb.0:
3376; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3377; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3378; GFX900-NEXT:    ;;#ASMSTART
3379; GFX900-NEXT:    ; def v[0:1]
3380; GFX900-NEXT:    ;;#ASMEND
3381; GFX900-NEXT:    ;;#ASMSTART
3382; GFX900-NEXT:    ; def v[2:3]
3383; GFX900-NEXT:    ;;#ASMEND
3384; GFX900-NEXT:    v_alignbit_b32 v0, v0, v3, 16
3385; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3386; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
3387; GFX900-NEXT:    s_waitcnt vmcnt(0)
3388; GFX900-NEXT:    s_setpc_b64 s[30:31]
3389;
3390; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_0_2:
3391; GFX90A:       ; %bb.0:
3392; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3393; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3394; GFX90A-NEXT:    ;;#ASMSTART
3395; GFX90A-NEXT:    ; def v[0:1]
3396; GFX90A-NEXT:    ;;#ASMEND
3397; GFX90A-NEXT:    ;;#ASMSTART
3398; GFX90A-NEXT:    ; def v[2:3]
3399; GFX90A-NEXT:    ;;#ASMEND
3400; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v3, 16
3401; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3402; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3403; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3404; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3405;
3406; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_2:
3407; GFX940:       ; %bb.0:
3408; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3409; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3410; GFX940-NEXT:    ;;#ASMSTART
3411; GFX940-NEXT:    ; def v[0:1]
3412; GFX940-NEXT:    ;;#ASMEND
3413; GFX940-NEXT:    ;;#ASMSTART
3414; GFX940-NEXT:    ; def v[2:3]
3415; GFX940-NEXT:    ;;#ASMEND
3416; GFX940-NEXT:    s_nop 0
3417; GFX940-NEXT:    v_alignbit_b32 v0, v0, v3, 16
3418; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3419; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3420; GFX940-NEXT:    s_waitcnt vmcnt(0)
3421; GFX940-NEXT:    s_setpc_b64 s[30:31]
3422  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3423  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3424  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 2>
3425  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3426  ret void
3427}
3428
3429define void @v_shuffle_v3i16_v4i16__7_1_2(ptr addrspace(1) inreg %ptr) {
3430; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_1_2:
3431; GFX900:       ; %bb.0:
3432; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3433; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3434; GFX900-NEXT:    ;;#ASMSTART
3435; GFX900-NEXT:    ; def v[0:1]
3436; GFX900-NEXT:    ;;#ASMEND
3437; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3438; GFX900-NEXT:    ;;#ASMSTART
3439; GFX900-NEXT:    ; def v[2:3]
3440; GFX900-NEXT:    ;;#ASMEND
3441; GFX900-NEXT:    v_perm_b32 v0, v0, v3, s4
3442; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3443; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
3444; GFX900-NEXT:    s_waitcnt vmcnt(0)
3445; GFX900-NEXT:    s_setpc_b64 s[30:31]
3446;
3447; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_1_2:
3448; GFX90A:       ; %bb.0:
3449; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3450; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3451; GFX90A-NEXT:    ;;#ASMSTART
3452; GFX90A-NEXT:    ; def v[0:1]
3453; GFX90A-NEXT:    ;;#ASMEND
3454; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3455; GFX90A-NEXT:    ;;#ASMSTART
3456; GFX90A-NEXT:    ; def v[2:3]
3457; GFX90A-NEXT:    ;;#ASMEND
3458; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
3459; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3460; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3461; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3462; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3463;
3464; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_2:
3465; GFX940:       ; %bb.0:
3466; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3467; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3468; GFX940-NEXT:    ;;#ASMSTART
3469; GFX940-NEXT:    ; def v[0:1]
3470; GFX940-NEXT:    ;;#ASMEND
3471; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3472; GFX940-NEXT:    ;;#ASMSTART
3473; GFX940-NEXT:    ; def v[2:3]
3474; GFX940-NEXT:    ;;#ASMEND
3475; GFX940-NEXT:    s_nop 0
3476; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
3477; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3478; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3479; GFX940-NEXT:    s_waitcnt vmcnt(0)
3480; GFX940-NEXT:    s_setpc_b64 s[30:31]
3481  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3482  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3483  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 2>
3484  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3485  ret void
3486}
3487
3488define void @v_shuffle_v3i16_v4i16__7_3_2(ptr addrspace(1) inreg %ptr) {
3489; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_3_2:
3490; GFX900:       ; %bb.0:
3491; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3492; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3493; GFX900-NEXT:    ;;#ASMSTART
3494; GFX900-NEXT:    ; def v[0:1]
3495; GFX900-NEXT:    ;;#ASMEND
3496; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3497; GFX900-NEXT:    ;;#ASMSTART
3498; GFX900-NEXT:    ; def v[2:3]
3499; GFX900-NEXT:    ;;#ASMEND
3500; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
3501; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3502; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
3503; GFX900-NEXT:    s_waitcnt vmcnt(0)
3504; GFX900-NEXT:    s_setpc_b64 s[30:31]
3505;
3506; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_3_2:
3507; GFX90A:       ; %bb.0:
3508; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3509; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3510; GFX90A-NEXT:    ;;#ASMSTART
3511; GFX90A-NEXT:    ; def v[0:1]
3512; GFX90A-NEXT:    ;;#ASMEND
3513; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3514; GFX90A-NEXT:    ;;#ASMSTART
3515; GFX90A-NEXT:    ; def v[2:3]
3516; GFX90A-NEXT:    ;;#ASMEND
3517; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
3518; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3519; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3520; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3521; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3522;
3523; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_2:
3524; GFX940:       ; %bb.0:
3525; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3526; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3527; GFX940-NEXT:    ;;#ASMSTART
3528; GFX940-NEXT:    ; def v[0:1]
3529; GFX940-NEXT:    ;;#ASMEND
3530; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3531; GFX940-NEXT:    ;;#ASMSTART
3532; GFX940-NEXT:    ; def v[2:3]
3533; GFX940-NEXT:    ;;#ASMEND
3534; GFX940-NEXT:    s_nop 0
3535; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
3536; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3537; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3538; GFX940-NEXT:    s_waitcnt vmcnt(0)
3539; GFX940-NEXT:    s_setpc_b64 s[30:31]
3540  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3541  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3542  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 2>
3543  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3544  ret void
3545}
3546
3547define void @v_shuffle_v3i16_v4i16__7_4_2(ptr addrspace(1) inreg %ptr) {
3548; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_4_2:
3549; GFX900:       ; %bb.0:
3550; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3551; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3552; GFX900-NEXT:    ;;#ASMSTART
3553; GFX900-NEXT:    ; def v[0:1]
3554; GFX900-NEXT:    ;;#ASMEND
3555; GFX900-NEXT:    ;;#ASMSTART
3556; GFX900-NEXT:    ; def v[2:3]
3557; GFX900-NEXT:    ;;#ASMEND
3558; GFX900-NEXT:    v_alignbit_b32 v0, v2, v3, 16
3559; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3560; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
3561; GFX900-NEXT:    s_waitcnt vmcnt(0)
3562; GFX900-NEXT:    s_setpc_b64 s[30:31]
3563;
3564; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_4_2:
3565; GFX90A:       ; %bb.0:
3566; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3567; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3568; GFX90A-NEXT:    ;;#ASMSTART
3569; GFX90A-NEXT:    ; def v[0:1]
3570; GFX90A-NEXT:    ;;#ASMEND
3571; GFX90A-NEXT:    ;;#ASMSTART
3572; GFX90A-NEXT:    ; def v[2:3]
3573; GFX90A-NEXT:    ;;#ASMEND
3574; GFX90A-NEXT:    v_alignbit_b32 v0, v2, v3, 16
3575; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3576; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3577; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3578; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3579;
3580; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_2:
3581; GFX940:       ; %bb.0:
3582; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3583; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3584; GFX940-NEXT:    ;;#ASMSTART
3585; GFX940-NEXT:    ; def v[0:1]
3586; GFX940-NEXT:    ;;#ASMEND
3587; GFX940-NEXT:    ;;#ASMSTART
3588; GFX940-NEXT:    ; def v[2:3]
3589; GFX940-NEXT:    ;;#ASMEND
3590; GFX940-NEXT:    s_nop 0
3591; GFX940-NEXT:    v_alignbit_b32 v0, v2, v3, 16
3592; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3593; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3594; GFX940-NEXT:    s_waitcnt vmcnt(0)
3595; GFX940-NEXT:    s_setpc_b64 s[30:31]
3596  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3597  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3598  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 2>
3599  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3600  ret void
3601}
3602
3603define void @v_shuffle_v3i16_v4i16__7_5_2(ptr addrspace(1) inreg %ptr) {
3604; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_5_2:
3605; GFX900:       ; %bb.0:
3606; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3607; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3608; GFX900-NEXT:    ;;#ASMSTART
3609; GFX900-NEXT:    ; def v[0:1]
3610; GFX900-NEXT:    ;;#ASMEND
3611; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3612; GFX900-NEXT:    ;;#ASMSTART
3613; GFX900-NEXT:    ; def v[2:3]
3614; GFX900-NEXT:    ;;#ASMEND
3615; GFX900-NEXT:    v_perm_b32 v0, v2, v3, s4
3616; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3617; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
3618; GFX900-NEXT:    s_waitcnt vmcnt(0)
3619; GFX900-NEXT:    s_setpc_b64 s[30:31]
3620;
3621; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_5_2:
3622; GFX90A:       ; %bb.0:
3623; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3624; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3625; GFX90A-NEXT:    ;;#ASMSTART
3626; GFX90A-NEXT:    ; def v[0:1]
3627; GFX90A-NEXT:    ;;#ASMEND
3628; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3629; GFX90A-NEXT:    ;;#ASMSTART
3630; GFX90A-NEXT:    ; def v[2:3]
3631; GFX90A-NEXT:    ;;#ASMEND
3632; GFX90A-NEXT:    v_perm_b32 v0, v2, v3, s4
3633; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3634; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3635; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3636; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3637;
3638; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_2:
3639; GFX940:       ; %bb.0:
3640; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3641; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3642; GFX940-NEXT:    ;;#ASMSTART
3643; GFX940-NEXT:    ; def v[0:1]
3644; GFX940-NEXT:    ;;#ASMEND
3645; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3646; GFX940-NEXT:    ;;#ASMSTART
3647; GFX940-NEXT:    ; def v[2:3]
3648; GFX940-NEXT:    ;;#ASMEND
3649; GFX940-NEXT:    s_nop 0
3650; GFX940-NEXT:    v_perm_b32 v0, v2, v3, s2
3651; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3652; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3653; GFX940-NEXT:    s_waitcnt vmcnt(0)
3654; GFX940-NEXT:    s_setpc_b64 s[30:31]
3655  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3656  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3657  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 2>
3658  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3659  ret void
3660}
3661
3662define void @v_shuffle_v3i16_v4i16__7_6_2(ptr addrspace(1) inreg %ptr) {
3663; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_6_2:
3664; GFX900:       ; %bb.0:
3665; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3666; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3667; GFX900-NEXT:    ;;#ASMSTART
3668; GFX900-NEXT:    ; def v[0:1]
3669; GFX900-NEXT:    ;;#ASMEND
3670; GFX900-NEXT:    ;;#ASMSTART
3671; GFX900-NEXT:    ; def v[2:3]
3672; GFX900-NEXT:    ;;#ASMEND
3673; GFX900-NEXT:    v_alignbit_b32 v0, v3, v3, 16
3674; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3675; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
3676; GFX900-NEXT:    s_waitcnt vmcnt(0)
3677; GFX900-NEXT:    s_setpc_b64 s[30:31]
3678;
3679; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_6_2:
3680; GFX90A:       ; %bb.0:
3681; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3682; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3683; GFX90A-NEXT:    ;;#ASMSTART
3684; GFX90A-NEXT:    ; def v[0:1]
3685; GFX90A-NEXT:    ;;#ASMEND
3686; GFX90A-NEXT:    ;;#ASMSTART
3687; GFX90A-NEXT:    ; def v[2:3]
3688; GFX90A-NEXT:    ;;#ASMEND
3689; GFX90A-NEXT:    v_alignbit_b32 v0, v3, v3, 16
3690; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3691; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3692; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3693; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3694;
3695; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_2:
3696; GFX940:       ; %bb.0:
3697; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3698; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3699; GFX940-NEXT:    ;;#ASMSTART
3700; GFX940-NEXT:    ; def v[0:1]
3701; GFX940-NEXT:    ;;#ASMEND
3702; GFX940-NEXT:    ;;#ASMSTART
3703; GFX940-NEXT:    ; def v[2:3]
3704; GFX940-NEXT:    ;;#ASMEND
3705; GFX940-NEXT:    s_nop 0
3706; GFX940-NEXT:    v_alignbit_b32 v0, v3, v3, 16
3707; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3708; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3709; GFX940-NEXT:    s_waitcnt vmcnt(0)
3710; GFX940-NEXT:    s_setpc_b64 s[30:31]
3711  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3712  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3713  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 2>
3714  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3715  ret void
3716}
3717
3718define void @v_shuffle_v3i16_v4i16__u_3_3(ptr addrspace(1) inreg %ptr) {
3719; GFX900-LABEL: v_shuffle_v3i16_v4i16__u_3_3:
3720; GFX900:       ; %bb.0:
3721; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3722; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3723; GFX900-NEXT:    ;;#ASMSTART
3724; GFX900-NEXT:    ; def v[0:1]
3725; GFX900-NEXT:    ;;#ASMEND
3726; GFX900-NEXT:    global_store_short_d16_hi v2, v1, s[16:17] offset:4
3727; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
3728; GFX900-NEXT:    s_waitcnt vmcnt(0)
3729; GFX900-NEXT:    s_setpc_b64 s[30:31]
3730;
3731; GFX90A-LABEL: v_shuffle_v3i16_v4i16__u_3_3:
3732; GFX90A:       ; %bb.0:
3733; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3734; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3735; GFX90A-NEXT:    ;;#ASMSTART
3736; GFX90A-NEXT:    ; def v[0:1]
3737; GFX90A-NEXT:    ;;#ASMEND
3738; GFX90A-NEXT:    global_store_short_d16_hi v2, v1, s[16:17] offset:4
3739; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
3740; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3741; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3742;
3743; GFX940-LABEL: v_shuffle_v3i16_v4i16__u_3_3:
3744; GFX940:       ; %bb.0:
3745; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3746; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3747; GFX940-NEXT:    ;;#ASMSTART
3748; GFX940-NEXT:    ; def v[0:1]
3749; GFX940-NEXT:    ;;#ASMEND
3750; GFX940-NEXT:    global_store_short_d16_hi v2, v1, s[0:1] offset:4 sc0 sc1
3751; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
3752; GFX940-NEXT:    s_waitcnt vmcnt(0)
3753; GFX940-NEXT:    s_setpc_b64 s[30:31]
3754  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3755  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 poison, i32 3, i32 3>
3756  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3757  ret void
3758}
3759
3760define void @v_shuffle_v3i16_v4i16__0_3_3(ptr addrspace(1) inreg %ptr) {
3761; GFX900-LABEL: v_shuffle_v3i16_v4i16__0_3_3:
3762; GFX900:       ; %bb.0:
3763; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3764; GFX900-NEXT:    ;;#ASMSTART
3765; GFX900-NEXT:    ; def v[0:1]
3766; GFX900-NEXT:    ;;#ASMEND
3767; GFX900-NEXT:    s_mov_b32 s4, 0xffff
3768; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3769; GFX900-NEXT:    v_bfi_b32 v0, s4, v0, v1
3770; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3771; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
3772; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
3773; GFX900-NEXT:    s_waitcnt vmcnt(0)
3774; GFX900-NEXT:    s_setpc_b64 s[30:31]
3775;
3776; GFX90A-LABEL: v_shuffle_v3i16_v4i16__0_3_3:
3777; GFX90A:       ; %bb.0:
3778; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3779; GFX90A-NEXT:    ;;#ASMSTART
3780; GFX90A-NEXT:    ; def v[0:1]
3781; GFX90A-NEXT:    ;;#ASMEND
3782; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
3783; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3784; GFX90A-NEXT:    v_bfi_b32 v0, s4, v0, v1
3785; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3786; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
3787; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
3788; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3789; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3790;
3791; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_3_3:
3792; GFX940:       ; %bb.0:
3793; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3794; GFX940-NEXT:    ;;#ASMSTART
3795; GFX940-NEXT:    ; def v[0:1]
3796; GFX940-NEXT:    ;;#ASMEND
3797; GFX940-NEXT:    s_mov_b32 s2, 0xffff
3798; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3799; GFX940-NEXT:    v_bfi_b32 v0, s2, v0, v1
3800; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3801; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
3802; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
3803; GFX940-NEXT:    s_waitcnt vmcnt(0)
3804; GFX940-NEXT:    s_setpc_b64 s[30:31]
3805  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3806  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 3, i32 3>
3807  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3808  ret void
3809}
3810
3811define void @v_shuffle_v3i16_v4i16__1_3_3(ptr addrspace(1) inreg %ptr) {
3812; GFX900-LABEL: v_shuffle_v3i16_v4i16__1_3_3:
3813; GFX900:       ; %bb.0:
3814; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3815; GFX900-NEXT:    ;;#ASMSTART
3816; GFX900-NEXT:    ; def v[0:1]
3817; GFX900-NEXT:    ;;#ASMEND
3818; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3819; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3820; GFX900-NEXT:    v_perm_b32 v0, v1, v0, s4
3821; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3822; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
3823; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
3824; GFX900-NEXT:    s_waitcnt vmcnt(0)
3825; GFX900-NEXT:    s_setpc_b64 s[30:31]
3826;
3827; GFX90A-LABEL: v_shuffle_v3i16_v4i16__1_3_3:
3828; GFX90A:       ; %bb.0:
3829; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3830; GFX90A-NEXT:    ;;#ASMSTART
3831; GFX90A-NEXT:    ; def v[0:1]
3832; GFX90A-NEXT:    ;;#ASMEND
3833; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3834; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3835; GFX90A-NEXT:    v_perm_b32 v0, v1, v0, s4
3836; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3837; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
3838; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
3839; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3840; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3841;
3842; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_3_3:
3843; GFX940:       ; %bb.0:
3844; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3845; GFX940-NEXT:    ;;#ASMSTART
3846; GFX940-NEXT:    ; def v[0:1]
3847; GFX940-NEXT:    ;;#ASMEND
3848; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3849; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3850; GFX940-NEXT:    v_perm_b32 v0, v1, v0, s2
3851; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3852; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
3853; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
3854; GFX940-NEXT:    s_waitcnt vmcnt(0)
3855; GFX940-NEXT:    s_setpc_b64 s[30:31]
3856  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3857  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 1, i32 3, i32 3>
3858  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3859  ret void
3860}
3861
3862define void @v_shuffle_v3i16_v4i16__2_3_3(ptr addrspace(1) inreg %ptr) {
3863; GFX900-LABEL: v_shuffle_v3i16_v4i16__2_3_3:
3864; GFX900:       ; %bb.0:
3865; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3866; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3867; GFX900-NEXT:    ;;#ASMSTART
3868; GFX900-NEXT:    ; def v[0:1]
3869; GFX900-NEXT:    ;;#ASMEND
3870; GFX900-NEXT:    global_store_short_d16_hi v2, v1, s[16:17] offset:4
3871; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
3872; GFX900-NEXT:    s_waitcnt vmcnt(0)
3873; GFX900-NEXT:    s_setpc_b64 s[30:31]
3874;
3875; GFX90A-LABEL: v_shuffle_v3i16_v4i16__2_3_3:
3876; GFX90A:       ; %bb.0:
3877; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3878; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3879; GFX90A-NEXT:    ;;#ASMSTART
3880; GFX90A-NEXT:    ; def v[0:1]
3881; GFX90A-NEXT:    ;;#ASMEND
3882; GFX90A-NEXT:    global_store_short_d16_hi v2, v1, s[16:17] offset:4
3883; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
3884; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3885; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3886;
3887; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_3_3:
3888; GFX940:       ; %bb.0:
3889; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3890; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3891; GFX940-NEXT:    ;;#ASMSTART
3892; GFX940-NEXT:    ; def v[0:1]
3893; GFX940-NEXT:    ;;#ASMEND
3894; GFX940-NEXT:    global_store_short_d16_hi v2, v1, s[0:1] offset:4 sc0 sc1
3895; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
3896; GFX940-NEXT:    s_waitcnt vmcnt(0)
3897; GFX940-NEXT:    s_setpc_b64 s[30:31]
3898  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3899  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 2, i32 3, i32 3>
3900  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3901  ret void
3902}
3903
3904define void @v_shuffle_v3i16_v4i16__3_3_3(ptr addrspace(1) inreg %ptr) {
3905; GFX900-LABEL: v_shuffle_v3i16_v4i16__3_3_3:
3906; GFX900:       ; %bb.0:
3907; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3908; GFX900-NEXT:    ;;#ASMSTART
3909; GFX900-NEXT:    ; def v[0:1]
3910; GFX900-NEXT:    ;;#ASMEND
3911; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3912; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3913; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
3914; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
3915; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
3916; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
3917; GFX900-NEXT:    s_waitcnt vmcnt(0)
3918; GFX900-NEXT:    s_setpc_b64 s[30:31]
3919;
3920; GFX90A-LABEL: v_shuffle_v3i16_v4i16__3_3_3:
3921; GFX90A:       ; %bb.0:
3922; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3923; GFX90A-NEXT:    ;;#ASMSTART
3924; GFX90A-NEXT:    ; def v[0:1]
3925; GFX90A-NEXT:    ;;#ASMEND
3926; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3927; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3928; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
3929; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
3930; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
3931; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
3932; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3933; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3934;
3935; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_3_3:
3936; GFX940:       ; %bb.0:
3937; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3938; GFX940-NEXT:    ;;#ASMSTART
3939; GFX940-NEXT:    ; def v[0:1]
3940; GFX940-NEXT:    ;;#ASMEND
3941; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3942; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3943; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
3944; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
3945; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
3946; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
3947; GFX940-NEXT:    s_waitcnt vmcnt(0)
3948; GFX940-NEXT:    s_setpc_b64 s[30:31]
3949  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3950  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 3, i32 3, i32 3>
3951  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3952  ret void
3953}
3954
3955define void @v_shuffle_v3i16_v4i16__4_3_3(ptr addrspace(1) inreg %ptr) {
3956; GFX900-LABEL: v_shuffle_v3i16_v4i16__4_3_3:
3957; GFX900:       ; %bb.0:
3958; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3959; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3960; GFX900-NEXT:    ;;#ASMSTART
3961; GFX900-NEXT:    ; def v[0:1]
3962; GFX900-NEXT:    ;;#ASMEND
3963; GFX900-NEXT:    global_store_short_d16_hi v2, v1, s[16:17] offset:4
3964; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
3965; GFX900-NEXT:    s_waitcnt vmcnt(0)
3966; GFX900-NEXT:    s_setpc_b64 s[30:31]
3967;
3968; GFX90A-LABEL: v_shuffle_v3i16_v4i16__4_3_3:
3969; GFX90A:       ; %bb.0:
3970; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3971; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3972; GFX90A-NEXT:    ;;#ASMSTART
3973; GFX90A-NEXT:    ; def v[0:1]
3974; GFX90A-NEXT:    ;;#ASMEND
3975; GFX90A-NEXT:    global_store_short_d16_hi v2, v1, s[16:17] offset:4
3976; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
3977; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3978; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3979;
3980; GFX940-LABEL: v_shuffle_v3i16_v4i16__4_3_3:
3981; GFX940:       ; %bb.0:
3982; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3983; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3984; GFX940-NEXT:    ;;#ASMSTART
3985; GFX940-NEXT:    ; def v[0:1]
3986; GFX940-NEXT:    ;;#ASMEND
3987; GFX940-NEXT:    global_store_short_d16_hi v2, v1, s[0:1] offset:4 sc0 sc1
3988; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
3989; GFX940-NEXT:    s_waitcnt vmcnt(0)
3990; GFX940-NEXT:    s_setpc_b64 s[30:31]
3991  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3992  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 4, i32 3, i32 3>
3993  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3994  ret void
3995}
3996
3997define void @v_shuffle_v3i16_v4i16__5_3_3(ptr addrspace(1) inreg %ptr) {
3998; GFX900-LABEL: v_shuffle_v3i16_v4i16__5_3_3:
3999; GFX900:       ; %bb.0:
4000; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4001; GFX900-NEXT:    ;;#ASMSTART
4002; GFX900-NEXT:    ; def v[0:1]
4003; GFX900-NEXT:    ;;#ASMEND
4004; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
4005; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4006; GFX900-NEXT:    ;;#ASMSTART
4007; GFX900-NEXT:    ; def v[2:3]
4008; GFX900-NEXT:    ;;#ASMEND
4009; GFX900-NEXT:    v_perm_b32 v0, v1, v2, s4
4010; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
4011; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
4012; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
4013; GFX900-NEXT:    s_waitcnt vmcnt(0)
4014; GFX900-NEXT:    s_setpc_b64 s[30:31]
4015;
4016; GFX90A-LABEL: v_shuffle_v3i16_v4i16__5_3_3:
4017; GFX90A:       ; %bb.0:
4018; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4019; GFX90A-NEXT:    ;;#ASMSTART
4020; GFX90A-NEXT:    ; def v[0:1]
4021; GFX90A-NEXT:    ;;#ASMEND
4022; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
4023; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4024; GFX90A-NEXT:    ;;#ASMSTART
4025; GFX90A-NEXT:    ; def v[2:3]
4026; GFX90A-NEXT:    ;;#ASMEND
4027; GFX90A-NEXT:    v_perm_b32 v0, v1, v2, s4
4028; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
4029; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4030; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
4031; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4032; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4033;
4034; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_3_3:
4035; GFX940:       ; %bb.0:
4036; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4037; GFX940-NEXT:    ;;#ASMSTART
4038; GFX940-NEXT:    ; def v[0:1]
4039; GFX940-NEXT:    ;;#ASMEND
4040; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
4041; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4042; GFX940-NEXT:    ;;#ASMSTART
4043; GFX940-NEXT:    ; def v[2:3]
4044; GFX940-NEXT:    ;;#ASMEND
4045; GFX940-NEXT:    s_nop 0
4046; GFX940-NEXT:    v_perm_b32 v0, v1, v2, s2
4047; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
4048; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
4049; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
4050; GFX940-NEXT:    s_waitcnt vmcnt(0)
4051; GFX940-NEXT:    s_setpc_b64 s[30:31]
4052  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4053  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4054  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 3, i32 3>
4055  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4056  ret void
4057}
4058
4059define void @v_shuffle_v3i16_v4i16__6_3_3(ptr addrspace(1) inreg %ptr) {
4060; GFX900-LABEL: v_shuffle_v3i16_v4i16__6_3_3:
4061; GFX900:       ; %bb.0:
4062; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4063; GFX900-NEXT:    ;;#ASMSTART
4064; GFX900-NEXT:    ; def v[0:1]
4065; GFX900-NEXT:    ;;#ASMEND
4066; GFX900-NEXT:    s_mov_b32 s4, 0xffff
4067; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4068; GFX900-NEXT:    ;;#ASMSTART
4069; GFX900-NEXT:    ; def v[2:3]
4070; GFX900-NEXT:    ;;#ASMEND
4071; GFX900-NEXT:    v_bfi_b32 v0, s4, v3, v1
4072; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
4073; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
4074; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
4075; GFX900-NEXT:    s_waitcnt vmcnt(0)
4076; GFX900-NEXT:    s_setpc_b64 s[30:31]
4077;
4078; GFX90A-LABEL: v_shuffle_v3i16_v4i16__6_3_3:
4079; GFX90A:       ; %bb.0:
4080; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4081; GFX90A-NEXT:    ;;#ASMSTART
4082; GFX90A-NEXT:    ; def v[0:1]
4083; GFX90A-NEXT:    ;;#ASMEND
4084; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
4085; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4086; GFX90A-NEXT:    ;;#ASMSTART
4087; GFX90A-NEXT:    ; def v[2:3]
4088; GFX90A-NEXT:    ;;#ASMEND
4089; GFX90A-NEXT:    v_bfi_b32 v0, s4, v3, v1
4090; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
4091; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
4092; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4093; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4094; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4095;
4096; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_3_3:
4097; GFX940:       ; %bb.0:
4098; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4099; GFX940-NEXT:    ;;#ASMSTART
4100; GFX940-NEXT:    ; def v[0:1]
4101; GFX940-NEXT:    ;;#ASMEND
4102; GFX940-NEXT:    s_mov_b32 s2, 0xffff
4103; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4104; GFX940-NEXT:    ;;#ASMSTART
4105; GFX940-NEXT:    ; def v[2:3]
4106; GFX940-NEXT:    ;;#ASMEND
4107; GFX940-NEXT:    s_nop 0
4108; GFX940-NEXT:    v_bfi_b32 v0, s2, v3, v1
4109; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
4110; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
4111; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
4112; GFX940-NEXT:    s_waitcnt vmcnt(0)
4113; GFX940-NEXT:    s_setpc_b64 s[30:31]
4114  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4115  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4116  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 3, i32 3>
4117  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4118  ret void
4119}
4120
4121define void @v_shuffle_v3i16_v4i16__7_3_3(ptr addrspace(1) inreg %ptr) {
4122; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_3_3:
4123; GFX900:       ; %bb.0:
4124; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4125; GFX900-NEXT:    ;;#ASMSTART
4126; GFX900-NEXT:    ; def v[0:1]
4127; GFX900-NEXT:    ;;#ASMEND
4128; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
4129; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4130; GFX900-NEXT:    ;;#ASMSTART
4131; GFX900-NEXT:    ; def v[2:3]
4132; GFX900-NEXT:    ;;#ASMEND
4133; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
4134; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
4135; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
4136; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
4137; GFX900-NEXT:    s_waitcnt vmcnt(0)
4138; GFX900-NEXT:    s_setpc_b64 s[30:31]
4139;
4140; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_3_3:
4141; GFX90A:       ; %bb.0:
4142; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4143; GFX90A-NEXT:    ;;#ASMSTART
4144; GFX90A-NEXT:    ; def v[0:1]
4145; GFX90A-NEXT:    ;;#ASMEND
4146; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
4147; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4148; GFX90A-NEXT:    ;;#ASMSTART
4149; GFX90A-NEXT:    ; def v[2:3]
4150; GFX90A-NEXT:    ;;#ASMEND
4151; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
4152; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
4153; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4154; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
4155; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4156; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4157;
4158; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_3:
4159; GFX940:       ; %bb.0:
4160; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4161; GFX940-NEXT:    ;;#ASMSTART
4162; GFX940-NEXT:    ; def v[0:1]
4163; GFX940-NEXT:    ;;#ASMEND
4164; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
4165; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4166; GFX940-NEXT:    ;;#ASMSTART
4167; GFX940-NEXT:    ; def v[2:3]
4168; GFX940-NEXT:    ;;#ASMEND
4169; GFX940-NEXT:    s_nop 0
4170; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
4171; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
4172; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
4173; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
4174; GFX940-NEXT:    s_waitcnt vmcnt(0)
4175; GFX940-NEXT:    s_setpc_b64 s[30:31]
4176  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4177  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4178  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 3>
4179  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4180  ret void
4181}
4182
4183define void @v_shuffle_v3i16_v4i16__7_u_3(ptr addrspace(1) inreg %ptr) {
4184; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_u_3:
4185; GFX900:       ; %bb.0:
4186; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4187; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4188; GFX900-NEXT:    ;;#ASMSTART
4189; GFX900-NEXT:    ; def v[0:1]
4190; GFX900-NEXT:    ;;#ASMEND
4191; GFX900-NEXT:    ;;#ASMSTART
4192; GFX900-NEXT:    ; def v[2:3]
4193; GFX900-NEXT:    ;;#ASMEND
4194; GFX900-NEXT:    v_alignbit_b32 v0, s4, v3, 16
4195; GFX900-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4196; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
4197; GFX900-NEXT:    s_waitcnt vmcnt(0)
4198; GFX900-NEXT:    s_setpc_b64 s[30:31]
4199;
4200; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_u_3:
4201; GFX90A:       ; %bb.0:
4202; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4203; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4204; GFX90A-NEXT:    ;;#ASMSTART
4205; GFX90A-NEXT:    ; def v[0:1]
4206; GFX90A-NEXT:    ;;#ASMEND
4207; GFX90A-NEXT:    ;;#ASMSTART
4208; GFX90A-NEXT:    ; def v[2:3]
4209; GFX90A-NEXT:    ;;#ASMEND
4210; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v3, 16
4211; GFX90A-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4212; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4213; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4214; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4215;
4216; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_3:
4217; GFX940:       ; %bb.0:
4218; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4219; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4220; GFX940-NEXT:    ;;#ASMSTART
4221; GFX940-NEXT:    ; def v[0:1]
4222; GFX940-NEXT:    ;;#ASMEND
4223; GFX940-NEXT:    ;;#ASMSTART
4224; GFX940-NEXT:    ; def v[2:3]
4225; GFX940-NEXT:    ;;#ASMEND
4226; GFX940-NEXT:    s_nop 0
4227; GFX940-NEXT:    v_alignbit_b32 v0, s0, v3, 16
4228; GFX940-NEXT:    global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1
4229; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
4230; GFX940-NEXT:    s_waitcnt vmcnt(0)
4231; GFX940-NEXT:    s_setpc_b64 s[30:31]
4232  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4233  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4234  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 3>
4235  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4236  ret void
4237}
4238
4239define void @v_shuffle_v3i16_v4i16__7_0_3(ptr addrspace(1) inreg %ptr) {
4240; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_0_3:
4241; GFX900:       ; %bb.0:
4242; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4243; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4244; GFX900-NEXT:    ;;#ASMSTART
4245; GFX900-NEXT:    ; def v[0:1]
4246; GFX900-NEXT:    ;;#ASMEND
4247; GFX900-NEXT:    ;;#ASMSTART
4248; GFX900-NEXT:    ; def v[2:3]
4249; GFX900-NEXT:    ;;#ASMEND
4250; GFX900-NEXT:    v_alignbit_b32 v0, v0, v3, 16
4251; GFX900-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4252; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
4253; GFX900-NEXT:    s_waitcnt vmcnt(0)
4254; GFX900-NEXT:    s_setpc_b64 s[30:31]
4255;
4256; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_0_3:
4257; GFX90A:       ; %bb.0:
4258; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4259; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4260; GFX90A-NEXT:    ;;#ASMSTART
4261; GFX90A-NEXT:    ; def v[0:1]
4262; GFX90A-NEXT:    ;;#ASMEND
4263; GFX90A-NEXT:    ;;#ASMSTART
4264; GFX90A-NEXT:    ; def v[2:3]
4265; GFX90A-NEXT:    ;;#ASMEND
4266; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v3, 16
4267; GFX90A-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4268; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4269; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4270; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4271;
4272; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_3:
4273; GFX940:       ; %bb.0:
4274; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4275; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4276; GFX940-NEXT:    ;;#ASMSTART
4277; GFX940-NEXT:    ; def v[0:1]
4278; GFX940-NEXT:    ;;#ASMEND
4279; GFX940-NEXT:    ;;#ASMSTART
4280; GFX940-NEXT:    ; def v[2:3]
4281; GFX940-NEXT:    ;;#ASMEND
4282; GFX940-NEXT:    s_nop 0
4283; GFX940-NEXT:    v_alignbit_b32 v0, v0, v3, 16
4284; GFX940-NEXT:    global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1
4285; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
4286; GFX940-NEXT:    s_waitcnt vmcnt(0)
4287; GFX940-NEXT:    s_setpc_b64 s[30:31]
4288  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4289  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4290  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 3>
4291  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4292  ret void
4293}
4294
4295define void @v_shuffle_v3i16_v4i16__7_1_3(ptr addrspace(1) inreg %ptr) {
4296; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_1_3:
4297; GFX900:       ; %bb.0:
4298; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4299; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4300; GFX900-NEXT:    ;;#ASMSTART
4301; GFX900-NEXT:    ; def v[0:1]
4302; GFX900-NEXT:    ;;#ASMEND
4303; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
4304; GFX900-NEXT:    ;;#ASMSTART
4305; GFX900-NEXT:    ; def v[2:3]
4306; GFX900-NEXT:    ;;#ASMEND
4307; GFX900-NEXT:    v_perm_b32 v0, v0, v3, s4
4308; GFX900-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4309; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
4310; GFX900-NEXT:    s_waitcnt vmcnt(0)
4311; GFX900-NEXT:    s_setpc_b64 s[30:31]
4312;
4313; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_1_3:
4314; GFX90A:       ; %bb.0:
4315; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4316; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4317; GFX90A-NEXT:    ;;#ASMSTART
4318; GFX90A-NEXT:    ; def v[0:1]
4319; GFX90A-NEXT:    ;;#ASMEND
4320; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
4321; GFX90A-NEXT:    ;;#ASMSTART
4322; GFX90A-NEXT:    ; def v[2:3]
4323; GFX90A-NEXT:    ;;#ASMEND
4324; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
4325; GFX90A-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4326; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4327; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4328; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4329;
4330; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_3:
4331; GFX940:       ; %bb.0:
4332; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4333; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4334; GFX940-NEXT:    ;;#ASMSTART
4335; GFX940-NEXT:    ; def v[0:1]
4336; GFX940-NEXT:    ;;#ASMEND
4337; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
4338; GFX940-NEXT:    ;;#ASMSTART
4339; GFX940-NEXT:    ; def v[2:3]
4340; GFX940-NEXT:    ;;#ASMEND
4341; GFX940-NEXT:    s_nop 0
4342; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
4343; GFX940-NEXT:    global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1
4344; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
4345; GFX940-NEXT:    s_waitcnt vmcnt(0)
4346; GFX940-NEXT:    s_setpc_b64 s[30:31]
4347  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4348  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4349  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 3>
4350  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4351  ret void
4352}
4353
4354define void @v_shuffle_v3i16_v4i16__7_2_3(ptr addrspace(1) inreg %ptr) {
4355; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_2_3:
4356; GFX900:       ; %bb.0:
4357; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4358; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4359; GFX900-NEXT:    ;;#ASMSTART
4360; GFX900-NEXT:    ; def v[0:1]
4361; GFX900-NEXT:    ;;#ASMEND
4362; GFX900-NEXT:    ;;#ASMSTART
4363; GFX900-NEXT:    ; def v[2:3]
4364; GFX900-NEXT:    ;;#ASMEND
4365; GFX900-NEXT:    v_alignbit_b32 v0, v1, v3, 16
4366; GFX900-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4367; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
4368; GFX900-NEXT:    s_waitcnt vmcnt(0)
4369; GFX900-NEXT:    s_setpc_b64 s[30:31]
4370;
4371; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_2_3:
4372; GFX90A:       ; %bb.0:
4373; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4374; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4375; GFX90A-NEXT:    ;;#ASMSTART
4376; GFX90A-NEXT:    ; def v[0:1]
4377; GFX90A-NEXT:    ;;#ASMEND
4378; GFX90A-NEXT:    ;;#ASMSTART
4379; GFX90A-NEXT:    ; def v[2:3]
4380; GFX90A-NEXT:    ;;#ASMEND
4381; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v3, 16
4382; GFX90A-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4383; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4384; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4385; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4386;
4387; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_3:
4388; GFX940:       ; %bb.0:
4389; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4390; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4391; GFX940-NEXT:    ;;#ASMSTART
4392; GFX940-NEXT:    ; def v[0:1]
4393; GFX940-NEXT:    ;;#ASMEND
4394; GFX940-NEXT:    ;;#ASMSTART
4395; GFX940-NEXT:    ; def v[2:3]
4396; GFX940-NEXT:    ;;#ASMEND
4397; GFX940-NEXT:    s_nop 0
4398; GFX940-NEXT:    v_alignbit_b32 v0, v1, v3, 16
4399; GFX940-NEXT:    global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1
4400; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
4401; GFX940-NEXT:    s_waitcnt vmcnt(0)
4402; GFX940-NEXT:    s_setpc_b64 s[30:31]
4403  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4404  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4405  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 3>
4406  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4407  ret void
4408}
4409
4410define void @v_shuffle_v3i16_v4i16__7_4_3(ptr addrspace(1) inreg %ptr) {
4411; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_4_3:
4412; GFX900:       ; %bb.0:
4413; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4414; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4415; GFX900-NEXT:    ;;#ASMSTART
4416; GFX900-NEXT:    ; def v[0:1]
4417; GFX900-NEXT:    ;;#ASMEND
4418; GFX900-NEXT:    ;;#ASMSTART
4419; GFX900-NEXT:    ; def v[2:3]
4420; GFX900-NEXT:    ;;#ASMEND
4421; GFX900-NEXT:    v_alignbit_b32 v0, v2, v3, 16
4422; GFX900-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4423; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
4424; GFX900-NEXT:    s_waitcnt vmcnt(0)
4425; GFX900-NEXT:    s_setpc_b64 s[30:31]
4426;
4427; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_4_3:
4428; GFX90A:       ; %bb.0:
4429; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4430; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4431; GFX90A-NEXT:    ;;#ASMSTART
4432; GFX90A-NEXT:    ; def v[0:1]
4433; GFX90A-NEXT:    ;;#ASMEND
4434; GFX90A-NEXT:    ;;#ASMSTART
4435; GFX90A-NEXT:    ; def v[2:3]
4436; GFX90A-NEXT:    ;;#ASMEND
4437; GFX90A-NEXT:    v_alignbit_b32 v0, v2, v3, 16
4438; GFX90A-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4439; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4440; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4441; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4442;
4443; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_3:
4444; GFX940:       ; %bb.0:
4445; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4446; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4447; GFX940-NEXT:    ;;#ASMSTART
4448; GFX940-NEXT:    ; def v[0:1]
4449; GFX940-NEXT:    ;;#ASMEND
4450; GFX940-NEXT:    ;;#ASMSTART
4451; GFX940-NEXT:    ; def v[2:3]
4452; GFX940-NEXT:    ;;#ASMEND
4453; GFX940-NEXT:    s_nop 0
4454; GFX940-NEXT:    v_alignbit_b32 v0, v2, v3, 16
4455; GFX940-NEXT:    global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1
4456; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
4457; GFX940-NEXT:    s_waitcnt vmcnt(0)
4458; GFX940-NEXT:    s_setpc_b64 s[30:31]
4459  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4460  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4461  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 3>
4462  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4463  ret void
4464}
4465
4466define void @v_shuffle_v3i16_v4i16__7_5_3(ptr addrspace(1) inreg %ptr) {
4467; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_5_3:
4468; GFX900:       ; %bb.0:
4469; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4470; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4471; GFX900-NEXT:    ;;#ASMSTART
4472; GFX900-NEXT:    ; def v[0:1]
4473; GFX900-NEXT:    ;;#ASMEND
4474; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
4475; GFX900-NEXT:    ;;#ASMSTART
4476; GFX900-NEXT:    ; def v[2:3]
4477; GFX900-NEXT:    ;;#ASMEND
4478; GFX900-NEXT:    v_perm_b32 v0, v2, v3, s4
4479; GFX900-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4480; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
4481; GFX900-NEXT:    s_waitcnt vmcnt(0)
4482; GFX900-NEXT:    s_setpc_b64 s[30:31]
4483;
4484; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_5_3:
4485; GFX90A:       ; %bb.0:
4486; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4487; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4488; GFX90A-NEXT:    ;;#ASMSTART
4489; GFX90A-NEXT:    ; def v[0:1]
4490; GFX90A-NEXT:    ;;#ASMEND
4491; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
4492; GFX90A-NEXT:    ;;#ASMSTART
4493; GFX90A-NEXT:    ; def v[2:3]
4494; GFX90A-NEXT:    ;;#ASMEND
4495; GFX90A-NEXT:    v_perm_b32 v0, v2, v3, s4
4496; GFX90A-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4497; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4498; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4499; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4500;
4501; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_3:
4502; GFX940:       ; %bb.0:
4503; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4504; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4505; GFX940-NEXT:    ;;#ASMSTART
4506; GFX940-NEXT:    ; def v[0:1]
4507; GFX940-NEXT:    ;;#ASMEND
4508; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
4509; GFX940-NEXT:    ;;#ASMSTART
4510; GFX940-NEXT:    ; def v[2:3]
4511; GFX940-NEXT:    ;;#ASMEND
4512; GFX940-NEXT:    s_nop 0
4513; GFX940-NEXT:    v_perm_b32 v0, v2, v3, s2
4514; GFX940-NEXT:    global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1
4515; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
4516; GFX940-NEXT:    s_waitcnt vmcnt(0)
4517; GFX940-NEXT:    s_setpc_b64 s[30:31]
4518  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4519  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4520  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 3>
4521  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4522  ret void
4523}
4524
4525define void @v_shuffle_v3i16_v4i16__7_6_3(ptr addrspace(1) inreg %ptr) {
4526; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_6_3:
4527; GFX900:       ; %bb.0:
4528; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4529; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4530; GFX900-NEXT:    ;;#ASMSTART
4531; GFX900-NEXT:    ; def v[0:1]
4532; GFX900-NEXT:    ;;#ASMEND
4533; GFX900-NEXT:    ;;#ASMSTART
4534; GFX900-NEXT:    ; def v[2:3]
4535; GFX900-NEXT:    ;;#ASMEND
4536; GFX900-NEXT:    v_alignbit_b32 v0, v3, v3, 16
4537; GFX900-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4538; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
4539; GFX900-NEXT:    s_waitcnt vmcnt(0)
4540; GFX900-NEXT:    s_setpc_b64 s[30:31]
4541;
4542; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_6_3:
4543; GFX90A:       ; %bb.0:
4544; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4545; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4546; GFX90A-NEXT:    ;;#ASMSTART
4547; GFX90A-NEXT:    ; def v[0:1]
4548; GFX90A-NEXT:    ;;#ASMEND
4549; GFX90A-NEXT:    ;;#ASMSTART
4550; GFX90A-NEXT:    ; def v[2:3]
4551; GFX90A-NEXT:    ;;#ASMEND
4552; GFX90A-NEXT:    v_alignbit_b32 v0, v3, v3, 16
4553; GFX90A-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4554; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4555; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4556; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4557;
4558; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_3:
4559; GFX940:       ; %bb.0:
4560; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4561; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4562; GFX940-NEXT:    ;;#ASMSTART
4563; GFX940-NEXT:    ; def v[0:1]
4564; GFX940-NEXT:    ;;#ASMEND
4565; GFX940-NEXT:    ;;#ASMSTART
4566; GFX940-NEXT:    ; def v[2:3]
4567; GFX940-NEXT:    ;;#ASMEND
4568; GFX940-NEXT:    s_nop 0
4569; GFX940-NEXT:    v_alignbit_b32 v0, v3, v3, 16
4570; GFX940-NEXT:    global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1
4571; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
4572; GFX940-NEXT:    s_waitcnt vmcnt(0)
4573; GFX940-NEXT:    s_setpc_b64 s[30:31]
4574  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4575  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4576  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 3>
4577  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4578  ret void
4579}
4580
4581define void @v_shuffle_v3i16_v4i16__u_4_4(ptr addrspace(1) inreg %ptr) {
4582; GFX9-LABEL: v_shuffle_v3i16_v4i16__u_4_4:
4583; GFX9:       ; %bb.0:
4584; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4585; GFX9-NEXT:    s_setpc_b64 s[30:31]
4586  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4587  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 poison, i32 4, i32 4>
4588  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4589  ret void
4590}
4591
4592define void @v_shuffle_v3i16_v4i16__0_4_4(ptr addrspace(1) inreg %ptr) {
4593; GFX900-LABEL: v_shuffle_v3i16_v4i16__0_4_4:
4594; GFX900:       ; %bb.0:
4595; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4596; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4597; GFX900-NEXT:    ;;#ASMSTART
4598; GFX900-NEXT:    ; def v[0:1]
4599; GFX900-NEXT:    ;;#ASMEND
4600; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
4601; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
4602; GFX900-NEXT:    s_waitcnt vmcnt(0)
4603; GFX900-NEXT:    s_setpc_b64 s[30:31]
4604;
4605; GFX90A-LABEL: v_shuffle_v3i16_v4i16__0_4_4:
4606; GFX90A:       ; %bb.0:
4607; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4608; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4609; GFX90A-NEXT:    ;;#ASMSTART
4610; GFX90A-NEXT:    ; def v[0:1]
4611; GFX90A-NEXT:    ;;#ASMEND
4612; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
4613; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
4614; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4615; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4616;
4617; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_4_4:
4618; GFX940:       ; %bb.0:
4619; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4620; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4621; GFX940-NEXT:    ;;#ASMSTART
4622; GFX940-NEXT:    ; def v[0:1]
4623; GFX940-NEXT:    ;;#ASMEND
4624; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
4625; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
4626; GFX940-NEXT:    s_waitcnt vmcnt(0)
4627; GFX940-NEXT:    s_setpc_b64 s[30:31]
4628  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4629  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 4, i32 4>
4630  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4631  ret void
4632}
4633
4634define void @v_shuffle_v3i16_v4i16__1_4_4(ptr addrspace(1) inreg %ptr) {
4635; GFX900-LABEL: v_shuffle_v3i16_v4i16__1_4_4:
4636; GFX900:       ; %bb.0:
4637; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4638; GFX900-NEXT:    ;;#ASMSTART
4639; GFX900-NEXT:    ; def v[0:1]
4640; GFX900-NEXT:    ;;#ASMEND
4641; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4642; GFX900-NEXT:    v_alignbit_b32 v0, s4, v0, 16
4643; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
4644; GFX900-NEXT:    s_waitcnt vmcnt(0)
4645; GFX900-NEXT:    s_setpc_b64 s[30:31]
4646;
4647; GFX90A-LABEL: v_shuffle_v3i16_v4i16__1_4_4:
4648; GFX90A:       ; %bb.0:
4649; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4650; GFX90A-NEXT:    ;;#ASMSTART
4651; GFX90A-NEXT:    ; def v[0:1]
4652; GFX90A-NEXT:    ;;#ASMEND
4653; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4654; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v0, 16
4655; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
4656; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4657; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4658;
4659; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_4_4:
4660; GFX940:       ; %bb.0:
4661; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4662; GFX940-NEXT:    ;;#ASMSTART
4663; GFX940-NEXT:    ; def v[0:1]
4664; GFX940-NEXT:    ;;#ASMEND
4665; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4666; GFX940-NEXT:    v_alignbit_b32 v0, s0, v0, 16
4667; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
4668; GFX940-NEXT:    s_waitcnt vmcnt(0)
4669; GFX940-NEXT:    s_setpc_b64 s[30:31]
4670  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4671  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 1, i32 4, i32 4>
4672  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4673  ret void
4674}
4675
4676define void @v_shuffle_v3i16_v4i16__2_4_4(ptr addrspace(1) inreg %ptr) {
4677; GFX900-LABEL: v_shuffle_v3i16_v4i16__2_4_4:
4678; GFX900:       ; %bb.0:
4679; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4680; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4681; GFX900-NEXT:    ;;#ASMSTART
4682; GFX900-NEXT:    ; def v[0:1]
4683; GFX900-NEXT:    ;;#ASMEND
4684; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
4685; GFX900-NEXT:    s_waitcnt vmcnt(0)
4686; GFX900-NEXT:    s_setpc_b64 s[30:31]
4687;
4688; GFX90A-LABEL: v_shuffle_v3i16_v4i16__2_4_4:
4689; GFX90A:       ; %bb.0:
4690; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4691; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4692; GFX90A-NEXT:    ;;#ASMSTART
4693; GFX90A-NEXT:    ; def v[0:1]
4694; GFX90A-NEXT:    ;;#ASMEND
4695; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
4696; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4697; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4698;
4699; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_4_4:
4700; GFX940:       ; %bb.0:
4701; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4702; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4703; GFX940-NEXT:    ;;#ASMSTART
4704; GFX940-NEXT:    ; def v[0:1]
4705; GFX940-NEXT:    ;;#ASMEND
4706; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
4707; GFX940-NEXT:    s_waitcnt vmcnt(0)
4708; GFX940-NEXT:    s_setpc_b64 s[30:31]
4709  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4710  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 2, i32 4, i32 4>
4711  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4712  ret void
4713}
4714
4715define void @v_shuffle_v3i16_v4i16__3_4_4(ptr addrspace(1) inreg %ptr) {
4716; GFX900-LABEL: v_shuffle_v3i16_v4i16__3_4_4:
4717; GFX900:       ; %bb.0:
4718; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4719; GFX900-NEXT:    ;;#ASMSTART
4720; GFX900-NEXT:    ; def v[0:1]
4721; GFX900-NEXT:    ;;#ASMEND
4722; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4723; GFX900-NEXT:    v_alignbit_b32 v0, s4, v1, 16
4724; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
4725; GFX900-NEXT:    s_waitcnt vmcnt(0)
4726; GFX900-NEXT:    s_setpc_b64 s[30:31]
4727;
4728; GFX90A-LABEL: v_shuffle_v3i16_v4i16__3_4_4:
4729; GFX90A:       ; %bb.0:
4730; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4731; GFX90A-NEXT:    ;;#ASMSTART
4732; GFX90A-NEXT:    ; def v[0:1]
4733; GFX90A-NEXT:    ;;#ASMEND
4734; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4735; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v1, 16
4736; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
4737; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4738; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4739;
4740; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_4_4:
4741; GFX940:       ; %bb.0:
4742; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4743; GFX940-NEXT:    ;;#ASMSTART
4744; GFX940-NEXT:    ; def v[0:1]
4745; GFX940-NEXT:    ;;#ASMEND
4746; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4747; GFX940-NEXT:    v_alignbit_b32 v0, s0, v1, 16
4748; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
4749; GFX940-NEXT:    s_waitcnt vmcnt(0)
4750; GFX940-NEXT:    s_setpc_b64 s[30:31]
4751  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4752  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 3, i32 4, i32 4>
4753  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4754  ret void
4755}
4756
4757define void @v_shuffle_v3i16_v4i16__4_4_4(ptr addrspace(1) inreg %ptr) {
4758; GFX9-LABEL: v_shuffle_v3i16_v4i16__4_4_4:
4759; GFX9:       ; %bb.0:
4760; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4761; GFX9-NEXT:    s_setpc_b64 s[30:31]
4762  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4763  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 4, i32 4, i32 4>
4764  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4765  ret void
4766}
4767
4768define void @v_shuffle_v3i16_v4i16__5_4_4(ptr addrspace(1) inreg %ptr) {
4769; GFX900-LABEL: v_shuffle_v3i16_v4i16__5_4_4:
4770; GFX900:       ; %bb.0:
4771; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4772; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4773; GFX900-NEXT:    ;;#ASMSTART
4774; GFX900-NEXT:    ; def v[0:1]
4775; GFX900-NEXT:    ;;#ASMEND
4776; GFX900-NEXT:    v_alignbit_b32 v1, v0, v0, 16
4777; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
4778; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
4779; GFX900-NEXT:    s_waitcnt vmcnt(0)
4780; GFX900-NEXT:    s_setpc_b64 s[30:31]
4781;
4782; GFX90A-LABEL: v_shuffle_v3i16_v4i16__5_4_4:
4783; GFX90A:       ; %bb.0:
4784; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4785; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4786; GFX90A-NEXT:    ;;#ASMSTART
4787; GFX90A-NEXT:    ; def v[0:1]
4788; GFX90A-NEXT:    ;;#ASMEND
4789; GFX90A-NEXT:    v_alignbit_b32 v1, v0, v0, 16
4790; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
4791; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
4792; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4793; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4794;
4795; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_4_4:
4796; GFX940:       ; %bb.0:
4797; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4798; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4799; GFX940-NEXT:    ;;#ASMSTART
4800; GFX940-NEXT:    ; def v[0:1]
4801; GFX940-NEXT:    ;;#ASMEND
4802; GFX940-NEXT:    s_nop 0
4803; GFX940-NEXT:    v_alignbit_b32 v1, v0, v0, 16
4804; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
4805; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
4806; GFX940-NEXT:    s_waitcnt vmcnt(0)
4807; GFX940-NEXT:    s_setpc_b64 s[30:31]
4808  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4809  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4810  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 4, i32 4>
4811  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4812  ret void
4813}
4814
4815define void @v_shuffle_v3i16_v4i16__6_4_4(ptr addrspace(1) inreg %ptr) {
4816; GFX900-LABEL: v_shuffle_v3i16_v4i16__6_4_4:
4817; GFX900:       ; %bb.0:
4818; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4819; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4820; GFX900-NEXT:    ;;#ASMSTART
4821; GFX900-NEXT:    ; def v[0:1]
4822; GFX900-NEXT:    ;;#ASMEND
4823; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4824; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
4825; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
4826; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
4827; GFX900-NEXT:    s_waitcnt vmcnt(0)
4828; GFX900-NEXT:    s_setpc_b64 s[30:31]
4829;
4830; GFX90A-LABEL: v_shuffle_v3i16_v4i16__6_4_4:
4831; GFX90A:       ; %bb.0:
4832; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4833; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4834; GFX90A-NEXT:    ;;#ASMSTART
4835; GFX90A-NEXT:    ; def v[0:1]
4836; GFX90A-NEXT:    ;;#ASMEND
4837; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4838; GFX90A-NEXT:    v_perm_b32 v1, v0, v1, s4
4839; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
4840; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
4841; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4842; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4843;
4844; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_4_4:
4845; GFX940:       ; %bb.0:
4846; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4847; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4848; GFX940-NEXT:    ;;#ASMSTART
4849; GFX940-NEXT:    ; def v[0:1]
4850; GFX940-NEXT:    ;;#ASMEND
4851; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4852; GFX940-NEXT:    v_perm_b32 v1, v0, v1, s2
4853; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
4854; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
4855; GFX940-NEXT:    s_waitcnt vmcnt(0)
4856; GFX940-NEXT:    s_setpc_b64 s[30:31]
4857  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4858  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4859  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 4, i32 4>
4860  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4861  ret void
4862}
4863
4864define void @v_shuffle_v3i16_v4i16__7_4_4(ptr addrspace(1) inreg %ptr) {
4865; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_4_4:
4866; GFX900:       ; %bb.0:
4867; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4868; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4869; GFX900-NEXT:    ;;#ASMSTART
4870; GFX900-NEXT:    ; def v[0:1]
4871; GFX900-NEXT:    ;;#ASMEND
4872; GFX900-NEXT:    v_alignbit_b32 v1, v0, v1, 16
4873; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
4874; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
4875; GFX900-NEXT:    s_waitcnt vmcnt(0)
4876; GFX900-NEXT:    s_setpc_b64 s[30:31]
4877;
4878; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_4_4:
4879; GFX90A:       ; %bb.0:
4880; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4881; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4882; GFX90A-NEXT:    ;;#ASMSTART
4883; GFX90A-NEXT:    ; def v[0:1]
4884; GFX90A-NEXT:    ;;#ASMEND
4885; GFX90A-NEXT:    v_alignbit_b32 v1, v0, v1, 16
4886; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
4887; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
4888; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4889; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4890;
4891; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_4:
4892; GFX940:       ; %bb.0:
4893; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4894; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4895; GFX940-NEXT:    ;;#ASMSTART
4896; GFX940-NEXT:    ; def v[0:1]
4897; GFX940-NEXT:    ;;#ASMEND
4898; GFX940-NEXT:    s_nop 0
4899; GFX940-NEXT:    v_alignbit_b32 v1, v0, v1, 16
4900; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
4901; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
4902; GFX940-NEXT:    s_waitcnt vmcnt(0)
4903; GFX940-NEXT:    s_setpc_b64 s[30:31]
4904  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4905  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4906  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 4>
4907  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4908  ret void
4909}
4910
4911define void @v_shuffle_v3i16_v4i16__7_u_4(ptr addrspace(1) inreg %ptr) {
4912; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_u_4:
4913; GFX900:       ; %bb.0:
4914; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4915; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4916; GFX900-NEXT:    ;;#ASMSTART
4917; GFX900-NEXT:    ; def v[0:1]
4918; GFX900-NEXT:    ;;#ASMEND
4919; GFX900-NEXT:    v_alignbit_b32 v1, s4, v1, 16
4920; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
4921; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
4922; GFX900-NEXT:    s_waitcnt vmcnt(0)
4923; GFX900-NEXT:    s_setpc_b64 s[30:31]
4924;
4925; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_u_4:
4926; GFX90A:       ; %bb.0:
4927; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4928; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4929; GFX90A-NEXT:    ;;#ASMSTART
4930; GFX90A-NEXT:    ; def v[0:1]
4931; GFX90A-NEXT:    ;;#ASMEND
4932; GFX90A-NEXT:    v_alignbit_b32 v1, s4, v1, 16
4933; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
4934; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
4935; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4936; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4937;
4938; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_4:
4939; GFX940:       ; %bb.0:
4940; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4941; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4942; GFX940-NEXT:    ;;#ASMSTART
4943; GFX940-NEXT:    ; def v[0:1]
4944; GFX940-NEXT:    ;;#ASMEND
4945; GFX940-NEXT:    s_nop 0
4946; GFX940-NEXT:    v_alignbit_b32 v1, s0, v1, 16
4947; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
4948; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
4949; GFX940-NEXT:    s_waitcnt vmcnt(0)
4950; GFX940-NEXT:    s_setpc_b64 s[30:31]
4951  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4952  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4953  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 4>
4954  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4955  ret void
4956}
4957
4958define void @v_shuffle_v3i16_v4i16__7_0_4(ptr addrspace(1) inreg %ptr) {
4959; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_0_4:
4960; GFX900:       ; %bb.0:
4961; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4962; GFX900-NEXT:    ;;#ASMSTART
4963; GFX900-NEXT:    ; def v[0:1]
4964; GFX900-NEXT:    ;;#ASMEND
4965; GFX900-NEXT:    v_mov_b32_e32 v3, 0
4966; GFX900-NEXT:    ;;#ASMSTART
4967; GFX900-NEXT:    ; def v[1:2]
4968; GFX900-NEXT:    ;;#ASMEND
4969; GFX900-NEXT:    v_alignbit_b32 v0, v0, v2, 16
4970; GFX900-NEXT:    global_store_short v3, v1, s[16:17] offset:4
4971; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
4972; GFX900-NEXT:    s_waitcnt vmcnt(0)
4973; GFX900-NEXT:    s_setpc_b64 s[30:31]
4974;
4975; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_0_4:
4976; GFX90A:       ; %bb.0:
4977; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4978; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4979; GFX90A-NEXT:    ;;#ASMSTART
4980; GFX90A-NEXT:    ; def v[0:1]
4981; GFX90A-NEXT:    ;;#ASMEND
4982; GFX90A-NEXT:    ;;#ASMSTART
4983; GFX90A-NEXT:    ; def v[2:3]
4984; GFX90A-NEXT:    ;;#ASMEND
4985; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v3, 16
4986; GFX90A-NEXT:    global_store_short v4, v2, s[16:17] offset:4
4987; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4988; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4989; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4990;
4991; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_4:
4992; GFX940:       ; %bb.0:
4993; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4994; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4995; GFX940-NEXT:    ;;#ASMSTART
4996; GFX940-NEXT:    ; def v[0:1]
4997; GFX940-NEXT:    ;;#ASMEND
4998; GFX940-NEXT:    ;;#ASMSTART
4999; GFX940-NEXT:    ; def v[2:3]
5000; GFX940-NEXT:    ;;#ASMEND
5001; GFX940-NEXT:    s_nop 0
5002; GFX940-NEXT:    v_alignbit_b32 v0, v0, v3, 16
5003; GFX940-NEXT:    global_store_short v4, v2, s[0:1] offset:4 sc0 sc1
5004; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
5005; GFX940-NEXT:    s_waitcnt vmcnt(0)
5006; GFX940-NEXT:    s_setpc_b64 s[30:31]
5007  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5008  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5009  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 4>
5010  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5011  ret void
5012}
5013
5014define void @v_shuffle_v3i16_v4i16__7_1_4(ptr addrspace(1) inreg %ptr) {
5015; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_1_4:
5016; GFX900:       ; %bb.0:
5017; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5018; GFX900-NEXT:    ;;#ASMSTART
5019; GFX900-NEXT:    ; def v[0:1]
5020; GFX900-NEXT:    ;;#ASMEND
5021; GFX900-NEXT:    v_mov_b32_e32 v3, 0
5022; GFX900-NEXT:    ;;#ASMSTART
5023; GFX900-NEXT:    ; def v[1:2]
5024; GFX900-NEXT:    ;;#ASMEND
5025; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5026; GFX900-NEXT:    v_perm_b32 v0, v0, v2, s4
5027; GFX900-NEXT:    global_store_short v3, v1, s[16:17] offset:4
5028; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
5029; GFX900-NEXT:    s_waitcnt vmcnt(0)
5030; GFX900-NEXT:    s_setpc_b64 s[30:31]
5031;
5032; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_1_4:
5033; GFX90A:       ; %bb.0:
5034; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5035; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5036; GFX90A-NEXT:    ;;#ASMSTART
5037; GFX90A-NEXT:    ; def v[0:1]
5038; GFX90A-NEXT:    ;;#ASMEND
5039; GFX90A-NEXT:    ;;#ASMSTART
5040; GFX90A-NEXT:    ; def v[2:3]
5041; GFX90A-NEXT:    ;;#ASMEND
5042; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5043; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
5044; GFX90A-NEXT:    global_store_short v4, v2, s[16:17] offset:4
5045; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
5046; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5047; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5048;
5049; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_4:
5050; GFX940:       ; %bb.0:
5051; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5052; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5053; GFX940-NEXT:    ;;#ASMSTART
5054; GFX940-NEXT:    ; def v[0:1]
5055; GFX940-NEXT:    ;;#ASMEND
5056; GFX940-NEXT:    ;;#ASMSTART
5057; GFX940-NEXT:    ; def v[2:3]
5058; GFX940-NEXT:    ;;#ASMEND
5059; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5060; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
5061; GFX940-NEXT:    global_store_short v4, v2, s[0:1] offset:4 sc0 sc1
5062; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
5063; GFX940-NEXT:    s_waitcnt vmcnt(0)
5064; GFX940-NEXT:    s_setpc_b64 s[30:31]
5065  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5066  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5067  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 4>
5068  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5069  ret void
5070}
5071
5072define void @v_shuffle_v3i16_v4i16__7_2_4(ptr addrspace(1) inreg %ptr) {
5073; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_2_4:
5074; GFX900:       ; %bb.0:
5075; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5076; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5077; GFX900-NEXT:    ;;#ASMSTART
5078; GFX900-NEXT:    ; def v[0:1]
5079; GFX900-NEXT:    ;;#ASMEND
5080; GFX900-NEXT:    ;;#ASMSTART
5081; GFX900-NEXT:    ; def v[2:3]
5082; GFX900-NEXT:    ;;#ASMEND
5083; GFX900-NEXT:    v_alignbit_b32 v0, v1, v3, 16
5084; GFX900-NEXT:    global_store_short v4, v2, s[16:17] offset:4
5085; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
5086; GFX900-NEXT:    s_waitcnt vmcnt(0)
5087; GFX900-NEXT:    s_setpc_b64 s[30:31]
5088;
5089; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_2_4:
5090; GFX90A:       ; %bb.0:
5091; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5092; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5093; GFX90A-NEXT:    ;;#ASMSTART
5094; GFX90A-NEXT:    ; def v[0:1]
5095; GFX90A-NEXT:    ;;#ASMEND
5096; GFX90A-NEXT:    ;;#ASMSTART
5097; GFX90A-NEXT:    ; def v[2:3]
5098; GFX90A-NEXT:    ;;#ASMEND
5099; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v3, 16
5100; GFX90A-NEXT:    global_store_short v4, v2, s[16:17] offset:4
5101; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
5102; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5103; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5104;
5105; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_4:
5106; GFX940:       ; %bb.0:
5107; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5108; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5109; GFX940-NEXT:    ;;#ASMSTART
5110; GFX940-NEXT:    ; def v[0:1]
5111; GFX940-NEXT:    ;;#ASMEND
5112; GFX940-NEXT:    ;;#ASMSTART
5113; GFX940-NEXT:    ; def v[2:3]
5114; GFX940-NEXT:    ;;#ASMEND
5115; GFX940-NEXT:    s_nop 0
5116; GFX940-NEXT:    v_alignbit_b32 v0, v1, v3, 16
5117; GFX940-NEXT:    global_store_short v4, v2, s[0:1] offset:4 sc0 sc1
5118; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
5119; GFX940-NEXT:    s_waitcnt vmcnt(0)
5120; GFX940-NEXT:    s_setpc_b64 s[30:31]
5121  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5122  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5123  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 4>
5124  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5125  ret void
5126}
5127
5128define void @v_shuffle_v3i16_v4i16__7_3_4(ptr addrspace(1) inreg %ptr) {
5129; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_3_4:
5130; GFX900:       ; %bb.0:
5131; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5132; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5133; GFX900-NEXT:    ;;#ASMSTART
5134; GFX900-NEXT:    ; def v[0:1]
5135; GFX900-NEXT:    ;;#ASMEND
5136; GFX900-NEXT:    ;;#ASMSTART
5137; GFX900-NEXT:    ; def v[2:3]
5138; GFX900-NEXT:    ;;#ASMEND
5139; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5140; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
5141; GFX900-NEXT:    global_store_short v4, v2, s[16:17] offset:4
5142; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
5143; GFX900-NEXT:    s_waitcnt vmcnt(0)
5144; GFX900-NEXT:    s_setpc_b64 s[30:31]
5145;
5146; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_3_4:
5147; GFX90A:       ; %bb.0:
5148; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5149; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5150; GFX90A-NEXT:    ;;#ASMSTART
5151; GFX90A-NEXT:    ; def v[0:1]
5152; GFX90A-NEXT:    ;;#ASMEND
5153; GFX90A-NEXT:    ;;#ASMSTART
5154; GFX90A-NEXT:    ; def v[2:3]
5155; GFX90A-NEXT:    ;;#ASMEND
5156; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5157; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
5158; GFX90A-NEXT:    global_store_short v4, v2, s[16:17] offset:4
5159; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
5160; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5161; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5162;
5163; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_4:
5164; GFX940:       ; %bb.0:
5165; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5166; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5167; GFX940-NEXT:    ;;#ASMSTART
5168; GFX940-NEXT:    ; def v[0:1]
5169; GFX940-NEXT:    ;;#ASMEND
5170; GFX940-NEXT:    ;;#ASMSTART
5171; GFX940-NEXT:    ; def v[2:3]
5172; GFX940-NEXT:    ;;#ASMEND
5173; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5174; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
5175; GFX940-NEXT:    global_store_short v4, v2, s[0:1] offset:4 sc0 sc1
5176; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
5177; GFX940-NEXT:    s_waitcnt vmcnt(0)
5178; GFX940-NEXT:    s_setpc_b64 s[30:31]
5179  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5180  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5181  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 4>
5182  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5183  ret void
5184}
5185
5186define void @v_shuffle_v3i16_v4i16__7_5_4(ptr addrspace(1) inreg %ptr) {
5187; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_5_4:
5188; GFX900:       ; %bb.0:
5189; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5190; GFX900-NEXT:    v_mov_b32_e32 v2, 0
5191; GFX900-NEXT:    ;;#ASMSTART
5192; GFX900-NEXT:    ; def v[0:1]
5193; GFX900-NEXT:    ;;#ASMEND
5194; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5195; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
5196; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
5197; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
5198; GFX900-NEXT:    s_waitcnt vmcnt(0)
5199; GFX900-NEXT:    s_setpc_b64 s[30:31]
5200;
5201; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_5_4:
5202; GFX90A:       ; %bb.0:
5203; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5204; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
5205; GFX90A-NEXT:    ;;#ASMSTART
5206; GFX90A-NEXT:    ; def v[0:1]
5207; GFX90A-NEXT:    ;;#ASMEND
5208; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5209; GFX90A-NEXT:    v_perm_b32 v1, v0, v1, s4
5210; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
5211; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
5212; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5213; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5214;
5215; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_4:
5216; GFX940:       ; %bb.0:
5217; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5218; GFX940-NEXT:    v_mov_b32_e32 v2, 0
5219; GFX940-NEXT:    ;;#ASMSTART
5220; GFX940-NEXT:    ; def v[0:1]
5221; GFX940-NEXT:    ;;#ASMEND
5222; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5223; GFX940-NEXT:    v_perm_b32 v1, v0, v1, s2
5224; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
5225; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
5226; GFX940-NEXT:    s_waitcnt vmcnt(0)
5227; GFX940-NEXT:    s_setpc_b64 s[30:31]
5228  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5229  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5230  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 4>
5231  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5232  ret void
5233}
5234
5235define void @v_shuffle_v3i16_v4i16__7_6_4(ptr addrspace(1) inreg %ptr) {
5236; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_6_4:
5237; GFX900:       ; %bb.0:
5238; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5239; GFX900-NEXT:    v_mov_b32_e32 v2, 0
5240; GFX900-NEXT:    ;;#ASMSTART
5241; GFX900-NEXT:    ; def v[0:1]
5242; GFX900-NEXT:    ;;#ASMEND
5243; GFX900-NEXT:    v_alignbit_b32 v1, v1, v1, 16
5244; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
5245; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
5246; GFX900-NEXT:    s_waitcnt vmcnt(0)
5247; GFX900-NEXT:    s_setpc_b64 s[30:31]
5248;
5249; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_6_4:
5250; GFX90A:       ; %bb.0:
5251; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5252; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
5253; GFX90A-NEXT:    ;;#ASMSTART
5254; GFX90A-NEXT:    ; def v[0:1]
5255; GFX90A-NEXT:    ;;#ASMEND
5256; GFX90A-NEXT:    v_alignbit_b32 v1, v1, v1, 16
5257; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
5258; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
5259; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5260; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5261;
5262; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_4:
5263; GFX940:       ; %bb.0:
5264; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5265; GFX940-NEXT:    v_mov_b32_e32 v2, 0
5266; GFX940-NEXT:    ;;#ASMSTART
5267; GFX940-NEXT:    ; def v[0:1]
5268; GFX940-NEXT:    ;;#ASMEND
5269; GFX940-NEXT:    s_nop 0
5270; GFX940-NEXT:    v_alignbit_b32 v1, v1, v1, 16
5271; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
5272; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
5273; GFX940-NEXT:    s_waitcnt vmcnt(0)
5274; GFX940-NEXT:    s_setpc_b64 s[30:31]
5275  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5276  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5277  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 4>
5278  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5279  ret void
5280}
5281
5282define void @v_shuffle_v3i16_v4i16__u_5_5(ptr addrspace(1) inreg %ptr) {
5283; GFX900-LABEL: v_shuffle_v3i16_v4i16__u_5_5:
5284; GFX900:       ; %bb.0:
5285; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5286; GFX900-NEXT:    v_mov_b32_e32 v2, 0
5287; GFX900-NEXT:    ;;#ASMSTART
5288; GFX900-NEXT:    ; def v[0:1]
5289; GFX900-NEXT:    ;;#ASMEND
5290; GFX900-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
5291; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
5292; GFX900-NEXT:    s_waitcnt vmcnt(0)
5293; GFX900-NEXT:    s_setpc_b64 s[30:31]
5294;
5295; GFX90A-LABEL: v_shuffle_v3i16_v4i16__u_5_5:
5296; GFX90A:       ; %bb.0:
5297; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5298; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
5299; GFX90A-NEXT:    ;;#ASMSTART
5300; GFX90A-NEXT:    ; def v[0:1]
5301; GFX90A-NEXT:    ;;#ASMEND
5302; GFX90A-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
5303; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
5304; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5305; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5306;
5307; GFX940-LABEL: v_shuffle_v3i16_v4i16__u_5_5:
5308; GFX940:       ; %bb.0:
5309; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5310; GFX940-NEXT:    v_mov_b32_e32 v2, 0
5311; GFX940-NEXT:    ;;#ASMSTART
5312; GFX940-NEXT:    ; def v[0:1]
5313; GFX940-NEXT:    ;;#ASMEND
5314; GFX940-NEXT:    global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1
5315; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
5316; GFX940-NEXT:    s_waitcnt vmcnt(0)
5317; GFX940-NEXT:    s_setpc_b64 s[30:31]
5318  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5319  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5320  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 poison, i32 5, i32 5>
5321  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5322  ret void
5323}
5324
5325define void @v_shuffle_v3i16_v4i16__0_5_5(ptr addrspace(1) inreg %ptr) {
5326; GFX900-LABEL: v_shuffle_v3i16_v4i16__0_5_5:
5327; GFX900:       ; %bb.0:
5328; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5329; GFX900-NEXT:    ;;#ASMSTART
5330; GFX900-NEXT:    ; def v[0:1]
5331; GFX900-NEXT:    ;;#ASMEND
5332; GFX900-NEXT:    s_mov_b32 s4, 0xffff
5333; GFX900-NEXT:    v_mov_b32_e32 v3, 0
5334; GFX900-NEXT:    ;;#ASMSTART
5335; GFX900-NEXT:    ; def v[1:2]
5336; GFX900-NEXT:    ;;#ASMEND
5337; GFX900-NEXT:    v_bfi_b32 v0, s4, v0, v1
5338; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
5339; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
5340; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
5341; GFX900-NEXT:    s_waitcnt vmcnt(0)
5342; GFX900-NEXT:    s_setpc_b64 s[30:31]
5343;
5344; GFX90A-LABEL: v_shuffle_v3i16_v4i16__0_5_5:
5345; GFX90A:       ; %bb.0:
5346; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5347; GFX90A-NEXT:    ;;#ASMSTART
5348; GFX90A-NEXT:    ; def v[0:1]
5349; GFX90A-NEXT:    ;;#ASMEND
5350; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
5351; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5352; GFX90A-NEXT:    ;;#ASMSTART
5353; GFX90A-NEXT:    ; def v[2:3]
5354; GFX90A-NEXT:    ;;#ASMEND
5355; GFX90A-NEXT:    v_bfi_b32 v0, s4, v0, v2
5356; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
5357; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
5358; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
5359; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5360; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5361;
5362; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_5_5:
5363; GFX940:       ; %bb.0:
5364; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5365; GFX940-NEXT:    ;;#ASMSTART
5366; GFX940-NEXT:    ; def v[0:1]
5367; GFX940-NEXT:    ;;#ASMEND
5368; GFX940-NEXT:    s_mov_b32 s2, 0xffff
5369; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5370; GFX940-NEXT:    ;;#ASMSTART
5371; GFX940-NEXT:    ; def v[2:3]
5372; GFX940-NEXT:    ;;#ASMEND
5373; GFX940-NEXT:    s_nop 0
5374; GFX940-NEXT:    v_bfi_b32 v0, s2, v0, v2
5375; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
5376; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
5377; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
5378; GFX940-NEXT:    s_waitcnt vmcnt(0)
5379; GFX940-NEXT:    s_setpc_b64 s[30:31]
5380  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5381  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5382  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 0, i32 5, i32 5>
5383  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5384  ret void
5385}
5386
5387define void @v_shuffle_v3i16_v4i16__1_5_5(ptr addrspace(1) inreg %ptr) {
5388; GFX900-LABEL: v_shuffle_v3i16_v4i16__1_5_5:
5389; GFX900:       ; %bb.0:
5390; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5391; GFX900-NEXT:    ;;#ASMSTART
5392; GFX900-NEXT:    ; def v[0:1]
5393; GFX900-NEXT:    ;;#ASMEND
5394; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5395; GFX900-NEXT:    v_mov_b32_e32 v3, 0
5396; GFX900-NEXT:    ;;#ASMSTART
5397; GFX900-NEXT:    ; def v[1:2]
5398; GFX900-NEXT:    ;;#ASMEND
5399; GFX900-NEXT:    v_perm_b32 v0, v1, v0, s4
5400; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
5401; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
5402; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
5403; GFX900-NEXT:    s_waitcnt vmcnt(0)
5404; GFX900-NEXT:    s_setpc_b64 s[30:31]
5405;
5406; GFX90A-LABEL: v_shuffle_v3i16_v4i16__1_5_5:
5407; GFX90A:       ; %bb.0:
5408; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5409; GFX90A-NEXT:    ;;#ASMSTART
5410; GFX90A-NEXT:    ; def v[0:1]
5411; GFX90A-NEXT:    ;;#ASMEND
5412; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5413; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5414; GFX90A-NEXT:    ;;#ASMSTART
5415; GFX90A-NEXT:    ; def v[2:3]
5416; GFX90A-NEXT:    ;;#ASMEND
5417; GFX90A-NEXT:    v_perm_b32 v0, v2, v0, s4
5418; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
5419; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
5420; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
5421; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5422; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5423;
5424; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_5_5:
5425; GFX940:       ; %bb.0:
5426; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5427; GFX940-NEXT:    ;;#ASMSTART
5428; GFX940-NEXT:    ; def v[0:1]
5429; GFX940-NEXT:    ;;#ASMEND
5430; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5431; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5432; GFX940-NEXT:    ;;#ASMSTART
5433; GFX940-NEXT:    ; def v[2:3]
5434; GFX940-NEXT:    ;;#ASMEND
5435; GFX940-NEXT:    s_nop 0
5436; GFX940-NEXT:    v_perm_b32 v0, v2, v0, s2
5437; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
5438; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
5439; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
5440; GFX940-NEXT:    s_waitcnt vmcnt(0)
5441; GFX940-NEXT:    s_setpc_b64 s[30:31]
5442  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5443  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5444  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 1, i32 5, i32 5>
5445  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5446  ret void
5447}
5448
5449define void @v_shuffle_v3i16_v4i16__2_5_5(ptr addrspace(1) inreg %ptr) {
5450; GFX900-LABEL: v_shuffle_v3i16_v4i16__2_5_5:
5451; GFX900:       ; %bb.0:
5452; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5453; GFX900-NEXT:    ;;#ASMSTART
5454; GFX900-NEXT:    ; def v[0:1]
5455; GFX900-NEXT:    ;;#ASMEND
5456; GFX900-NEXT:    s_mov_b32 s4, 0xffff
5457; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5458; GFX900-NEXT:    ;;#ASMSTART
5459; GFX900-NEXT:    ; def v[2:3]
5460; GFX900-NEXT:    ;;#ASMEND
5461; GFX900-NEXT:    v_bfi_b32 v0, s4, v1, v2
5462; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
5463; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
5464; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
5465; GFX900-NEXT:    s_waitcnt vmcnt(0)
5466; GFX900-NEXT:    s_setpc_b64 s[30:31]
5467;
5468; GFX90A-LABEL: v_shuffle_v3i16_v4i16__2_5_5:
5469; GFX90A:       ; %bb.0:
5470; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5471; GFX90A-NEXT:    ;;#ASMSTART
5472; GFX90A-NEXT:    ; def v[0:1]
5473; GFX90A-NEXT:    ;;#ASMEND
5474; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
5475; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5476; GFX90A-NEXT:    ;;#ASMSTART
5477; GFX90A-NEXT:    ; def v[2:3]
5478; GFX90A-NEXT:    ;;#ASMEND
5479; GFX90A-NEXT:    v_bfi_b32 v0, s4, v1, v2
5480; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
5481; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
5482; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
5483; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5484; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5485;
5486; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_5_5:
5487; GFX940:       ; %bb.0:
5488; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5489; GFX940-NEXT:    ;;#ASMSTART
5490; GFX940-NEXT:    ; def v[0:1]
5491; GFX940-NEXT:    ;;#ASMEND
5492; GFX940-NEXT:    s_mov_b32 s2, 0xffff
5493; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5494; GFX940-NEXT:    ;;#ASMSTART
5495; GFX940-NEXT:    ; def v[2:3]
5496; GFX940-NEXT:    ;;#ASMEND
5497; GFX940-NEXT:    s_nop 0
5498; GFX940-NEXT:    v_bfi_b32 v0, s2, v1, v2
5499; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
5500; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
5501; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
5502; GFX940-NEXT:    s_waitcnt vmcnt(0)
5503; GFX940-NEXT:    s_setpc_b64 s[30:31]
5504  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5505  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5506  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 2, i32 5, i32 5>
5507  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5508  ret void
5509}
5510
5511define void @v_shuffle_v3i16_v4i16__3_5_5(ptr addrspace(1) inreg %ptr) {
5512; GFX900-LABEL: v_shuffle_v3i16_v4i16__3_5_5:
5513; GFX900:       ; %bb.0:
5514; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5515; GFX900-NEXT:    ;;#ASMSTART
5516; GFX900-NEXT:    ; def v[0:1]
5517; GFX900-NEXT:    ;;#ASMEND
5518; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5519; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5520; GFX900-NEXT:    ;;#ASMSTART
5521; GFX900-NEXT:    ; def v[2:3]
5522; GFX900-NEXT:    ;;#ASMEND
5523; GFX900-NEXT:    v_perm_b32 v0, v2, v1, s4
5524; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
5525; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
5526; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
5527; GFX900-NEXT:    s_waitcnt vmcnt(0)
5528; GFX900-NEXT:    s_setpc_b64 s[30:31]
5529;
5530; GFX90A-LABEL: v_shuffle_v3i16_v4i16__3_5_5:
5531; GFX90A:       ; %bb.0:
5532; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5533; GFX90A-NEXT:    ;;#ASMSTART
5534; GFX90A-NEXT:    ; def v[0:1]
5535; GFX90A-NEXT:    ;;#ASMEND
5536; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5537; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5538; GFX90A-NEXT:    ;;#ASMSTART
5539; GFX90A-NEXT:    ; def v[2:3]
5540; GFX90A-NEXT:    ;;#ASMEND
5541; GFX90A-NEXT:    v_perm_b32 v0, v2, v1, s4
5542; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
5543; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
5544; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
5545; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5546; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5547;
5548; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_5_5:
5549; GFX940:       ; %bb.0:
5550; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5551; GFX940-NEXT:    ;;#ASMSTART
5552; GFX940-NEXT:    ; def v[0:1]
5553; GFX940-NEXT:    ;;#ASMEND
5554; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5555; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5556; GFX940-NEXT:    ;;#ASMSTART
5557; GFX940-NEXT:    ; def v[2:3]
5558; GFX940-NEXT:    ;;#ASMEND
5559; GFX940-NEXT:    s_nop 0
5560; GFX940-NEXT:    v_perm_b32 v0, v2, v1, s2
5561; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
5562; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
5563; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
5564; GFX940-NEXT:    s_waitcnt vmcnt(0)
5565; GFX940-NEXT:    s_setpc_b64 s[30:31]
5566  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5567  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5568  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 3, i32 5, i32 5>
5569  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5570  ret void
5571}
5572
5573define void @v_shuffle_v3i16_v4i16__4_5_5(ptr addrspace(1) inreg %ptr) {
5574; GFX900-LABEL: v_shuffle_v3i16_v4i16__4_5_5:
5575; GFX900:       ; %bb.0:
5576; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5577; GFX900-NEXT:    v_mov_b32_e32 v2, 0
5578; GFX900-NEXT:    ;;#ASMSTART
5579; GFX900-NEXT:    ; def v[0:1]
5580; GFX900-NEXT:    ;;#ASMEND
5581; GFX900-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
5582; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
5583; GFX900-NEXT:    s_waitcnt vmcnt(0)
5584; GFX900-NEXT:    s_setpc_b64 s[30:31]
5585;
5586; GFX90A-LABEL: v_shuffle_v3i16_v4i16__4_5_5:
5587; GFX90A:       ; %bb.0:
5588; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5589; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
5590; GFX90A-NEXT:    ;;#ASMSTART
5591; GFX90A-NEXT:    ; def v[0:1]
5592; GFX90A-NEXT:    ;;#ASMEND
5593; GFX90A-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
5594; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
5595; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5596; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5597;
5598; GFX940-LABEL: v_shuffle_v3i16_v4i16__4_5_5:
5599; GFX940:       ; %bb.0:
5600; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5601; GFX940-NEXT:    v_mov_b32_e32 v2, 0
5602; GFX940-NEXT:    ;;#ASMSTART
5603; GFX940-NEXT:    ; def v[0:1]
5604; GFX940-NEXT:    ;;#ASMEND
5605; GFX940-NEXT:    global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1
5606; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
5607; GFX940-NEXT:    s_waitcnt vmcnt(0)
5608; GFX940-NEXT:    s_setpc_b64 s[30:31]
5609  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5610  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5611  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 4, i32 5, i32 5>
5612  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5613  ret void
5614}
5615
5616define void @v_shuffle_v3i16_v4i16__5_5_5(ptr addrspace(1) inreg %ptr) {
5617; GFX900-LABEL: v_shuffle_v3i16_v4i16__5_5_5:
5618; GFX900:       ; %bb.0:
5619; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5620; GFX900-NEXT:    ;;#ASMSTART
5621; GFX900-NEXT:    ; def v[0:1]
5622; GFX900-NEXT:    ;;#ASMEND
5623; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5624; GFX900-NEXT:    v_mov_b32_e32 v2, 0
5625; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
5626; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5627; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
5628; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
5629; GFX900-NEXT:    s_waitcnt vmcnt(0)
5630; GFX900-NEXT:    s_setpc_b64 s[30:31]
5631;
5632; GFX90A-LABEL: v_shuffle_v3i16_v4i16__5_5_5:
5633; GFX90A:       ; %bb.0:
5634; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5635; GFX90A-NEXT:    ;;#ASMSTART
5636; GFX90A-NEXT:    ; def v[0:1]
5637; GFX90A-NEXT:    ;;#ASMEND
5638; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5639; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
5640; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
5641; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5642; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
5643; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
5644; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5645; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5646;
5647; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_5_5:
5648; GFX940:       ; %bb.0:
5649; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5650; GFX940-NEXT:    ;;#ASMSTART
5651; GFX940-NEXT:    ; def v[0:1]
5652; GFX940-NEXT:    ;;#ASMEND
5653; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5654; GFX940-NEXT:    v_mov_b32_e32 v2, 0
5655; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
5656; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5657; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
5658; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
5659; GFX940-NEXT:    s_waitcnt vmcnt(0)
5660; GFX940-NEXT:    s_setpc_b64 s[30:31]
5661  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5662  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5663  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 5, i32 5>
5664  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5665  ret void
5666}
5667
5668define void @v_shuffle_v3i16_v4i16__6_5_5(ptr addrspace(1) inreg %ptr) {
5669; GFX900-LABEL: v_shuffle_v3i16_v4i16__6_5_5:
5670; GFX900:       ; %bb.0:
5671; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5672; GFX900-NEXT:    ;;#ASMSTART
5673; GFX900-NEXT:    ; def v[0:1]
5674; GFX900-NEXT:    ;;#ASMEND
5675; GFX900-NEXT:    s_mov_b32 s4, 0xffff
5676; GFX900-NEXT:    v_mov_b32_e32 v2, 0
5677; GFX900-NEXT:    v_bfi_b32 v1, s4, v1, v0
5678; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5679; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
5680; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
5681; GFX900-NEXT:    s_waitcnt vmcnt(0)
5682; GFX900-NEXT:    s_setpc_b64 s[30:31]
5683;
5684; GFX90A-LABEL: v_shuffle_v3i16_v4i16__6_5_5:
5685; GFX90A:       ; %bb.0:
5686; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5687; GFX90A-NEXT:    ;;#ASMSTART
5688; GFX90A-NEXT:    ; def v[0:1]
5689; GFX90A-NEXT:    ;;#ASMEND
5690; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
5691; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
5692; GFX90A-NEXT:    v_bfi_b32 v1, s4, v1, v0
5693; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5694; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
5695; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
5696; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5697; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5698;
5699; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_5_5:
5700; GFX940:       ; %bb.0:
5701; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5702; GFX940-NEXT:    ;;#ASMSTART
5703; GFX940-NEXT:    ; def v[0:1]
5704; GFX940-NEXT:    ;;#ASMEND
5705; GFX940-NEXT:    s_mov_b32 s2, 0xffff
5706; GFX940-NEXT:    v_mov_b32_e32 v2, 0
5707; GFX940-NEXT:    v_bfi_b32 v1, s2, v1, v0
5708; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5709; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
5710; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
5711; GFX940-NEXT:    s_waitcnt vmcnt(0)
5712; GFX940-NEXT:    s_setpc_b64 s[30:31]
5713  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5714  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5715  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 5, i32 5>
5716  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5717  ret void
5718}
5719
5720define void @v_shuffle_v3i16_v4i16__7_5_5(ptr addrspace(1) inreg %ptr) {
5721; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_5_5:
5722; GFX900:       ; %bb.0:
5723; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5724; GFX900-NEXT:    ;;#ASMSTART
5725; GFX900-NEXT:    ; def v[0:1]
5726; GFX900-NEXT:    ;;#ASMEND
5727; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5728; GFX900-NEXT:    v_mov_b32_e32 v2, 0
5729; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
5730; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5731; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
5732; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
5733; GFX900-NEXT:    s_waitcnt vmcnt(0)
5734; GFX900-NEXT:    s_setpc_b64 s[30:31]
5735;
5736; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_5_5:
5737; GFX90A:       ; %bb.0:
5738; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5739; GFX90A-NEXT:    ;;#ASMSTART
5740; GFX90A-NEXT:    ; def v[0:1]
5741; GFX90A-NEXT:    ;;#ASMEND
5742; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5743; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
5744; GFX90A-NEXT:    v_perm_b32 v1, v0, v1, s4
5745; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5746; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
5747; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
5748; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5749; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5750;
5751; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_5:
5752; GFX940:       ; %bb.0:
5753; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5754; GFX940-NEXT:    ;;#ASMSTART
5755; GFX940-NEXT:    ; def v[0:1]
5756; GFX940-NEXT:    ;;#ASMEND
5757; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5758; GFX940-NEXT:    v_mov_b32_e32 v2, 0
5759; GFX940-NEXT:    v_perm_b32 v1, v0, v1, s2
5760; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5761; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
5762; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
5763; GFX940-NEXT:    s_waitcnt vmcnt(0)
5764; GFX940-NEXT:    s_setpc_b64 s[30:31]
5765  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5766  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5767  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 5>
5768  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5769  ret void
5770}
5771
5772define void @v_shuffle_v3i16_v4i16__7_u_5(ptr addrspace(1) inreg %ptr) {
5773; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_u_5:
5774; GFX900:       ; %bb.0:
5775; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5776; GFX900-NEXT:    v_mov_b32_e32 v2, 0
5777; GFX900-NEXT:    ;;#ASMSTART
5778; GFX900-NEXT:    ; def v[0:1]
5779; GFX900-NEXT:    ;;#ASMEND
5780; GFX900-NEXT:    v_alignbit_b32 v1, s4, v1, 16
5781; GFX900-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
5782; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
5783; GFX900-NEXT:    s_waitcnt vmcnt(0)
5784; GFX900-NEXT:    s_setpc_b64 s[30:31]
5785;
5786; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_u_5:
5787; GFX90A:       ; %bb.0:
5788; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5789; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
5790; GFX90A-NEXT:    ;;#ASMSTART
5791; GFX90A-NEXT:    ; def v[0:1]
5792; GFX90A-NEXT:    ;;#ASMEND
5793; GFX90A-NEXT:    v_alignbit_b32 v1, s4, v1, 16
5794; GFX90A-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
5795; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
5796; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5797; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5798;
5799; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_5:
5800; GFX940:       ; %bb.0:
5801; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5802; GFX940-NEXT:    v_mov_b32_e32 v2, 0
5803; GFX940-NEXT:    ;;#ASMSTART
5804; GFX940-NEXT:    ; def v[0:1]
5805; GFX940-NEXT:    ;;#ASMEND
5806; GFX940-NEXT:    s_nop 0
5807; GFX940-NEXT:    v_alignbit_b32 v1, s0, v1, 16
5808; GFX940-NEXT:    global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1
5809; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
5810; GFX940-NEXT:    s_waitcnt vmcnt(0)
5811; GFX940-NEXT:    s_setpc_b64 s[30:31]
5812  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5813  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5814  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 5>
5815  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5816  ret void
5817}
5818
5819define void @v_shuffle_v3i16_v4i16__7_0_5(ptr addrspace(1) inreg %ptr) {
5820; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_0_5:
5821; GFX900:       ; %bb.0:
5822; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5823; GFX900-NEXT:    ;;#ASMSTART
5824; GFX900-NEXT:    ; def v[0:1]
5825; GFX900-NEXT:    ;;#ASMEND
5826; GFX900-NEXT:    v_mov_b32_e32 v3, 0
5827; GFX900-NEXT:    ;;#ASMSTART
5828; GFX900-NEXT:    ; def v[1:2]
5829; GFX900-NEXT:    ;;#ASMEND
5830; GFX900-NEXT:    v_alignbit_b32 v0, v0, v2, 16
5831; GFX900-NEXT:    global_store_short_d16_hi v3, v1, s[16:17] offset:4
5832; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
5833; GFX900-NEXT:    s_waitcnt vmcnt(0)
5834; GFX900-NEXT:    s_setpc_b64 s[30:31]
5835;
5836; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_0_5:
5837; GFX90A:       ; %bb.0:
5838; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5839; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5840; GFX90A-NEXT:    ;;#ASMSTART
5841; GFX90A-NEXT:    ; def v[0:1]
5842; GFX90A-NEXT:    ;;#ASMEND
5843; GFX90A-NEXT:    ;;#ASMSTART
5844; GFX90A-NEXT:    ; def v[2:3]
5845; GFX90A-NEXT:    ;;#ASMEND
5846; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v3, 16
5847; GFX90A-NEXT:    global_store_short_d16_hi v4, v2, s[16:17] offset:4
5848; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
5849; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5850; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5851;
5852; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_5:
5853; GFX940:       ; %bb.0:
5854; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5855; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5856; GFX940-NEXT:    ;;#ASMSTART
5857; GFX940-NEXT:    ; def v[0:1]
5858; GFX940-NEXT:    ;;#ASMEND
5859; GFX940-NEXT:    ;;#ASMSTART
5860; GFX940-NEXT:    ; def v[2:3]
5861; GFX940-NEXT:    ;;#ASMEND
5862; GFX940-NEXT:    s_nop 0
5863; GFX940-NEXT:    v_alignbit_b32 v0, v0, v3, 16
5864; GFX940-NEXT:    global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1
5865; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
5866; GFX940-NEXT:    s_waitcnt vmcnt(0)
5867; GFX940-NEXT:    s_setpc_b64 s[30:31]
5868  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5869  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5870  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 5>
5871  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5872  ret void
5873}
5874
5875define void @v_shuffle_v3i16_v4i16__7_1_5(ptr addrspace(1) inreg %ptr) {
5876; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_1_5:
5877; GFX900:       ; %bb.0:
5878; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5879; GFX900-NEXT:    ;;#ASMSTART
5880; GFX900-NEXT:    ; def v[0:1]
5881; GFX900-NEXT:    ;;#ASMEND
5882; GFX900-NEXT:    v_mov_b32_e32 v3, 0
5883; GFX900-NEXT:    ;;#ASMSTART
5884; GFX900-NEXT:    ; def v[1:2]
5885; GFX900-NEXT:    ;;#ASMEND
5886; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5887; GFX900-NEXT:    v_perm_b32 v0, v0, v2, s4
5888; GFX900-NEXT:    global_store_short_d16_hi v3, v1, s[16:17] offset:4
5889; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
5890; GFX900-NEXT:    s_waitcnt vmcnt(0)
5891; GFX900-NEXT:    s_setpc_b64 s[30:31]
5892;
5893; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_1_5:
5894; GFX90A:       ; %bb.0:
5895; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5896; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5897; GFX90A-NEXT:    ;;#ASMSTART
5898; GFX90A-NEXT:    ; def v[0:1]
5899; GFX90A-NEXT:    ;;#ASMEND
5900; GFX90A-NEXT:    ;;#ASMSTART
5901; GFX90A-NEXT:    ; def v[2:3]
5902; GFX90A-NEXT:    ;;#ASMEND
5903; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5904; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
5905; GFX90A-NEXT:    global_store_short_d16_hi v4, v2, s[16:17] offset:4
5906; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
5907; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5908; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5909;
5910; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_5:
5911; GFX940:       ; %bb.0:
5912; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5913; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5914; GFX940-NEXT:    ;;#ASMSTART
5915; GFX940-NEXT:    ; def v[0:1]
5916; GFX940-NEXT:    ;;#ASMEND
5917; GFX940-NEXT:    ;;#ASMSTART
5918; GFX940-NEXT:    ; def v[2:3]
5919; GFX940-NEXT:    ;;#ASMEND
5920; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5921; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
5922; GFX940-NEXT:    global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1
5923; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
5924; GFX940-NEXT:    s_waitcnt vmcnt(0)
5925; GFX940-NEXT:    s_setpc_b64 s[30:31]
5926  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5927  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5928  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 5>
5929  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5930  ret void
5931}
5932
5933define void @v_shuffle_v3i16_v4i16__7_2_5(ptr addrspace(1) inreg %ptr) {
5934; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_2_5:
5935; GFX900:       ; %bb.0:
5936; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5937; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5938; GFX900-NEXT:    ;;#ASMSTART
5939; GFX900-NEXT:    ; def v[0:1]
5940; GFX900-NEXT:    ;;#ASMEND
5941; GFX900-NEXT:    ;;#ASMSTART
5942; GFX900-NEXT:    ; def v[2:3]
5943; GFX900-NEXT:    ;;#ASMEND
5944; GFX900-NEXT:    v_alignbit_b32 v0, v1, v3, 16
5945; GFX900-NEXT:    global_store_short_d16_hi v4, v2, s[16:17] offset:4
5946; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
5947; GFX900-NEXT:    s_waitcnt vmcnt(0)
5948; GFX900-NEXT:    s_setpc_b64 s[30:31]
5949;
5950; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_2_5:
5951; GFX90A:       ; %bb.0:
5952; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5953; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5954; GFX90A-NEXT:    ;;#ASMSTART
5955; GFX90A-NEXT:    ; def v[0:1]
5956; GFX90A-NEXT:    ;;#ASMEND
5957; GFX90A-NEXT:    ;;#ASMSTART
5958; GFX90A-NEXT:    ; def v[2:3]
5959; GFX90A-NEXT:    ;;#ASMEND
5960; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v3, 16
5961; GFX90A-NEXT:    global_store_short_d16_hi v4, v2, s[16:17] offset:4
5962; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
5963; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5964; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5965;
5966; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_5:
5967; GFX940:       ; %bb.0:
5968; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5969; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5970; GFX940-NEXT:    ;;#ASMSTART
5971; GFX940-NEXT:    ; def v[0:1]
5972; GFX940-NEXT:    ;;#ASMEND
5973; GFX940-NEXT:    ;;#ASMSTART
5974; GFX940-NEXT:    ; def v[2:3]
5975; GFX940-NEXT:    ;;#ASMEND
5976; GFX940-NEXT:    s_nop 0
5977; GFX940-NEXT:    v_alignbit_b32 v0, v1, v3, 16
5978; GFX940-NEXT:    global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1
5979; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
5980; GFX940-NEXT:    s_waitcnt vmcnt(0)
5981; GFX940-NEXT:    s_setpc_b64 s[30:31]
5982  %vec0 = call <4 x i16> asm "; def $0", "=v"()
5983  %vec1 = call <4 x i16> asm "; def $0", "=v"()
5984  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 5>
5985  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
5986  ret void
5987}
5988
5989define void @v_shuffle_v3i16_v4i16__7_3_5(ptr addrspace(1) inreg %ptr) {
5990; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_3_5:
5991; GFX900:       ; %bb.0:
5992; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5993; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5994; GFX900-NEXT:    ;;#ASMSTART
5995; GFX900-NEXT:    ; def v[0:1]
5996; GFX900-NEXT:    ;;#ASMEND
5997; GFX900-NEXT:    ;;#ASMSTART
5998; GFX900-NEXT:    ; def v[2:3]
5999; GFX900-NEXT:    ;;#ASMEND
6000; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
6001; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
6002; GFX900-NEXT:    global_store_short_d16_hi v4, v2, s[16:17] offset:4
6003; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
6004; GFX900-NEXT:    s_waitcnt vmcnt(0)
6005; GFX900-NEXT:    s_setpc_b64 s[30:31]
6006;
6007; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_3_5:
6008; GFX90A:       ; %bb.0:
6009; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6010; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6011; GFX90A-NEXT:    ;;#ASMSTART
6012; GFX90A-NEXT:    ; def v[0:1]
6013; GFX90A-NEXT:    ;;#ASMEND
6014; GFX90A-NEXT:    ;;#ASMSTART
6015; GFX90A-NEXT:    ; def v[2:3]
6016; GFX90A-NEXT:    ;;#ASMEND
6017; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
6018; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
6019; GFX90A-NEXT:    global_store_short_d16_hi v4, v2, s[16:17] offset:4
6020; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
6021; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6022; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6023;
6024; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_5:
6025; GFX940:       ; %bb.0:
6026; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6027; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6028; GFX940-NEXT:    ;;#ASMSTART
6029; GFX940-NEXT:    ; def v[0:1]
6030; GFX940-NEXT:    ;;#ASMEND
6031; GFX940-NEXT:    ;;#ASMSTART
6032; GFX940-NEXT:    ; def v[2:3]
6033; GFX940-NEXT:    ;;#ASMEND
6034; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
6035; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
6036; GFX940-NEXT:    global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1
6037; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
6038; GFX940-NEXT:    s_waitcnt vmcnt(0)
6039; GFX940-NEXT:    s_setpc_b64 s[30:31]
6040  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6041  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6042  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 5>
6043  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6044  ret void
6045}
6046
6047define void @v_shuffle_v3i16_v4i16__7_4_5(ptr addrspace(1) inreg %ptr) {
6048; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_4_5:
6049; GFX900:       ; %bb.0:
6050; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6051; GFX900-NEXT:    v_mov_b32_e32 v2, 0
6052; GFX900-NEXT:    ;;#ASMSTART
6053; GFX900-NEXT:    ; def v[0:1]
6054; GFX900-NEXT:    ;;#ASMEND
6055; GFX900-NEXT:    v_alignbit_b32 v1, v0, v1, 16
6056; GFX900-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
6057; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
6058; GFX900-NEXT:    s_waitcnt vmcnt(0)
6059; GFX900-NEXT:    s_setpc_b64 s[30:31]
6060;
6061; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_4_5:
6062; GFX90A:       ; %bb.0:
6063; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6064; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
6065; GFX90A-NEXT:    ;;#ASMSTART
6066; GFX90A-NEXT:    ; def v[0:1]
6067; GFX90A-NEXT:    ;;#ASMEND
6068; GFX90A-NEXT:    v_alignbit_b32 v1, v0, v1, 16
6069; GFX90A-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
6070; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
6071; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6072; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6073;
6074; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_5:
6075; GFX940:       ; %bb.0:
6076; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6077; GFX940-NEXT:    v_mov_b32_e32 v2, 0
6078; GFX940-NEXT:    ;;#ASMSTART
6079; GFX940-NEXT:    ; def v[0:1]
6080; GFX940-NEXT:    ;;#ASMEND
6081; GFX940-NEXT:    s_nop 0
6082; GFX940-NEXT:    v_alignbit_b32 v1, v0, v1, 16
6083; GFX940-NEXT:    global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1
6084; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
6085; GFX940-NEXT:    s_waitcnt vmcnt(0)
6086; GFX940-NEXT:    s_setpc_b64 s[30:31]
6087  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6088  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6089  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 5>
6090  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6091  ret void
6092}
6093
6094define void @v_shuffle_v3i16_v4i16__7_6_5(ptr addrspace(1) inreg %ptr) {
6095; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_6_5:
6096; GFX900:       ; %bb.0:
6097; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6098; GFX900-NEXT:    v_mov_b32_e32 v2, 0
6099; GFX900-NEXT:    ;;#ASMSTART
6100; GFX900-NEXT:    ; def v[0:1]
6101; GFX900-NEXT:    ;;#ASMEND
6102; GFX900-NEXT:    v_alignbit_b32 v1, v1, v1, 16
6103; GFX900-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
6104; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
6105; GFX900-NEXT:    s_waitcnt vmcnt(0)
6106; GFX900-NEXT:    s_setpc_b64 s[30:31]
6107;
6108; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_6_5:
6109; GFX90A:       ; %bb.0:
6110; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6111; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
6112; GFX90A-NEXT:    ;;#ASMSTART
6113; GFX90A-NEXT:    ; def v[0:1]
6114; GFX90A-NEXT:    ;;#ASMEND
6115; GFX90A-NEXT:    v_alignbit_b32 v1, v1, v1, 16
6116; GFX90A-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
6117; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
6118; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6119; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6120;
6121; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_5:
6122; GFX940:       ; %bb.0:
6123; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6124; GFX940-NEXT:    v_mov_b32_e32 v2, 0
6125; GFX940-NEXT:    ;;#ASMSTART
6126; GFX940-NEXT:    ; def v[0:1]
6127; GFX940-NEXT:    ;;#ASMEND
6128; GFX940-NEXT:    s_nop 0
6129; GFX940-NEXT:    v_alignbit_b32 v1, v1, v1, 16
6130; GFX940-NEXT:    global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1
6131; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
6132; GFX940-NEXT:    s_waitcnt vmcnt(0)
6133; GFX940-NEXT:    s_setpc_b64 s[30:31]
6134  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6135  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6136  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 5>
6137  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6138  ret void
6139}
6140
6141define void @v_shuffle_v3i16_v4i16__u_6_6(ptr addrspace(1) inreg %ptr) {
6142; GFX900-LABEL: v_shuffle_v3i16_v4i16__u_6_6:
6143; GFX900:       ; %bb.0:
6144; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6145; GFX900-NEXT:    v_mov_b32_e32 v2, 0
6146; GFX900-NEXT:    ;;#ASMSTART
6147; GFX900-NEXT:    ; def v[0:1]
6148; GFX900-NEXT:    ;;#ASMEND
6149; GFX900-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
6150; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6151; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
6152; GFX900-NEXT:    s_waitcnt vmcnt(0)
6153; GFX900-NEXT:    s_setpc_b64 s[30:31]
6154;
6155; GFX90A-LABEL: v_shuffle_v3i16_v4i16__u_6_6:
6156; GFX90A:       ; %bb.0:
6157; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6158; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
6159; GFX90A-NEXT:    ;;#ASMSTART
6160; GFX90A-NEXT:    ; def v[0:1]
6161; GFX90A-NEXT:    ;;#ASMEND
6162; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
6163; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6164; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
6165; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6166; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6167;
6168; GFX940-LABEL: v_shuffle_v3i16_v4i16__u_6_6:
6169; GFX940:       ; %bb.0:
6170; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6171; GFX940-NEXT:    v_mov_b32_e32 v2, 0
6172; GFX940-NEXT:    ;;#ASMSTART
6173; GFX940-NEXT:    ; def v[0:1]
6174; GFX940-NEXT:    ;;#ASMEND
6175; GFX940-NEXT:    s_nop 0
6176; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
6177; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
6178; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
6179; GFX940-NEXT:    s_waitcnt vmcnt(0)
6180; GFX940-NEXT:    s_setpc_b64 s[30:31]
6181  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6182  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6183  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 poison, i32 6, i32 6>
6184  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6185  ret void
6186}
6187
6188define void @v_shuffle_v3i16_v4i16__0_6_6(ptr addrspace(1) inreg %ptr) {
6189; GFX900-LABEL: v_shuffle_v3i16_v4i16__0_6_6:
6190; GFX900:       ; %bb.0:
6191; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6192; GFX900-NEXT:    ;;#ASMSTART
6193; GFX900-NEXT:    ; def v[0:1]
6194; GFX900-NEXT:    ;;#ASMEND
6195; GFX900-NEXT:    v_mov_b32_e32 v3, 0
6196; GFX900-NEXT:    ;;#ASMSTART
6197; GFX900-NEXT:    ; def v[1:2]
6198; GFX900-NEXT:    ;;#ASMEND
6199; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
6200; GFX900-NEXT:    v_perm_b32 v0, v2, v0, s4
6201; GFX900-NEXT:    global_store_short v3, v2, s[16:17] offset:4
6202; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
6203; GFX900-NEXT:    s_waitcnt vmcnt(0)
6204; GFX900-NEXT:    s_setpc_b64 s[30:31]
6205;
6206; GFX90A-LABEL: v_shuffle_v3i16_v4i16__0_6_6:
6207; GFX90A:       ; %bb.0:
6208; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6209; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6210; GFX90A-NEXT:    ;;#ASMSTART
6211; GFX90A-NEXT:    ; def v[0:1]
6212; GFX90A-NEXT:    ;;#ASMEND
6213; GFX90A-NEXT:    ;;#ASMSTART
6214; GFX90A-NEXT:    ; def v[2:3]
6215; GFX90A-NEXT:    ;;#ASMEND
6216; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
6217; GFX90A-NEXT:    v_perm_b32 v0, v3, v0, s4
6218; GFX90A-NEXT:    global_store_short v4, v3, s[16:17] offset:4
6219; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
6220; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6221; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6222;
6223; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_6_6:
6224; GFX940:       ; %bb.0:
6225; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6226; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6227; GFX940-NEXT:    ;;#ASMSTART
6228; GFX940-NEXT:    ; def v[0:1]
6229; GFX940-NEXT:    ;;#ASMEND
6230; GFX940-NEXT:    ;;#ASMSTART
6231; GFX940-NEXT:    ; def v[2:3]
6232; GFX940-NEXT:    ;;#ASMEND
6233; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
6234; GFX940-NEXT:    v_perm_b32 v0, v3, v0, s2
6235; GFX940-NEXT:    global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
6236; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
6237; GFX940-NEXT:    s_waitcnt vmcnt(0)
6238; GFX940-NEXT:    s_setpc_b64 s[30:31]
6239  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6240  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6241  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 0, i32 6, i32 6>
6242  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6243  ret void
6244}
6245
6246define void @v_shuffle_v3i16_v4i16__1_6_6(ptr addrspace(1) inreg %ptr) {
6247; GFX900-LABEL: v_shuffle_v3i16_v4i16__1_6_6:
6248; GFX900:       ; %bb.0:
6249; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6250; GFX900-NEXT:    ;;#ASMSTART
6251; GFX900-NEXT:    ; def v[0:1]
6252; GFX900-NEXT:    ;;#ASMEND
6253; GFX900-NEXT:    v_mov_b32_e32 v3, 0
6254; GFX900-NEXT:    ;;#ASMSTART
6255; GFX900-NEXT:    ; def v[1:2]
6256; GFX900-NEXT:    ;;#ASMEND
6257; GFX900-NEXT:    v_alignbit_b32 v0, v2, v0, 16
6258; GFX900-NEXT:    global_store_short v3, v2, s[16:17] offset:4
6259; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
6260; GFX900-NEXT:    s_waitcnt vmcnt(0)
6261; GFX900-NEXT:    s_setpc_b64 s[30:31]
6262;
6263; GFX90A-LABEL: v_shuffle_v3i16_v4i16__1_6_6:
6264; GFX90A:       ; %bb.0:
6265; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6266; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6267; GFX90A-NEXT:    ;;#ASMSTART
6268; GFX90A-NEXT:    ; def v[0:1]
6269; GFX90A-NEXT:    ;;#ASMEND
6270; GFX90A-NEXT:    ;;#ASMSTART
6271; GFX90A-NEXT:    ; def v[2:3]
6272; GFX90A-NEXT:    ;;#ASMEND
6273; GFX90A-NEXT:    v_alignbit_b32 v0, v3, v0, 16
6274; GFX90A-NEXT:    global_store_short v4, v3, s[16:17] offset:4
6275; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
6276; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6277; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6278;
6279; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_6_6:
6280; GFX940:       ; %bb.0:
6281; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6282; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6283; GFX940-NEXT:    ;;#ASMSTART
6284; GFX940-NEXT:    ; def v[0:1]
6285; GFX940-NEXT:    ;;#ASMEND
6286; GFX940-NEXT:    ;;#ASMSTART
6287; GFX940-NEXT:    ; def v[2:3]
6288; GFX940-NEXT:    ;;#ASMEND
6289; GFX940-NEXT:    s_nop 0
6290; GFX940-NEXT:    v_alignbit_b32 v0, v3, v0, 16
6291; GFX940-NEXT:    global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
6292; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
6293; GFX940-NEXT:    s_waitcnt vmcnt(0)
6294; GFX940-NEXT:    s_setpc_b64 s[30:31]
6295  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6296  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6297  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 1, i32 6, i32 6>
6298  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6299  ret void
6300}
6301
6302define void @v_shuffle_v3i16_v4i16__2_6_6(ptr addrspace(1) inreg %ptr) {
6303; GFX900-LABEL: v_shuffle_v3i16_v4i16__2_6_6:
6304; GFX900:       ; %bb.0:
6305; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6306; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6307; GFX900-NEXT:    ;;#ASMSTART
6308; GFX900-NEXT:    ; def v[0:1]
6309; GFX900-NEXT:    ;;#ASMEND
6310; GFX900-NEXT:    ;;#ASMSTART
6311; GFX900-NEXT:    ; def v[2:3]
6312; GFX900-NEXT:    ;;#ASMEND
6313; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
6314; GFX900-NEXT:    v_perm_b32 v0, v3, v1, s4
6315; GFX900-NEXT:    global_store_short v4, v3, s[16:17] offset:4
6316; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
6317; GFX900-NEXT:    s_waitcnt vmcnt(0)
6318; GFX900-NEXT:    s_setpc_b64 s[30:31]
6319;
6320; GFX90A-LABEL: v_shuffle_v3i16_v4i16__2_6_6:
6321; GFX90A:       ; %bb.0:
6322; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6323; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6324; GFX90A-NEXT:    ;;#ASMSTART
6325; GFX90A-NEXT:    ; def v[0:1]
6326; GFX90A-NEXT:    ;;#ASMEND
6327; GFX90A-NEXT:    ;;#ASMSTART
6328; GFX90A-NEXT:    ; def v[2:3]
6329; GFX90A-NEXT:    ;;#ASMEND
6330; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
6331; GFX90A-NEXT:    v_perm_b32 v0, v3, v1, s4
6332; GFX90A-NEXT:    global_store_short v4, v3, s[16:17] offset:4
6333; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
6334; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6335; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6336;
6337; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_6_6:
6338; GFX940:       ; %bb.0:
6339; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6340; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6341; GFX940-NEXT:    ;;#ASMSTART
6342; GFX940-NEXT:    ; def v[0:1]
6343; GFX940-NEXT:    ;;#ASMEND
6344; GFX940-NEXT:    ;;#ASMSTART
6345; GFX940-NEXT:    ; def v[2:3]
6346; GFX940-NEXT:    ;;#ASMEND
6347; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
6348; GFX940-NEXT:    v_perm_b32 v0, v3, v1, s2
6349; GFX940-NEXT:    global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
6350; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
6351; GFX940-NEXT:    s_waitcnt vmcnt(0)
6352; GFX940-NEXT:    s_setpc_b64 s[30:31]
6353  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6354  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6355  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 2, i32 6, i32 6>
6356  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6357  ret void
6358}
6359
6360define void @v_shuffle_v3i16_v4i16__3_6_6(ptr addrspace(1) inreg %ptr) {
6361; GFX900-LABEL: v_shuffle_v3i16_v4i16__3_6_6:
6362; GFX900:       ; %bb.0:
6363; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6364; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6365; GFX900-NEXT:    ;;#ASMSTART
6366; GFX900-NEXT:    ; def v[0:1]
6367; GFX900-NEXT:    ;;#ASMEND
6368; GFX900-NEXT:    ;;#ASMSTART
6369; GFX900-NEXT:    ; def v[2:3]
6370; GFX900-NEXT:    ;;#ASMEND
6371; GFX900-NEXT:    v_alignbit_b32 v0, v3, v1, 16
6372; GFX900-NEXT:    global_store_short v4, v3, s[16:17] offset:4
6373; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
6374; GFX900-NEXT:    s_waitcnt vmcnt(0)
6375; GFX900-NEXT:    s_setpc_b64 s[30:31]
6376;
6377; GFX90A-LABEL: v_shuffle_v3i16_v4i16__3_6_6:
6378; GFX90A:       ; %bb.0:
6379; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6380; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6381; GFX90A-NEXT:    ;;#ASMSTART
6382; GFX90A-NEXT:    ; def v[0:1]
6383; GFX90A-NEXT:    ;;#ASMEND
6384; GFX90A-NEXT:    ;;#ASMSTART
6385; GFX90A-NEXT:    ; def v[2:3]
6386; GFX90A-NEXT:    ;;#ASMEND
6387; GFX90A-NEXT:    v_alignbit_b32 v0, v3, v1, 16
6388; GFX90A-NEXT:    global_store_short v4, v3, s[16:17] offset:4
6389; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
6390; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6391; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6392;
6393; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_6_6:
6394; GFX940:       ; %bb.0:
6395; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6396; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6397; GFX940-NEXT:    ;;#ASMSTART
6398; GFX940-NEXT:    ; def v[0:1]
6399; GFX940-NEXT:    ;;#ASMEND
6400; GFX940-NEXT:    ;;#ASMSTART
6401; GFX940-NEXT:    ; def v[2:3]
6402; GFX940-NEXT:    ;;#ASMEND
6403; GFX940-NEXT:    s_nop 0
6404; GFX940-NEXT:    v_alignbit_b32 v0, v3, v1, 16
6405; GFX940-NEXT:    global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
6406; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
6407; GFX940-NEXT:    s_waitcnt vmcnt(0)
6408; GFX940-NEXT:    s_setpc_b64 s[30:31]
6409  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6410  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6411  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 3, i32 6, i32 6>
6412  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6413  ret void
6414}
6415
6416define void @v_shuffle_v3i16_v4i16__4_6_6(ptr addrspace(1) inreg %ptr) {
6417; GFX900-LABEL: v_shuffle_v3i16_v4i16__4_6_6:
6418; GFX900:       ; %bb.0:
6419; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6420; GFX900-NEXT:    v_mov_b32_e32 v2, 0
6421; GFX900-NEXT:    ;;#ASMSTART
6422; GFX900-NEXT:    ; def v[0:1]
6423; GFX900-NEXT:    ;;#ASMEND
6424; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
6425; GFX900-NEXT:    v_perm_b32 v0, v1, v0, s4
6426; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6427; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
6428; GFX900-NEXT:    s_waitcnt vmcnt(0)
6429; GFX900-NEXT:    s_setpc_b64 s[30:31]
6430;
6431; GFX90A-LABEL: v_shuffle_v3i16_v4i16__4_6_6:
6432; GFX90A:       ; %bb.0:
6433; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6434; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
6435; GFX90A-NEXT:    ;;#ASMSTART
6436; GFX90A-NEXT:    ; def v[0:1]
6437; GFX90A-NEXT:    ;;#ASMEND
6438; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
6439; GFX90A-NEXT:    v_perm_b32 v0, v1, v0, s4
6440; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6441; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
6442; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6443; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6444;
6445; GFX940-LABEL: v_shuffle_v3i16_v4i16__4_6_6:
6446; GFX940:       ; %bb.0:
6447; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6448; GFX940-NEXT:    v_mov_b32_e32 v2, 0
6449; GFX940-NEXT:    ;;#ASMSTART
6450; GFX940-NEXT:    ; def v[0:1]
6451; GFX940-NEXT:    ;;#ASMEND
6452; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
6453; GFX940-NEXT:    v_perm_b32 v0, v1, v0, s2
6454; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
6455; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
6456; GFX940-NEXT:    s_waitcnt vmcnt(0)
6457; GFX940-NEXT:    s_setpc_b64 s[30:31]
6458  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6459  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6460  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 4, i32 6, i32 6>
6461  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6462  ret void
6463}
6464
6465define void @v_shuffle_v3i16_v4i16__5_6_6(ptr addrspace(1) inreg %ptr) {
6466; GFX900-LABEL: v_shuffle_v3i16_v4i16__5_6_6:
6467; GFX900:       ; %bb.0:
6468; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6469; GFX900-NEXT:    v_mov_b32_e32 v2, 0
6470; GFX900-NEXT:    ;;#ASMSTART
6471; GFX900-NEXT:    ; def v[0:1]
6472; GFX900-NEXT:    ;;#ASMEND
6473; GFX900-NEXT:    v_alignbit_b32 v0, v1, v0, 16
6474; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6475; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
6476; GFX900-NEXT:    s_waitcnt vmcnt(0)
6477; GFX900-NEXT:    s_setpc_b64 s[30:31]
6478;
6479; GFX90A-LABEL: v_shuffle_v3i16_v4i16__5_6_6:
6480; GFX90A:       ; %bb.0:
6481; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6482; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
6483; GFX90A-NEXT:    ;;#ASMSTART
6484; GFX90A-NEXT:    ; def v[0:1]
6485; GFX90A-NEXT:    ;;#ASMEND
6486; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v0, 16
6487; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6488; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
6489; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6490; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6491;
6492; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_6_6:
6493; GFX940:       ; %bb.0:
6494; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6495; GFX940-NEXT:    v_mov_b32_e32 v2, 0
6496; GFX940-NEXT:    ;;#ASMSTART
6497; GFX940-NEXT:    ; def v[0:1]
6498; GFX940-NEXT:    ;;#ASMEND
6499; GFX940-NEXT:    s_nop 0
6500; GFX940-NEXT:    v_alignbit_b32 v0, v1, v0, 16
6501; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
6502; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
6503; GFX940-NEXT:    s_waitcnt vmcnt(0)
6504; GFX940-NEXT:    s_setpc_b64 s[30:31]
6505  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6506  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6507  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 6, i32 6>
6508  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6509  ret void
6510}
6511
6512define void @v_shuffle_v3i16_v4i16__6_6_6(ptr addrspace(1) inreg %ptr) {
6513; GFX900-LABEL: v_shuffle_v3i16_v4i16__6_6_6:
6514; GFX900:       ; %bb.0:
6515; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6516; GFX900-NEXT:    v_mov_b32_e32 v2, 0
6517; GFX900-NEXT:    ;;#ASMSTART
6518; GFX900-NEXT:    ; def v[0:1]
6519; GFX900-NEXT:    ;;#ASMEND
6520; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
6521; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
6522; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6523; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
6524; GFX900-NEXT:    s_waitcnt vmcnt(0)
6525; GFX900-NEXT:    s_setpc_b64 s[30:31]
6526;
6527; GFX90A-LABEL: v_shuffle_v3i16_v4i16__6_6_6:
6528; GFX90A:       ; %bb.0:
6529; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6530; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
6531; GFX90A-NEXT:    ;;#ASMSTART
6532; GFX90A-NEXT:    ; def v[0:1]
6533; GFX90A-NEXT:    ;;#ASMEND
6534; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
6535; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
6536; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6537; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
6538; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6539; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6540;
6541; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_6_6:
6542; GFX940:       ; %bb.0:
6543; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6544; GFX940-NEXT:    v_mov_b32_e32 v2, 0
6545; GFX940-NEXT:    ;;#ASMSTART
6546; GFX940-NEXT:    ; def v[0:1]
6547; GFX940-NEXT:    ;;#ASMEND
6548; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
6549; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
6550; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
6551; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
6552; GFX940-NEXT:    s_waitcnt vmcnt(0)
6553; GFX940-NEXT:    s_setpc_b64 s[30:31]
6554  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6555  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6556  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 6, i32 6>
6557  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6558  ret void
6559}
6560
6561define void @v_shuffle_v3i16_v4i16__7_6_6(ptr addrspace(1) inreg %ptr) {
6562; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_6_6:
6563; GFX900:       ; %bb.0:
6564; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6565; GFX900-NEXT:    v_mov_b32_e32 v2, 0
6566; GFX900-NEXT:    ;;#ASMSTART
6567; GFX900-NEXT:    ; def v[0:1]
6568; GFX900-NEXT:    ;;#ASMEND
6569; GFX900-NEXT:    v_alignbit_b32 v0, v1, v1, 16
6570; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6571; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
6572; GFX900-NEXT:    s_waitcnt vmcnt(0)
6573; GFX900-NEXT:    s_setpc_b64 s[30:31]
6574;
6575; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_6_6:
6576; GFX90A:       ; %bb.0:
6577; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6578; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
6579; GFX90A-NEXT:    ;;#ASMSTART
6580; GFX90A-NEXT:    ; def v[0:1]
6581; GFX90A-NEXT:    ;;#ASMEND
6582; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v1, 16
6583; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6584; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
6585; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6586; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6587;
6588; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_6:
6589; GFX940:       ; %bb.0:
6590; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6591; GFX940-NEXT:    v_mov_b32_e32 v2, 0
6592; GFX940-NEXT:    ;;#ASMSTART
6593; GFX940-NEXT:    ; def v[0:1]
6594; GFX940-NEXT:    ;;#ASMEND
6595; GFX940-NEXT:    s_nop 0
6596; GFX940-NEXT:    v_alignbit_b32 v0, v1, v1, 16
6597; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
6598; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
6599; GFX940-NEXT:    s_waitcnt vmcnt(0)
6600; GFX940-NEXT:    s_setpc_b64 s[30:31]
6601  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6602  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6603  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 6>
6604  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6605  ret void
6606}
6607
6608define void @v_shuffle_v3i16_v4i16__7_u_6(ptr addrspace(1) inreg %ptr) {
6609; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_u_6:
6610; GFX900:       ; %bb.0:
6611; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6612; GFX900-NEXT:    v_mov_b32_e32 v2, 0
6613; GFX900-NEXT:    ;;#ASMSTART
6614; GFX900-NEXT:    ; def v[0:1]
6615; GFX900-NEXT:    ;;#ASMEND
6616; GFX900-NEXT:    v_alignbit_b32 v0, s4, v1, 16
6617; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6618; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
6619; GFX900-NEXT:    s_waitcnt vmcnt(0)
6620; GFX900-NEXT:    s_setpc_b64 s[30:31]
6621;
6622; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_u_6:
6623; GFX90A:       ; %bb.0:
6624; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6625; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
6626; GFX90A-NEXT:    ;;#ASMSTART
6627; GFX90A-NEXT:    ; def v[0:1]
6628; GFX90A-NEXT:    ;;#ASMEND
6629; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v1, 16
6630; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6631; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
6632; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6633; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6634;
6635; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_6:
6636; GFX940:       ; %bb.0:
6637; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6638; GFX940-NEXT:    v_mov_b32_e32 v2, 0
6639; GFX940-NEXT:    ;;#ASMSTART
6640; GFX940-NEXT:    ; def v[0:1]
6641; GFX940-NEXT:    ;;#ASMEND
6642; GFX940-NEXT:    s_nop 0
6643; GFX940-NEXT:    v_alignbit_b32 v0, s0, v1, 16
6644; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
6645; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
6646; GFX940-NEXT:    s_waitcnt vmcnt(0)
6647; GFX940-NEXT:    s_setpc_b64 s[30:31]
6648  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6649  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6650  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 6>
6651  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6652  ret void
6653}
6654
6655define void @v_shuffle_v3i16_v4i16__7_0_6(ptr addrspace(1) inreg %ptr) {
6656; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_0_6:
6657; GFX900:       ; %bb.0:
6658; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6659; GFX900-NEXT:    ;;#ASMSTART
6660; GFX900-NEXT:    ; def v[0:1]
6661; GFX900-NEXT:    ;;#ASMEND
6662; GFX900-NEXT:    v_mov_b32_e32 v3, 0
6663; GFX900-NEXT:    ;;#ASMSTART
6664; GFX900-NEXT:    ; def v[1:2]
6665; GFX900-NEXT:    ;;#ASMEND
6666; GFX900-NEXT:    v_alignbit_b32 v0, v0, v2, 16
6667; GFX900-NEXT:    global_store_short v3, v2, s[16:17] offset:4
6668; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
6669; GFX900-NEXT:    s_waitcnt vmcnt(0)
6670; GFX900-NEXT:    s_setpc_b64 s[30:31]
6671;
6672; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_0_6:
6673; GFX90A:       ; %bb.0:
6674; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6675; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6676; GFX90A-NEXT:    ;;#ASMSTART
6677; GFX90A-NEXT:    ; def v[0:1]
6678; GFX90A-NEXT:    ;;#ASMEND
6679; GFX90A-NEXT:    ;;#ASMSTART
6680; GFX90A-NEXT:    ; def v[2:3]
6681; GFX90A-NEXT:    ;;#ASMEND
6682; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v3, 16
6683; GFX90A-NEXT:    global_store_short v4, v3, s[16:17] offset:4
6684; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
6685; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6686; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6687;
6688; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_6:
6689; GFX940:       ; %bb.0:
6690; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6691; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6692; GFX940-NEXT:    ;;#ASMSTART
6693; GFX940-NEXT:    ; def v[0:1]
6694; GFX940-NEXT:    ;;#ASMEND
6695; GFX940-NEXT:    ;;#ASMSTART
6696; GFX940-NEXT:    ; def v[2:3]
6697; GFX940-NEXT:    ;;#ASMEND
6698; GFX940-NEXT:    s_nop 0
6699; GFX940-NEXT:    v_alignbit_b32 v0, v0, v3, 16
6700; GFX940-NEXT:    global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
6701; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
6702; GFX940-NEXT:    s_waitcnt vmcnt(0)
6703; GFX940-NEXT:    s_setpc_b64 s[30:31]
6704  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6705  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6706  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 6>
6707  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6708  ret void
6709}
6710
6711define void @v_shuffle_v3i16_v4i16__7_1_6(ptr addrspace(1) inreg %ptr) {
6712; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_1_6:
6713; GFX900:       ; %bb.0:
6714; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6715; GFX900-NEXT:    ;;#ASMSTART
6716; GFX900-NEXT:    ; def v[0:1]
6717; GFX900-NEXT:    ;;#ASMEND
6718; GFX900-NEXT:    v_mov_b32_e32 v3, 0
6719; GFX900-NEXT:    ;;#ASMSTART
6720; GFX900-NEXT:    ; def v[1:2]
6721; GFX900-NEXT:    ;;#ASMEND
6722; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
6723; GFX900-NEXT:    v_perm_b32 v0, v0, v2, s4
6724; GFX900-NEXT:    global_store_short v3, v2, s[16:17] offset:4
6725; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
6726; GFX900-NEXT:    s_waitcnt vmcnt(0)
6727; GFX900-NEXT:    s_setpc_b64 s[30:31]
6728;
6729; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_1_6:
6730; GFX90A:       ; %bb.0:
6731; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6732; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6733; GFX90A-NEXT:    ;;#ASMSTART
6734; GFX90A-NEXT:    ; def v[0:1]
6735; GFX90A-NEXT:    ;;#ASMEND
6736; GFX90A-NEXT:    ;;#ASMSTART
6737; GFX90A-NEXT:    ; def v[2:3]
6738; GFX90A-NEXT:    ;;#ASMEND
6739; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
6740; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
6741; GFX90A-NEXT:    global_store_short v4, v3, s[16:17] offset:4
6742; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
6743; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6744; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6745;
6746; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_6:
6747; GFX940:       ; %bb.0:
6748; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6749; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6750; GFX940-NEXT:    ;;#ASMSTART
6751; GFX940-NEXT:    ; def v[0:1]
6752; GFX940-NEXT:    ;;#ASMEND
6753; GFX940-NEXT:    ;;#ASMSTART
6754; GFX940-NEXT:    ; def v[2:3]
6755; GFX940-NEXT:    ;;#ASMEND
6756; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
6757; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
6758; GFX940-NEXT:    global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
6759; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
6760; GFX940-NEXT:    s_waitcnt vmcnt(0)
6761; GFX940-NEXT:    s_setpc_b64 s[30:31]
6762  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6763  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6764  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 6>
6765  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6766  ret void
6767}
6768
6769define void @v_shuffle_v3i16_v4i16__7_2_6(ptr addrspace(1) inreg %ptr) {
6770; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_2_6:
6771; GFX900:       ; %bb.0:
6772; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6773; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6774; GFX900-NEXT:    ;;#ASMSTART
6775; GFX900-NEXT:    ; def v[0:1]
6776; GFX900-NEXT:    ;;#ASMEND
6777; GFX900-NEXT:    ;;#ASMSTART
6778; GFX900-NEXT:    ; def v[2:3]
6779; GFX900-NEXT:    ;;#ASMEND
6780; GFX900-NEXT:    v_alignbit_b32 v0, v1, v3, 16
6781; GFX900-NEXT:    global_store_short v4, v3, s[16:17] offset:4
6782; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
6783; GFX900-NEXT:    s_waitcnt vmcnt(0)
6784; GFX900-NEXT:    s_setpc_b64 s[30:31]
6785;
6786; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_2_6:
6787; GFX90A:       ; %bb.0:
6788; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6789; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6790; GFX90A-NEXT:    ;;#ASMSTART
6791; GFX90A-NEXT:    ; def v[0:1]
6792; GFX90A-NEXT:    ;;#ASMEND
6793; GFX90A-NEXT:    ;;#ASMSTART
6794; GFX90A-NEXT:    ; def v[2:3]
6795; GFX90A-NEXT:    ;;#ASMEND
6796; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v3, 16
6797; GFX90A-NEXT:    global_store_short v4, v3, s[16:17] offset:4
6798; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
6799; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6800; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6801;
6802; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_6:
6803; GFX940:       ; %bb.0:
6804; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6805; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6806; GFX940-NEXT:    ;;#ASMSTART
6807; GFX940-NEXT:    ; def v[0:1]
6808; GFX940-NEXT:    ;;#ASMEND
6809; GFX940-NEXT:    ;;#ASMSTART
6810; GFX940-NEXT:    ; def v[2:3]
6811; GFX940-NEXT:    ;;#ASMEND
6812; GFX940-NEXT:    s_nop 0
6813; GFX940-NEXT:    v_alignbit_b32 v0, v1, v3, 16
6814; GFX940-NEXT:    global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
6815; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
6816; GFX940-NEXT:    s_waitcnt vmcnt(0)
6817; GFX940-NEXT:    s_setpc_b64 s[30:31]
6818  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6819  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6820  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 6>
6821  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6822  ret void
6823}
6824
6825define void @v_shuffle_v3i16_v4i16__7_3_6(ptr addrspace(1) inreg %ptr) {
6826; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_3_6:
6827; GFX900:       ; %bb.0:
6828; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6829; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6830; GFX900-NEXT:    ;;#ASMSTART
6831; GFX900-NEXT:    ; def v[0:1]
6832; GFX900-NEXT:    ;;#ASMEND
6833; GFX900-NEXT:    ;;#ASMSTART
6834; GFX900-NEXT:    ; def v[2:3]
6835; GFX900-NEXT:    ;;#ASMEND
6836; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
6837; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
6838; GFX900-NEXT:    global_store_short v4, v3, s[16:17] offset:4
6839; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
6840; GFX900-NEXT:    s_waitcnt vmcnt(0)
6841; GFX900-NEXT:    s_setpc_b64 s[30:31]
6842;
6843; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_3_6:
6844; GFX90A:       ; %bb.0:
6845; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6846; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6847; GFX90A-NEXT:    ;;#ASMSTART
6848; GFX90A-NEXT:    ; def v[0:1]
6849; GFX90A-NEXT:    ;;#ASMEND
6850; GFX90A-NEXT:    ;;#ASMSTART
6851; GFX90A-NEXT:    ; def v[2:3]
6852; GFX90A-NEXT:    ;;#ASMEND
6853; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
6854; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
6855; GFX90A-NEXT:    global_store_short v4, v3, s[16:17] offset:4
6856; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
6857; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6858; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6859;
6860; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_6:
6861; GFX940:       ; %bb.0:
6862; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6863; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6864; GFX940-NEXT:    ;;#ASMSTART
6865; GFX940-NEXT:    ; def v[0:1]
6866; GFX940-NEXT:    ;;#ASMEND
6867; GFX940-NEXT:    ;;#ASMSTART
6868; GFX940-NEXT:    ; def v[2:3]
6869; GFX940-NEXT:    ;;#ASMEND
6870; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
6871; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
6872; GFX940-NEXT:    global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
6873; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
6874; GFX940-NEXT:    s_waitcnt vmcnt(0)
6875; GFX940-NEXT:    s_setpc_b64 s[30:31]
6876  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6877  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6878  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 6>
6879  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6880  ret void
6881}
6882
6883define void @v_shuffle_v3i16_v4i16__7_4_6(ptr addrspace(1) inreg %ptr) {
6884; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_4_6:
6885; GFX900:       ; %bb.0:
6886; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6887; GFX900-NEXT:    v_mov_b32_e32 v2, 0
6888; GFX900-NEXT:    ;;#ASMSTART
6889; GFX900-NEXT:    ; def v[0:1]
6890; GFX900-NEXT:    ;;#ASMEND
6891; GFX900-NEXT:    v_alignbit_b32 v0, v0, v1, 16
6892; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6893; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
6894; GFX900-NEXT:    s_waitcnt vmcnt(0)
6895; GFX900-NEXT:    s_setpc_b64 s[30:31]
6896;
6897; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_4_6:
6898; GFX90A:       ; %bb.0:
6899; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6900; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
6901; GFX90A-NEXT:    ;;#ASMSTART
6902; GFX90A-NEXT:    ; def v[0:1]
6903; GFX90A-NEXT:    ;;#ASMEND
6904; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v1, 16
6905; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6906; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
6907; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6908; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6909;
6910; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_6:
6911; GFX940:       ; %bb.0:
6912; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6913; GFX940-NEXT:    v_mov_b32_e32 v2, 0
6914; GFX940-NEXT:    ;;#ASMSTART
6915; GFX940-NEXT:    ; def v[0:1]
6916; GFX940-NEXT:    ;;#ASMEND
6917; GFX940-NEXT:    s_nop 0
6918; GFX940-NEXT:    v_alignbit_b32 v0, v0, v1, 16
6919; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
6920; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
6921; GFX940-NEXT:    s_waitcnt vmcnt(0)
6922; GFX940-NEXT:    s_setpc_b64 s[30:31]
6923  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6924  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6925  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 6>
6926  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6927  ret void
6928}
6929
6930define void @v_shuffle_v3i16_v4i16__7_5_6(ptr addrspace(1) inreg %ptr) {
6931; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_5_6:
6932; GFX900:       ; %bb.0:
6933; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6934; GFX900-NEXT:    v_mov_b32_e32 v2, 0
6935; GFX900-NEXT:    ;;#ASMSTART
6936; GFX900-NEXT:    ; def v[0:1]
6937; GFX900-NEXT:    ;;#ASMEND
6938; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
6939; GFX900-NEXT:    v_perm_b32 v0, v0, v1, s4
6940; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6941; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
6942; GFX900-NEXT:    s_waitcnt vmcnt(0)
6943; GFX900-NEXT:    s_setpc_b64 s[30:31]
6944;
6945; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_5_6:
6946; GFX90A:       ; %bb.0:
6947; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6948; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
6949; GFX90A-NEXT:    ;;#ASMSTART
6950; GFX90A-NEXT:    ; def v[0:1]
6951; GFX90A-NEXT:    ;;#ASMEND
6952; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
6953; GFX90A-NEXT:    v_perm_b32 v0, v0, v1, s4
6954; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6955; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
6956; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6957; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6958;
6959; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_6:
6960; GFX940:       ; %bb.0:
6961; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6962; GFX940-NEXT:    v_mov_b32_e32 v2, 0
6963; GFX940-NEXT:    ;;#ASMSTART
6964; GFX940-NEXT:    ; def v[0:1]
6965; GFX940-NEXT:    ;;#ASMEND
6966; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
6967; GFX940-NEXT:    v_perm_b32 v0, v0, v1, s2
6968; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
6969; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
6970; GFX940-NEXT:    s_waitcnt vmcnt(0)
6971; GFX940-NEXT:    s_setpc_b64 s[30:31]
6972  %vec0 = call <4 x i16> asm "; def $0", "=v"()
6973  %vec1 = call <4 x i16> asm "; def $0", "=v"()
6974  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 6>
6975  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
6976  ret void
6977}
6978
6979define void @v_shuffle_v3i16_v4i16__u_7_7(ptr addrspace(1) inreg %ptr) {
6980; GFX900-LABEL: v_shuffle_v3i16_v4i16__u_7_7:
6981; GFX900:       ; %bb.0:
6982; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6983; GFX900-NEXT:    v_mov_b32_e32 v2, 0
6984; GFX900-NEXT:    ;;#ASMSTART
6985; GFX900-NEXT:    ; def v[0:1]
6986; GFX900-NEXT:    ;;#ASMEND
6987; GFX900-NEXT:    global_store_short_d16_hi v2, v1, s[16:17] offset:4
6988; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
6989; GFX900-NEXT:    s_waitcnt vmcnt(0)
6990; GFX900-NEXT:    s_setpc_b64 s[30:31]
6991;
6992; GFX90A-LABEL: v_shuffle_v3i16_v4i16__u_7_7:
6993; GFX90A:       ; %bb.0:
6994; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6995; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
6996; GFX90A-NEXT:    ;;#ASMSTART
6997; GFX90A-NEXT:    ; def v[0:1]
6998; GFX90A-NEXT:    ;;#ASMEND
6999; GFX90A-NEXT:    global_store_short_d16_hi v2, v1, s[16:17] offset:4
7000; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
7001; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7002; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7003;
7004; GFX940-LABEL: v_shuffle_v3i16_v4i16__u_7_7:
7005; GFX940:       ; %bb.0:
7006; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7007; GFX940-NEXT:    v_mov_b32_e32 v2, 0
7008; GFX940-NEXT:    ;;#ASMSTART
7009; GFX940-NEXT:    ; def v[0:1]
7010; GFX940-NEXT:    ;;#ASMEND
7011; GFX940-NEXT:    global_store_short_d16_hi v2, v1, s[0:1] offset:4 sc0 sc1
7012; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
7013; GFX940-NEXT:    s_waitcnt vmcnt(0)
7014; GFX940-NEXT:    s_setpc_b64 s[30:31]
7015  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7016  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7017  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 poison, i32 7, i32 7>
7018  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7019  ret void
7020}
7021
7022define void @v_shuffle_v3i16_v4i16__0_7_7(ptr addrspace(1) inreg %ptr) {
7023; GFX900-LABEL: v_shuffle_v3i16_v4i16__0_7_7:
7024; GFX900:       ; %bb.0:
7025; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7026; GFX900-NEXT:    ;;#ASMSTART
7027; GFX900-NEXT:    ; def v[0:1]
7028; GFX900-NEXT:    ;;#ASMEND
7029; GFX900-NEXT:    s_mov_b32 s4, 0xffff
7030; GFX900-NEXT:    v_mov_b32_e32 v3, 0
7031; GFX900-NEXT:    ;;#ASMSTART
7032; GFX900-NEXT:    ; def v[1:2]
7033; GFX900-NEXT:    ;;#ASMEND
7034; GFX900-NEXT:    v_bfi_b32 v0, s4, v0, v2
7035; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
7036; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
7037; GFX900-NEXT:    global_store_short v3, v1, s[16:17] offset:4
7038; GFX900-NEXT:    s_waitcnt vmcnt(0)
7039; GFX900-NEXT:    s_setpc_b64 s[30:31]
7040;
7041; GFX90A-LABEL: v_shuffle_v3i16_v4i16__0_7_7:
7042; GFX90A:       ; %bb.0:
7043; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7044; GFX90A-NEXT:    ;;#ASMSTART
7045; GFX90A-NEXT:    ; def v[0:1]
7046; GFX90A-NEXT:    ;;#ASMEND
7047; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
7048; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7049; GFX90A-NEXT:    ;;#ASMSTART
7050; GFX90A-NEXT:    ; def v[2:3]
7051; GFX90A-NEXT:    ;;#ASMEND
7052; GFX90A-NEXT:    v_bfi_b32 v0, s4, v0, v3
7053; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7054; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
7055; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
7056; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7057; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7058;
7059; GFX940-LABEL: v_shuffle_v3i16_v4i16__0_7_7:
7060; GFX940:       ; %bb.0:
7061; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7062; GFX940-NEXT:    ;;#ASMSTART
7063; GFX940-NEXT:    ; def v[0:1]
7064; GFX940-NEXT:    ;;#ASMEND
7065; GFX940-NEXT:    s_mov_b32 s2, 0xffff
7066; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7067; GFX940-NEXT:    ;;#ASMSTART
7068; GFX940-NEXT:    ; def v[2:3]
7069; GFX940-NEXT:    ;;#ASMEND
7070; GFX940-NEXT:    s_nop 0
7071; GFX940-NEXT:    v_bfi_b32 v0, s2, v0, v3
7072; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7073; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
7074; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
7075; GFX940-NEXT:    s_waitcnt vmcnt(0)
7076; GFX940-NEXT:    s_setpc_b64 s[30:31]
7077  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7078  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7079  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 0, i32 7, i32 7>
7080  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7081  ret void
7082}
7083
7084define void @v_shuffle_v3i16_v4i16__1_7_7(ptr addrspace(1) inreg %ptr) {
7085; GFX900-LABEL: v_shuffle_v3i16_v4i16__1_7_7:
7086; GFX900:       ; %bb.0:
7087; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7088; GFX900-NEXT:    ;;#ASMSTART
7089; GFX900-NEXT:    ; def v[0:1]
7090; GFX900-NEXT:    ;;#ASMEND
7091; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
7092; GFX900-NEXT:    v_mov_b32_e32 v3, 0
7093; GFX900-NEXT:    ;;#ASMSTART
7094; GFX900-NEXT:    ; def v[1:2]
7095; GFX900-NEXT:    ;;#ASMEND
7096; GFX900-NEXT:    v_perm_b32 v0, v2, v0, s4
7097; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
7098; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
7099; GFX900-NEXT:    global_store_short v3, v1, s[16:17] offset:4
7100; GFX900-NEXT:    s_waitcnt vmcnt(0)
7101; GFX900-NEXT:    s_setpc_b64 s[30:31]
7102;
7103; GFX90A-LABEL: v_shuffle_v3i16_v4i16__1_7_7:
7104; GFX90A:       ; %bb.0:
7105; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7106; GFX90A-NEXT:    ;;#ASMSTART
7107; GFX90A-NEXT:    ; def v[0:1]
7108; GFX90A-NEXT:    ;;#ASMEND
7109; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
7110; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7111; GFX90A-NEXT:    ;;#ASMSTART
7112; GFX90A-NEXT:    ; def v[2:3]
7113; GFX90A-NEXT:    ;;#ASMEND
7114; GFX90A-NEXT:    v_perm_b32 v0, v3, v0, s4
7115; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7116; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
7117; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
7118; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7119; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7120;
7121; GFX940-LABEL: v_shuffle_v3i16_v4i16__1_7_7:
7122; GFX940:       ; %bb.0:
7123; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7124; GFX940-NEXT:    ;;#ASMSTART
7125; GFX940-NEXT:    ; def v[0:1]
7126; GFX940-NEXT:    ;;#ASMEND
7127; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
7128; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7129; GFX940-NEXT:    ;;#ASMSTART
7130; GFX940-NEXT:    ; def v[2:3]
7131; GFX940-NEXT:    ;;#ASMEND
7132; GFX940-NEXT:    s_nop 0
7133; GFX940-NEXT:    v_perm_b32 v0, v3, v0, s2
7134; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7135; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
7136; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
7137; GFX940-NEXT:    s_waitcnt vmcnt(0)
7138; GFX940-NEXT:    s_setpc_b64 s[30:31]
7139  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7140  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7141  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 1, i32 7, i32 7>
7142  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7143  ret void
7144}
7145
7146define void @v_shuffle_v3i16_v4i16__2_7_7(ptr addrspace(1) inreg %ptr) {
7147; GFX900-LABEL: v_shuffle_v3i16_v4i16__2_7_7:
7148; GFX900:       ; %bb.0:
7149; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7150; GFX900-NEXT:    ;;#ASMSTART
7151; GFX900-NEXT:    ; def v[0:1]
7152; GFX900-NEXT:    ;;#ASMEND
7153; GFX900-NEXT:    s_mov_b32 s4, 0xffff
7154; GFX900-NEXT:    v_mov_b32_e32 v4, 0
7155; GFX900-NEXT:    ;;#ASMSTART
7156; GFX900-NEXT:    ; def v[2:3]
7157; GFX900-NEXT:    ;;#ASMEND
7158; GFX900-NEXT:    v_bfi_b32 v0, s4, v1, v3
7159; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7160; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
7161; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
7162; GFX900-NEXT:    s_waitcnt vmcnt(0)
7163; GFX900-NEXT:    s_setpc_b64 s[30:31]
7164;
7165; GFX90A-LABEL: v_shuffle_v3i16_v4i16__2_7_7:
7166; GFX90A:       ; %bb.0:
7167; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7168; GFX90A-NEXT:    ;;#ASMSTART
7169; GFX90A-NEXT:    ; def v[0:1]
7170; GFX90A-NEXT:    ;;#ASMEND
7171; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
7172; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7173; GFX90A-NEXT:    ;;#ASMSTART
7174; GFX90A-NEXT:    ; def v[2:3]
7175; GFX90A-NEXT:    ;;#ASMEND
7176; GFX90A-NEXT:    v_bfi_b32 v0, s4, v1, v3
7177; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7178; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
7179; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
7180; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7181; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7182;
7183; GFX940-LABEL: v_shuffle_v3i16_v4i16__2_7_7:
7184; GFX940:       ; %bb.0:
7185; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7186; GFX940-NEXT:    ;;#ASMSTART
7187; GFX940-NEXT:    ; def v[0:1]
7188; GFX940-NEXT:    ;;#ASMEND
7189; GFX940-NEXT:    s_mov_b32 s2, 0xffff
7190; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7191; GFX940-NEXT:    ;;#ASMSTART
7192; GFX940-NEXT:    ; def v[2:3]
7193; GFX940-NEXT:    ;;#ASMEND
7194; GFX940-NEXT:    s_nop 0
7195; GFX940-NEXT:    v_bfi_b32 v0, s2, v1, v3
7196; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7197; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
7198; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
7199; GFX940-NEXT:    s_waitcnt vmcnt(0)
7200; GFX940-NEXT:    s_setpc_b64 s[30:31]
7201  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7202  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7203  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 2, i32 7, i32 7>
7204  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7205  ret void
7206}
7207
7208define void @v_shuffle_v3i16_v4i16__3_7_7(ptr addrspace(1) inreg %ptr) {
7209; GFX900-LABEL: v_shuffle_v3i16_v4i16__3_7_7:
7210; GFX900:       ; %bb.0:
7211; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7212; GFX900-NEXT:    ;;#ASMSTART
7213; GFX900-NEXT:    ; def v[0:1]
7214; GFX900-NEXT:    ;;#ASMEND
7215; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
7216; GFX900-NEXT:    v_mov_b32_e32 v4, 0
7217; GFX900-NEXT:    ;;#ASMSTART
7218; GFX900-NEXT:    ; def v[2:3]
7219; GFX900-NEXT:    ;;#ASMEND
7220; GFX900-NEXT:    v_perm_b32 v0, v3, v1, s4
7221; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7222; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
7223; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
7224; GFX900-NEXT:    s_waitcnt vmcnt(0)
7225; GFX900-NEXT:    s_setpc_b64 s[30:31]
7226;
7227; GFX90A-LABEL: v_shuffle_v3i16_v4i16__3_7_7:
7228; GFX90A:       ; %bb.0:
7229; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7230; GFX90A-NEXT:    ;;#ASMSTART
7231; GFX90A-NEXT:    ; def v[0:1]
7232; GFX90A-NEXT:    ;;#ASMEND
7233; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
7234; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7235; GFX90A-NEXT:    ;;#ASMSTART
7236; GFX90A-NEXT:    ; def v[2:3]
7237; GFX90A-NEXT:    ;;#ASMEND
7238; GFX90A-NEXT:    v_perm_b32 v0, v3, v1, s4
7239; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7240; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
7241; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
7242; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7243; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7244;
7245; GFX940-LABEL: v_shuffle_v3i16_v4i16__3_7_7:
7246; GFX940:       ; %bb.0:
7247; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7248; GFX940-NEXT:    ;;#ASMSTART
7249; GFX940-NEXT:    ; def v[0:1]
7250; GFX940-NEXT:    ;;#ASMEND
7251; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
7252; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7253; GFX940-NEXT:    ;;#ASMSTART
7254; GFX940-NEXT:    ; def v[2:3]
7255; GFX940-NEXT:    ;;#ASMEND
7256; GFX940-NEXT:    s_nop 0
7257; GFX940-NEXT:    v_perm_b32 v0, v3, v1, s2
7258; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7259; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
7260; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
7261; GFX940-NEXT:    s_waitcnt vmcnt(0)
7262; GFX940-NEXT:    s_setpc_b64 s[30:31]
7263  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7264  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7265  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 3, i32 7, i32 7>
7266  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7267  ret void
7268}
7269
7270define void @v_shuffle_v3i16_v4i16__4_7_7(ptr addrspace(1) inreg %ptr) {
7271; GFX900-LABEL: v_shuffle_v3i16_v4i16__4_7_7:
7272; GFX900:       ; %bb.0:
7273; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7274; GFX900-NEXT:    ;;#ASMSTART
7275; GFX900-NEXT:    ; def v[0:1]
7276; GFX900-NEXT:    ;;#ASMEND
7277; GFX900-NEXT:    s_mov_b32 s4, 0xffff
7278; GFX900-NEXT:    v_mov_b32_e32 v2, 0
7279; GFX900-NEXT:    v_bfi_b32 v0, s4, v0, v1
7280; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7281; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
7282; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
7283; GFX900-NEXT:    s_waitcnt vmcnt(0)
7284; GFX900-NEXT:    s_setpc_b64 s[30:31]
7285;
7286; GFX90A-LABEL: v_shuffle_v3i16_v4i16__4_7_7:
7287; GFX90A:       ; %bb.0:
7288; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7289; GFX90A-NEXT:    ;;#ASMSTART
7290; GFX90A-NEXT:    ; def v[0:1]
7291; GFX90A-NEXT:    ;;#ASMEND
7292; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
7293; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
7294; GFX90A-NEXT:    v_bfi_b32 v0, s4, v0, v1
7295; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7296; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
7297; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
7298; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7299; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7300;
7301; GFX940-LABEL: v_shuffle_v3i16_v4i16__4_7_7:
7302; GFX940:       ; %bb.0:
7303; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7304; GFX940-NEXT:    ;;#ASMSTART
7305; GFX940-NEXT:    ; def v[0:1]
7306; GFX940-NEXT:    ;;#ASMEND
7307; GFX940-NEXT:    s_mov_b32 s2, 0xffff
7308; GFX940-NEXT:    v_mov_b32_e32 v2, 0
7309; GFX940-NEXT:    v_bfi_b32 v0, s2, v0, v1
7310; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7311; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
7312; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
7313; GFX940-NEXT:    s_waitcnt vmcnt(0)
7314; GFX940-NEXT:    s_setpc_b64 s[30:31]
7315  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7316  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7317  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 4, i32 7, i32 7>
7318  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7319  ret void
7320}
7321
7322define void @v_shuffle_v3i16_v4i16__5_7_7(ptr addrspace(1) inreg %ptr) {
7323; GFX900-LABEL: v_shuffle_v3i16_v4i16__5_7_7:
7324; GFX900:       ; %bb.0:
7325; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7326; GFX900-NEXT:    ;;#ASMSTART
7327; GFX900-NEXT:    ; def v[0:1]
7328; GFX900-NEXT:    ;;#ASMEND
7329; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
7330; GFX900-NEXT:    v_mov_b32_e32 v2, 0
7331; GFX900-NEXT:    v_perm_b32 v0, v1, v0, s4
7332; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7333; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
7334; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
7335; GFX900-NEXT:    s_waitcnt vmcnt(0)
7336; GFX900-NEXT:    s_setpc_b64 s[30:31]
7337;
7338; GFX90A-LABEL: v_shuffle_v3i16_v4i16__5_7_7:
7339; GFX90A:       ; %bb.0:
7340; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7341; GFX90A-NEXT:    ;;#ASMSTART
7342; GFX90A-NEXT:    ; def v[0:1]
7343; GFX90A-NEXT:    ;;#ASMEND
7344; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
7345; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
7346; GFX90A-NEXT:    v_perm_b32 v0, v1, v0, s4
7347; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7348; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
7349; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
7350; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7351; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7352;
7353; GFX940-LABEL: v_shuffle_v3i16_v4i16__5_7_7:
7354; GFX940:       ; %bb.0:
7355; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7356; GFX940-NEXT:    ;;#ASMSTART
7357; GFX940-NEXT:    ; def v[0:1]
7358; GFX940-NEXT:    ;;#ASMEND
7359; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
7360; GFX940-NEXT:    v_mov_b32_e32 v2, 0
7361; GFX940-NEXT:    v_perm_b32 v0, v1, v0, s2
7362; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7363; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
7364; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
7365; GFX940-NEXT:    s_waitcnt vmcnt(0)
7366; GFX940-NEXT:    s_setpc_b64 s[30:31]
7367  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7368  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7369  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 7, i32 7>
7370  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7371  ret void
7372}
7373
7374define void @v_shuffle_v3i16_v4i16__6_7_7(ptr addrspace(1) inreg %ptr) {
7375; GFX900-LABEL: v_shuffle_v3i16_v4i16__6_7_7:
7376; GFX900:       ; %bb.0:
7377; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7378; GFX900-NEXT:    v_mov_b32_e32 v2, 0
7379; GFX900-NEXT:    ;;#ASMSTART
7380; GFX900-NEXT:    ; def v[0:1]
7381; GFX900-NEXT:    ;;#ASMEND
7382; GFX900-NEXT:    global_store_short_d16_hi v2, v1, s[16:17] offset:4
7383; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
7384; GFX900-NEXT:    s_waitcnt vmcnt(0)
7385; GFX900-NEXT:    s_setpc_b64 s[30:31]
7386;
7387; GFX90A-LABEL: v_shuffle_v3i16_v4i16__6_7_7:
7388; GFX90A:       ; %bb.0:
7389; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7390; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
7391; GFX90A-NEXT:    ;;#ASMSTART
7392; GFX90A-NEXT:    ; def v[0:1]
7393; GFX90A-NEXT:    ;;#ASMEND
7394; GFX90A-NEXT:    global_store_short_d16_hi v2, v1, s[16:17] offset:4
7395; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
7396; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7397; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7398;
7399; GFX940-LABEL: v_shuffle_v3i16_v4i16__6_7_7:
7400; GFX940:       ; %bb.0:
7401; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7402; GFX940-NEXT:    v_mov_b32_e32 v2, 0
7403; GFX940-NEXT:    ;;#ASMSTART
7404; GFX940-NEXT:    ; def v[0:1]
7405; GFX940-NEXT:    ;;#ASMEND
7406; GFX940-NEXT:    global_store_short_d16_hi v2, v1, s[0:1] offset:4 sc0 sc1
7407; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
7408; GFX940-NEXT:    s_waitcnt vmcnt(0)
7409; GFX940-NEXT:    s_setpc_b64 s[30:31]
7410  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7411  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7412  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 7, i32 7>
7413  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7414  ret void
7415}
7416
7417define void @v_shuffle_v3i16_v4i16__7_u_7(ptr addrspace(1) inreg %ptr) {
7418; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_u_7:
7419; GFX900:       ; %bb.0:
7420; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7421; GFX900-NEXT:    ;;#ASMSTART
7422; GFX900-NEXT:    ; def v[0:1]
7423; GFX900-NEXT:    ;;#ASMEND
7424; GFX900-NEXT:    v_mov_b32_e32 v2, 0
7425; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
7426; GFX900-NEXT:    v_alignbit_b32 v1, s4, v1, 16
7427; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
7428; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
7429; GFX900-NEXT:    s_waitcnt vmcnt(0)
7430; GFX900-NEXT:    s_setpc_b64 s[30:31]
7431;
7432; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_u_7:
7433; GFX90A:       ; %bb.0:
7434; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7435; GFX90A-NEXT:    ;;#ASMSTART
7436; GFX90A-NEXT:    ; def v[0:1]
7437; GFX90A-NEXT:    ;;#ASMEND
7438; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
7439; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
7440; GFX90A-NEXT:    v_alignbit_b32 v1, s4, v1, 16
7441; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
7442; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
7443; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7444; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7445;
7446; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_u_7:
7447; GFX940:       ; %bb.0:
7448; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7449; GFX940-NEXT:    ;;#ASMSTART
7450; GFX940-NEXT:    ; def v[0:1]
7451; GFX940-NEXT:    ;;#ASMEND
7452; GFX940-NEXT:    v_mov_b32_e32 v2, 0
7453; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
7454; GFX940-NEXT:    v_alignbit_b32 v1, s0, v1, 16
7455; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
7456; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
7457; GFX940-NEXT:    s_waitcnt vmcnt(0)
7458; GFX940-NEXT:    s_setpc_b64 s[30:31]
7459  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7460  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7461  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 7>
7462  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7463  ret void
7464}
7465
7466define void @v_shuffle_v3i16_v4i16__7_0_7(ptr addrspace(1) inreg %ptr) {
7467; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_0_7:
7468; GFX900:       ; %bb.0:
7469; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7470; GFX900-NEXT:    ;;#ASMSTART
7471; GFX900-NEXT:    ; def v[0:1]
7472; GFX900-NEXT:    ;;#ASMEND
7473; GFX900-NEXT:    v_mov_b32_e32 v3, 0
7474; GFX900-NEXT:    ;;#ASMSTART
7475; GFX900-NEXT:    ; def v[1:2]
7476; GFX900-NEXT:    ;;#ASMEND
7477; GFX900-NEXT:    v_alignbit_b32 v0, v0, v2, 16
7478; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
7479; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
7480; GFX900-NEXT:    global_store_short v3, v1, s[16:17] offset:4
7481; GFX900-NEXT:    s_waitcnt vmcnt(0)
7482; GFX900-NEXT:    s_setpc_b64 s[30:31]
7483;
7484; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_0_7:
7485; GFX90A:       ; %bb.0:
7486; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7487; GFX90A-NEXT:    ;;#ASMSTART
7488; GFX90A-NEXT:    ; def v[0:1]
7489; GFX90A-NEXT:    ;;#ASMEND
7490; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7491; GFX90A-NEXT:    ;;#ASMSTART
7492; GFX90A-NEXT:    ; def v[2:3]
7493; GFX90A-NEXT:    ;;#ASMEND
7494; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v3, 16
7495; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7496; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
7497; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
7498; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7499; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7500;
7501; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_0_7:
7502; GFX940:       ; %bb.0:
7503; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7504; GFX940-NEXT:    ;;#ASMSTART
7505; GFX940-NEXT:    ; def v[0:1]
7506; GFX940-NEXT:    ;;#ASMEND
7507; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7508; GFX940-NEXT:    ;;#ASMSTART
7509; GFX940-NEXT:    ; def v[2:3]
7510; GFX940-NEXT:    ;;#ASMEND
7511; GFX940-NEXT:    s_nop 0
7512; GFX940-NEXT:    v_alignbit_b32 v0, v0, v3, 16
7513; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7514; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
7515; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
7516; GFX940-NEXT:    s_waitcnt vmcnt(0)
7517; GFX940-NEXT:    s_setpc_b64 s[30:31]
7518  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7519  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7520  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 7>
7521  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7522  ret void
7523}
7524
7525define void @v_shuffle_v3i16_v4i16__7_1_7(ptr addrspace(1) inreg %ptr) {
7526; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_1_7:
7527; GFX900:       ; %bb.0:
7528; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7529; GFX900-NEXT:    ;;#ASMSTART
7530; GFX900-NEXT:    ; def v[0:1]
7531; GFX900-NEXT:    ;;#ASMEND
7532; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
7533; GFX900-NEXT:    v_mov_b32_e32 v3, 0
7534; GFX900-NEXT:    ;;#ASMSTART
7535; GFX900-NEXT:    ; def v[1:2]
7536; GFX900-NEXT:    ;;#ASMEND
7537; GFX900-NEXT:    v_perm_b32 v0, v0, v2, s4
7538; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
7539; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
7540; GFX900-NEXT:    global_store_short v3, v1, s[16:17] offset:4
7541; GFX900-NEXT:    s_waitcnt vmcnt(0)
7542; GFX900-NEXT:    s_setpc_b64 s[30:31]
7543;
7544; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_1_7:
7545; GFX90A:       ; %bb.0:
7546; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7547; GFX90A-NEXT:    ;;#ASMSTART
7548; GFX90A-NEXT:    ; def v[0:1]
7549; GFX90A-NEXT:    ;;#ASMEND
7550; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
7551; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7552; GFX90A-NEXT:    ;;#ASMSTART
7553; GFX90A-NEXT:    ; def v[2:3]
7554; GFX90A-NEXT:    ;;#ASMEND
7555; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
7556; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7557; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
7558; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
7559; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7560; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7561;
7562; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_1_7:
7563; GFX940:       ; %bb.0:
7564; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7565; GFX940-NEXT:    ;;#ASMSTART
7566; GFX940-NEXT:    ; def v[0:1]
7567; GFX940-NEXT:    ;;#ASMEND
7568; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
7569; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7570; GFX940-NEXT:    ;;#ASMSTART
7571; GFX940-NEXT:    ; def v[2:3]
7572; GFX940-NEXT:    ;;#ASMEND
7573; GFX940-NEXT:    s_nop 0
7574; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
7575; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7576; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
7577; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
7578; GFX940-NEXT:    s_waitcnt vmcnt(0)
7579; GFX940-NEXT:    s_setpc_b64 s[30:31]
7580  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7581  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7582  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 7>
7583  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7584  ret void
7585}
7586
7587define void @v_shuffle_v3i16_v4i16__7_2_7(ptr addrspace(1) inreg %ptr) {
7588; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_2_7:
7589; GFX900:       ; %bb.0:
7590; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7591; GFX900-NEXT:    ;;#ASMSTART
7592; GFX900-NEXT:    ; def v[0:1]
7593; GFX900-NEXT:    ;;#ASMEND
7594; GFX900-NEXT:    v_mov_b32_e32 v4, 0
7595; GFX900-NEXT:    ;;#ASMSTART
7596; GFX900-NEXT:    ; def v[2:3]
7597; GFX900-NEXT:    ;;#ASMEND
7598; GFX900-NEXT:    v_alignbit_b32 v0, v1, v3, 16
7599; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7600; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
7601; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
7602; GFX900-NEXT:    s_waitcnt vmcnt(0)
7603; GFX900-NEXT:    s_setpc_b64 s[30:31]
7604;
7605; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_2_7:
7606; GFX90A:       ; %bb.0:
7607; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7608; GFX90A-NEXT:    ;;#ASMSTART
7609; GFX90A-NEXT:    ; def v[0:1]
7610; GFX90A-NEXT:    ;;#ASMEND
7611; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7612; GFX90A-NEXT:    ;;#ASMSTART
7613; GFX90A-NEXT:    ; def v[2:3]
7614; GFX90A-NEXT:    ;;#ASMEND
7615; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v3, 16
7616; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7617; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
7618; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
7619; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7620; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7621;
7622; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_2_7:
7623; GFX940:       ; %bb.0:
7624; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7625; GFX940-NEXT:    ;;#ASMSTART
7626; GFX940-NEXT:    ; def v[0:1]
7627; GFX940-NEXT:    ;;#ASMEND
7628; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7629; GFX940-NEXT:    ;;#ASMSTART
7630; GFX940-NEXT:    ; def v[2:3]
7631; GFX940-NEXT:    ;;#ASMEND
7632; GFX940-NEXT:    s_nop 0
7633; GFX940-NEXT:    v_alignbit_b32 v0, v1, v3, 16
7634; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7635; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
7636; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
7637; GFX940-NEXT:    s_waitcnt vmcnt(0)
7638; GFX940-NEXT:    s_setpc_b64 s[30:31]
7639  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7640  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7641  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 7>
7642  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7643  ret void
7644}
7645
7646define void @v_shuffle_v3i16_v4i16__7_3_7(ptr addrspace(1) inreg %ptr) {
7647; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_3_7:
7648; GFX900:       ; %bb.0:
7649; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7650; GFX900-NEXT:    ;;#ASMSTART
7651; GFX900-NEXT:    ; def v[0:1]
7652; GFX900-NEXT:    ;;#ASMEND
7653; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
7654; GFX900-NEXT:    v_mov_b32_e32 v4, 0
7655; GFX900-NEXT:    ;;#ASMSTART
7656; GFX900-NEXT:    ; def v[2:3]
7657; GFX900-NEXT:    ;;#ASMEND
7658; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
7659; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7660; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
7661; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
7662; GFX900-NEXT:    s_waitcnt vmcnt(0)
7663; GFX900-NEXT:    s_setpc_b64 s[30:31]
7664;
7665; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_3_7:
7666; GFX90A:       ; %bb.0:
7667; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7668; GFX90A-NEXT:    ;;#ASMSTART
7669; GFX90A-NEXT:    ; def v[0:1]
7670; GFX90A-NEXT:    ;;#ASMEND
7671; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
7672; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7673; GFX90A-NEXT:    ;;#ASMSTART
7674; GFX90A-NEXT:    ; def v[2:3]
7675; GFX90A-NEXT:    ;;#ASMEND
7676; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
7677; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7678; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
7679; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
7680; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7681; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7682;
7683; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_3_7:
7684; GFX940:       ; %bb.0:
7685; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7686; GFX940-NEXT:    ;;#ASMSTART
7687; GFX940-NEXT:    ; def v[0:1]
7688; GFX940-NEXT:    ;;#ASMEND
7689; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
7690; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7691; GFX940-NEXT:    ;;#ASMSTART
7692; GFX940-NEXT:    ; def v[2:3]
7693; GFX940-NEXT:    ;;#ASMEND
7694; GFX940-NEXT:    s_nop 0
7695; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
7696; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7697; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
7698; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
7699; GFX940-NEXT:    s_waitcnt vmcnt(0)
7700; GFX940-NEXT:    s_setpc_b64 s[30:31]
7701  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7702  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7703  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 7>
7704  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7705  ret void
7706}
7707
7708define void @v_shuffle_v3i16_v4i16__7_4_7(ptr addrspace(1) inreg %ptr) {
7709; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_4_7:
7710; GFX900:       ; %bb.0:
7711; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7712; GFX900-NEXT:    ;;#ASMSTART
7713; GFX900-NEXT:    ; def v[0:1]
7714; GFX900-NEXT:    ;;#ASMEND
7715; GFX900-NEXT:    v_mov_b32_e32 v2, 0
7716; GFX900-NEXT:    v_alignbit_b32 v0, v0, v1, 16
7717; GFX900-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
7718; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
7719; GFX900-NEXT:    global_store_short v2, v3, s[16:17] offset:4
7720; GFX900-NEXT:    s_waitcnt vmcnt(0)
7721; GFX900-NEXT:    s_setpc_b64 s[30:31]
7722;
7723; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_4_7:
7724; GFX90A:       ; %bb.0:
7725; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7726; GFX90A-NEXT:    ;;#ASMSTART
7727; GFX90A-NEXT:    ; def v[0:1]
7728; GFX90A-NEXT:    ;;#ASMEND
7729; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
7730; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v1, 16
7731; GFX90A-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
7732; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
7733; GFX90A-NEXT:    global_store_short v2, v3, s[16:17] offset:4
7734; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7735; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7736;
7737; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_4_7:
7738; GFX940:       ; %bb.0:
7739; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7740; GFX940-NEXT:    ;;#ASMSTART
7741; GFX940-NEXT:    ; def v[0:1]
7742; GFX940-NEXT:    ;;#ASMEND
7743; GFX940-NEXT:    v_mov_b32_e32 v2, 0
7744; GFX940-NEXT:    v_alignbit_b32 v0, v0, v1, 16
7745; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
7746; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
7747; GFX940-NEXT:    global_store_short v2, v3, s[0:1] offset:4 sc0 sc1
7748; GFX940-NEXT:    s_waitcnt vmcnt(0)
7749; GFX940-NEXT:    s_setpc_b64 s[30:31]
7750  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7751  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7752  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 7>
7753  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7754  ret void
7755}
7756
7757define void @v_shuffle_v3i16_v4i16__7_5_7(ptr addrspace(1) inreg %ptr) {
7758; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_5_7:
7759; GFX900:       ; %bb.0:
7760; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7761; GFX900-NEXT:    ;;#ASMSTART
7762; GFX900-NEXT:    ; def v[0:1]
7763; GFX900-NEXT:    ;;#ASMEND
7764; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
7765; GFX900-NEXT:    v_mov_b32_e32 v2, 0
7766; GFX900-NEXT:    v_perm_b32 v0, v0, v1, s4
7767; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7768; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
7769; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
7770; GFX900-NEXT:    s_waitcnt vmcnt(0)
7771; GFX900-NEXT:    s_setpc_b64 s[30:31]
7772;
7773; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_5_7:
7774; GFX90A:       ; %bb.0:
7775; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7776; GFX90A-NEXT:    ;;#ASMSTART
7777; GFX90A-NEXT:    ; def v[0:1]
7778; GFX90A-NEXT:    ;;#ASMEND
7779; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
7780; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
7781; GFX90A-NEXT:    v_perm_b32 v0, v0, v1, s4
7782; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7783; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
7784; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
7785; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7786; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7787;
7788; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_5_7:
7789; GFX940:       ; %bb.0:
7790; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7791; GFX940-NEXT:    ;;#ASMSTART
7792; GFX940-NEXT:    ; def v[0:1]
7793; GFX940-NEXT:    ;;#ASMEND
7794; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
7795; GFX940-NEXT:    v_mov_b32_e32 v2, 0
7796; GFX940-NEXT:    v_perm_b32 v0, v0, v1, s2
7797; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7798; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
7799; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
7800; GFX940-NEXT:    s_waitcnt vmcnt(0)
7801; GFX940-NEXT:    s_setpc_b64 s[30:31]
7802  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7803  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7804  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 7>
7805  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7806  ret void
7807}
7808
7809define void @v_shuffle_v3i16_v4i16__7_6_7(ptr addrspace(1) inreg %ptr) {
7810; GFX900-LABEL: v_shuffle_v3i16_v4i16__7_6_7:
7811; GFX900:       ; %bb.0:
7812; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7813; GFX900-NEXT:    ;;#ASMSTART
7814; GFX900-NEXT:    ; def v[0:1]
7815; GFX900-NEXT:    ;;#ASMEND
7816; GFX900-NEXT:    v_mov_b32_e32 v2, 0
7817; GFX900-NEXT:    v_alignbit_b32 v0, v1, v1, 16
7818; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7819; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
7820; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
7821; GFX900-NEXT:    s_waitcnt vmcnt(0)
7822; GFX900-NEXT:    s_setpc_b64 s[30:31]
7823;
7824; GFX90A-LABEL: v_shuffle_v3i16_v4i16__7_6_7:
7825; GFX90A:       ; %bb.0:
7826; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7827; GFX90A-NEXT:    ;;#ASMSTART
7828; GFX90A-NEXT:    ; def v[0:1]
7829; GFX90A-NEXT:    ;;#ASMEND
7830; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
7831; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v1, 16
7832; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7833; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
7834; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
7835; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7836; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7837;
7838; GFX940-LABEL: v_shuffle_v3i16_v4i16__7_6_7:
7839; GFX940:       ; %bb.0:
7840; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7841; GFX940-NEXT:    ;;#ASMSTART
7842; GFX940-NEXT:    ; def v[0:1]
7843; GFX940-NEXT:    ;;#ASMEND
7844; GFX940-NEXT:    v_mov_b32_e32 v2, 0
7845; GFX940-NEXT:    v_alignbit_b32 v0, v1, v1, 16
7846; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7847; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
7848; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
7849; GFX940-NEXT:    s_waitcnt vmcnt(0)
7850; GFX940-NEXT:    s_setpc_b64 s[30:31]
7851  %vec0 = call <4 x i16> asm "; def $0", "=v"()
7852  %vec1 = call <4 x i16> asm "; def $0", "=v"()
7853  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 7>
7854  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
7855  ret void
7856}
7857
7858define void @s_shuffle_v3i16_v4i16__u_u_u() {
7859; GFX9-LABEL: s_shuffle_v3i16_v4i16__u_u_u:
7860; GFX9:       ; %bb.0:
7861; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7862; GFX9-NEXT:    ;;#ASMSTART
7863; GFX9-NEXT:    ; use s[8:9]
7864; GFX9-NEXT:    ;;#ASMEND
7865; GFX9-NEXT:    s_setpc_b64 s[30:31]
7866  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7867  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> poison
7868  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7869  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
7870  ret void
7871}
7872
7873define void @s_shuffle_v3i16_v4i16__0_u_u() {
7874; GFX900-LABEL: s_shuffle_v3i16_v4i16__0_u_u:
7875; GFX900:       ; %bb.0:
7876; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7877; GFX900-NEXT:    ;;#ASMSTART
7878; GFX900-NEXT:    ; def s[8:9]
7879; GFX900-NEXT:    ;;#ASMEND
7880; GFX900-NEXT:    ;;#ASMSTART
7881; GFX900-NEXT:    ; use s[8:9]
7882; GFX900-NEXT:    ;;#ASMEND
7883; GFX900-NEXT:    s_setpc_b64 s[30:31]
7884;
7885; GFX90A-LABEL: s_shuffle_v3i16_v4i16__0_u_u:
7886; GFX90A:       ; %bb.0:
7887; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7888; GFX90A-NEXT:    ;;#ASMSTART
7889; GFX90A-NEXT:    ; def s[8:9]
7890; GFX90A-NEXT:    ;;#ASMEND
7891; GFX90A-NEXT:    ;;#ASMSTART
7892; GFX90A-NEXT:    ; use s[8:9]
7893; GFX90A-NEXT:    ;;#ASMEND
7894; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7895;
7896; GFX940-LABEL: s_shuffle_v3i16_v4i16__0_u_u:
7897; GFX940:       ; %bb.0:
7898; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7899; GFX940-NEXT:    ;;#ASMSTART
7900; GFX940-NEXT:    ; def s[8:9]
7901; GFX940-NEXT:    ;;#ASMEND
7902; GFX940-NEXT:    s_nop 0
7903; GFX940-NEXT:    ;;#ASMSTART
7904; GFX940-NEXT:    ; use s[8:9]
7905; GFX940-NEXT:    ;;#ASMEND
7906; GFX940-NEXT:    s_setpc_b64 s[30:31]
7907  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7908  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 poison, i32 poison>
7909  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7910  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
7911  ret void
7912}
7913
7914define void @s_shuffle_v3i16_v4i16__1_u_u() {
7915; GFX900-LABEL: s_shuffle_v3i16_v4i16__1_u_u:
7916; GFX900:       ; %bb.0:
7917; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7918; GFX900-NEXT:    ;;#ASMSTART
7919; GFX900-NEXT:    ; def s[4:5]
7920; GFX900-NEXT:    ;;#ASMEND
7921; GFX900-NEXT:    s_lshr_b32 s8, s4, 16
7922; GFX900-NEXT:    ;;#ASMSTART
7923; GFX900-NEXT:    ; use s[8:9]
7924; GFX900-NEXT:    ;;#ASMEND
7925; GFX900-NEXT:    s_setpc_b64 s[30:31]
7926;
7927; GFX90A-LABEL: s_shuffle_v3i16_v4i16__1_u_u:
7928; GFX90A:       ; %bb.0:
7929; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7930; GFX90A-NEXT:    ;;#ASMSTART
7931; GFX90A-NEXT:    ; def s[4:5]
7932; GFX90A-NEXT:    ;;#ASMEND
7933; GFX90A-NEXT:    s_lshr_b32 s8, s4, 16
7934; GFX90A-NEXT:    ;;#ASMSTART
7935; GFX90A-NEXT:    ; use s[8:9]
7936; GFX90A-NEXT:    ;;#ASMEND
7937; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7938;
7939; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_u_u:
7940; GFX940:       ; %bb.0:
7941; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7942; GFX940-NEXT:    ;;#ASMSTART
7943; GFX940-NEXT:    ; def s[0:1]
7944; GFX940-NEXT:    ;;#ASMEND
7945; GFX940-NEXT:    s_lshr_b32 s8, s0, 16
7946; GFX940-NEXT:    ;;#ASMSTART
7947; GFX940-NEXT:    ; use s[8:9]
7948; GFX940-NEXT:    ;;#ASMEND
7949; GFX940-NEXT:    s_setpc_b64 s[30:31]
7950  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7951  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 1, i32 poison, i32 poison>
7952  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7953  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
7954  ret void
7955}
7956
7957define void @s_shuffle_v3i16_v4i16__2_u_u() {
7958; GFX900-LABEL: s_shuffle_v3i16_v4i16__2_u_u:
7959; GFX900:       ; %bb.0:
7960; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7961; GFX900-NEXT:    ;;#ASMSTART
7962; GFX900-NEXT:    ; def s[4:5]
7963; GFX900-NEXT:    ;;#ASMEND
7964; GFX900-NEXT:    s_mov_b32 s8, s5
7965; GFX900-NEXT:    ;;#ASMSTART
7966; GFX900-NEXT:    ; use s[8:9]
7967; GFX900-NEXT:    ;;#ASMEND
7968; GFX900-NEXT:    s_setpc_b64 s[30:31]
7969;
7970; GFX90A-LABEL: s_shuffle_v3i16_v4i16__2_u_u:
7971; GFX90A:       ; %bb.0:
7972; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7973; GFX90A-NEXT:    ;;#ASMSTART
7974; GFX90A-NEXT:    ; def s[4:5]
7975; GFX90A-NEXT:    ;;#ASMEND
7976; GFX90A-NEXT:    s_mov_b32 s8, s5
7977; GFX90A-NEXT:    ;;#ASMSTART
7978; GFX90A-NEXT:    ; use s[8:9]
7979; GFX90A-NEXT:    ;;#ASMEND
7980; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7981;
7982; GFX940-LABEL: s_shuffle_v3i16_v4i16__2_u_u:
7983; GFX940:       ; %bb.0:
7984; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7985; GFX940-NEXT:    ;;#ASMSTART
7986; GFX940-NEXT:    ; def s[0:1]
7987; GFX940-NEXT:    ;;#ASMEND
7988; GFX940-NEXT:    s_mov_b32 s8, s1
7989; GFX940-NEXT:    ;;#ASMSTART
7990; GFX940-NEXT:    ; use s[8:9]
7991; GFX940-NEXT:    ;;#ASMEND
7992; GFX940-NEXT:    s_setpc_b64 s[30:31]
7993  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7994  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 2, i32 poison, i32 poison>
7995  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7996  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
7997  ret void
7998}
7999
8000define void @s_shuffle_v3i16_v4i16__3_u_u() {
8001; GFX900-LABEL: s_shuffle_v3i16_v4i16__3_u_u:
8002; GFX900:       ; %bb.0:
8003; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8004; GFX900-NEXT:    ;;#ASMSTART
8005; GFX900-NEXT:    ; def s[4:5]
8006; GFX900-NEXT:    ;;#ASMEND
8007; GFX900-NEXT:    s_lshr_b32 s8, s5, 16
8008; GFX900-NEXT:    ;;#ASMSTART
8009; GFX900-NEXT:    ; use s[8:9]
8010; GFX900-NEXT:    ;;#ASMEND
8011; GFX900-NEXT:    s_setpc_b64 s[30:31]
8012;
8013; GFX90A-LABEL: s_shuffle_v3i16_v4i16__3_u_u:
8014; GFX90A:       ; %bb.0:
8015; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8016; GFX90A-NEXT:    ;;#ASMSTART
8017; GFX90A-NEXT:    ; def s[4:5]
8018; GFX90A-NEXT:    ;;#ASMEND
8019; GFX90A-NEXT:    s_lshr_b32 s8, s5, 16
8020; GFX90A-NEXT:    ;;#ASMSTART
8021; GFX90A-NEXT:    ; use s[8:9]
8022; GFX90A-NEXT:    ;;#ASMEND
8023; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8024;
8025; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_u_u:
8026; GFX940:       ; %bb.0:
8027; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8028; GFX940-NEXT:    ;;#ASMSTART
8029; GFX940-NEXT:    ; def s[0:1]
8030; GFX940-NEXT:    ;;#ASMEND
8031; GFX940-NEXT:    s_lshr_b32 s8, s1, 16
8032; GFX940-NEXT:    ;;#ASMSTART
8033; GFX940-NEXT:    ; use s[8:9]
8034; GFX940-NEXT:    ;;#ASMEND
8035; GFX940-NEXT:    s_setpc_b64 s[30:31]
8036  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8037  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 3, i32 poison, i32 poison>
8038  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8039  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8040  ret void
8041}
8042
8043define void @s_shuffle_v3i16_v4i16__4_u_u() {
8044; GFX9-LABEL: s_shuffle_v3i16_v4i16__4_u_u:
8045; GFX9:       ; %bb.0:
8046; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8047; GFX9-NEXT:    ;;#ASMSTART
8048; GFX9-NEXT:    ; use s[8:9]
8049; GFX9-NEXT:    ;;#ASMEND
8050; GFX9-NEXT:    s_setpc_b64 s[30:31]
8051  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8052  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 4, i32 poison, i32 poison>
8053  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8054  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8055  ret void
8056}
8057
8058define void @s_shuffle_v3i16_v4i16__5_u_u() {
8059; GFX900-LABEL: s_shuffle_v3i16_v4i16__5_u_u:
8060; GFX900:       ; %bb.0:
8061; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8062; GFX900-NEXT:    ;;#ASMSTART
8063; GFX900-NEXT:    ; def s[4:5]
8064; GFX900-NEXT:    ;;#ASMEND
8065; GFX900-NEXT:    s_lshr_b32 s8, s4, 16
8066; GFX900-NEXT:    ;;#ASMSTART
8067; GFX900-NEXT:    ; use s[8:9]
8068; GFX900-NEXT:    ;;#ASMEND
8069; GFX900-NEXT:    s_setpc_b64 s[30:31]
8070;
8071; GFX90A-LABEL: s_shuffle_v3i16_v4i16__5_u_u:
8072; GFX90A:       ; %bb.0:
8073; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8074; GFX90A-NEXT:    ;;#ASMSTART
8075; GFX90A-NEXT:    ; def s[4:5]
8076; GFX90A-NEXT:    ;;#ASMEND
8077; GFX90A-NEXT:    s_lshr_b32 s8, s4, 16
8078; GFX90A-NEXT:    ;;#ASMSTART
8079; GFX90A-NEXT:    ; use s[8:9]
8080; GFX90A-NEXT:    ;;#ASMEND
8081; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8082;
8083; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_u_u:
8084; GFX940:       ; %bb.0:
8085; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8086; GFX940-NEXT:    ;;#ASMSTART
8087; GFX940-NEXT:    ; def s[0:1]
8088; GFX940-NEXT:    ;;#ASMEND
8089; GFX940-NEXT:    s_lshr_b32 s8, s0, 16
8090; GFX940-NEXT:    ;;#ASMSTART
8091; GFX940-NEXT:    ; use s[8:9]
8092; GFX940-NEXT:    ;;#ASMEND
8093; GFX940-NEXT:    s_setpc_b64 s[30:31]
8094  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8095  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8096  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 poison, i32 poison>
8097  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8098  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8099  ret void
8100}
8101
8102define void @s_shuffle_v3i16_v4i16__6_u_u() {
8103; GFX900-LABEL: s_shuffle_v3i16_v4i16__6_u_u:
8104; GFX900:       ; %bb.0:
8105; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8106; GFX900-NEXT:    ;;#ASMSTART
8107; GFX900-NEXT:    ; def s[4:5]
8108; GFX900-NEXT:    ;;#ASMEND
8109; GFX900-NEXT:    s_mov_b32 s8, s5
8110; GFX900-NEXT:    ;;#ASMSTART
8111; GFX900-NEXT:    ; use s[8:9]
8112; GFX900-NEXT:    ;;#ASMEND
8113; GFX900-NEXT:    s_setpc_b64 s[30:31]
8114;
8115; GFX90A-LABEL: s_shuffle_v3i16_v4i16__6_u_u:
8116; GFX90A:       ; %bb.0:
8117; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8118; GFX90A-NEXT:    ;;#ASMSTART
8119; GFX90A-NEXT:    ; def s[4:5]
8120; GFX90A-NEXT:    ;;#ASMEND
8121; GFX90A-NEXT:    s_mov_b32 s8, s5
8122; GFX90A-NEXT:    ;;#ASMSTART
8123; GFX90A-NEXT:    ; use s[8:9]
8124; GFX90A-NEXT:    ;;#ASMEND
8125; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8126;
8127; GFX940-LABEL: s_shuffle_v3i16_v4i16__6_u_u:
8128; GFX940:       ; %bb.0:
8129; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8130; GFX940-NEXT:    ;;#ASMSTART
8131; GFX940-NEXT:    ; def s[0:1]
8132; GFX940-NEXT:    ;;#ASMEND
8133; GFX940-NEXT:    s_mov_b32 s8, s1
8134; GFX940-NEXT:    ;;#ASMSTART
8135; GFX940-NEXT:    ; use s[8:9]
8136; GFX940-NEXT:    ;;#ASMEND
8137; GFX940-NEXT:    s_setpc_b64 s[30:31]
8138  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8139  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8140  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 poison, i32 poison>
8141  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8142  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8143  ret void
8144}
8145
8146define void @s_shuffle_v3i16_v4i16__7_u_u() {
8147; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_u_u:
8148; GFX900:       ; %bb.0:
8149; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8150; GFX900-NEXT:    ;;#ASMSTART
8151; GFX900-NEXT:    ; def s[4:5]
8152; GFX900-NEXT:    ;;#ASMEND
8153; GFX900-NEXT:    s_lshr_b32 s8, s5, 16
8154; GFX900-NEXT:    ;;#ASMSTART
8155; GFX900-NEXT:    ; use s[8:9]
8156; GFX900-NEXT:    ;;#ASMEND
8157; GFX900-NEXT:    s_setpc_b64 s[30:31]
8158;
8159; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_u_u:
8160; GFX90A:       ; %bb.0:
8161; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8162; GFX90A-NEXT:    ;;#ASMSTART
8163; GFX90A-NEXT:    ; def s[4:5]
8164; GFX90A-NEXT:    ;;#ASMEND
8165; GFX90A-NEXT:    s_lshr_b32 s8, s5, 16
8166; GFX90A-NEXT:    ;;#ASMSTART
8167; GFX90A-NEXT:    ; use s[8:9]
8168; GFX90A-NEXT:    ;;#ASMEND
8169; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8170;
8171; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_u_u:
8172; GFX940:       ; %bb.0:
8173; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8174; GFX940-NEXT:    ;;#ASMSTART
8175; GFX940-NEXT:    ; def s[0:1]
8176; GFX940-NEXT:    ;;#ASMEND
8177; GFX940-NEXT:    s_lshr_b32 s8, s1, 16
8178; GFX940-NEXT:    ;;#ASMSTART
8179; GFX940-NEXT:    ; use s[8:9]
8180; GFX940-NEXT:    ;;#ASMEND
8181; GFX940-NEXT:    s_setpc_b64 s[30:31]
8182  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8183  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8184  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 poison>
8185  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8186  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8187  ret void
8188}
8189
8190define void @s_shuffle_v3i16_v4i16__7_0_u() {
8191; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_0_u:
8192; GFX900:       ; %bb.0:
8193; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8194; GFX900-NEXT:    ;;#ASMSTART
8195; GFX900-NEXT:    ; def s[4:5]
8196; GFX900-NEXT:    ;;#ASMEND
8197; GFX900-NEXT:    ;;#ASMSTART
8198; GFX900-NEXT:    ; def s[6:7]
8199; GFX900-NEXT:    ;;#ASMEND
8200; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
8201; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
8202; GFX900-NEXT:    ;;#ASMSTART
8203; GFX900-NEXT:    ; use s[8:9]
8204; GFX900-NEXT:    ;;#ASMEND
8205; GFX900-NEXT:    s_setpc_b64 s[30:31]
8206;
8207; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_0_u:
8208; GFX90A:       ; %bb.0:
8209; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8210; GFX90A-NEXT:    ;;#ASMSTART
8211; GFX90A-NEXT:    ; def s[4:5]
8212; GFX90A-NEXT:    ;;#ASMEND
8213; GFX90A-NEXT:    ;;#ASMSTART
8214; GFX90A-NEXT:    ; def s[6:7]
8215; GFX90A-NEXT:    ;;#ASMEND
8216; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
8217; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
8218; GFX90A-NEXT:    ;;#ASMSTART
8219; GFX90A-NEXT:    ; use s[8:9]
8220; GFX90A-NEXT:    ;;#ASMEND
8221; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8222;
8223; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_u:
8224; GFX940:       ; %bb.0:
8225; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8226; GFX940-NEXT:    ;;#ASMSTART
8227; GFX940-NEXT:    ; def s[0:1]
8228; GFX940-NEXT:    ;;#ASMEND
8229; GFX940-NEXT:    ;;#ASMSTART
8230; GFX940-NEXT:    ; def s[2:3]
8231; GFX940-NEXT:    ;;#ASMEND
8232; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
8233; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
8234; GFX940-NEXT:    ;;#ASMSTART
8235; GFX940-NEXT:    ; use s[8:9]
8236; GFX940-NEXT:    ;;#ASMEND
8237; GFX940-NEXT:    s_setpc_b64 s[30:31]
8238  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8239  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8240  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 poison>
8241  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8242  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8243  ret void
8244}
8245
8246define void @s_shuffle_v3i16_v4i16__7_1_u() {
8247; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_1_u:
8248; GFX900:       ; %bb.0:
8249; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8250; GFX900-NEXT:    ;;#ASMSTART
8251; GFX900-NEXT:    ; def s[4:5]
8252; GFX900-NEXT:    ;;#ASMEND
8253; GFX900-NEXT:    ;;#ASMSTART
8254; GFX900-NEXT:    ; def s[6:7]
8255; GFX900-NEXT:    ;;#ASMEND
8256; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
8257; GFX900-NEXT:    ;;#ASMSTART
8258; GFX900-NEXT:    ; use s[8:9]
8259; GFX900-NEXT:    ;;#ASMEND
8260; GFX900-NEXT:    s_setpc_b64 s[30:31]
8261;
8262; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_1_u:
8263; GFX90A:       ; %bb.0:
8264; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8265; GFX90A-NEXT:    ;;#ASMSTART
8266; GFX90A-NEXT:    ; def s[4:5]
8267; GFX90A-NEXT:    ;;#ASMEND
8268; GFX90A-NEXT:    ;;#ASMSTART
8269; GFX90A-NEXT:    ; def s[6:7]
8270; GFX90A-NEXT:    ;;#ASMEND
8271; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
8272; GFX90A-NEXT:    ;;#ASMSTART
8273; GFX90A-NEXT:    ; use s[8:9]
8274; GFX90A-NEXT:    ;;#ASMEND
8275; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8276;
8277; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_u:
8278; GFX940:       ; %bb.0:
8279; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8280; GFX940-NEXT:    ;;#ASMSTART
8281; GFX940-NEXT:    ; def s[0:1]
8282; GFX940-NEXT:    ;;#ASMEND
8283; GFX940-NEXT:    ;;#ASMSTART
8284; GFX940-NEXT:    ; def s[2:3]
8285; GFX940-NEXT:    ;;#ASMEND
8286; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s0
8287; GFX940-NEXT:    ;;#ASMSTART
8288; GFX940-NEXT:    ; use s[8:9]
8289; GFX940-NEXT:    ;;#ASMEND
8290; GFX940-NEXT:    s_setpc_b64 s[30:31]
8291  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8292  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8293  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 poison>
8294  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8295  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8296  ret void
8297}
8298
8299define void @s_shuffle_v3i16_v4i16__7_2_u() {
8300; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_2_u:
8301; GFX900:       ; %bb.0:
8302; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8303; GFX900-NEXT:    ;;#ASMSTART
8304; GFX900-NEXT:    ; def s[4:5]
8305; GFX900-NEXT:    ;;#ASMEND
8306; GFX900-NEXT:    ;;#ASMSTART
8307; GFX900-NEXT:    ; def s[6:7]
8308; GFX900-NEXT:    ;;#ASMEND
8309; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
8310; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
8311; GFX900-NEXT:    ;;#ASMSTART
8312; GFX900-NEXT:    ; use s[8:9]
8313; GFX900-NEXT:    ;;#ASMEND
8314; GFX900-NEXT:    s_setpc_b64 s[30:31]
8315;
8316; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_2_u:
8317; GFX90A:       ; %bb.0:
8318; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8319; GFX90A-NEXT:    ;;#ASMSTART
8320; GFX90A-NEXT:    ; def s[4:5]
8321; GFX90A-NEXT:    ;;#ASMEND
8322; GFX90A-NEXT:    ;;#ASMSTART
8323; GFX90A-NEXT:    ; def s[6:7]
8324; GFX90A-NEXT:    ;;#ASMEND
8325; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
8326; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
8327; GFX90A-NEXT:    ;;#ASMSTART
8328; GFX90A-NEXT:    ; use s[8:9]
8329; GFX90A-NEXT:    ;;#ASMEND
8330; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8331;
8332; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_u:
8333; GFX940:       ; %bb.0:
8334; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8335; GFX940-NEXT:    ;;#ASMSTART
8336; GFX940-NEXT:    ; def s[0:1]
8337; GFX940-NEXT:    ;;#ASMEND
8338; GFX940-NEXT:    ;;#ASMSTART
8339; GFX940-NEXT:    ; def s[2:3]
8340; GFX940-NEXT:    ;;#ASMEND
8341; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
8342; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
8343; GFX940-NEXT:    ;;#ASMSTART
8344; GFX940-NEXT:    ; use s[8:9]
8345; GFX940-NEXT:    ;;#ASMEND
8346; GFX940-NEXT:    s_setpc_b64 s[30:31]
8347  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8348  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8349  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 poison>
8350  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8351  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8352  ret void
8353}
8354
8355define void @s_shuffle_v3i16_v4i16__7_3_u() {
8356; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_3_u:
8357; GFX900:       ; %bb.0:
8358; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8359; GFX900-NEXT:    ;;#ASMSTART
8360; GFX900-NEXT:    ; def s[4:5]
8361; GFX900-NEXT:    ;;#ASMEND
8362; GFX900-NEXT:    ;;#ASMSTART
8363; GFX900-NEXT:    ; def s[6:7]
8364; GFX900-NEXT:    ;;#ASMEND
8365; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
8366; GFX900-NEXT:    ;;#ASMSTART
8367; GFX900-NEXT:    ; use s[8:9]
8368; GFX900-NEXT:    ;;#ASMEND
8369; GFX900-NEXT:    s_setpc_b64 s[30:31]
8370;
8371; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_3_u:
8372; GFX90A:       ; %bb.0:
8373; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8374; GFX90A-NEXT:    ;;#ASMSTART
8375; GFX90A-NEXT:    ; def s[4:5]
8376; GFX90A-NEXT:    ;;#ASMEND
8377; GFX90A-NEXT:    ;;#ASMSTART
8378; GFX90A-NEXT:    ; def s[6:7]
8379; GFX90A-NEXT:    ;;#ASMEND
8380; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
8381; GFX90A-NEXT:    ;;#ASMSTART
8382; GFX90A-NEXT:    ; use s[8:9]
8383; GFX90A-NEXT:    ;;#ASMEND
8384; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8385;
8386; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_u:
8387; GFX940:       ; %bb.0:
8388; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8389; GFX940-NEXT:    ;;#ASMSTART
8390; GFX940-NEXT:    ; def s[0:1]
8391; GFX940-NEXT:    ;;#ASMEND
8392; GFX940-NEXT:    ;;#ASMSTART
8393; GFX940-NEXT:    ; def s[2:3]
8394; GFX940-NEXT:    ;;#ASMEND
8395; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s1
8396; GFX940-NEXT:    ;;#ASMSTART
8397; GFX940-NEXT:    ; use s[8:9]
8398; GFX940-NEXT:    ;;#ASMEND
8399; GFX940-NEXT:    s_setpc_b64 s[30:31]
8400  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8401  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8402  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 poison>
8403  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8404  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8405  ret void
8406}
8407
8408define void @s_shuffle_v3i16_v4i16__7_4_u() {
8409; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_4_u:
8410; GFX900:       ; %bb.0:
8411; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8412; GFX900-NEXT:    ;;#ASMSTART
8413; GFX900-NEXT:    ; def s[4:5]
8414; GFX900-NEXT:    ;;#ASMEND
8415; GFX900-NEXT:    s_lshr_b32 s5, s5, 16
8416; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
8417; GFX900-NEXT:    ;;#ASMSTART
8418; GFX900-NEXT:    ; use s[8:9]
8419; GFX900-NEXT:    ;;#ASMEND
8420; GFX900-NEXT:    s_setpc_b64 s[30:31]
8421;
8422; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_4_u:
8423; GFX90A:       ; %bb.0:
8424; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8425; GFX90A-NEXT:    ;;#ASMSTART
8426; GFX90A-NEXT:    ; def s[4:5]
8427; GFX90A-NEXT:    ;;#ASMEND
8428; GFX90A-NEXT:    s_lshr_b32 s5, s5, 16
8429; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
8430; GFX90A-NEXT:    ;;#ASMSTART
8431; GFX90A-NEXT:    ; use s[8:9]
8432; GFX90A-NEXT:    ;;#ASMEND
8433; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8434;
8435; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_u:
8436; GFX940:       ; %bb.0:
8437; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8438; GFX940-NEXT:    ;;#ASMSTART
8439; GFX940-NEXT:    ; def s[0:1]
8440; GFX940-NEXT:    ;;#ASMEND
8441; GFX940-NEXT:    s_lshr_b32 s1, s1, 16
8442; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
8443; GFX940-NEXT:    ;;#ASMSTART
8444; GFX940-NEXT:    ; use s[8:9]
8445; GFX940-NEXT:    ;;#ASMEND
8446; GFX940-NEXT:    s_setpc_b64 s[30:31]
8447  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8448  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8449  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 poison>
8450  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8451  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8452  ret void
8453}
8454
8455define void @s_shuffle_v3i16_v4i16__7_5_u() {
8456; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_5_u:
8457; GFX900:       ; %bb.0:
8458; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8459; GFX900-NEXT:    ;;#ASMSTART
8460; GFX900-NEXT:    ; def s[4:5]
8461; GFX900-NEXT:    ;;#ASMEND
8462; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s4
8463; GFX900-NEXT:    ;;#ASMSTART
8464; GFX900-NEXT:    ; use s[8:9]
8465; GFX900-NEXT:    ;;#ASMEND
8466; GFX900-NEXT:    s_setpc_b64 s[30:31]
8467;
8468; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_5_u:
8469; GFX90A:       ; %bb.0:
8470; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8471; GFX90A-NEXT:    ;;#ASMSTART
8472; GFX90A-NEXT:    ; def s[4:5]
8473; GFX90A-NEXT:    ;;#ASMEND
8474; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s4
8475; GFX90A-NEXT:    ;;#ASMSTART
8476; GFX90A-NEXT:    ; use s[8:9]
8477; GFX90A-NEXT:    ;;#ASMEND
8478; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8479;
8480; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_5_u:
8481; GFX940:       ; %bb.0:
8482; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8483; GFX940-NEXT:    ;;#ASMSTART
8484; GFX940-NEXT:    ; def s[0:1]
8485; GFX940-NEXT:    ;;#ASMEND
8486; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s0
8487; GFX940-NEXT:    ;;#ASMSTART
8488; GFX940-NEXT:    ; use s[8:9]
8489; GFX940-NEXT:    ;;#ASMEND
8490; GFX940-NEXT:    s_setpc_b64 s[30:31]
8491  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8492  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8493  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 poison>
8494  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8495  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8496  ret void
8497}
8498
8499define void @s_shuffle_v3i16_v4i16__7_6_u() {
8500; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_6_u:
8501; GFX900:       ; %bb.0:
8502; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8503; GFX900-NEXT:    ;;#ASMSTART
8504; GFX900-NEXT:    ; def s[4:5]
8505; GFX900-NEXT:    ;;#ASMEND
8506; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
8507; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
8508; GFX900-NEXT:    ;;#ASMSTART
8509; GFX900-NEXT:    ; use s[8:9]
8510; GFX900-NEXT:    ;;#ASMEND
8511; GFX900-NEXT:    s_setpc_b64 s[30:31]
8512;
8513; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_6_u:
8514; GFX90A:       ; %bb.0:
8515; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8516; GFX90A-NEXT:    ;;#ASMSTART
8517; GFX90A-NEXT:    ; def s[4:5]
8518; GFX90A-NEXT:    ;;#ASMEND
8519; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
8520; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
8521; GFX90A-NEXT:    ;;#ASMSTART
8522; GFX90A-NEXT:    ; use s[8:9]
8523; GFX90A-NEXT:    ;;#ASMEND
8524; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8525;
8526; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_u:
8527; GFX940:       ; %bb.0:
8528; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8529; GFX940-NEXT:    ;;#ASMSTART
8530; GFX940-NEXT:    ; def s[0:1]
8531; GFX940-NEXT:    ;;#ASMEND
8532; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
8533; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
8534; GFX940-NEXT:    ;;#ASMSTART
8535; GFX940-NEXT:    ; use s[8:9]
8536; GFX940-NEXT:    ;;#ASMEND
8537; GFX940-NEXT:    s_setpc_b64 s[30:31]
8538  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8539  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8540  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 poison>
8541  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8542  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8543  ret void
8544}
8545
8546define void @s_shuffle_v3i16_v4i16__7_7_u() {
8547; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_7_u:
8548; GFX900:       ; %bb.0:
8549; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8550; GFX900-NEXT:    ;;#ASMSTART
8551; GFX900-NEXT:    ; def s[4:5]
8552; GFX900-NEXT:    ;;#ASMEND
8553; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
8554; GFX900-NEXT:    ;;#ASMSTART
8555; GFX900-NEXT:    ; use s[8:9]
8556; GFX900-NEXT:    ;;#ASMEND
8557; GFX900-NEXT:    s_setpc_b64 s[30:31]
8558;
8559; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_7_u:
8560; GFX90A:       ; %bb.0:
8561; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8562; GFX90A-NEXT:    ;;#ASMSTART
8563; GFX90A-NEXT:    ; def s[4:5]
8564; GFX90A-NEXT:    ;;#ASMEND
8565; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
8566; GFX90A-NEXT:    ;;#ASMSTART
8567; GFX90A-NEXT:    ; use s[8:9]
8568; GFX90A-NEXT:    ;;#ASMEND
8569; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8570;
8571; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_7_u:
8572; GFX940:       ; %bb.0:
8573; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8574; GFX940-NEXT:    ;;#ASMSTART
8575; GFX940-NEXT:    ; def s[0:1]
8576; GFX940-NEXT:    ;;#ASMEND
8577; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
8578; GFX940-NEXT:    ;;#ASMSTART
8579; GFX940-NEXT:    ; use s[8:9]
8580; GFX940-NEXT:    ;;#ASMEND
8581; GFX940-NEXT:    s_setpc_b64 s[30:31]
8582  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8583  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8584  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 poison>
8585  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8586  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8587  ret void
8588}
8589
8590define void @s_shuffle_v3i16_v4i16__7_7_0() {
8591; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_7_0:
8592; GFX900:       ; %bb.0:
8593; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8594; GFX900-NEXT:    ;;#ASMSTART
8595; GFX900-NEXT:    ; def s[4:5]
8596; GFX900-NEXT:    ;;#ASMEND
8597; GFX900-NEXT:    ;;#ASMSTART
8598; GFX900-NEXT:    ; def s[6:7]
8599; GFX900-NEXT:    ;;#ASMEND
8600; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
8601; GFX900-NEXT:    s_mov_b32 s9, s4
8602; GFX900-NEXT:    ;;#ASMSTART
8603; GFX900-NEXT:    ; use s[8:9]
8604; GFX900-NEXT:    ;;#ASMEND
8605; GFX900-NEXT:    s_setpc_b64 s[30:31]
8606;
8607; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_7_0:
8608; GFX90A:       ; %bb.0:
8609; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8610; GFX90A-NEXT:    ;;#ASMSTART
8611; GFX90A-NEXT:    ; def s[4:5]
8612; GFX90A-NEXT:    ;;#ASMEND
8613; GFX90A-NEXT:    ;;#ASMSTART
8614; GFX90A-NEXT:    ; def s[6:7]
8615; GFX90A-NEXT:    ;;#ASMEND
8616; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
8617; GFX90A-NEXT:    s_mov_b32 s9, s4
8618; GFX90A-NEXT:    ;;#ASMSTART
8619; GFX90A-NEXT:    ; use s[8:9]
8620; GFX90A-NEXT:    ;;#ASMEND
8621; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8622;
8623; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_7_0:
8624; GFX940:       ; %bb.0:
8625; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8626; GFX940-NEXT:    ;;#ASMSTART
8627; GFX940-NEXT:    ; def s[0:1]
8628; GFX940-NEXT:    ;;#ASMEND
8629; GFX940-NEXT:    ;;#ASMSTART
8630; GFX940-NEXT:    ; def s[2:3]
8631; GFX940-NEXT:    ;;#ASMEND
8632; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
8633; GFX940-NEXT:    s_mov_b32 s9, s0
8634; GFX940-NEXT:    ;;#ASMSTART
8635; GFX940-NEXT:    ; use s[8:9]
8636; GFX940-NEXT:    ;;#ASMEND
8637; GFX940-NEXT:    s_setpc_b64 s[30:31]
8638  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8639  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8640  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 0>
8641  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8642  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8643  ret void
8644}
8645
8646define void @s_shuffle_v3i16_v4i16__7_7_1() {
8647; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_7_1:
8648; GFX900:       ; %bb.0:
8649; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8650; GFX900-NEXT:    ;;#ASMSTART
8651; GFX900-NEXT:    ; def s[4:5]
8652; GFX900-NEXT:    ;;#ASMEND
8653; GFX900-NEXT:    ;;#ASMSTART
8654; GFX900-NEXT:    ; def s[6:7]
8655; GFX900-NEXT:    ;;#ASMEND
8656; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
8657; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
8658; GFX900-NEXT:    ;;#ASMSTART
8659; GFX900-NEXT:    ; use s[8:9]
8660; GFX900-NEXT:    ;;#ASMEND
8661; GFX900-NEXT:    s_setpc_b64 s[30:31]
8662;
8663; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_7_1:
8664; GFX90A:       ; %bb.0:
8665; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8666; GFX90A-NEXT:    ;;#ASMSTART
8667; GFX90A-NEXT:    ; def s[4:5]
8668; GFX90A-NEXT:    ;;#ASMEND
8669; GFX90A-NEXT:    ;;#ASMSTART
8670; GFX90A-NEXT:    ; def s[6:7]
8671; GFX90A-NEXT:    ;;#ASMEND
8672; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
8673; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
8674; GFX90A-NEXT:    ;;#ASMSTART
8675; GFX90A-NEXT:    ; use s[8:9]
8676; GFX90A-NEXT:    ;;#ASMEND
8677; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8678;
8679; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_7_1:
8680; GFX940:       ; %bb.0:
8681; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8682; GFX940-NEXT:    ;;#ASMSTART
8683; GFX940-NEXT:    ; def s[0:1]
8684; GFX940-NEXT:    ;;#ASMEND
8685; GFX940-NEXT:    ;;#ASMSTART
8686; GFX940-NEXT:    ; def s[2:3]
8687; GFX940-NEXT:    ;;#ASMEND
8688; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
8689; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
8690; GFX940-NEXT:    ;;#ASMSTART
8691; GFX940-NEXT:    ; use s[8:9]
8692; GFX940-NEXT:    ;;#ASMEND
8693; GFX940-NEXT:    s_setpc_b64 s[30:31]
8694  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8695  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8696  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 1>
8697  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8698  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8699  ret void
8700}
8701
8702define void @s_shuffle_v3i16_v4i16__7_7_2() {
8703; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_7_2:
8704; GFX900:       ; %bb.0:
8705; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8706; GFX900-NEXT:    ;;#ASMSTART
8707; GFX900-NEXT:    ; def s[8:9]
8708; GFX900-NEXT:    ;;#ASMEND
8709; GFX900-NEXT:    ;;#ASMSTART
8710; GFX900-NEXT:    ; def s[4:5]
8711; GFX900-NEXT:    ;;#ASMEND
8712; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
8713; GFX900-NEXT:    ;;#ASMSTART
8714; GFX900-NEXT:    ; use s[8:9]
8715; GFX900-NEXT:    ;;#ASMEND
8716; GFX900-NEXT:    s_setpc_b64 s[30:31]
8717;
8718; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_7_2:
8719; GFX90A:       ; %bb.0:
8720; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8721; GFX90A-NEXT:    ;;#ASMSTART
8722; GFX90A-NEXT:    ; def s[8:9]
8723; GFX90A-NEXT:    ;;#ASMEND
8724; GFX90A-NEXT:    ;;#ASMSTART
8725; GFX90A-NEXT:    ; def s[4:5]
8726; GFX90A-NEXT:    ;;#ASMEND
8727; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
8728; GFX90A-NEXT:    ;;#ASMSTART
8729; GFX90A-NEXT:    ; use s[8:9]
8730; GFX90A-NEXT:    ;;#ASMEND
8731; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8732;
8733; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_7_2:
8734; GFX940:       ; %bb.0:
8735; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8736; GFX940-NEXT:    ;;#ASMSTART
8737; GFX940-NEXT:    ; def s[8:9]
8738; GFX940-NEXT:    ;;#ASMEND
8739; GFX940-NEXT:    ;;#ASMSTART
8740; GFX940-NEXT:    ; def s[0:1]
8741; GFX940-NEXT:    ;;#ASMEND
8742; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
8743; GFX940-NEXT:    ;;#ASMSTART
8744; GFX940-NEXT:    ; use s[8:9]
8745; GFX940-NEXT:    ;;#ASMEND
8746; GFX940-NEXT:    s_setpc_b64 s[30:31]
8747  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8748  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8749  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 2>
8750  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8751  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8752  ret void
8753}
8754
8755define void @s_shuffle_v3i16_v4i16__7_7_3() {
8756; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_7_3:
8757; GFX900:       ; %bb.0:
8758; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8759; GFX900-NEXT:    ;;#ASMSTART
8760; GFX900-NEXT:    ; def s[4:5]
8761; GFX900-NEXT:    ;;#ASMEND
8762; GFX900-NEXT:    ;;#ASMSTART
8763; GFX900-NEXT:    ; def s[6:7]
8764; GFX900-NEXT:    ;;#ASMEND
8765; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
8766; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
8767; GFX900-NEXT:    ;;#ASMSTART
8768; GFX900-NEXT:    ; use s[8:9]
8769; GFX900-NEXT:    ;;#ASMEND
8770; GFX900-NEXT:    s_setpc_b64 s[30:31]
8771;
8772; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_7_3:
8773; GFX90A:       ; %bb.0:
8774; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8775; GFX90A-NEXT:    ;;#ASMSTART
8776; GFX90A-NEXT:    ; def s[4:5]
8777; GFX90A-NEXT:    ;;#ASMEND
8778; GFX90A-NEXT:    ;;#ASMSTART
8779; GFX90A-NEXT:    ; def s[6:7]
8780; GFX90A-NEXT:    ;;#ASMEND
8781; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
8782; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s7
8783; GFX90A-NEXT:    ;;#ASMSTART
8784; GFX90A-NEXT:    ; use s[8:9]
8785; GFX90A-NEXT:    ;;#ASMEND
8786; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8787;
8788; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_7_3:
8789; GFX940:       ; %bb.0:
8790; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8791; GFX940-NEXT:    ;;#ASMSTART
8792; GFX940-NEXT:    ; def s[0:1]
8793; GFX940-NEXT:    ;;#ASMEND
8794; GFX940-NEXT:    ;;#ASMSTART
8795; GFX940-NEXT:    ; def s[2:3]
8796; GFX940-NEXT:    ;;#ASMEND
8797; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
8798; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s3
8799; GFX940-NEXT:    ;;#ASMSTART
8800; GFX940-NEXT:    ; use s[8:9]
8801; GFX940-NEXT:    ;;#ASMEND
8802; GFX940-NEXT:    s_setpc_b64 s[30:31]
8803  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8804  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8805  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 3>
8806  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8807  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8808  ret void
8809}
8810
8811define void @s_shuffle_v3i16_v4i16__7_7_4() {
8812; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_7_4:
8813; GFX900:       ; %bb.0:
8814; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8815; GFX900-NEXT:    ;;#ASMSTART
8816; GFX900-NEXT:    ; def s[4:5]
8817; GFX900-NEXT:    ;;#ASMEND
8818; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
8819; GFX900-NEXT:    s_mov_b32 s9, s4
8820; GFX900-NEXT:    ;;#ASMSTART
8821; GFX900-NEXT:    ; use s[8:9]
8822; GFX900-NEXT:    ;;#ASMEND
8823; GFX900-NEXT:    s_setpc_b64 s[30:31]
8824;
8825; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_7_4:
8826; GFX90A:       ; %bb.0:
8827; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8828; GFX90A-NEXT:    ;;#ASMSTART
8829; GFX90A-NEXT:    ; def s[4:5]
8830; GFX90A-NEXT:    ;;#ASMEND
8831; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
8832; GFX90A-NEXT:    s_mov_b32 s9, s4
8833; GFX90A-NEXT:    ;;#ASMSTART
8834; GFX90A-NEXT:    ; use s[8:9]
8835; GFX90A-NEXT:    ;;#ASMEND
8836; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8837;
8838; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_7_4:
8839; GFX940:       ; %bb.0:
8840; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8841; GFX940-NEXT:    ;;#ASMSTART
8842; GFX940-NEXT:    ; def s[0:1]
8843; GFX940-NEXT:    ;;#ASMEND
8844; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
8845; GFX940-NEXT:    s_mov_b32 s9, s0
8846; GFX940-NEXT:    ;;#ASMSTART
8847; GFX940-NEXT:    ; use s[8:9]
8848; GFX940-NEXT:    ;;#ASMEND
8849; GFX940-NEXT:    s_setpc_b64 s[30:31]
8850  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8851  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8852  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 4>
8853  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8854  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8855  ret void
8856}
8857
8858define void @s_shuffle_v3i16_v4i16__7_7_5() {
8859; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_7_5:
8860; GFX900:       ; %bb.0:
8861; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8862; GFX900-NEXT:    ;;#ASMSTART
8863; GFX900-NEXT:    ; def s[4:5]
8864; GFX900-NEXT:    ;;#ASMEND
8865; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
8866; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
8867; GFX900-NEXT:    ;;#ASMSTART
8868; GFX900-NEXT:    ; use s[8:9]
8869; GFX900-NEXT:    ;;#ASMEND
8870; GFX900-NEXT:    s_setpc_b64 s[30:31]
8871;
8872; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_7_5:
8873; GFX90A:       ; %bb.0:
8874; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8875; GFX90A-NEXT:    ;;#ASMSTART
8876; GFX90A-NEXT:    ; def s[4:5]
8877; GFX90A-NEXT:    ;;#ASMEND
8878; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
8879; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
8880; GFX90A-NEXT:    ;;#ASMSTART
8881; GFX90A-NEXT:    ; use s[8:9]
8882; GFX90A-NEXT:    ;;#ASMEND
8883; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8884;
8885; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_7_5:
8886; GFX940:       ; %bb.0:
8887; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8888; GFX940-NEXT:    ;;#ASMSTART
8889; GFX940-NEXT:    ; def s[0:1]
8890; GFX940-NEXT:    ;;#ASMEND
8891; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
8892; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
8893; GFX940-NEXT:    ;;#ASMSTART
8894; GFX940-NEXT:    ; use s[8:9]
8895; GFX940-NEXT:    ;;#ASMEND
8896; GFX940-NEXT:    s_setpc_b64 s[30:31]
8897  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8898  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8899  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 5>
8900  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8901  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8902  ret void
8903}
8904
8905define void @s_shuffle_v3i16_v4i16__7_7_6() {
8906; GFX9-LABEL: s_shuffle_v3i16_v4i16__7_7_6:
8907; GFX9:       ; %bb.0:
8908; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8909; GFX9-NEXT:    ;;#ASMSTART
8910; GFX9-NEXT:    ; def s[8:9]
8911; GFX9-NEXT:    ;;#ASMEND
8912; GFX9-NEXT:    s_pack_hh_b32_b16 s8, s9, s9
8913; GFX9-NEXT:    ;;#ASMSTART
8914; GFX9-NEXT:    ; use s[8:9]
8915; GFX9-NEXT:    ;;#ASMEND
8916; GFX9-NEXT:    s_setpc_b64 s[30:31]
8917  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8918  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8919  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 6>
8920  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8921  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8922  ret void
8923}
8924
8925define void @s_shuffle_v3i16_v4i16__7_7_7() {
8926; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_7_7:
8927; GFX900:       ; %bb.0:
8928; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8929; GFX900-NEXT:    ;;#ASMSTART
8930; GFX900-NEXT:    ; def s[4:5]
8931; GFX900-NEXT:    ;;#ASMEND
8932; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
8933; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
8934; GFX900-NEXT:    ;;#ASMSTART
8935; GFX900-NEXT:    ; use s[8:9]
8936; GFX900-NEXT:    ;;#ASMEND
8937; GFX900-NEXT:    s_setpc_b64 s[30:31]
8938;
8939; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_7_7:
8940; GFX90A:       ; %bb.0:
8941; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8942; GFX90A-NEXT:    ;;#ASMSTART
8943; GFX90A-NEXT:    ; def s[4:5]
8944; GFX90A-NEXT:    ;;#ASMEND
8945; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
8946; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
8947; GFX90A-NEXT:    ;;#ASMSTART
8948; GFX90A-NEXT:    ; use s[8:9]
8949; GFX90A-NEXT:    ;;#ASMEND
8950; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8951;
8952; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_7_7:
8953; GFX940:       ; %bb.0:
8954; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8955; GFX940-NEXT:    ;;#ASMSTART
8956; GFX940-NEXT:    ; def s[0:1]
8957; GFX940-NEXT:    ;;#ASMEND
8958; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
8959; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
8960; GFX940-NEXT:    ;;#ASMSTART
8961; GFX940-NEXT:    ; use s[8:9]
8962; GFX940-NEXT:    ;;#ASMEND
8963; GFX940-NEXT:    s_setpc_b64 s[30:31]
8964  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8965  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8966  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 7, i32 7>
8967  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8968  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8969  ret void
8970}
8971
8972define void @s_shuffle_v3i16_v4i16__u_0_0() {
8973; GFX900-LABEL: s_shuffle_v3i16_v4i16__u_0_0:
8974; GFX900:       ; %bb.0:
8975; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8976; GFX900-NEXT:    ;;#ASMSTART
8977; GFX900-NEXT:    ; def s[4:5]
8978; GFX900-NEXT:    ;;#ASMEND
8979; GFX900-NEXT:    s_lshl_b32 s8, s4, 16
8980; GFX900-NEXT:    s_mov_b32 s9, s4
8981; GFX900-NEXT:    ;;#ASMSTART
8982; GFX900-NEXT:    ; use s[8:9]
8983; GFX900-NEXT:    ;;#ASMEND
8984; GFX900-NEXT:    s_setpc_b64 s[30:31]
8985;
8986; GFX90A-LABEL: s_shuffle_v3i16_v4i16__u_0_0:
8987; GFX90A:       ; %bb.0:
8988; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8989; GFX90A-NEXT:    ;;#ASMSTART
8990; GFX90A-NEXT:    ; def s[4:5]
8991; GFX90A-NEXT:    ;;#ASMEND
8992; GFX90A-NEXT:    s_lshl_b32 s8, s4, 16
8993; GFX90A-NEXT:    s_mov_b32 s9, s4
8994; GFX90A-NEXT:    ;;#ASMSTART
8995; GFX90A-NEXT:    ; use s[8:9]
8996; GFX90A-NEXT:    ;;#ASMEND
8997; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8998;
8999; GFX940-LABEL: s_shuffle_v3i16_v4i16__u_0_0:
9000; GFX940:       ; %bb.0:
9001; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9002; GFX940-NEXT:    ;;#ASMSTART
9003; GFX940-NEXT:    ; def s[0:1]
9004; GFX940-NEXT:    ;;#ASMEND
9005; GFX940-NEXT:    s_lshl_b32 s8, s0, 16
9006; GFX940-NEXT:    s_mov_b32 s9, s0
9007; GFX940-NEXT:    ;;#ASMSTART
9008; GFX940-NEXT:    ; use s[8:9]
9009; GFX940-NEXT:    ;;#ASMEND
9010; GFX940-NEXT:    s_setpc_b64 s[30:31]
9011  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9012  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 poison, i32 0, i32 0>
9013  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9014  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
9015  ret void
9016}
9017
9018define void @s_shuffle_v3i16_v4i16__0_0_0() {
9019; GFX900-LABEL: s_shuffle_v3i16_v4i16__0_0_0:
9020; GFX900:       ; %bb.0:
9021; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9022; GFX900-NEXT:    ;;#ASMSTART
9023; GFX900-NEXT:    ; def s[4:5]
9024; GFX900-NEXT:    ;;#ASMEND
9025; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
9026; GFX900-NEXT:    s_mov_b32 s9, s4
9027; GFX900-NEXT:    ;;#ASMSTART
9028; GFX900-NEXT:    ; use s[8:9]
9029; GFX900-NEXT:    ;;#ASMEND
9030; GFX900-NEXT:    s_setpc_b64 s[30:31]
9031;
9032; GFX90A-LABEL: s_shuffle_v3i16_v4i16__0_0_0:
9033; GFX90A:       ; %bb.0:
9034; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9035; GFX90A-NEXT:    ;;#ASMSTART
9036; GFX90A-NEXT:    ; def s[4:5]
9037; GFX90A-NEXT:    ;;#ASMEND
9038; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
9039; GFX90A-NEXT:    s_mov_b32 s9, s4
9040; GFX90A-NEXT:    ;;#ASMSTART
9041; GFX90A-NEXT:    ; use s[8:9]
9042; GFX90A-NEXT:    ;;#ASMEND
9043; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9044;
9045; GFX940-LABEL: s_shuffle_v3i16_v4i16__0_0_0:
9046; GFX940:       ; %bb.0:
9047; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9048; GFX940-NEXT:    ;;#ASMSTART
9049; GFX940-NEXT:    ; def s[0:1]
9050; GFX940-NEXT:    ;;#ASMEND
9051; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s0
9052; GFX940-NEXT:    s_mov_b32 s9, s0
9053; GFX940-NEXT:    ;;#ASMSTART
9054; GFX940-NEXT:    ; use s[8:9]
9055; GFX940-NEXT:    ;;#ASMEND
9056; GFX940-NEXT:    s_setpc_b64 s[30:31]
9057  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9058  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> zeroinitializer
9059  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9060  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
9061  ret void
9062}
9063
9064define void @s_shuffle_v3i16_v4i16__1_0_0() {
9065; GFX900-LABEL: s_shuffle_v3i16_v4i16__1_0_0:
9066; GFX900:       ; %bb.0:
9067; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9068; GFX900-NEXT:    ;;#ASMSTART
9069; GFX900-NEXT:    ; def s[4:5]
9070; GFX900-NEXT:    ;;#ASMEND
9071; GFX900-NEXT:    s_lshr_b32 s5, s4, 16
9072; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
9073; GFX900-NEXT:    s_mov_b32 s9, s4
9074; GFX900-NEXT:    ;;#ASMSTART
9075; GFX900-NEXT:    ; use s[8:9]
9076; GFX900-NEXT:    ;;#ASMEND
9077; GFX900-NEXT:    s_setpc_b64 s[30:31]
9078;
9079; GFX90A-LABEL: s_shuffle_v3i16_v4i16__1_0_0:
9080; GFX90A:       ; %bb.0:
9081; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9082; GFX90A-NEXT:    ;;#ASMSTART
9083; GFX90A-NEXT:    ; def s[4:5]
9084; GFX90A-NEXT:    ;;#ASMEND
9085; GFX90A-NEXT:    s_lshr_b32 s5, s4, 16
9086; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
9087; GFX90A-NEXT:    s_mov_b32 s9, s4
9088; GFX90A-NEXT:    ;;#ASMSTART
9089; GFX90A-NEXT:    ; use s[8:9]
9090; GFX90A-NEXT:    ;;#ASMEND
9091; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9092;
9093; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_0_0:
9094; GFX940:       ; %bb.0:
9095; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9096; GFX940-NEXT:    ;;#ASMSTART
9097; GFX940-NEXT:    ; def s[0:1]
9098; GFX940-NEXT:    ;;#ASMEND
9099; GFX940-NEXT:    s_lshr_b32 s1, s0, 16
9100; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
9101; GFX940-NEXT:    s_mov_b32 s9, s0
9102; GFX940-NEXT:    ;;#ASMSTART
9103; GFX940-NEXT:    ; use s[8:9]
9104; GFX940-NEXT:    ;;#ASMEND
9105; GFX940-NEXT:    s_setpc_b64 s[30:31]
9106  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9107  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 1, i32 0, i32 0>
9108  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9109  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
9110  ret void
9111}
9112
9113define void @s_shuffle_v3i16_v4i16__2_0_0() {
9114; GFX900-LABEL: s_shuffle_v3i16_v4i16__2_0_0:
9115; GFX900:       ; %bb.0:
9116; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9117; GFX900-NEXT:    ;;#ASMSTART
9118; GFX900-NEXT:    ; def s[4:5]
9119; GFX900-NEXT:    ;;#ASMEND
9120; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
9121; GFX900-NEXT:    s_mov_b32 s9, s4
9122; GFX900-NEXT:    ;;#ASMSTART
9123; GFX900-NEXT:    ; use s[8:9]
9124; GFX900-NEXT:    ;;#ASMEND
9125; GFX900-NEXT:    s_setpc_b64 s[30:31]
9126;
9127; GFX90A-LABEL: s_shuffle_v3i16_v4i16__2_0_0:
9128; GFX90A:       ; %bb.0:
9129; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9130; GFX90A-NEXT:    ;;#ASMSTART
9131; GFX90A-NEXT:    ; def s[4:5]
9132; GFX90A-NEXT:    ;;#ASMEND
9133; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
9134; GFX90A-NEXT:    s_mov_b32 s9, s4
9135; GFX90A-NEXT:    ;;#ASMSTART
9136; GFX90A-NEXT:    ; use s[8:9]
9137; GFX90A-NEXT:    ;;#ASMEND
9138; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9139;
9140; GFX940-LABEL: s_shuffle_v3i16_v4i16__2_0_0:
9141; GFX940:       ; %bb.0:
9142; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9143; GFX940-NEXT:    ;;#ASMSTART
9144; GFX940-NEXT:    ; def s[0:1]
9145; GFX940-NEXT:    ;;#ASMEND
9146; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
9147; GFX940-NEXT:    s_mov_b32 s9, s0
9148; GFX940-NEXT:    ;;#ASMSTART
9149; GFX940-NEXT:    ; use s[8:9]
9150; GFX940-NEXT:    ;;#ASMEND
9151; GFX940-NEXT:    s_setpc_b64 s[30:31]
9152  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9153  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 2, i32 0, i32 0>
9154  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9155  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
9156  ret void
9157}
9158
9159define void @s_shuffle_v3i16_v4i16__3_0_0() {
9160; GFX900-LABEL: s_shuffle_v3i16_v4i16__3_0_0:
9161; GFX900:       ; %bb.0:
9162; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9163; GFX900-NEXT:    ;;#ASMSTART
9164; GFX900-NEXT:    ; def s[4:5]
9165; GFX900-NEXT:    ;;#ASMEND
9166; GFX900-NEXT:    s_lshr_b32 s5, s5, 16
9167; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
9168; GFX900-NEXT:    s_mov_b32 s9, s4
9169; GFX900-NEXT:    ;;#ASMSTART
9170; GFX900-NEXT:    ; use s[8:9]
9171; GFX900-NEXT:    ;;#ASMEND
9172; GFX900-NEXT:    s_setpc_b64 s[30:31]
9173;
9174; GFX90A-LABEL: s_shuffle_v3i16_v4i16__3_0_0:
9175; GFX90A:       ; %bb.0:
9176; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9177; GFX90A-NEXT:    ;;#ASMSTART
9178; GFX90A-NEXT:    ; def s[4:5]
9179; GFX90A-NEXT:    ;;#ASMEND
9180; GFX90A-NEXT:    s_lshr_b32 s5, s5, 16
9181; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
9182; GFX90A-NEXT:    s_mov_b32 s9, s4
9183; GFX90A-NEXT:    ;;#ASMSTART
9184; GFX90A-NEXT:    ; use s[8:9]
9185; GFX90A-NEXT:    ;;#ASMEND
9186; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9187;
9188; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_0_0:
9189; GFX940:       ; %bb.0:
9190; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9191; GFX940-NEXT:    ;;#ASMSTART
9192; GFX940-NEXT:    ; def s[0:1]
9193; GFX940-NEXT:    ;;#ASMEND
9194; GFX940-NEXT:    s_lshr_b32 s1, s1, 16
9195; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
9196; GFX940-NEXT:    s_mov_b32 s9, s0
9197; GFX940-NEXT:    ;;#ASMSTART
9198; GFX940-NEXT:    ; use s[8:9]
9199; GFX940-NEXT:    ;;#ASMEND
9200; GFX940-NEXT:    s_setpc_b64 s[30:31]
9201  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9202  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 3, i32 0, i32 0>
9203  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9204  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
9205  ret void
9206}
9207
9208define void @s_shuffle_v3i16_v4i16__4_0_0() {
9209; GFX900-LABEL: s_shuffle_v3i16_v4i16__4_0_0:
9210; GFX900:       ; %bb.0:
9211; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9212; GFX900-NEXT:    ;;#ASMSTART
9213; GFX900-NEXT:    ; def s[4:5]
9214; GFX900-NEXT:    ;;#ASMEND
9215; GFX900-NEXT:    s_lshl_b32 s8, s4, 16
9216; GFX900-NEXT:    s_mov_b32 s9, s4
9217; GFX900-NEXT:    ;;#ASMSTART
9218; GFX900-NEXT:    ; use s[8:9]
9219; GFX900-NEXT:    ;;#ASMEND
9220; GFX900-NEXT:    s_setpc_b64 s[30:31]
9221;
9222; GFX90A-LABEL: s_shuffle_v3i16_v4i16__4_0_0:
9223; GFX90A:       ; %bb.0:
9224; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9225; GFX90A-NEXT:    ;;#ASMSTART
9226; GFX90A-NEXT:    ; def s[4:5]
9227; GFX90A-NEXT:    ;;#ASMEND
9228; GFX90A-NEXT:    s_lshl_b32 s8, s4, 16
9229; GFX90A-NEXT:    s_mov_b32 s9, s4
9230; GFX90A-NEXT:    ;;#ASMSTART
9231; GFX90A-NEXT:    ; use s[8:9]
9232; GFX90A-NEXT:    ;;#ASMEND
9233; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9234;
9235; GFX940-LABEL: s_shuffle_v3i16_v4i16__4_0_0:
9236; GFX940:       ; %bb.0:
9237; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9238; GFX940-NEXT:    ;;#ASMSTART
9239; GFX940-NEXT:    ; def s[0:1]
9240; GFX940-NEXT:    ;;#ASMEND
9241; GFX940-NEXT:    s_lshl_b32 s8, s0, 16
9242; GFX940-NEXT:    s_mov_b32 s9, s0
9243; GFX940-NEXT:    ;;#ASMSTART
9244; GFX940-NEXT:    ; use s[8:9]
9245; GFX940-NEXT:    ;;#ASMEND
9246; GFX940-NEXT:    s_setpc_b64 s[30:31]
9247  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9248  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 4, i32 0, i32 0>
9249  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9250  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
9251  ret void
9252}
9253
9254define void @s_shuffle_v3i16_v4i16__5_0_0() {
9255; GFX900-LABEL: s_shuffle_v3i16_v4i16__5_0_0:
9256; GFX900:       ; %bb.0:
9257; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9258; GFX900-NEXT:    ;;#ASMSTART
9259; GFX900-NEXT:    ; def s[4:5]
9260; GFX900-NEXT:    ;;#ASMEND
9261; GFX900-NEXT:    ;;#ASMSTART
9262; GFX900-NEXT:    ; def s[6:7]
9263; GFX900-NEXT:    ;;#ASMEND
9264; GFX900-NEXT:    s_lshr_b32 s5, s6, 16
9265; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
9266; GFX900-NEXT:    s_mov_b32 s9, s4
9267; GFX900-NEXT:    ;;#ASMSTART
9268; GFX900-NEXT:    ; use s[8:9]
9269; GFX900-NEXT:    ;;#ASMEND
9270; GFX900-NEXT:    s_setpc_b64 s[30:31]
9271;
9272; GFX90A-LABEL: s_shuffle_v3i16_v4i16__5_0_0:
9273; GFX90A:       ; %bb.0:
9274; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9275; GFX90A-NEXT:    ;;#ASMSTART
9276; GFX90A-NEXT:    ; def s[4:5]
9277; GFX90A-NEXT:    ;;#ASMEND
9278; GFX90A-NEXT:    ;;#ASMSTART
9279; GFX90A-NEXT:    ; def s[6:7]
9280; GFX90A-NEXT:    ;;#ASMEND
9281; GFX90A-NEXT:    s_lshr_b32 s5, s6, 16
9282; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
9283; GFX90A-NEXT:    s_mov_b32 s9, s4
9284; GFX90A-NEXT:    ;;#ASMSTART
9285; GFX90A-NEXT:    ; use s[8:9]
9286; GFX90A-NEXT:    ;;#ASMEND
9287; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9288;
9289; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_0_0:
9290; GFX940:       ; %bb.0:
9291; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9292; GFX940-NEXT:    ;;#ASMSTART
9293; GFX940-NEXT:    ; def s[0:1]
9294; GFX940-NEXT:    ;;#ASMEND
9295; GFX940-NEXT:    ;;#ASMSTART
9296; GFX940-NEXT:    ; def s[2:3]
9297; GFX940-NEXT:    ;;#ASMEND
9298; GFX940-NEXT:    s_lshr_b32 s1, s2, 16
9299; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
9300; GFX940-NEXT:    s_mov_b32 s9, s0
9301; GFX940-NEXT:    ;;#ASMSTART
9302; GFX940-NEXT:    ; use s[8:9]
9303; GFX940-NEXT:    ;;#ASMEND
9304; GFX940-NEXT:    s_setpc_b64 s[30:31]
9305  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9306  %vec1 = call <4 x i16> asm "; def $0", "=s"()
9307  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 0, i32 0>
9308  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9309  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
9310  ret void
9311}
9312
9313define void @s_shuffle_v3i16_v4i16__6_0_0() {
9314; GFX900-LABEL: s_shuffle_v3i16_v4i16__6_0_0:
9315; GFX900:       ; %bb.0:
9316; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9317; GFX900-NEXT:    ;;#ASMSTART
9318; GFX900-NEXT:    ; def s[4:5]
9319; GFX900-NEXT:    ;;#ASMEND
9320; GFX900-NEXT:    ;;#ASMSTART
9321; GFX900-NEXT:    ; def s[6:7]
9322; GFX900-NEXT:    ;;#ASMEND
9323; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
9324; GFX900-NEXT:    s_mov_b32 s9, s4
9325; GFX900-NEXT:    ;;#ASMSTART
9326; GFX900-NEXT:    ; use s[8:9]
9327; GFX900-NEXT:    ;;#ASMEND
9328; GFX900-NEXT:    s_setpc_b64 s[30:31]
9329;
9330; GFX90A-LABEL: s_shuffle_v3i16_v4i16__6_0_0:
9331; GFX90A:       ; %bb.0:
9332; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9333; GFX90A-NEXT:    ;;#ASMSTART
9334; GFX90A-NEXT:    ; def s[4:5]
9335; GFX90A-NEXT:    ;;#ASMEND
9336; GFX90A-NEXT:    ;;#ASMSTART
9337; GFX90A-NEXT:    ; def s[6:7]
9338; GFX90A-NEXT:    ;;#ASMEND
9339; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
9340; GFX90A-NEXT:    s_mov_b32 s9, s4
9341; GFX90A-NEXT:    ;;#ASMSTART
9342; GFX90A-NEXT:    ; use s[8:9]
9343; GFX90A-NEXT:    ;;#ASMEND
9344; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9345;
9346; GFX940-LABEL: s_shuffle_v3i16_v4i16__6_0_0:
9347; GFX940:       ; %bb.0:
9348; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9349; GFX940-NEXT:    ;;#ASMSTART
9350; GFX940-NEXT:    ; def s[0:1]
9351; GFX940-NEXT:    ;;#ASMEND
9352; GFX940-NEXT:    ;;#ASMSTART
9353; GFX940-NEXT:    ; def s[2:3]
9354; GFX940-NEXT:    ;;#ASMEND
9355; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s0
9356; GFX940-NEXT:    s_mov_b32 s9, s0
9357; GFX940-NEXT:    ;;#ASMSTART
9358; GFX940-NEXT:    ; use s[8:9]
9359; GFX940-NEXT:    ;;#ASMEND
9360; GFX940-NEXT:    s_setpc_b64 s[30:31]
9361  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9362  %vec1 = call <4 x i16> asm "; def $0", "=s"()
9363  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 0, i32 0>
9364  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9365  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
9366  ret void
9367}
9368
9369define void @s_shuffle_v3i16_v4i16__7_0_0() {
9370; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_0_0:
9371; GFX900:       ; %bb.0:
9372; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9373; GFX900-NEXT:    ;;#ASMSTART
9374; GFX900-NEXT:    ; def s[4:5]
9375; GFX900-NEXT:    ;;#ASMEND
9376; GFX900-NEXT:    ;;#ASMSTART
9377; GFX900-NEXT:    ; def s[6:7]
9378; GFX900-NEXT:    ;;#ASMEND
9379; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
9380; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
9381; GFX900-NEXT:    s_mov_b32 s9, s4
9382; GFX900-NEXT:    ;;#ASMSTART
9383; GFX900-NEXT:    ; use s[8:9]
9384; GFX900-NEXT:    ;;#ASMEND
9385; GFX900-NEXT:    s_setpc_b64 s[30:31]
9386;
9387; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_0_0:
9388; GFX90A:       ; %bb.0:
9389; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9390; GFX90A-NEXT:    ;;#ASMSTART
9391; GFX90A-NEXT:    ; def s[4:5]
9392; GFX90A-NEXT:    ;;#ASMEND
9393; GFX90A-NEXT:    ;;#ASMSTART
9394; GFX90A-NEXT:    ; def s[6:7]
9395; GFX90A-NEXT:    ;;#ASMEND
9396; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
9397; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
9398; GFX90A-NEXT:    s_mov_b32 s9, s4
9399; GFX90A-NEXT:    ;;#ASMSTART
9400; GFX90A-NEXT:    ; use s[8:9]
9401; GFX90A-NEXT:    ;;#ASMEND
9402; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9403;
9404; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_0:
9405; GFX940:       ; %bb.0:
9406; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9407; GFX940-NEXT:    ;;#ASMSTART
9408; GFX940-NEXT:    ; def s[0:1]
9409; GFX940-NEXT:    ;;#ASMEND
9410; GFX940-NEXT:    ;;#ASMSTART
9411; GFX940-NEXT:    ; def s[2:3]
9412; GFX940-NEXT:    ;;#ASMEND
9413; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
9414; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
9415; GFX940-NEXT:    s_mov_b32 s9, s0
9416; GFX940-NEXT:    ;;#ASMSTART
9417; GFX940-NEXT:    ; use s[8:9]
9418; GFX940-NEXT:    ;;#ASMEND
9419; GFX940-NEXT:    s_setpc_b64 s[30:31]
9420  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9421  %vec1 = call <4 x i16> asm "; def $0", "=s"()
9422  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 0>
9423  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9424  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
9425  ret void
9426}
9427
9428define void @s_shuffle_v3i16_v4i16__7_u_0() {
9429; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_u_0:
9430; GFX900:       ; %bb.0:
9431; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9432; GFX900-NEXT:    ;;#ASMSTART
9433; GFX900-NEXT:    ; def s[4:5]
9434; GFX900-NEXT:    ;;#ASMEND
9435; GFX900-NEXT:    ;;#ASMSTART
9436; GFX900-NEXT:    ; def s[6:7]
9437; GFX900-NEXT:    ;;#ASMEND
9438; GFX900-NEXT:    s_lshr_b32 s8, s7, 16
9439; GFX900-NEXT:    s_mov_b32 s9, s4
9440; GFX900-NEXT:    ;;#ASMSTART
9441; GFX900-NEXT:    ; use s[8:9]
9442; GFX900-NEXT:    ;;#ASMEND
9443; GFX900-NEXT:    s_setpc_b64 s[30:31]
9444;
9445; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_u_0:
9446; GFX90A:       ; %bb.0:
9447; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9448; GFX90A-NEXT:    ;;#ASMSTART
9449; GFX90A-NEXT:    ; def s[4:5]
9450; GFX90A-NEXT:    ;;#ASMEND
9451; GFX90A-NEXT:    ;;#ASMSTART
9452; GFX90A-NEXT:    ; def s[6:7]
9453; GFX90A-NEXT:    ;;#ASMEND
9454; GFX90A-NEXT:    s_lshr_b32 s8, s7, 16
9455; GFX90A-NEXT:    s_mov_b32 s9, s4
9456; GFX90A-NEXT:    ;;#ASMSTART
9457; GFX90A-NEXT:    ; use s[8:9]
9458; GFX90A-NEXT:    ;;#ASMEND
9459; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9460;
9461; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_u_0:
9462; GFX940:       ; %bb.0:
9463; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9464; GFX940-NEXT:    ;;#ASMSTART
9465; GFX940-NEXT:    ; def s[0:1]
9466; GFX940-NEXT:    ;;#ASMEND
9467; GFX940-NEXT:    ;;#ASMSTART
9468; GFX940-NEXT:    ; def s[2:3]
9469; GFX940-NEXT:    ;;#ASMEND
9470; GFX940-NEXT:    s_lshr_b32 s8, s3, 16
9471; GFX940-NEXT:    s_mov_b32 s9, s0
9472; GFX940-NEXT:    ;;#ASMSTART
9473; GFX940-NEXT:    ; use s[8:9]
9474; GFX940-NEXT:    ;;#ASMEND
9475; GFX940-NEXT:    s_setpc_b64 s[30:31]
9476  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9477  %vec1 = call <4 x i16> asm "; def $0", "=s"()
9478  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 0>
9479  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9480  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
9481  ret void
9482}
9483
9484define void @s_shuffle_v3i16_v4i16__7_1_0() {
9485; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_1_0:
9486; GFX900:       ; %bb.0:
9487; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9488; GFX900-NEXT:    ;;#ASMSTART
9489; GFX900-NEXT:    ; def s[4:5]
9490; GFX900-NEXT:    ;;#ASMEND
9491; GFX900-NEXT:    ;;#ASMSTART
9492; GFX900-NEXT:    ; def s[6:7]
9493; GFX900-NEXT:    ;;#ASMEND
9494; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
9495; GFX900-NEXT:    s_mov_b32 s9, s4
9496; GFX900-NEXT:    ;;#ASMSTART
9497; GFX900-NEXT:    ; use s[8:9]
9498; GFX900-NEXT:    ;;#ASMEND
9499; GFX900-NEXT:    s_setpc_b64 s[30:31]
9500;
9501; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_1_0:
9502; GFX90A:       ; %bb.0:
9503; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9504; GFX90A-NEXT:    ;;#ASMSTART
9505; GFX90A-NEXT:    ; def s[4:5]
9506; GFX90A-NEXT:    ;;#ASMEND
9507; GFX90A-NEXT:    ;;#ASMSTART
9508; GFX90A-NEXT:    ; def s[6:7]
9509; GFX90A-NEXT:    ;;#ASMEND
9510; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
9511; GFX90A-NEXT:    s_mov_b32 s9, s4
9512; GFX90A-NEXT:    ;;#ASMSTART
9513; GFX90A-NEXT:    ; use s[8:9]
9514; GFX90A-NEXT:    ;;#ASMEND
9515; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9516;
9517; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_0:
9518; GFX940:       ; %bb.0:
9519; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9520; GFX940-NEXT:    ;;#ASMSTART
9521; GFX940-NEXT:    ; def s[0:1]
9522; GFX940-NEXT:    ;;#ASMEND
9523; GFX940-NEXT:    ;;#ASMSTART
9524; GFX940-NEXT:    ; def s[2:3]
9525; GFX940-NEXT:    ;;#ASMEND
9526; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s0
9527; GFX940-NEXT:    s_mov_b32 s9, s0
9528; GFX940-NEXT:    ;;#ASMSTART
9529; GFX940-NEXT:    ; use s[8:9]
9530; GFX940-NEXT:    ;;#ASMEND
9531; GFX940-NEXT:    s_setpc_b64 s[30:31]
9532  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9533  %vec1 = call <4 x i16> asm "; def $0", "=s"()
9534  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 0>
9535  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9536  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
9537  ret void
9538}
9539
9540define void @s_shuffle_v3i16_v4i16__7_2_0() {
9541; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_2_0:
9542; GFX900:       ; %bb.0:
9543; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9544; GFX900-NEXT:    ;;#ASMSTART
9545; GFX900-NEXT:    ; def s[6:7]
9546; GFX900-NEXT:    ;;#ASMEND
9547; GFX900-NEXT:    s_lshr_b32 s6, s7, 16
9548; GFX900-NEXT:    ;;#ASMSTART
9549; GFX900-NEXT:    ; def s[4:5]
9550; GFX900-NEXT:    ;;#ASMEND
9551; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
9552; GFX900-NEXT:    s_mov_b32 s9, s4
9553; GFX900-NEXT:    ;;#ASMSTART
9554; GFX900-NEXT:    ; use s[8:9]
9555; GFX900-NEXT:    ;;#ASMEND
9556; GFX900-NEXT:    s_setpc_b64 s[30:31]
9557;
9558; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_2_0:
9559; GFX90A:       ; %bb.0:
9560; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9561; GFX90A-NEXT:    ;;#ASMSTART
9562; GFX90A-NEXT:    ; def s[6:7]
9563; GFX90A-NEXT:    ;;#ASMEND
9564; GFX90A-NEXT:    s_lshr_b32 s6, s7, 16
9565; GFX90A-NEXT:    ;;#ASMSTART
9566; GFX90A-NEXT:    ; def s[4:5]
9567; GFX90A-NEXT:    ;;#ASMEND
9568; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
9569; GFX90A-NEXT:    s_mov_b32 s9, s4
9570; GFX90A-NEXT:    ;;#ASMSTART
9571; GFX90A-NEXT:    ; use s[8:9]
9572; GFX90A-NEXT:    ;;#ASMEND
9573; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9574;
9575; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_0:
9576; GFX940:       ; %bb.0:
9577; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9578; GFX940-NEXT:    ;;#ASMSTART
9579; GFX940-NEXT:    ; def s[2:3]
9580; GFX940-NEXT:    ;;#ASMEND
9581; GFX940-NEXT:    s_lshr_b32 s2, s3, 16
9582; GFX940-NEXT:    ;;#ASMSTART
9583; GFX940-NEXT:    ; def s[0:1]
9584; GFX940-NEXT:    ;;#ASMEND
9585; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s2, s1
9586; GFX940-NEXT:    s_mov_b32 s9, s0
9587; GFX940-NEXT:    ;;#ASMSTART
9588; GFX940-NEXT:    ; use s[8:9]
9589; GFX940-NEXT:    ;;#ASMEND
9590; GFX940-NEXT:    s_setpc_b64 s[30:31]
9591  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9592  %vec1 = call <4 x i16> asm "; def $0", "=s"()
9593  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 0>
9594  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9595  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
9596  ret void
9597}
9598
9599define void @s_shuffle_v3i16_v4i16__7_3_0() {
9600; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_3_0:
9601; GFX900:       ; %bb.0:
9602; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9603; GFX900-NEXT:    ;;#ASMSTART
9604; GFX900-NEXT:    ; def s[4:5]
9605; GFX900-NEXT:    ;;#ASMEND
9606; GFX900-NEXT:    ;;#ASMSTART
9607; GFX900-NEXT:    ; def s[6:7]
9608; GFX900-NEXT:    ;;#ASMEND
9609; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
9610; GFX900-NEXT:    s_mov_b32 s9, s4
9611; GFX900-NEXT:    ;;#ASMSTART
9612; GFX900-NEXT:    ; use s[8:9]
9613; GFX900-NEXT:    ;;#ASMEND
9614; GFX900-NEXT:    s_setpc_b64 s[30:31]
9615;
9616; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_3_0:
9617; GFX90A:       ; %bb.0:
9618; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9619; GFX90A-NEXT:    ;;#ASMSTART
9620; GFX90A-NEXT:    ; def s[4:5]
9621; GFX90A-NEXT:    ;;#ASMEND
9622; GFX90A-NEXT:    ;;#ASMSTART
9623; GFX90A-NEXT:    ; def s[6:7]
9624; GFX90A-NEXT:    ;;#ASMEND
9625; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
9626; GFX90A-NEXT:    s_mov_b32 s9, s4
9627; GFX90A-NEXT:    ;;#ASMSTART
9628; GFX90A-NEXT:    ; use s[8:9]
9629; GFX90A-NEXT:    ;;#ASMEND
9630; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9631;
9632; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_0:
9633; GFX940:       ; %bb.0:
9634; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9635; GFX940-NEXT:    ;;#ASMSTART
9636; GFX940-NEXT:    ; def s[0:1]
9637; GFX940-NEXT:    ;;#ASMEND
9638; GFX940-NEXT:    ;;#ASMSTART
9639; GFX940-NEXT:    ; def s[2:3]
9640; GFX940-NEXT:    ;;#ASMEND
9641; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s1
9642; GFX940-NEXT:    s_mov_b32 s9, s0
9643; GFX940-NEXT:    ;;#ASMSTART
9644; GFX940-NEXT:    ; use s[8:9]
9645; GFX940-NEXT:    ;;#ASMEND
9646; GFX940-NEXT:    s_setpc_b64 s[30:31]
9647  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9648  %vec1 = call <4 x i16> asm "; def $0", "=s"()
9649  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 0>
9650  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9651  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
9652  ret void
9653}
9654
9655define void @s_shuffle_v3i16_v4i16__7_4_0() {
9656; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_4_0:
9657; GFX900:       ; %bb.0:
9658; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9659; GFX900-NEXT:    ;;#ASMSTART
9660; GFX900-NEXT:    ; def s[4:5]
9661; GFX900-NEXT:    ;;#ASMEND
9662; GFX900-NEXT:    ;;#ASMSTART
9663; GFX900-NEXT:    ; def s[6:7]
9664; GFX900-NEXT:    ;;#ASMEND
9665; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
9666; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s6
9667; GFX900-NEXT:    s_mov_b32 s9, s4
9668; GFX900-NEXT:    ;;#ASMSTART
9669; GFX900-NEXT:    ; use s[8:9]
9670; GFX900-NEXT:    ;;#ASMEND
9671; GFX900-NEXT:    s_setpc_b64 s[30:31]
9672;
9673; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_4_0:
9674; GFX90A:       ; %bb.0:
9675; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9676; GFX90A-NEXT:    ;;#ASMSTART
9677; GFX90A-NEXT:    ; def s[4:5]
9678; GFX90A-NEXT:    ;;#ASMEND
9679; GFX90A-NEXT:    ;;#ASMSTART
9680; GFX90A-NEXT:    ; def s[6:7]
9681; GFX90A-NEXT:    ;;#ASMEND
9682; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
9683; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s6
9684; GFX90A-NEXT:    s_mov_b32 s9, s4
9685; GFX90A-NEXT:    ;;#ASMSTART
9686; GFX90A-NEXT:    ; use s[8:9]
9687; GFX90A-NEXT:    ;;#ASMEND
9688; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9689;
9690; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_0:
9691; GFX940:       ; %bb.0:
9692; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9693; GFX940-NEXT:    ;;#ASMSTART
9694; GFX940-NEXT:    ; def s[0:1]
9695; GFX940-NEXT:    ;;#ASMEND
9696; GFX940-NEXT:    ;;#ASMSTART
9697; GFX940-NEXT:    ; def s[2:3]
9698; GFX940-NEXT:    ;;#ASMEND
9699; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
9700; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s2
9701; GFX940-NEXT:    s_mov_b32 s9, s0
9702; GFX940-NEXT:    ;;#ASMSTART
9703; GFX940-NEXT:    ; use s[8:9]
9704; GFX940-NEXT:    ;;#ASMEND
9705; GFX940-NEXT:    s_setpc_b64 s[30:31]
9706  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9707  %vec1 = call <4 x i16> asm "; def $0", "=s"()
9708  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 0>
9709  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9710  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
9711  ret void
9712}
9713
9714define void @s_shuffle_v3i16_v4i16__7_5_0() {
9715; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_5_0:
9716; GFX900:       ; %bb.0:
9717; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9718; GFX900-NEXT:    ;;#ASMSTART
9719; GFX900-NEXT:    ; def s[4:5]
9720; GFX900-NEXT:    ;;#ASMEND
9721; GFX900-NEXT:    ;;#ASMSTART
9722; GFX900-NEXT:    ; def s[6:7]
9723; GFX900-NEXT:    ;;#ASMEND
9724; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s6
9725; GFX900-NEXT:    s_mov_b32 s9, s4
9726; GFX900-NEXT:    ;;#ASMSTART
9727; GFX900-NEXT:    ; use s[8:9]
9728; GFX900-NEXT:    ;;#ASMEND
9729; GFX900-NEXT:    s_setpc_b64 s[30:31]
9730;
9731; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_5_0:
9732; GFX90A:       ; %bb.0:
9733; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9734; GFX90A-NEXT:    ;;#ASMSTART
9735; GFX90A-NEXT:    ; def s[4:5]
9736; GFX90A-NEXT:    ;;#ASMEND
9737; GFX90A-NEXT:    ;;#ASMSTART
9738; GFX90A-NEXT:    ; def s[6:7]
9739; GFX90A-NEXT:    ;;#ASMEND
9740; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s6
9741; GFX90A-NEXT:    s_mov_b32 s9, s4
9742; GFX90A-NEXT:    ;;#ASMSTART
9743; GFX90A-NEXT:    ; use s[8:9]
9744; GFX90A-NEXT:    ;;#ASMEND
9745; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9746;
9747; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_5_0:
9748; GFX940:       ; %bb.0:
9749; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9750; GFX940-NEXT:    ;;#ASMSTART
9751; GFX940-NEXT:    ; def s[0:1]
9752; GFX940-NEXT:    ;;#ASMEND
9753; GFX940-NEXT:    ;;#ASMSTART
9754; GFX940-NEXT:    ; def s[2:3]
9755; GFX940-NEXT:    ;;#ASMEND
9756; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s2
9757; GFX940-NEXT:    s_mov_b32 s9, s0
9758; GFX940-NEXT:    ;;#ASMSTART
9759; GFX940-NEXT:    ; use s[8:9]
9760; GFX940-NEXT:    ;;#ASMEND
9761; GFX940-NEXT:    s_setpc_b64 s[30:31]
9762  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9763  %vec1 = call <4 x i16> asm "; def $0", "=s"()
9764  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 0>
9765  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9766  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
9767  ret void
9768}
9769
9770define void @s_shuffle_v3i16_v4i16__7_6_0() {
9771; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_6_0:
9772; GFX900:       ; %bb.0:
9773; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9774; GFX900-NEXT:    ;;#ASMSTART
9775; GFX900-NEXT:    ; def s[4:5]
9776; GFX900-NEXT:    ;;#ASMEND
9777; GFX900-NEXT:    ;;#ASMSTART
9778; GFX900-NEXT:    ; def s[6:7]
9779; GFX900-NEXT:    ;;#ASMEND
9780; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
9781; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s7
9782; GFX900-NEXT:    s_mov_b32 s9, s4
9783; GFX900-NEXT:    ;;#ASMSTART
9784; GFX900-NEXT:    ; use s[8:9]
9785; GFX900-NEXT:    ;;#ASMEND
9786; GFX900-NEXT:    s_setpc_b64 s[30:31]
9787;
9788; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_6_0:
9789; GFX90A:       ; %bb.0:
9790; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9791; GFX90A-NEXT:    ;;#ASMSTART
9792; GFX90A-NEXT:    ; def s[4:5]
9793; GFX90A-NEXT:    ;;#ASMEND
9794; GFX90A-NEXT:    ;;#ASMSTART
9795; GFX90A-NEXT:    ; def s[6:7]
9796; GFX90A-NEXT:    ;;#ASMEND
9797; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
9798; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s7
9799; GFX90A-NEXT:    s_mov_b32 s9, s4
9800; GFX90A-NEXT:    ;;#ASMSTART
9801; GFX90A-NEXT:    ; use s[8:9]
9802; GFX90A-NEXT:    ;;#ASMEND
9803; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9804;
9805; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_0:
9806; GFX940:       ; %bb.0:
9807; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9808; GFX940-NEXT:    ;;#ASMSTART
9809; GFX940-NEXT:    ; def s[0:1]
9810; GFX940-NEXT:    ;;#ASMEND
9811; GFX940-NEXT:    ;;#ASMSTART
9812; GFX940-NEXT:    ; def s[2:3]
9813; GFX940-NEXT:    ;;#ASMEND
9814; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
9815; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s3
9816; GFX940-NEXT:    s_mov_b32 s9, s0
9817; GFX940-NEXT:    ;;#ASMSTART
9818; GFX940-NEXT:    ; use s[8:9]
9819; GFX940-NEXT:    ;;#ASMEND
9820; GFX940-NEXT:    s_setpc_b64 s[30:31]
9821  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9822  %vec1 = call <4 x i16> asm "; def $0", "=s"()
9823  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 0>
9824  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9825  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
9826  ret void
9827}
9828
9829define void @s_shuffle_v3i16_v4i16__u_1_1() {
9830; GFX9-LABEL: s_shuffle_v3i16_v4i16__u_1_1:
9831; GFX9:       ; %bb.0:
9832; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9833; GFX9-NEXT:    ;;#ASMSTART
9834; GFX9-NEXT:    ; def s[8:9]
9835; GFX9-NEXT:    ;;#ASMEND
9836; GFX9-NEXT:    s_lshr_b32 s9, s8, 16
9837; GFX9-NEXT:    ;;#ASMSTART
9838; GFX9-NEXT:    ; use s[8:9]
9839; GFX9-NEXT:    ;;#ASMEND
9840; GFX9-NEXT:    s_setpc_b64 s[30:31]
9841  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9842  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 poison, i32 1, i32 1>
9843  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9844  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
9845  ret void
9846}
9847
9848define void @s_shuffle_v3i16_v4i16__0_1_1() {
9849; GFX9-LABEL: s_shuffle_v3i16_v4i16__0_1_1:
9850; GFX9:       ; %bb.0:
9851; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9852; GFX9-NEXT:    ;;#ASMSTART
9853; GFX9-NEXT:    ; def s[8:9]
9854; GFX9-NEXT:    ;;#ASMEND
9855; GFX9-NEXT:    s_lshr_b32 s9, s8, 16
9856; GFX9-NEXT:    ;;#ASMSTART
9857; GFX9-NEXT:    ; use s[8:9]
9858; GFX9-NEXT:    ;;#ASMEND
9859; GFX9-NEXT:    s_setpc_b64 s[30:31]
9860  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9861  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 1>
9862  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9863  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
9864  ret void
9865}
9866
9867define void @s_shuffle_v3i16_v4i16__1_1_1() {
9868; GFX900-LABEL: s_shuffle_v3i16_v4i16__1_1_1:
9869; GFX900:       ; %bb.0:
9870; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9871; GFX900-NEXT:    ;;#ASMSTART
9872; GFX900-NEXT:    ; def s[4:5]
9873; GFX900-NEXT:    ;;#ASMEND
9874; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
9875; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s4, s4
9876; GFX900-NEXT:    ;;#ASMSTART
9877; GFX900-NEXT:    ; use s[8:9]
9878; GFX900-NEXT:    ;;#ASMEND
9879; GFX900-NEXT:    s_setpc_b64 s[30:31]
9880;
9881; GFX90A-LABEL: s_shuffle_v3i16_v4i16__1_1_1:
9882; GFX90A:       ; %bb.0:
9883; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9884; GFX90A-NEXT:    ;;#ASMSTART
9885; GFX90A-NEXT:    ; def s[4:5]
9886; GFX90A-NEXT:    ;;#ASMEND
9887; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
9888; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s4, s4
9889; GFX90A-NEXT:    ;;#ASMSTART
9890; GFX90A-NEXT:    ; use s[8:9]
9891; GFX90A-NEXT:    ;;#ASMEND
9892; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9893;
9894; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_1_1:
9895; GFX940:       ; %bb.0:
9896; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9897; GFX940-NEXT:    ;;#ASMSTART
9898; GFX940-NEXT:    ; def s[0:1]
9899; GFX940-NEXT:    ;;#ASMEND
9900; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
9901; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s0, s0
9902; GFX940-NEXT:    ;;#ASMSTART
9903; GFX940-NEXT:    ; use s[8:9]
9904; GFX940-NEXT:    ;;#ASMEND
9905; GFX940-NEXT:    s_setpc_b64 s[30:31]
9906  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9907  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 1, i32 1, i32 1>
9908  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9909  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
9910  ret void
9911}
9912
9913define void @s_shuffle_v3i16_v4i16__2_1_1() {
9914; GFX900-LABEL: s_shuffle_v3i16_v4i16__2_1_1:
9915; GFX900:       ; %bb.0:
9916; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9917; GFX900-NEXT:    ;;#ASMSTART
9918; GFX900-NEXT:    ; def s[4:5]
9919; GFX900-NEXT:    ;;#ASMEND
9920; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
9921; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
9922; GFX900-NEXT:    ;;#ASMSTART
9923; GFX900-NEXT:    ; use s[8:9]
9924; GFX900-NEXT:    ;;#ASMEND
9925; GFX900-NEXT:    s_setpc_b64 s[30:31]
9926;
9927; GFX90A-LABEL: s_shuffle_v3i16_v4i16__2_1_1:
9928; GFX90A:       ; %bb.0:
9929; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9930; GFX90A-NEXT:    ;;#ASMSTART
9931; GFX90A-NEXT:    ; def s[4:5]
9932; GFX90A-NEXT:    ;;#ASMEND
9933; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
9934; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
9935; GFX90A-NEXT:    ;;#ASMSTART
9936; GFX90A-NEXT:    ; use s[8:9]
9937; GFX90A-NEXT:    ;;#ASMEND
9938; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9939;
9940; GFX940-LABEL: s_shuffle_v3i16_v4i16__2_1_1:
9941; GFX940:       ; %bb.0:
9942; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9943; GFX940-NEXT:    ;;#ASMSTART
9944; GFX940-NEXT:    ; def s[0:1]
9945; GFX940-NEXT:    ;;#ASMEND
9946; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s1, s0
9947; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
9948; GFX940-NEXT:    ;;#ASMSTART
9949; GFX940-NEXT:    ; use s[8:9]
9950; GFX940-NEXT:    ;;#ASMEND
9951; GFX940-NEXT:    s_setpc_b64 s[30:31]
9952  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9953  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 2, i32 1, i32 1>
9954  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9955  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
9956  ret void
9957}
9958
9959define void @s_shuffle_v3i16_v4i16__3_1_1() {
9960; GFX900-LABEL: s_shuffle_v3i16_v4i16__3_1_1:
9961; GFX900:       ; %bb.0:
9962; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9963; GFX900-NEXT:    ;;#ASMSTART
9964; GFX900-NEXT:    ; def s[4:5]
9965; GFX900-NEXT:    ;;#ASMEND
9966; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s4
9967; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
9968; GFX900-NEXT:    ;;#ASMSTART
9969; GFX900-NEXT:    ; use s[8:9]
9970; GFX900-NEXT:    ;;#ASMEND
9971; GFX900-NEXT:    s_setpc_b64 s[30:31]
9972;
9973; GFX90A-LABEL: s_shuffle_v3i16_v4i16__3_1_1:
9974; GFX90A:       ; %bb.0:
9975; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9976; GFX90A-NEXT:    ;;#ASMSTART
9977; GFX90A-NEXT:    ; def s[4:5]
9978; GFX90A-NEXT:    ;;#ASMEND
9979; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s4
9980; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
9981; GFX90A-NEXT:    ;;#ASMSTART
9982; GFX90A-NEXT:    ; use s[8:9]
9983; GFX90A-NEXT:    ;;#ASMEND
9984; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9985;
9986; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_1_1:
9987; GFX940:       ; %bb.0:
9988; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9989; GFX940-NEXT:    ;;#ASMSTART
9990; GFX940-NEXT:    ; def s[0:1]
9991; GFX940-NEXT:    ;;#ASMEND
9992; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s0
9993; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
9994; GFX940-NEXT:    ;;#ASMSTART
9995; GFX940-NEXT:    ; use s[8:9]
9996; GFX940-NEXT:    ;;#ASMEND
9997; GFX940-NEXT:    s_setpc_b64 s[30:31]
9998  %vec0 = call <4 x i16> asm "; def $0", "=s"()
9999  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 3, i32 1, i32 1>
10000  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10001  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
10002  ret void
10003}
10004
10005define void @s_shuffle_v3i16_v4i16__4_1_1() {
10006; GFX9-LABEL: s_shuffle_v3i16_v4i16__4_1_1:
10007; GFX9:       ; %bb.0:
10008; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10009; GFX9-NEXT:    ;;#ASMSTART
10010; GFX9-NEXT:    ; def s[8:9]
10011; GFX9-NEXT:    ;;#ASMEND
10012; GFX9-NEXT:    s_lshr_b32 s9, s8, 16
10013; GFX9-NEXT:    ;;#ASMSTART
10014; GFX9-NEXT:    ; use s[8:9]
10015; GFX9-NEXT:    ;;#ASMEND
10016; GFX9-NEXT:    s_setpc_b64 s[30:31]
10017  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10018  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 4, i32 1, i32 1>
10019  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10020  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
10021  ret void
10022}
10023
10024define void @s_shuffle_v3i16_v4i16__5_1_1() {
10025; GFX900-LABEL: s_shuffle_v3i16_v4i16__5_1_1:
10026; GFX900:       ; %bb.0:
10027; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10028; GFX900-NEXT:    ;;#ASMSTART
10029; GFX900-NEXT:    ; def s[4:5]
10030; GFX900-NEXT:    ;;#ASMEND
10031; GFX900-NEXT:    ;;#ASMSTART
10032; GFX900-NEXT:    ; def s[6:7]
10033; GFX900-NEXT:    ;;#ASMEND
10034; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s6, s4
10035; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
10036; GFX900-NEXT:    ;;#ASMSTART
10037; GFX900-NEXT:    ; use s[8:9]
10038; GFX900-NEXT:    ;;#ASMEND
10039; GFX900-NEXT:    s_setpc_b64 s[30:31]
10040;
10041; GFX90A-LABEL: s_shuffle_v3i16_v4i16__5_1_1:
10042; GFX90A:       ; %bb.0:
10043; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10044; GFX90A-NEXT:    ;;#ASMSTART
10045; GFX90A-NEXT:    ; def s[4:5]
10046; GFX90A-NEXT:    ;;#ASMEND
10047; GFX90A-NEXT:    ;;#ASMSTART
10048; GFX90A-NEXT:    ; def s[6:7]
10049; GFX90A-NEXT:    ;;#ASMEND
10050; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s6, s4
10051; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
10052; GFX90A-NEXT:    ;;#ASMSTART
10053; GFX90A-NEXT:    ; use s[8:9]
10054; GFX90A-NEXT:    ;;#ASMEND
10055; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10056;
10057; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_1_1:
10058; GFX940:       ; %bb.0:
10059; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10060; GFX940-NEXT:    ;;#ASMSTART
10061; GFX940-NEXT:    ; def s[0:1]
10062; GFX940-NEXT:    ;;#ASMEND
10063; GFX940-NEXT:    ;;#ASMSTART
10064; GFX940-NEXT:    ; def s[2:3]
10065; GFX940-NEXT:    ;;#ASMEND
10066; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s2, s0
10067; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
10068; GFX940-NEXT:    ;;#ASMSTART
10069; GFX940-NEXT:    ; use s[8:9]
10070; GFX940-NEXT:    ;;#ASMEND
10071; GFX940-NEXT:    s_setpc_b64 s[30:31]
10072  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10073  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10074  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 1, i32 1>
10075  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10076  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
10077  ret void
10078}
10079
10080define void @s_shuffle_v3i16_v4i16__6_1_1() {
10081; GFX900-LABEL: s_shuffle_v3i16_v4i16__6_1_1:
10082; GFX900:       ; %bb.0:
10083; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10084; GFX900-NEXT:    ;;#ASMSTART
10085; GFX900-NEXT:    ; def s[4:5]
10086; GFX900-NEXT:    ;;#ASMEND
10087; GFX900-NEXT:    ;;#ASMSTART
10088; GFX900-NEXT:    ; def s[6:7]
10089; GFX900-NEXT:    ;;#ASMEND
10090; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
10091; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
10092; GFX900-NEXT:    ;;#ASMSTART
10093; GFX900-NEXT:    ; use s[8:9]
10094; GFX900-NEXT:    ;;#ASMEND
10095; GFX900-NEXT:    s_setpc_b64 s[30:31]
10096;
10097; GFX90A-LABEL: s_shuffle_v3i16_v4i16__6_1_1:
10098; GFX90A:       ; %bb.0:
10099; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10100; GFX90A-NEXT:    ;;#ASMSTART
10101; GFX90A-NEXT:    ; def s[4:5]
10102; GFX90A-NEXT:    ;;#ASMEND
10103; GFX90A-NEXT:    ;;#ASMSTART
10104; GFX90A-NEXT:    ; def s[6:7]
10105; GFX90A-NEXT:    ;;#ASMEND
10106; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
10107; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
10108; GFX90A-NEXT:    ;;#ASMSTART
10109; GFX90A-NEXT:    ; use s[8:9]
10110; GFX90A-NEXT:    ;;#ASMEND
10111; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10112;
10113; GFX940-LABEL: s_shuffle_v3i16_v4i16__6_1_1:
10114; GFX940:       ; %bb.0:
10115; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10116; GFX940-NEXT:    ;;#ASMSTART
10117; GFX940-NEXT:    ; def s[0:1]
10118; GFX940-NEXT:    ;;#ASMEND
10119; GFX940-NEXT:    ;;#ASMSTART
10120; GFX940-NEXT:    ; def s[2:3]
10121; GFX940-NEXT:    ;;#ASMEND
10122; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s3, s0
10123; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
10124; GFX940-NEXT:    ;;#ASMSTART
10125; GFX940-NEXT:    ; use s[8:9]
10126; GFX940-NEXT:    ;;#ASMEND
10127; GFX940-NEXT:    s_setpc_b64 s[30:31]
10128  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10129  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10130  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 1, i32 1>
10131  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10132  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
10133  ret void
10134}
10135
10136define void @s_shuffle_v3i16_v4i16__7_1_1() {
10137; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_1_1:
10138; GFX900:       ; %bb.0:
10139; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10140; GFX900-NEXT:    ;;#ASMSTART
10141; GFX900-NEXT:    ; def s[4:5]
10142; GFX900-NEXT:    ;;#ASMEND
10143; GFX900-NEXT:    ;;#ASMSTART
10144; GFX900-NEXT:    ; def s[6:7]
10145; GFX900-NEXT:    ;;#ASMEND
10146; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
10147; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
10148; GFX900-NEXT:    ;;#ASMSTART
10149; GFX900-NEXT:    ; use s[8:9]
10150; GFX900-NEXT:    ;;#ASMEND
10151; GFX900-NEXT:    s_setpc_b64 s[30:31]
10152;
10153; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_1_1:
10154; GFX90A:       ; %bb.0:
10155; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10156; GFX90A-NEXT:    ;;#ASMSTART
10157; GFX90A-NEXT:    ; def s[4:5]
10158; GFX90A-NEXT:    ;;#ASMEND
10159; GFX90A-NEXT:    ;;#ASMSTART
10160; GFX90A-NEXT:    ; def s[6:7]
10161; GFX90A-NEXT:    ;;#ASMEND
10162; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
10163; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
10164; GFX90A-NEXT:    ;;#ASMSTART
10165; GFX90A-NEXT:    ; use s[8:9]
10166; GFX90A-NEXT:    ;;#ASMEND
10167; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10168;
10169; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_1:
10170; GFX940:       ; %bb.0:
10171; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10172; GFX940-NEXT:    ;;#ASMSTART
10173; GFX940-NEXT:    ; def s[0:1]
10174; GFX940-NEXT:    ;;#ASMEND
10175; GFX940-NEXT:    ;;#ASMSTART
10176; GFX940-NEXT:    ; def s[2:3]
10177; GFX940-NEXT:    ;;#ASMEND
10178; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s0
10179; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
10180; GFX940-NEXT:    ;;#ASMSTART
10181; GFX940-NEXT:    ; use s[8:9]
10182; GFX940-NEXT:    ;;#ASMEND
10183; GFX940-NEXT:    s_setpc_b64 s[30:31]
10184  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10185  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10186  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 1>
10187  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10188  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
10189  ret void
10190}
10191
10192define void @s_shuffle_v3i16_v4i16__7_u_1() {
10193; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_u_1:
10194; GFX900:       ; %bb.0:
10195; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10196; GFX900-NEXT:    ;;#ASMSTART
10197; GFX900-NEXT:    ; def s[4:5]
10198; GFX900-NEXT:    ;;#ASMEND
10199; GFX900-NEXT:    ;;#ASMSTART
10200; GFX900-NEXT:    ; def s[6:7]
10201; GFX900-NEXT:    ;;#ASMEND
10202; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
10203; GFX900-NEXT:    s_lshr_b32 s8, s7, 16
10204; GFX900-NEXT:    ;;#ASMSTART
10205; GFX900-NEXT:    ; use s[8:9]
10206; GFX900-NEXT:    ;;#ASMEND
10207; GFX900-NEXT:    s_setpc_b64 s[30:31]
10208;
10209; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_u_1:
10210; GFX90A:       ; %bb.0:
10211; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10212; GFX90A-NEXT:    ;;#ASMSTART
10213; GFX90A-NEXT:    ; def s[4:5]
10214; GFX90A-NEXT:    ;;#ASMEND
10215; GFX90A-NEXT:    ;;#ASMSTART
10216; GFX90A-NEXT:    ; def s[6:7]
10217; GFX90A-NEXT:    ;;#ASMEND
10218; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
10219; GFX90A-NEXT:    s_lshr_b32 s8, s7, 16
10220; GFX90A-NEXT:    ;;#ASMSTART
10221; GFX90A-NEXT:    ; use s[8:9]
10222; GFX90A-NEXT:    ;;#ASMEND
10223; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10224;
10225; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_u_1:
10226; GFX940:       ; %bb.0:
10227; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10228; GFX940-NEXT:    ;;#ASMSTART
10229; GFX940-NEXT:    ; def s[0:1]
10230; GFX940-NEXT:    ;;#ASMEND
10231; GFX940-NEXT:    ;;#ASMSTART
10232; GFX940-NEXT:    ; def s[2:3]
10233; GFX940-NEXT:    ;;#ASMEND
10234; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
10235; GFX940-NEXT:    s_lshr_b32 s8, s3, 16
10236; GFX940-NEXT:    ;;#ASMSTART
10237; GFX940-NEXT:    ; use s[8:9]
10238; GFX940-NEXT:    ;;#ASMEND
10239; GFX940-NEXT:    s_setpc_b64 s[30:31]
10240  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10241  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10242  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 1>
10243  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10244  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
10245  ret void
10246}
10247
10248define void @s_shuffle_v3i16_v4i16__7_0_1() {
10249; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_0_1:
10250; GFX900:       ; %bb.0:
10251; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10252; GFX900-NEXT:    ;;#ASMSTART
10253; GFX900-NEXT:    ; def s[4:5]
10254; GFX900-NEXT:    ;;#ASMEND
10255; GFX900-NEXT:    ;;#ASMSTART
10256; GFX900-NEXT:    ; def s[6:7]
10257; GFX900-NEXT:    ;;#ASMEND
10258; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
10259; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
10260; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
10261; GFX900-NEXT:    ;;#ASMSTART
10262; GFX900-NEXT:    ; use s[8:9]
10263; GFX900-NEXT:    ;;#ASMEND
10264; GFX900-NEXT:    s_setpc_b64 s[30:31]
10265;
10266; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_0_1:
10267; GFX90A:       ; %bb.0:
10268; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10269; GFX90A-NEXT:    ;;#ASMSTART
10270; GFX90A-NEXT:    ; def s[4:5]
10271; GFX90A-NEXT:    ;;#ASMEND
10272; GFX90A-NEXT:    ;;#ASMSTART
10273; GFX90A-NEXT:    ; def s[6:7]
10274; GFX90A-NEXT:    ;;#ASMEND
10275; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
10276; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
10277; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
10278; GFX90A-NEXT:    ;;#ASMSTART
10279; GFX90A-NEXT:    ; use s[8:9]
10280; GFX90A-NEXT:    ;;#ASMEND
10281; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10282;
10283; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_1:
10284; GFX940:       ; %bb.0:
10285; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10286; GFX940-NEXT:    ;;#ASMSTART
10287; GFX940-NEXT:    ; def s[0:1]
10288; GFX940-NEXT:    ;;#ASMEND
10289; GFX940-NEXT:    ;;#ASMSTART
10290; GFX940-NEXT:    ; def s[2:3]
10291; GFX940-NEXT:    ;;#ASMEND
10292; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
10293; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
10294; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
10295; GFX940-NEXT:    ;;#ASMSTART
10296; GFX940-NEXT:    ; use s[8:9]
10297; GFX940-NEXT:    ;;#ASMEND
10298; GFX940-NEXT:    s_setpc_b64 s[30:31]
10299  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10300  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10301  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 1>
10302  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10303  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
10304  ret void
10305}
10306
10307define void @s_shuffle_v3i16_v4i16__7_2_1() {
10308; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_2_1:
10309; GFX900:       ; %bb.0:
10310; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10311; GFX900-NEXT:    ;;#ASMSTART
10312; GFX900-NEXT:    ; def s[6:7]
10313; GFX900-NEXT:    ;;#ASMEND
10314; GFX900-NEXT:    s_lshr_b32 s6, s7, 16
10315; GFX900-NEXT:    ;;#ASMSTART
10316; GFX900-NEXT:    ; def s[4:5]
10317; GFX900-NEXT:    ;;#ASMEND
10318; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
10319; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
10320; GFX900-NEXT:    ;;#ASMSTART
10321; GFX900-NEXT:    ; use s[8:9]
10322; GFX900-NEXT:    ;;#ASMEND
10323; GFX900-NEXT:    s_setpc_b64 s[30:31]
10324;
10325; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_2_1:
10326; GFX90A:       ; %bb.0:
10327; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10328; GFX90A-NEXT:    ;;#ASMSTART
10329; GFX90A-NEXT:    ; def s[6:7]
10330; GFX90A-NEXT:    ;;#ASMEND
10331; GFX90A-NEXT:    s_lshr_b32 s6, s7, 16
10332; GFX90A-NEXT:    ;;#ASMSTART
10333; GFX90A-NEXT:    ; def s[4:5]
10334; GFX90A-NEXT:    ;;#ASMEND
10335; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
10336; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
10337; GFX90A-NEXT:    ;;#ASMSTART
10338; GFX90A-NEXT:    ; use s[8:9]
10339; GFX90A-NEXT:    ;;#ASMEND
10340; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10341;
10342; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_1:
10343; GFX940:       ; %bb.0:
10344; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10345; GFX940-NEXT:    ;;#ASMSTART
10346; GFX940-NEXT:    ; def s[2:3]
10347; GFX940-NEXT:    ;;#ASMEND
10348; GFX940-NEXT:    s_lshr_b32 s2, s3, 16
10349; GFX940-NEXT:    ;;#ASMSTART
10350; GFX940-NEXT:    ; def s[0:1]
10351; GFX940-NEXT:    ;;#ASMEND
10352; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s2, s1
10353; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
10354; GFX940-NEXT:    ;;#ASMSTART
10355; GFX940-NEXT:    ; use s[8:9]
10356; GFX940-NEXT:    ;;#ASMEND
10357; GFX940-NEXT:    s_setpc_b64 s[30:31]
10358  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10359  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10360  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 1>
10361  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10362  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
10363  ret void
10364}
10365
10366define void @s_shuffle_v3i16_v4i16__7_3_1() {
10367; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_3_1:
10368; GFX900:       ; %bb.0:
10369; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10370; GFX900-NEXT:    ;;#ASMSTART
10371; GFX900-NEXT:    ; def s[4:5]
10372; GFX900-NEXT:    ;;#ASMEND
10373; GFX900-NEXT:    ;;#ASMSTART
10374; GFX900-NEXT:    ; def s[6:7]
10375; GFX900-NEXT:    ;;#ASMEND
10376; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
10377; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
10378; GFX900-NEXT:    ;;#ASMSTART
10379; GFX900-NEXT:    ; use s[8:9]
10380; GFX900-NEXT:    ;;#ASMEND
10381; GFX900-NEXT:    s_setpc_b64 s[30:31]
10382;
10383; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_3_1:
10384; GFX90A:       ; %bb.0:
10385; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10386; GFX90A-NEXT:    ;;#ASMSTART
10387; GFX90A-NEXT:    ; def s[4:5]
10388; GFX90A-NEXT:    ;;#ASMEND
10389; GFX90A-NEXT:    ;;#ASMSTART
10390; GFX90A-NEXT:    ; def s[6:7]
10391; GFX90A-NEXT:    ;;#ASMEND
10392; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
10393; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
10394; GFX90A-NEXT:    ;;#ASMSTART
10395; GFX90A-NEXT:    ; use s[8:9]
10396; GFX90A-NEXT:    ;;#ASMEND
10397; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10398;
10399; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_1:
10400; GFX940:       ; %bb.0:
10401; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10402; GFX940-NEXT:    ;;#ASMSTART
10403; GFX940-NEXT:    ; def s[0:1]
10404; GFX940-NEXT:    ;;#ASMEND
10405; GFX940-NEXT:    ;;#ASMSTART
10406; GFX940-NEXT:    ; def s[2:3]
10407; GFX940-NEXT:    ;;#ASMEND
10408; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s1
10409; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
10410; GFX940-NEXT:    ;;#ASMSTART
10411; GFX940-NEXT:    ; use s[8:9]
10412; GFX940-NEXT:    ;;#ASMEND
10413; GFX940-NEXT:    s_setpc_b64 s[30:31]
10414  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10415  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10416  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 1>
10417  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10418  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
10419  ret void
10420}
10421
10422define void @s_shuffle_v3i16_v4i16__7_4_1() {
10423; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_4_1:
10424; GFX900:       ; %bb.0:
10425; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10426; GFX900-NEXT:    ;;#ASMSTART
10427; GFX900-NEXT:    ; def s[4:5]
10428; GFX900-NEXT:    ;;#ASMEND
10429; GFX900-NEXT:    ;;#ASMSTART
10430; GFX900-NEXT:    ; def s[6:7]
10431; GFX900-NEXT:    ;;#ASMEND
10432; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
10433; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s6
10434; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
10435; GFX900-NEXT:    ;;#ASMSTART
10436; GFX900-NEXT:    ; use s[8:9]
10437; GFX900-NEXT:    ;;#ASMEND
10438; GFX900-NEXT:    s_setpc_b64 s[30:31]
10439;
10440; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_4_1:
10441; GFX90A:       ; %bb.0:
10442; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10443; GFX90A-NEXT:    ;;#ASMSTART
10444; GFX90A-NEXT:    ; def s[4:5]
10445; GFX90A-NEXT:    ;;#ASMEND
10446; GFX90A-NEXT:    ;;#ASMSTART
10447; GFX90A-NEXT:    ; def s[6:7]
10448; GFX90A-NEXT:    ;;#ASMEND
10449; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
10450; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s6
10451; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
10452; GFX90A-NEXT:    ;;#ASMSTART
10453; GFX90A-NEXT:    ; use s[8:9]
10454; GFX90A-NEXT:    ;;#ASMEND
10455; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10456;
10457; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_1:
10458; GFX940:       ; %bb.0:
10459; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10460; GFX940-NEXT:    ;;#ASMSTART
10461; GFX940-NEXT:    ; def s[0:1]
10462; GFX940-NEXT:    ;;#ASMEND
10463; GFX940-NEXT:    ;;#ASMSTART
10464; GFX940-NEXT:    ; def s[2:3]
10465; GFX940-NEXT:    ;;#ASMEND
10466; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
10467; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s2
10468; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
10469; GFX940-NEXT:    ;;#ASMSTART
10470; GFX940-NEXT:    ; use s[8:9]
10471; GFX940-NEXT:    ;;#ASMEND
10472; GFX940-NEXT:    s_setpc_b64 s[30:31]
10473  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10474  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10475  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 1>
10476  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10477  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
10478  ret void
10479}
10480
10481define void @s_shuffle_v3i16_v4i16__7_5_1() {
10482; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_5_1:
10483; GFX900:       ; %bb.0:
10484; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10485; GFX900-NEXT:    ;;#ASMSTART
10486; GFX900-NEXT:    ; def s[4:5]
10487; GFX900-NEXT:    ;;#ASMEND
10488; GFX900-NEXT:    ;;#ASMSTART
10489; GFX900-NEXT:    ; def s[6:7]
10490; GFX900-NEXT:    ;;#ASMEND
10491; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s6
10492; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
10493; GFX900-NEXT:    ;;#ASMSTART
10494; GFX900-NEXT:    ; use s[8:9]
10495; GFX900-NEXT:    ;;#ASMEND
10496; GFX900-NEXT:    s_setpc_b64 s[30:31]
10497;
10498; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_5_1:
10499; GFX90A:       ; %bb.0:
10500; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10501; GFX90A-NEXT:    ;;#ASMSTART
10502; GFX90A-NEXT:    ; def s[4:5]
10503; GFX90A-NEXT:    ;;#ASMEND
10504; GFX90A-NEXT:    ;;#ASMSTART
10505; GFX90A-NEXT:    ; def s[6:7]
10506; GFX90A-NEXT:    ;;#ASMEND
10507; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s6
10508; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
10509; GFX90A-NEXT:    ;;#ASMSTART
10510; GFX90A-NEXT:    ; use s[8:9]
10511; GFX90A-NEXT:    ;;#ASMEND
10512; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10513;
10514; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_5_1:
10515; GFX940:       ; %bb.0:
10516; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10517; GFX940-NEXT:    ;;#ASMSTART
10518; GFX940-NEXT:    ; def s[0:1]
10519; GFX940-NEXT:    ;;#ASMEND
10520; GFX940-NEXT:    ;;#ASMSTART
10521; GFX940-NEXT:    ; def s[2:3]
10522; GFX940-NEXT:    ;;#ASMEND
10523; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s2
10524; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
10525; GFX940-NEXT:    ;;#ASMSTART
10526; GFX940-NEXT:    ; use s[8:9]
10527; GFX940-NEXT:    ;;#ASMEND
10528; GFX940-NEXT:    s_setpc_b64 s[30:31]
10529  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10530  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10531  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 1>
10532  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10533  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
10534  ret void
10535}
10536
10537define void @s_shuffle_v3i16_v4i16__7_6_1() {
10538; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_6_1:
10539; GFX900:       ; %bb.0:
10540; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10541; GFX900-NEXT:    ;;#ASMSTART
10542; GFX900-NEXT:    ; def s[4:5]
10543; GFX900-NEXT:    ;;#ASMEND
10544; GFX900-NEXT:    ;;#ASMSTART
10545; GFX900-NEXT:    ; def s[6:7]
10546; GFX900-NEXT:    ;;#ASMEND
10547; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
10548; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s7
10549; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
10550; GFX900-NEXT:    ;;#ASMSTART
10551; GFX900-NEXT:    ; use s[8:9]
10552; GFX900-NEXT:    ;;#ASMEND
10553; GFX900-NEXT:    s_setpc_b64 s[30:31]
10554;
10555; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_6_1:
10556; GFX90A:       ; %bb.0:
10557; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10558; GFX90A-NEXT:    ;;#ASMSTART
10559; GFX90A-NEXT:    ; def s[4:5]
10560; GFX90A-NEXT:    ;;#ASMEND
10561; GFX90A-NEXT:    ;;#ASMSTART
10562; GFX90A-NEXT:    ; def s[6:7]
10563; GFX90A-NEXT:    ;;#ASMEND
10564; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
10565; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s7
10566; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
10567; GFX90A-NEXT:    ;;#ASMSTART
10568; GFX90A-NEXT:    ; use s[8:9]
10569; GFX90A-NEXT:    ;;#ASMEND
10570; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10571;
10572; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_1:
10573; GFX940:       ; %bb.0:
10574; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10575; GFX940-NEXT:    ;;#ASMSTART
10576; GFX940-NEXT:    ; def s[0:1]
10577; GFX940-NEXT:    ;;#ASMEND
10578; GFX940-NEXT:    ;;#ASMSTART
10579; GFX940-NEXT:    ; def s[2:3]
10580; GFX940-NEXT:    ;;#ASMEND
10581; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
10582; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s3
10583; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
10584; GFX940-NEXT:    ;;#ASMSTART
10585; GFX940-NEXT:    ; use s[8:9]
10586; GFX940-NEXT:    ;;#ASMEND
10587; GFX940-NEXT:    s_setpc_b64 s[30:31]
10588  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10589  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10590  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 1>
10591  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10592  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
10593  ret void
10594}
10595
10596define void @s_shuffle_v3i16_v4i16__u_2_2() {
10597; GFX9-LABEL: s_shuffle_v3i16_v4i16__u_2_2:
10598; GFX9:       ; %bb.0:
10599; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10600; GFX9-NEXT:    ;;#ASMSTART
10601; GFX9-NEXT:    ; def s[8:9]
10602; GFX9-NEXT:    ;;#ASMEND
10603; GFX9-NEXT:    s_lshl_b32 s8, s9, 16
10604; GFX9-NEXT:    ;;#ASMSTART
10605; GFX9-NEXT:    ; use s[8:9]
10606; GFX9-NEXT:    ;;#ASMEND
10607; GFX9-NEXT:    s_setpc_b64 s[30:31]
10608  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10609  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 poison, i32 2, i32 2>
10610  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10611  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
10612  ret void
10613}
10614
10615define void @s_shuffle_v3i16_v4i16__0_2_2() {
10616; GFX9-LABEL: s_shuffle_v3i16_v4i16__0_2_2:
10617; GFX9:       ; %bb.0:
10618; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10619; GFX9-NEXT:    ;;#ASMSTART
10620; GFX9-NEXT:    ; def s[8:9]
10621; GFX9-NEXT:    ;;#ASMEND
10622; GFX9-NEXT:    s_pack_ll_b32_b16 s8, s8, s9
10623; GFX9-NEXT:    ;;#ASMSTART
10624; GFX9-NEXT:    ; use s[8:9]
10625; GFX9-NEXT:    ;;#ASMEND
10626; GFX9-NEXT:    s_setpc_b64 s[30:31]
10627  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10628  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 2, i32 2>
10629  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10630  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
10631  ret void
10632}
10633
10634define void @s_shuffle_v3i16_v4i16__1_2_2() {
10635; GFX900-LABEL: s_shuffle_v3i16_v4i16__1_2_2:
10636; GFX900:       ; %bb.0:
10637; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10638; GFX900-NEXT:    ;;#ASMSTART
10639; GFX900-NEXT:    ; def s[8:9]
10640; GFX900-NEXT:    ;;#ASMEND
10641; GFX900-NEXT:    s_lshr_b32 s4, s8, 16
10642; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
10643; GFX900-NEXT:    ;;#ASMSTART
10644; GFX900-NEXT:    ; use s[8:9]
10645; GFX900-NEXT:    ;;#ASMEND
10646; GFX900-NEXT:    s_setpc_b64 s[30:31]
10647;
10648; GFX90A-LABEL: s_shuffle_v3i16_v4i16__1_2_2:
10649; GFX90A:       ; %bb.0:
10650; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10651; GFX90A-NEXT:    ;;#ASMSTART
10652; GFX90A-NEXT:    ; def s[8:9]
10653; GFX90A-NEXT:    ;;#ASMEND
10654; GFX90A-NEXT:    s_lshr_b32 s4, s8, 16
10655; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
10656; GFX90A-NEXT:    ;;#ASMSTART
10657; GFX90A-NEXT:    ; use s[8:9]
10658; GFX90A-NEXT:    ;;#ASMEND
10659; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10660;
10661; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_2_2:
10662; GFX940:       ; %bb.0:
10663; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10664; GFX940-NEXT:    ;;#ASMSTART
10665; GFX940-NEXT:    ; def s[8:9]
10666; GFX940-NEXT:    ;;#ASMEND
10667; GFX940-NEXT:    s_lshr_b32 s0, s8, 16
10668; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
10669; GFX940-NEXT:    ;;#ASMSTART
10670; GFX940-NEXT:    ; use s[8:9]
10671; GFX940-NEXT:    ;;#ASMEND
10672; GFX940-NEXT:    s_setpc_b64 s[30:31]
10673  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10674  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 1, i32 2, i32 2>
10675  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10676  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
10677  ret void
10678}
10679
10680define void @s_shuffle_v3i16_v4i16__2_2_2() {
10681; GFX9-LABEL: s_shuffle_v3i16_v4i16__2_2_2:
10682; GFX9:       ; %bb.0:
10683; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10684; GFX9-NEXT:    ;;#ASMSTART
10685; GFX9-NEXT:    ; def s[8:9]
10686; GFX9-NEXT:    ;;#ASMEND
10687; GFX9-NEXT:    s_pack_ll_b32_b16 s8, s9, s9
10688; GFX9-NEXT:    ;;#ASMSTART
10689; GFX9-NEXT:    ; use s[8:9]
10690; GFX9-NEXT:    ;;#ASMEND
10691; GFX9-NEXT:    s_setpc_b64 s[30:31]
10692  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10693  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 2, i32 2, i32 2>
10694  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10695  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
10696  ret void
10697}
10698
10699define void @s_shuffle_v3i16_v4i16__3_2_2() {
10700; GFX900-LABEL: s_shuffle_v3i16_v4i16__3_2_2:
10701; GFX900:       ; %bb.0:
10702; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10703; GFX900-NEXT:    ;;#ASMSTART
10704; GFX900-NEXT:    ; def s[8:9]
10705; GFX900-NEXT:    ;;#ASMEND
10706; GFX900-NEXT:    s_lshr_b32 s4, s9, 16
10707; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
10708; GFX900-NEXT:    ;;#ASMSTART
10709; GFX900-NEXT:    ; use s[8:9]
10710; GFX900-NEXT:    ;;#ASMEND
10711; GFX900-NEXT:    s_setpc_b64 s[30:31]
10712;
10713; GFX90A-LABEL: s_shuffle_v3i16_v4i16__3_2_2:
10714; GFX90A:       ; %bb.0:
10715; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10716; GFX90A-NEXT:    ;;#ASMSTART
10717; GFX90A-NEXT:    ; def s[8:9]
10718; GFX90A-NEXT:    ;;#ASMEND
10719; GFX90A-NEXT:    s_lshr_b32 s4, s9, 16
10720; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
10721; GFX90A-NEXT:    ;;#ASMSTART
10722; GFX90A-NEXT:    ; use s[8:9]
10723; GFX90A-NEXT:    ;;#ASMEND
10724; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10725;
10726; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_2_2:
10727; GFX940:       ; %bb.0:
10728; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10729; GFX940-NEXT:    ;;#ASMSTART
10730; GFX940-NEXT:    ; def s[8:9]
10731; GFX940-NEXT:    ;;#ASMEND
10732; GFX940-NEXT:    s_lshr_b32 s0, s9, 16
10733; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
10734; GFX940-NEXT:    ;;#ASMSTART
10735; GFX940-NEXT:    ; use s[8:9]
10736; GFX940-NEXT:    ;;#ASMEND
10737; GFX940-NEXT:    s_setpc_b64 s[30:31]
10738  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10739  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 3, i32 2, i32 2>
10740  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10741  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
10742  ret void
10743}
10744
10745define void @s_shuffle_v3i16_v4i16__4_2_2() {
10746; GFX9-LABEL: s_shuffle_v3i16_v4i16__4_2_2:
10747; GFX9:       ; %bb.0:
10748; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10749; GFX9-NEXT:    ;;#ASMSTART
10750; GFX9-NEXT:    ; def s[8:9]
10751; GFX9-NEXT:    ;;#ASMEND
10752; GFX9-NEXT:    s_lshl_b32 s8, s9, 16
10753; GFX9-NEXT:    ;;#ASMSTART
10754; GFX9-NEXT:    ; use s[8:9]
10755; GFX9-NEXT:    ;;#ASMEND
10756; GFX9-NEXT:    s_setpc_b64 s[30:31]
10757  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10758  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 4, i32 2, i32 2>
10759  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10760  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
10761  ret void
10762}
10763
10764define void @s_shuffle_v3i16_v4i16__5_2_2() {
10765; GFX900-LABEL: s_shuffle_v3i16_v4i16__5_2_2:
10766; GFX900:       ; %bb.0:
10767; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10768; GFX900-NEXT:    ;;#ASMSTART
10769; GFX900-NEXT:    ; def s[4:5]
10770; GFX900-NEXT:    ;;#ASMEND
10771; GFX900-NEXT:    ;;#ASMSTART
10772; GFX900-NEXT:    ; def s[8:9]
10773; GFX900-NEXT:    ;;#ASMEND
10774; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
10775; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
10776; GFX900-NEXT:    ;;#ASMSTART
10777; GFX900-NEXT:    ; use s[8:9]
10778; GFX900-NEXT:    ;;#ASMEND
10779; GFX900-NEXT:    s_setpc_b64 s[30:31]
10780;
10781; GFX90A-LABEL: s_shuffle_v3i16_v4i16__5_2_2:
10782; GFX90A:       ; %bb.0:
10783; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10784; GFX90A-NEXT:    ;;#ASMSTART
10785; GFX90A-NEXT:    ; def s[4:5]
10786; GFX90A-NEXT:    ;;#ASMEND
10787; GFX90A-NEXT:    ;;#ASMSTART
10788; GFX90A-NEXT:    ; def s[8:9]
10789; GFX90A-NEXT:    ;;#ASMEND
10790; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
10791; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
10792; GFX90A-NEXT:    ;;#ASMSTART
10793; GFX90A-NEXT:    ; use s[8:9]
10794; GFX90A-NEXT:    ;;#ASMEND
10795; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10796;
10797; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_2_2:
10798; GFX940:       ; %bb.0:
10799; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10800; GFX940-NEXT:    ;;#ASMSTART
10801; GFX940-NEXT:    ; def s[0:1]
10802; GFX940-NEXT:    ;;#ASMEND
10803; GFX940-NEXT:    ;;#ASMSTART
10804; GFX940-NEXT:    ; def s[8:9]
10805; GFX940-NEXT:    ;;#ASMEND
10806; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
10807; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
10808; GFX940-NEXT:    ;;#ASMSTART
10809; GFX940-NEXT:    ; use s[8:9]
10810; GFX940-NEXT:    ;;#ASMEND
10811; GFX940-NEXT:    s_setpc_b64 s[30:31]
10812  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10813  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10814  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 2, i32 2>
10815  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10816  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
10817  ret void
10818}
10819
10820define void @s_shuffle_v3i16_v4i16__6_2_2() {
10821; GFX900-LABEL: s_shuffle_v3i16_v4i16__6_2_2:
10822; GFX900:       ; %bb.0:
10823; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10824; GFX900-NEXT:    ;;#ASMSTART
10825; GFX900-NEXT:    ; def s[8:9]
10826; GFX900-NEXT:    ;;#ASMEND
10827; GFX900-NEXT:    ;;#ASMSTART
10828; GFX900-NEXT:    ; def s[4:5]
10829; GFX900-NEXT:    ;;#ASMEND
10830; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s9
10831; GFX900-NEXT:    ;;#ASMSTART
10832; GFX900-NEXT:    ; use s[8:9]
10833; GFX900-NEXT:    ;;#ASMEND
10834; GFX900-NEXT:    s_setpc_b64 s[30:31]
10835;
10836; GFX90A-LABEL: s_shuffle_v3i16_v4i16__6_2_2:
10837; GFX90A:       ; %bb.0:
10838; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10839; GFX90A-NEXT:    ;;#ASMSTART
10840; GFX90A-NEXT:    ; def s[8:9]
10841; GFX90A-NEXT:    ;;#ASMEND
10842; GFX90A-NEXT:    ;;#ASMSTART
10843; GFX90A-NEXT:    ; def s[4:5]
10844; GFX90A-NEXT:    ;;#ASMEND
10845; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s9
10846; GFX90A-NEXT:    ;;#ASMSTART
10847; GFX90A-NEXT:    ; use s[8:9]
10848; GFX90A-NEXT:    ;;#ASMEND
10849; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10850;
10851; GFX940-LABEL: s_shuffle_v3i16_v4i16__6_2_2:
10852; GFX940:       ; %bb.0:
10853; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10854; GFX940-NEXT:    ;;#ASMSTART
10855; GFX940-NEXT:    ; def s[8:9]
10856; GFX940-NEXT:    ;;#ASMEND
10857; GFX940-NEXT:    ;;#ASMSTART
10858; GFX940-NEXT:    ; def s[0:1]
10859; GFX940-NEXT:    ;;#ASMEND
10860; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s9
10861; GFX940-NEXT:    ;;#ASMSTART
10862; GFX940-NEXT:    ; use s[8:9]
10863; GFX940-NEXT:    ;;#ASMEND
10864; GFX940-NEXT:    s_setpc_b64 s[30:31]
10865  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10866  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10867  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 2, i32 2>
10868  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10869  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
10870  ret void
10871}
10872
10873define void @s_shuffle_v3i16_v4i16__7_2_2() {
10874; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_2_2:
10875; GFX900:       ; %bb.0:
10876; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10877; GFX900-NEXT:    ;;#ASMSTART
10878; GFX900-NEXT:    ; def s[4:5]
10879; GFX900-NEXT:    ;;#ASMEND
10880; GFX900-NEXT:    ;;#ASMSTART
10881; GFX900-NEXT:    ; def s[8:9]
10882; GFX900-NEXT:    ;;#ASMEND
10883; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
10884; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
10885; GFX900-NEXT:    ;;#ASMSTART
10886; GFX900-NEXT:    ; use s[8:9]
10887; GFX900-NEXT:    ;;#ASMEND
10888; GFX900-NEXT:    s_setpc_b64 s[30:31]
10889;
10890; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_2_2:
10891; GFX90A:       ; %bb.0:
10892; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10893; GFX90A-NEXT:    ;;#ASMSTART
10894; GFX90A-NEXT:    ; def s[4:5]
10895; GFX90A-NEXT:    ;;#ASMEND
10896; GFX90A-NEXT:    ;;#ASMSTART
10897; GFX90A-NEXT:    ; def s[8:9]
10898; GFX90A-NEXT:    ;;#ASMEND
10899; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
10900; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
10901; GFX90A-NEXT:    ;;#ASMSTART
10902; GFX90A-NEXT:    ; use s[8:9]
10903; GFX90A-NEXT:    ;;#ASMEND
10904; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10905;
10906; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_2:
10907; GFX940:       ; %bb.0:
10908; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10909; GFX940-NEXT:    ;;#ASMSTART
10910; GFX940-NEXT:    ; def s[0:1]
10911; GFX940-NEXT:    ;;#ASMEND
10912; GFX940-NEXT:    ;;#ASMSTART
10913; GFX940-NEXT:    ; def s[8:9]
10914; GFX940-NEXT:    ;;#ASMEND
10915; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
10916; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
10917; GFX940-NEXT:    ;;#ASMSTART
10918; GFX940-NEXT:    ; use s[8:9]
10919; GFX940-NEXT:    ;;#ASMEND
10920; GFX940-NEXT:    s_setpc_b64 s[30:31]
10921  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10922  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10923  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 2>
10924  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10925  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
10926  ret void
10927}
10928
10929define void @s_shuffle_v3i16_v4i16__7_u_2() {
10930; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_u_2:
10931; GFX900:       ; %bb.0:
10932; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10933; GFX900-NEXT:    ;;#ASMSTART
10934; GFX900-NEXT:    ; def s[8:9]
10935; GFX900-NEXT:    ;;#ASMEND
10936; GFX900-NEXT:    ;;#ASMSTART
10937; GFX900-NEXT:    ; def s[4:5]
10938; GFX900-NEXT:    ;;#ASMEND
10939; GFX900-NEXT:    s_lshr_b32 s8, s5, 16
10940; GFX900-NEXT:    ;;#ASMSTART
10941; GFX900-NEXT:    ; use s[8:9]
10942; GFX900-NEXT:    ;;#ASMEND
10943; GFX900-NEXT:    s_setpc_b64 s[30:31]
10944;
10945; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_u_2:
10946; GFX90A:       ; %bb.0:
10947; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10948; GFX90A-NEXT:    ;;#ASMSTART
10949; GFX90A-NEXT:    ; def s[8:9]
10950; GFX90A-NEXT:    ;;#ASMEND
10951; GFX90A-NEXT:    ;;#ASMSTART
10952; GFX90A-NEXT:    ; def s[4:5]
10953; GFX90A-NEXT:    ;;#ASMEND
10954; GFX90A-NEXT:    s_lshr_b32 s8, s5, 16
10955; GFX90A-NEXT:    ;;#ASMSTART
10956; GFX90A-NEXT:    ; use s[8:9]
10957; GFX90A-NEXT:    ;;#ASMEND
10958; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10959;
10960; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_u_2:
10961; GFX940:       ; %bb.0:
10962; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10963; GFX940-NEXT:    ;;#ASMSTART
10964; GFX940-NEXT:    ; def s[8:9]
10965; GFX940-NEXT:    ;;#ASMEND
10966; GFX940-NEXT:    ;;#ASMSTART
10967; GFX940-NEXT:    ; def s[0:1]
10968; GFX940-NEXT:    ;;#ASMEND
10969; GFX940-NEXT:    s_lshr_b32 s8, s1, 16
10970; GFX940-NEXT:    ;;#ASMSTART
10971; GFX940-NEXT:    ; use s[8:9]
10972; GFX940-NEXT:    ;;#ASMEND
10973; GFX940-NEXT:    s_setpc_b64 s[30:31]
10974  %vec0 = call <4 x i16> asm "; def $0", "=s"()
10975  %vec1 = call <4 x i16> asm "; def $0", "=s"()
10976  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 2>
10977  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10978  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
10979  ret void
10980}
10981
10982define void @s_shuffle_v3i16_v4i16__7_0_2() {
10983; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_0_2:
10984; GFX900:       ; %bb.0:
10985; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10986; GFX900-NEXT:    ;;#ASMSTART
10987; GFX900-NEXT:    ; def s[4:5]
10988; GFX900-NEXT:    ;;#ASMEND
10989; GFX900-NEXT:    ;;#ASMSTART
10990; GFX900-NEXT:    ; def s[8:9]
10991; GFX900-NEXT:    ;;#ASMEND
10992; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
10993; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s8
10994; GFX900-NEXT:    ;;#ASMSTART
10995; GFX900-NEXT:    ; use s[8:9]
10996; GFX900-NEXT:    ;;#ASMEND
10997; GFX900-NEXT:    s_setpc_b64 s[30:31]
10998;
10999; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_0_2:
11000; GFX90A:       ; %bb.0:
11001; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11002; GFX90A-NEXT:    ;;#ASMSTART
11003; GFX90A-NEXT:    ; def s[4:5]
11004; GFX90A-NEXT:    ;;#ASMEND
11005; GFX90A-NEXT:    ;;#ASMSTART
11006; GFX90A-NEXT:    ; def s[8:9]
11007; GFX90A-NEXT:    ;;#ASMEND
11008; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
11009; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s8
11010; GFX90A-NEXT:    ;;#ASMSTART
11011; GFX90A-NEXT:    ; use s[8:9]
11012; GFX90A-NEXT:    ;;#ASMEND
11013; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11014;
11015; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_2:
11016; GFX940:       ; %bb.0:
11017; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11018; GFX940-NEXT:    ;;#ASMSTART
11019; GFX940-NEXT:    ; def s[0:1]
11020; GFX940-NEXT:    ;;#ASMEND
11021; GFX940-NEXT:    ;;#ASMSTART
11022; GFX940-NEXT:    ; def s[8:9]
11023; GFX940-NEXT:    ;;#ASMEND
11024; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
11025; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s8
11026; GFX940-NEXT:    ;;#ASMSTART
11027; GFX940-NEXT:    ; use s[8:9]
11028; GFX940-NEXT:    ;;#ASMEND
11029; GFX940-NEXT:    s_setpc_b64 s[30:31]
11030  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11031  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11032  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 2>
11033  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11034  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
11035  ret void
11036}
11037
11038define void @s_shuffle_v3i16_v4i16__7_1_2() {
11039; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_1_2:
11040; GFX900:       ; %bb.0:
11041; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11042; GFX900-NEXT:    ;;#ASMSTART
11043; GFX900-NEXT:    ; def s[8:9]
11044; GFX900-NEXT:    ;;#ASMEND
11045; GFX900-NEXT:    ;;#ASMSTART
11046; GFX900-NEXT:    ; def s[4:5]
11047; GFX900-NEXT:    ;;#ASMEND
11048; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s8
11049; GFX900-NEXT:    ;;#ASMSTART
11050; GFX900-NEXT:    ; use s[8:9]
11051; GFX900-NEXT:    ;;#ASMEND
11052; GFX900-NEXT:    s_setpc_b64 s[30:31]
11053;
11054; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_1_2:
11055; GFX90A:       ; %bb.0:
11056; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11057; GFX90A-NEXT:    ;;#ASMSTART
11058; GFX90A-NEXT:    ; def s[8:9]
11059; GFX90A-NEXT:    ;;#ASMEND
11060; GFX90A-NEXT:    ;;#ASMSTART
11061; GFX90A-NEXT:    ; def s[4:5]
11062; GFX90A-NEXT:    ;;#ASMEND
11063; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s8
11064; GFX90A-NEXT:    ;;#ASMSTART
11065; GFX90A-NEXT:    ; use s[8:9]
11066; GFX90A-NEXT:    ;;#ASMEND
11067; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11068;
11069; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_2:
11070; GFX940:       ; %bb.0:
11071; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11072; GFX940-NEXT:    ;;#ASMSTART
11073; GFX940-NEXT:    ; def s[8:9]
11074; GFX940-NEXT:    ;;#ASMEND
11075; GFX940-NEXT:    ;;#ASMSTART
11076; GFX940-NEXT:    ; def s[0:1]
11077; GFX940-NEXT:    ;;#ASMEND
11078; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s8
11079; GFX940-NEXT:    ;;#ASMSTART
11080; GFX940-NEXT:    ; use s[8:9]
11081; GFX940-NEXT:    ;;#ASMEND
11082; GFX940-NEXT:    s_setpc_b64 s[30:31]
11083  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11084  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11085  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 2>
11086  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11087  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
11088  ret void
11089}
11090
11091define void @s_shuffle_v3i16_v4i16__7_3_2() {
11092; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_3_2:
11093; GFX900:       ; %bb.0:
11094; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11095; GFX900-NEXT:    ;;#ASMSTART
11096; GFX900-NEXT:    ; def s[8:9]
11097; GFX900-NEXT:    ;;#ASMEND
11098; GFX900-NEXT:    ;;#ASMSTART
11099; GFX900-NEXT:    ; def s[4:5]
11100; GFX900-NEXT:    ;;#ASMEND
11101; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s9
11102; GFX900-NEXT:    ;;#ASMSTART
11103; GFX900-NEXT:    ; use s[8:9]
11104; GFX900-NEXT:    ;;#ASMEND
11105; GFX900-NEXT:    s_setpc_b64 s[30:31]
11106;
11107; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_3_2:
11108; GFX90A:       ; %bb.0:
11109; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11110; GFX90A-NEXT:    ;;#ASMSTART
11111; GFX90A-NEXT:    ; def s[8:9]
11112; GFX90A-NEXT:    ;;#ASMEND
11113; GFX90A-NEXT:    ;;#ASMSTART
11114; GFX90A-NEXT:    ; def s[4:5]
11115; GFX90A-NEXT:    ;;#ASMEND
11116; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s9
11117; GFX90A-NEXT:    ;;#ASMSTART
11118; GFX90A-NEXT:    ; use s[8:9]
11119; GFX90A-NEXT:    ;;#ASMEND
11120; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11121;
11122; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_2:
11123; GFX940:       ; %bb.0:
11124; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11125; GFX940-NEXT:    ;;#ASMSTART
11126; GFX940-NEXT:    ; def s[8:9]
11127; GFX940-NEXT:    ;;#ASMEND
11128; GFX940-NEXT:    ;;#ASMSTART
11129; GFX940-NEXT:    ; def s[0:1]
11130; GFX940-NEXT:    ;;#ASMEND
11131; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s9
11132; GFX940-NEXT:    ;;#ASMSTART
11133; GFX940-NEXT:    ; use s[8:9]
11134; GFX940-NEXT:    ;;#ASMEND
11135; GFX940-NEXT:    s_setpc_b64 s[30:31]
11136  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11137  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11138  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 2>
11139  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11140  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
11141  ret void
11142}
11143
11144define void @s_shuffle_v3i16_v4i16__7_4_2() {
11145; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_4_2:
11146; GFX900:       ; %bb.0:
11147; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11148; GFX900-NEXT:    ;;#ASMSTART
11149; GFX900-NEXT:    ; def s[4:5]
11150; GFX900-NEXT:    ;;#ASMEND
11151; GFX900-NEXT:    ;;#ASMSTART
11152; GFX900-NEXT:    ; def s[8:9]
11153; GFX900-NEXT:    ;;#ASMEND
11154; GFX900-NEXT:    s_lshr_b32 s5, s5, 16
11155; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
11156; GFX900-NEXT:    ;;#ASMSTART
11157; GFX900-NEXT:    ; use s[8:9]
11158; GFX900-NEXT:    ;;#ASMEND
11159; GFX900-NEXT:    s_setpc_b64 s[30:31]
11160;
11161; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_4_2:
11162; GFX90A:       ; %bb.0:
11163; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11164; GFX90A-NEXT:    ;;#ASMSTART
11165; GFX90A-NEXT:    ; def s[4:5]
11166; GFX90A-NEXT:    ;;#ASMEND
11167; GFX90A-NEXT:    ;;#ASMSTART
11168; GFX90A-NEXT:    ; def s[8:9]
11169; GFX90A-NEXT:    ;;#ASMEND
11170; GFX90A-NEXT:    s_lshr_b32 s5, s5, 16
11171; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
11172; GFX90A-NEXT:    ;;#ASMSTART
11173; GFX90A-NEXT:    ; use s[8:9]
11174; GFX90A-NEXT:    ;;#ASMEND
11175; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11176;
11177; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_2:
11178; GFX940:       ; %bb.0:
11179; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11180; GFX940-NEXT:    ;;#ASMSTART
11181; GFX940-NEXT:    ; def s[0:1]
11182; GFX940-NEXT:    ;;#ASMEND
11183; GFX940-NEXT:    ;;#ASMSTART
11184; GFX940-NEXT:    ; def s[8:9]
11185; GFX940-NEXT:    ;;#ASMEND
11186; GFX940-NEXT:    s_lshr_b32 s1, s1, 16
11187; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
11188; GFX940-NEXT:    ;;#ASMSTART
11189; GFX940-NEXT:    ; use s[8:9]
11190; GFX940-NEXT:    ;;#ASMEND
11191; GFX940-NEXT:    s_setpc_b64 s[30:31]
11192  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11193  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11194  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 2>
11195  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11196  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
11197  ret void
11198}
11199
11200define void @s_shuffle_v3i16_v4i16__7_5_2() {
11201; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_5_2:
11202; GFX900:       ; %bb.0:
11203; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11204; GFX900-NEXT:    ;;#ASMSTART
11205; GFX900-NEXT:    ; def s[8:9]
11206; GFX900-NEXT:    ;;#ASMEND
11207; GFX900-NEXT:    ;;#ASMSTART
11208; GFX900-NEXT:    ; def s[4:5]
11209; GFX900-NEXT:    ;;#ASMEND
11210; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s4
11211; GFX900-NEXT:    ;;#ASMSTART
11212; GFX900-NEXT:    ; use s[8:9]
11213; GFX900-NEXT:    ;;#ASMEND
11214; GFX900-NEXT:    s_setpc_b64 s[30:31]
11215;
11216; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_5_2:
11217; GFX90A:       ; %bb.0:
11218; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11219; GFX90A-NEXT:    ;;#ASMSTART
11220; GFX90A-NEXT:    ; def s[8:9]
11221; GFX90A-NEXT:    ;;#ASMEND
11222; GFX90A-NEXT:    ;;#ASMSTART
11223; GFX90A-NEXT:    ; def s[4:5]
11224; GFX90A-NEXT:    ;;#ASMEND
11225; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s4
11226; GFX90A-NEXT:    ;;#ASMSTART
11227; GFX90A-NEXT:    ; use s[8:9]
11228; GFX90A-NEXT:    ;;#ASMEND
11229; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11230;
11231; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_5_2:
11232; GFX940:       ; %bb.0:
11233; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11234; GFX940-NEXT:    ;;#ASMSTART
11235; GFX940-NEXT:    ; def s[8:9]
11236; GFX940-NEXT:    ;;#ASMEND
11237; GFX940-NEXT:    ;;#ASMSTART
11238; GFX940-NEXT:    ; def s[0:1]
11239; GFX940-NEXT:    ;;#ASMEND
11240; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s0
11241; GFX940-NEXT:    ;;#ASMSTART
11242; GFX940-NEXT:    ; use s[8:9]
11243; GFX940-NEXT:    ;;#ASMEND
11244; GFX940-NEXT:    s_setpc_b64 s[30:31]
11245  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11246  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11247  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 2>
11248  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11249  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
11250  ret void
11251}
11252
11253define void @s_shuffle_v3i16_v4i16__7_6_2() {
11254; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_6_2:
11255; GFX900:       ; %bb.0:
11256; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11257; GFX900-NEXT:    ;;#ASMSTART
11258; GFX900-NEXT:    ; def s[4:5]
11259; GFX900-NEXT:    ;;#ASMEND
11260; GFX900-NEXT:    ;;#ASMSTART
11261; GFX900-NEXT:    ; def s[8:9]
11262; GFX900-NEXT:    ;;#ASMEND
11263; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
11264; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
11265; GFX900-NEXT:    ;;#ASMSTART
11266; GFX900-NEXT:    ; use s[8:9]
11267; GFX900-NEXT:    ;;#ASMEND
11268; GFX900-NEXT:    s_setpc_b64 s[30:31]
11269;
11270; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_6_2:
11271; GFX90A:       ; %bb.0:
11272; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11273; GFX90A-NEXT:    ;;#ASMSTART
11274; GFX90A-NEXT:    ; def s[4:5]
11275; GFX90A-NEXT:    ;;#ASMEND
11276; GFX90A-NEXT:    ;;#ASMSTART
11277; GFX90A-NEXT:    ; def s[8:9]
11278; GFX90A-NEXT:    ;;#ASMEND
11279; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
11280; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
11281; GFX90A-NEXT:    ;;#ASMSTART
11282; GFX90A-NEXT:    ; use s[8:9]
11283; GFX90A-NEXT:    ;;#ASMEND
11284; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11285;
11286; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_2:
11287; GFX940:       ; %bb.0:
11288; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11289; GFX940-NEXT:    ;;#ASMSTART
11290; GFX940-NEXT:    ; def s[0:1]
11291; GFX940-NEXT:    ;;#ASMEND
11292; GFX940-NEXT:    ;;#ASMSTART
11293; GFX940-NEXT:    ; def s[8:9]
11294; GFX940-NEXT:    ;;#ASMEND
11295; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
11296; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
11297; GFX940-NEXT:    ;;#ASMSTART
11298; GFX940-NEXT:    ; use s[8:9]
11299; GFX940-NEXT:    ;;#ASMEND
11300; GFX940-NEXT:    s_setpc_b64 s[30:31]
11301  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11302  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11303  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 2>
11304  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11305  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
11306  ret void
11307}
11308
11309define void @s_shuffle_v3i16_v4i16__u_3_3() {
11310; GFX900-LABEL: s_shuffle_v3i16_v4i16__u_3_3:
11311; GFX900:       ; %bb.0:
11312; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11313; GFX900-NEXT:    ;;#ASMSTART
11314; GFX900-NEXT:    ; def s[4:5]
11315; GFX900-NEXT:    ;;#ASMEND
11316; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
11317; GFX900-NEXT:    s_mov_b32 s8, s5
11318; GFX900-NEXT:    ;;#ASMSTART
11319; GFX900-NEXT:    ; use s[8:9]
11320; GFX900-NEXT:    ;;#ASMEND
11321; GFX900-NEXT:    s_setpc_b64 s[30:31]
11322;
11323; GFX90A-LABEL: s_shuffle_v3i16_v4i16__u_3_3:
11324; GFX90A:       ; %bb.0:
11325; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11326; GFX90A-NEXT:    ;;#ASMSTART
11327; GFX90A-NEXT:    ; def s[4:5]
11328; GFX90A-NEXT:    ;;#ASMEND
11329; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
11330; GFX90A-NEXT:    s_mov_b32 s8, s5
11331; GFX90A-NEXT:    ;;#ASMSTART
11332; GFX90A-NEXT:    ; use s[8:9]
11333; GFX90A-NEXT:    ;;#ASMEND
11334; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11335;
11336; GFX940-LABEL: s_shuffle_v3i16_v4i16__u_3_3:
11337; GFX940:       ; %bb.0:
11338; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11339; GFX940-NEXT:    ;;#ASMSTART
11340; GFX940-NEXT:    ; def s[0:1]
11341; GFX940-NEXT:    ;;#ASMEND
11342; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
11343; GFX940-NEXT:    s_mov_b32 s8, s1
11344; GFX940-NEXT:    ;;#ASMSTART
11345; GFX940-NEXT:    ; use s[8:9]
11346; GFX940-NEXT:    ;;#ASMEND
11347; GFX940-NEXT:    s_setpc_b64 s[30:31]
11348  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11349  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 poison, i32 3, i32 3>
11350  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11351  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
11352  ret void
11353}
11354
11355define void @s_shuffle_v3i16_v4i16__0_3_3() {
11356; GFX900-LABEL: s_shuffle_v3i16_v4i16__0_3_3:
11357; GFX900:       ; %bb.0:
11358; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11359; GFX900-NEXT:    ;;#ASMSTART
11360; GFX900-NEXT:    ; def s[4:5]
11361; GFX900-NEXT:    ;;#ASMEND
11362; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s4, s5
11363; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
11364; GFX900-NEXT:    ;;#ASMSTART
11365; GFX900-NEXT:    ; use s[8:9]
11366; GFX900-NEXT:    ;;#ASMEND
11367; GFX900-NEXT:    s_setpc_b64 s[30:31]
11368;
11369; GFX90A-LABEL: s_shuffle_v3i16_v4i16__0_3_3:
11370; GFX90A:       ; %bb.0:
11371; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11372; GFX90A-NEXT:    ;;#ASMSTART
11373; GFX90A-NEXT:    ; def s[4:5]
11374; GFX90A-NEXT:    ;;#ASMEND
11375; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s4, s5
11376; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
11377; GFX90A-NEXT:    ;;#ASMSTART
11378; GFX90A-NEXT:    ; use s[8:9]
11379; GFX90A-NEXT:    ;;#ASMEND
11380; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11381;
11382; GFX940-LABEL: s_shuffle_v3i16_v4i16__0_3_3:
11383; GFX940:       ; %bb.0:
11384; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11385; GFX940-NEXT:    ;;#ASMSTART
11386; GFX940-NEXT:    ; def s[0:1]
11387; GFX940-NEXT:    ;;#ASMEND
11388; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s0, s1
11389; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
11390; GFX940-NEXT:    ;;#ASMSTART
11391; GFX940-NEXT:    ; use s[8:9]
11392; GFX940-NEXT:    ;;#ASMEND
11393; GFX940-NEXT:    s_setpc_b64 s[30:31]
11394  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11395  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 3, i32 3>
11396  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11397  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
11398  ret void
11399}
11400
11401define void @s_shuffle_v3i16_v4i16__1_3_3() {
11402; GFX900-LABEL: s_shuffle_v3i16_v4i16__1_3_3:
11403; GFX900:       ; %bb.0:
11404; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11405; GFX900-NEXT:    ;;#ASMSTART
11406; GFX900-NEXT:    ; def s[4:5]
11407; GFX900-NEXT:    ;;#ASMEND
11408; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s4, s5
11409; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
11410; GFX900-NEXT:    ;;#ASMSTART
11411; GFX900-NEXT:    ; use s[8:9]
11412; GFX900-NEXT:    ;;#ASMEND
11413; GFX900-NEXT:    s_setpc_b64 s[30:31]
11414;
11415; GFX90A-LABEL: s_shuffle_v3i16_v4i16__1_3_3:
11416; GFX90A:       ; %bb.0:
11417; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11418; GFX90A-NEXT:    ;;#ASMSTART
11419; GFX90A-NEXT:    ; def s[4:5]
11420; GFX90A-NEXT:    ;;#ASMEND
11421; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s4, s5
11422; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
11423; GFX90A-NEXT:    ;;#ASMSTART
11424; GFX90A-NEXT:    ; use s[8:9]
11425; GFX90A-NEXT:    ;;#ASMEND
11426; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11427;
11428; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_3_3:
11429; GFX940:       ; %bb.0:
11430; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11431; GFX940-NEXT:    ;;#ASMSTART
11432; GFX940-NEXT:    ; def s[0:1]
11433; GFX940-NEXT:    ;;#ASMEND
11434; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s0, s1
11435; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
11436; GFX940-NEXT:    ;;#ASMSTART
11437; GFX940-NEXT:    ; use s[8:9]
11438; GFX940-NEXT:    ;;#ASMEND
11439; GFX940-NEXT:    s_setpc_b64 s[30:31]
11440  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11441  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 1, i32 3, i32 3>
11442  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11443  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
11444  ret void
11445}
11446
11447define void @s_shuffle_v3i16_v4i16__2_3_3() {
11448; GFX900-LABEL: s_shuffle_v3i16_v4i16__2_3_3:
11449; GFX900:       ; %bb.0:
11450; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11451; GFX900-NEXT:    ;;#ASMSTART
11452; GFX900-NEXT:    ; def s[4:5]
11453; GFX900-NEXT:    ;;#ASMEND
11454; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
11455; GFX900-NEXT:    s_mov_b32 s8, s5
11456; GFX900-NEXT:    ;;#ASMSTART
11457; GFX900-NEXT:    ; use s[8:9]
11458; GFX900-NEXT:    ;;#ASMEND
11459; GFX900-NEXT:    s_setpc_b64 s[30:31]
11460;
11461; GFX90A-LABEL: s_shuffle_v3i16_v4i16__2_3_3:
11462; GFX90A:       ; %bb.0:
11463; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11464; GFX90A-NEXT:    ;;#ASMSTART
11465; GFX90A-NEXT:    ; def s[4:5]
11466; GFX90A-NEXT:    ;;#ASMEND
11467; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
11468; GFX90A-NEXT:    s_mov_b32 s8, s5
11469; GFX90A-NEXT:    ;;#ASMSTART
11470; GFX90A-NEXT:    ; use s[8:9]
11471; GFX90A-NEXT:    ;;#ASMEND
11472; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11473;
11474; GFX940-LABEL: s_shuffle_v3i16_v4i16__2_3_3:
11475; GFX940:       ; %bb.0:
11476; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11477; GFX940-NEXT:    ;;#ASMSTART
11478; GFX940-NEXT:    ; def s[0:1]
11479; GFX940-NEXT:    ;;#ASMEND
11480; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
11481; GFX940-NEXT:    s_mov_b32 s8, s1
11482; GFX940-NEXT:    ;;#ASMSTART
11483; GFX940-NEXT:    ; use s[8:9]
11484; GFX940-NEXT:    ;;#ASMEND
11485; GFX940-NEXT:    s_setpc_b64 s[30:31]
11486  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11487  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 2, i32 3, i32 3>
11488  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11489  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
11490  ret void
11491}
11492
11493define void @s_shuffle_v3i16_v4i16__3_3_3() {
11494; GFX900-LABEL: s_shuffle_v3i16_v4i16__3_3_3:
11495; GFX900:       ; %bb.0:
11496; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11497; GFX900-NEXT:    ;;#ASMSTART
11498; GFX900-NEXT:    ; def s[4:5]
11499; GFX900-NEXT:    ;;#ASMEND
11500; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
11501; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
11502; GFX900-NEXT:    ;;#ASMSTART
11503; GFX900-NEXT:    ; use s[8:9]
11504; GFX900-NEXT:    ;;#ASMEND
11505; GFX900-NEXT:    s_setpc_b64 s[30:31]
11506;
11507; GFX90A-LABEL: s_shuffle_v3i16_v4i16__3_3_3:
11508; GFX90A:       ; %bb.0:
11509; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11510; GFX90A-NEXT:    ;;#ASMSTART
11511; GFX90A-NEXT:    ; def s[4:5]
11512; GFX90A-NEXT:    ;;#ASMEND
11513; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
11514; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s5
11515; GFX90A-NEXT:    ;;#ASMSTART
11516; GFX90A-NEXT:    ; use s[8:9]
11517; GFX90A-NEXT:    ;;#ASMEND
11518; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11519;
11520; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_3_3:
11521; GFX940:       ; %bb.0:
11522; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11523; GFX940-NEXT:    ;;#ASMSTART
11524; GFX940-NEXT:    ; def s[0:1]
11525; GFX940-NEXT:    ;;#ASMEND
11526; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
11527; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s1
11528; GFX940-NEXT:    ;;#ASMSTART
11529; GFX940-NEXT:    ; use s[8:9]
11530; GFX940-NEXT:    ;;#ASMEND
11531; GFX940-NEXT:    s_setpc_b64 s[30:31]
11532  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11533  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 3, i32 3, i32 3>
11534  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11535  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
11536  ret void
11537}
11538
11539define void @s_shuffle_v3i16_v4i16__4_3_3() {
11540; GFX900-LABEL: s_shuffle_v3i16_v4i16__4_3_3:
11541; GFX900:       ; %bb.0:
11542; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11543; GFX900-NEXT:    ;;#ASMSTART
11544; GFX900-NEXT:    ; def s[4:5]
11545; GFX900-NEXT:    ;;#ASMEND
11546; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
11547; GFX900-NEXT:    s_mov_b32 s8, s5
11548; GFX900-NEXT:    ;;#ASMSTART
11549; GFX900-NEXT:    ; use s[8:9]
11550; GFX900-NEXT:    ;;#ASMEND
11551; GFX900-NEXT:    s_setpc_b64 s[30:31]
11552;
11553; GFX90A-LABEL: s_shuffle_v3i16_v4i16__4_3_3:
11554; GFX90A:       ; %bb.0:
11555; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11556; GFX90A-NEXT:    ;;#ASMSTART
11557; GFX90A-NEXT:    ; def s[4:5]
11558; GFX90A-NEXT:    ;;#ASMEND
11559; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
11560; GFX90A-NEXT:    s_mov_b32 s8, s5
11561; GFX90A-NEXT:    ;;#ASMSTART
11562; GFX90A-NEXT:    ; use s[8:9]
11563; GFX90A-NEXT:    ;;#ASMEND
11564; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11565;
11566; GFX940-LABEL: s_shuffle_v3i16_v4i16__4_3_3:
11567; GFX940:       ; %bb.0:
11568; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11569; GFX940-NEXT:    ;;#ASMSTART
11570; GFX940-NEXT:    ; def s[0:1]
11571; GFX940-NEXT:    ;;#ASMEND
11572; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
11573; GFX940-NEXT:    s_mov_b32 s8, s1
11574; GFX940-NEXT:    ;;#ASMSTART
11575; GFX940-NEXT:    ; use s[8:9]
11576; GFX940-NEXT:    ;;#ASMEND
11577; GFX940-NEXT:    s_setpc_b64 s[30:31]
11578  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11579  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 4, i32 3, i32 3>
11580  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11581  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
11582  ret void
11583}
11584
11585define void @s_shuffle_v3i16_v4i16__5_3_3() {
11586; GFX900-LABEL: s_shuffle_v3i16_v4i16__5_3_3:
11587; GFX900:       ; %bb.0:
11588; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11589; GFX900-NEXT:    ;;#ASMSTART
11590; GFX900-NEXT:    ; def s[4:5]
11591; GFX900-NEXT:    ;;#ASMEND
11592; GFX900-NEXT:    ;;#ASMSTART
11593; GFX900-NEXT:    ; def s[6:7]
11594; GFX900-NEXT:    ;;#ASMEND
11595; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s6, s5
11596; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
11597; GFX900-NEXT:    ;;#ASMSTART
11598; GFX900-NEXT:    ; use s[8:9]
11599; GFX900-NEXT:    ;;#ASMEND
11600; GFX900-NEXT:    s_setpc_b64 s[30:31]
11601;
11602; GFX90A-LABEL: s_shuffle_v3i16_v4i16__5_3_3:
11603; GFX90A:       ; %bb.0:
11604; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11605; GFX90A-NEXT:    ;;#ASMSTART
11606; GFX90A-NEXT:    ; def s[4:5]
11607; GFX90A-NEXT:    ;;#ASMEND
11608; GFX90A-NEXT:    ;;#ASMSTART
11609; GFX90A-NEXT:    ; def s[6:7]
11610; GFX90A-NEXT:    ;;#ASMEND
11611; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s6, s5
11612; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
11613; GFX90A-NEXT:    ;;#ASMSTART
11614; GFX90A-NEXT:    ; use s[8:9]
11615; GFX90A-NEXT:    ;;#ASMEND
11616; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11617;
11618; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_3_3:
11619; GFX940:       ; %bb.0:
11620; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11621; GFX940-NEXT:    ;;#ASMSTART
11622; GFX940-NEXT:    ; def s[0:1]
11623; GFX940-NEXT:    ;;#ASMEND
11624; GFX940-NEXT:    ;;#ASMSTART
11625; GFX940-NEXT:    ; def s[2:3]
11626; GFX940-NEXT:    ;;#ASMEND
11627; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s2, s1
11628; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
11629; GFX940-NEXT:    ;;#ASMSTART
11630; GFX940-NEXT:    ; use s[8:9]
11631; GFX940-NEXT:    ;;#ASMEND
11632; GFX940-NEXT:    s_setpc_b64 s[30:31]
11633  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11634  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11635  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 3, i32 3>
11636  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11637  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
11638  ret void
11639}
11640
11641define void @s_shuffle_v3i16_v4i16__6_3_3() {
11642; GFX900-LABEL: s_shuffle_v3i16_v4i16__6_3_3:
11643; GFX900:       ; %bb.0:
11644; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11645; GFX900-NEXT:    ;;#ASMSTART
11646; GFX900-NEXT:    ; def s[4:5]
11647; GFX900-NEXT:    ;;#ASMEND
11648; GFX900-NEXT:    ;;#ASMSTART
11649; GFX900-NEXT:    ; def s[6:7]
11650; GFX900-NEXT:    ;;#ASMEND
11651; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s7, s5
11652; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
11653; GFX900-NEXT:    ;;#ASMSTART
11654; GFX900-NEXT:    ; use s[8:9]
11655; GFX900-NEXT:    ;;#ASMEND
11656; GFX900-NEXT:    s_setpc_b64 s[30:31]
11657;
11658; GFX90A-LABEL: s_shuffle_v3i16_v4i16__6_3_3:
11659; GFX90A:       ; %bb.0:
11660; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11661; GFX90A-NEXT:    ;;#ASMSTART
11662; GFX90A-NEXT:    ; def s[4:5]
11663; GFX90A-NEXT:    ;;#ASMEND
11664; GFX90A-NEXT:    ;;#ASMSTART
11665; GFX90A-NEXT:    ; def s[6:7]
11666; GFX90A-NEXT:    ;;#ASMEND
11667; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s7, s5
11668; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
11669; GFX90A-NEXT:    ;;#ASMSTART
11670; GFX90A-NEXT:    ; use s[8:9]
11671; GFX90A-NEXT:    ;;#ASMEND
11672; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11673;
11674; GFX940-LABEL: s_shuffle_v3i16_v4i16__6_3_3:
11675; GFX940:       ; %bb.0:
11676; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11677; GFX940-NEXT:    ;;#ASMSTART
11678; GFX940-NEXT:    ; def s[0:1]
11679; GFX940-NEXT:    ;;#ASMEND
11680; GFX940-NEXT:    ;;#ASMSTART
11681; GFX940-NEXT:    ; def s[2:3]
11682; GFX940-NEXT:    ;;#ASMEND
11683; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s3, s1
11684; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
11685; GFX940-NEXT:    ;;#ASMSTART
11686; GFX940-NEXT:    ; use s[8:9]
11687; GFX940-NEXT:    ;;#ASMEND
11688; GFX940-NEXT:    s_setpc_b64 s[30:31]
11689  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11690  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11691  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 3, i32 3>
11692  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11693  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
11694  ret void
11695}
11696
11697define void @s_shuffle_v3i16_v4i16__7_3_3() {
11698; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_3_3:
11699; GFX900:       ; %bb.0:
11700; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11701; GFX900-NEXT:    ;;#ASMSTART
11702; GFX900-NEXT:    ; def s[4:5]
11703; GFX900-NEXT:    ;;#ASMEND
11704; GFX900-NEXT:    ;;#ASMSTART
11705; GFX900-NEXT:    ; def s[6:7]
11706; GFX900-NEXT:    ;;#ASMEND
11707; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
11708; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
11709; GFX900-NEXT:    ;;#ASMSTART
11710; GFX900-NEXT:    ; use s[8:9]
11711; GFX900-NEXT:    ;;#ASMEND
11712; GFX900-NEXT:    s_setpc_b64 s[30:31]
11713;
11714; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_3_3:
11715; GFX90A:       ; %bb.0:
11716; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11717; GFX90A-NEXT:    ;;#ASMSTART
11718; GFX90A-NEXT:    ; def s[4:5]
11719; GFX90A-NEXT:    ;;#ASMEND
11720; GFX90A-NEXT:    ;;#ASMSTART
11721; GFX90A-NEXT:    ; def s[6:7]
11722; GFX90A-NEXT:    ;;#ASMEND
11723; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
11724; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
11725; GFX90A-NEXT:    ;;#ASMSTART
11726; GFX90A-NEXT:    ; use s[8:9]
11727; GFX90A-NEXT:    ;;#ASMEND
11728; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11729;
11730; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_3:
11731; GFX940:       ; %bb.0:
11732; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11733; GFX940-NEXT:    ;;#ASMSTART
11734; GFX940-NEXT:    ; def s[0:1]
11735; GFX940-NEXT:    ;;#ASMEND
11736; GFX940-NEXT:    ;;#ASMSTART
11737; GFX940-NEXT:    ; def s[2:3]
11738; GFX940-NEXT:    ;;#ASMEND
11739; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s1
11740; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
11741; GFX940-NEXT:    ;;#ASMSTART
11742; GFX940-NEXT:    ; use s[8:9]
11743; GFX940-NEXT:    ;;#ASMEND
11744; GFX940-NEXT:    s_setpc_b64 s[30:31]
11745  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11746  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11747  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 3>
11748  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11749  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
11750  ret void
11751}
11752
11753define void @s_shuffle_v3i16_v4i16__7_u_3() {
11754; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_u_3:
11755; GFX900:       ; %bb.0:
11756; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11757; GFX900-NEXT:    ;;#ASMSTART
11758; GFX900-NEXT:    ; def s[4:5]
11759; GFX900-NEXT:    ;;#ASMEND
11760; GFX900-NEXT:    ;;#ASMSTART
11761; GFX900-NEXT:    ; def s[6:7]
11762; GFX900-NEXT:    ;;#ASMEND
11763; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
11764; GFX900-NEXT:    s_lshr_b32 s8, s7, 16
11765; GFX900-NEXT:    ;;#ASMSTART
11766; GFX900-NEXT:    ; use s[8:9]
11767; GFX900-NEXT:    ;;#ASMEND
11768; GFX900-NEXT:    s_setpc_b64 s[30:31]
11769;
11770; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_u_3:
11771; GFX90A:       ; %bb.0:
11772; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11773; GFX90A-NEXT:    ;;#ASMSTART
11774; GFX90A-NEXT:    ; def s[4:5]
11775; GFX90A-NEXT:    ;;#ASMEND
11776; GFX90A-NEXT:    ;;#ASMSTART
11777; GFX90A-NEXT:    ; def s[6:7]
11778; GFX90A-NEXT:    ;;#ASMEND
11779; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
11780; GFX90A-NEXT:    s_lshr_b32 s8, s7, 16
11781; GFX90A-NEXT:    ;;#ASMSTART
11782; GFX90A-NEXT:    ; use s[8:9]
11783; GFX90A-NEXT:    ;;#ASMEND
11784; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11785;
11786; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_u_3:
11787; GFX940:       ; %bb.0:
11788; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11789; GFX940-NEXT:    ;;#ASMSTART
11790; GFX940-NEXT:    ; def s[0:1]
11791; GFX940-NEXT:    ;;#ASMEND
11792; GFX940-NEXT:    ;;#ASMSTART
11793; GFX940-NEXT:    ; def s[2:3]
11794; GFX940-NEXT:    ;;#ASMEND
11795; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
11796; GFX940-NEXT:    s_lshr_b32 s8, s3, 16
11797; GFX940-NEXT:    ;;#ASMSTART
11798; GFX940-NEXT:    ; use s[8:9]
11799; GFX940-NEXT:    ;;#ASMEND
11800; GFX940-NEXT:    s_setpc_b64 s[30:31]
11801  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11802  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11803  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 3>
11804  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11805  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
11806  ret void
11807}
11808
11809define void @s_shuffle_v3i16_v4i16__7_0_3() {
11810; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_0_3:
11811; GFX900:       ; %bb.0:
11812; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11813; GFX900-NEXT:    ;;#ASMSTART
11814; GFX900-NEXT:    ; def s[6:7]
11815; GFX900-NEXT:    ;;#ASMEND
11816; GFX900-NEXT:    s_lshr_b32 s6, s7, 16
11817; GFX900-NEXT:    ;;#ASMSTART
11818; GFX900-NEXT:    ; def s[4:5]
11819; GFX900-NEXT:    ;;#ASMEND
11820; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s6, s4
11821; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
11822; GFX900-NEXT:    ;;#ASMSTART
11823; GFX900-NEXT:    ; use s[8:9]
11824; GFX900-NEXT:    ;;#ASMEND
11825; GFX900-NEXT:    s_setpc_b64 s[30:31]
11826;
11827; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_0_3:
11828; GFX90A:       ; %bb.0:
11829; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11830; GFX90A-NEXT:    ;;#ASMSTART
11831; GFX90A-NEXT:    ; def s[6:7]
11832; GFX90A-NEXT:    ;;#ASMEND
11833; GFX90A-NEXT:    s_lshr_b32 s6, s7, 16
11834; GFX90A-NEXT:    ;;#ASMSTART
11835; GFX90A-NEXT:    ; def s[4:5]
11836; GFX90A-NEXT:    ;;#ASMEND
11837; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s6, s4
11838; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
11839; GFX90A-NEXT:    ;;#ASMSTART
11840; GFX90A-NEXT:    ; use s[8:9]
11841; GFX90A-NEXT:    ;;#ASMEND
11842; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11843;
11844; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_3:
11845; GFX940:       ; %bb.0:
11846; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11847; GFX940-NEXT:    ;;#ASMSTART
11848; GFX940-NEXT:    ; def s[2:3]
11849; GFX940-NEXT:    ;;#ASMEND
11850; GFX940-NEXT:    s_lshr_b32 s2, s3, 16
11851; GFX940-NEXT:    ;;#ASMSTART
11852; GFX940-NEXT:    ; def s[0:1]
11853; GFX940-NEXT:    ;;#ASMEND
11854; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s2, s0
11855; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
11856; GFX940-NEXT:    ;;#ASMSTART
11857; GFX940-NEXT:    ; use s[8:9]
11858; GFX940-NEXT:    ;;#ASMEND
11859; GFX940-NEXT:    s_setpc_b64 s[30:31]
11860  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11861  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11862  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 3>
11863  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11864  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
11865  ret void
11866}
11867
11868define void @s_shuffle_v3i16_v4i16__7_1_3() {
11869; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_1_3:
11870; GFX900:       ; %bb.0:
11871; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11872; GFX900-NEXT:    ;;#ASMSTART
11873; GFX900-NEXT:    ; def s[4:5]
11874; GFX900-NEXT:    ;;#ASMEND
11875; GFX900-NEXT:    ;;#ASMSTART
11876; GFX900-NEXT:    ; def s[6:7]
11877; GFX900-NEXT:    ;;#ASMEND
11878; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
11879; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
11880; GFX900-NEXT:    ;;#ASMSTART
11881; GFX900-NEXT:    ; use s[8:9]
11882; GFX900-NEXT:    ;;#ASMEND
11883; GFX900-NEXT:    s_setpc_b64 s[30:31]
11884;
11885; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_1_3:
11886; GFX90A:       ; %bb.0:
11887; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11888; GFX90A-NEXT:    ;;#ASMSTART
11889; GFX90A-NEXT:    ; def s[4:5]
11890; GFX90A-NEXT:    ;;#ASMEND
11891; GFX90A-NEXT:    ;;#ASMSTART
11892; GFX90A-NEXT:    ; def s[6:7]
11893; GFX90A-NEXT:    ;;#ASMEND
11894; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
11895; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
11896; GFX90A-NEXT:    ;;#ASMSTART
11897; GFX90A-NEXT:    ; use s[8:9]
11898; GFX90A-NEXT:    ;;#ASMEND
11899; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11900;
11901; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_3:
11902; GFX940:       ; %bb.0:
11903; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11904; GFX940-NEXT:    ;;#ASMSTART
11905; GFX940-NEXT:    ; def s[0:1]
11906; GFX940-NEXT:    ;;#ASMEND
11907; GFX940-NEXT:    ;;#ASMSTART
11908; GFX940-NEXT:    ; def s[2:3]
11909; GFX940-NEXT:    ;;#ASMEND
11910; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s0
11911; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
11912; GFX940-NEXT:    ;;#ASMSTART
11913; GFX940-NEXT:    ; use s[8:9]
11914; GFX940-NEXT:    ;;#ASMEND
11915; GFX940-NEXT:    s_setpc_b64 s[30:31]
11916  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11917  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11918  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 3>
11919  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11920  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
11921  ret void
11922}
11923
11924define void @s_shuffle_v3i16_v4i16__7_2_3() {
11925; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_2_3:
11926; GFX900:       ; %bb.0:
11927; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11928; GFX900-NEXT:    ;;#ASMSTART
11929; GFX900-NEXT:    ; def s[4:5]
11930; GFX900-NEXT:    ;;#ASMEND
11931; GFX900-NEXT:    ;;#ASMSTART
11932; GFX900-NEXT:    ; def s[6:7]
11933; GFX900-NEXT:    ;;#ASMEND
11934; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
11935; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
11936; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
11937; GFX900-NEXT:    ;;#ASMSTART
11938; GFX900-NEXT:    ; use s[8:9]
11939; GFX900-NEXT:    ;;#ASMEND
11940; GFX900-NEXT:    s_setpc_b64 s[30:31]
11941;
11942; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_2_3:
11943; GFX90A:       ; %bb.0:
11944; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11945; GFX90A-NEXT:    ;;#ASMSTART
11946; GFX90A-NEXT:    ; def s[4:5]
11947; GFX90A-NEXT:    ;;#ASMEND
11948; GFX90A-NEXT:    ;;#ASMSTART
11949; GFX90A-NEXT:    ; def s[6:7]
11950; GFX90A-NEXT:    ;;#ASMEND
11951; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
11952; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
11953; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
11954; GFX90A-NEXT:    ;;#ASMSTART
11955; GFX90A-NEXT:    ; use s[8:9]
11956; GFX90A-NEXT:    ;;#ASMEND
11957; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11958;
11959; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_3:
11960; GFX940:       ; %bb.0:
11961; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11962; GFX940-NEXT:    ;;#ASMSTART
11963; GFX940-NEXT:    ; def s[0:1]
11964; GFX940-NEXT:    ;;#ASMEND
11965; GFX940-NEXT:    ;;#ASMSTART
11966; GFX940-NEXT:    ; def s[2:3]
11967; GFX940-NEXT:    ;;#ASMEND
11968; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
11969; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
11970; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
11971; GFX940-NEXT:    ;;#ASMSTART
11972; GFX940-NEXT:    ; use s[8:9]
11973; GFX940-NEXT:    ;;#ASMEND
11974; GFX940-NEXT:    s_setpc_b64 s[30:31]
11975  %vec0 = call <4 x i16> asm "; def $0", "=s"()
11976  %vec1 = call <4 x i16> asm "; def $0", "=s"()
11977  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 3>
11978  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11979  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
11980  ret void
11981}
11982
11983define void @s_shuffle_v3i16_v4i16__7_4_3() {
11984; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_4_3:
11985; GFX900:       ; %bb.0:
11986; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11987; GFX900-NEXT:    ;;#ASMSTART
11988; GFX900-NEXT:    ; def s[4:5]
11989; GFX900-NEXT:    ;;#ASMEND
11990; GFX900-NEXT:    ;;#ASMSTART
11991; GFX900-NEXT:    ; def s[6:7]
11992; GFX900-NEXT:    ;;#ASMEND
11993; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
11994; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s6
11995; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
11996; GFX900-NEXT:    ;;#ASMSTART
11997; GFX900-NEXT:    ; use s[8:9]
11998; GFX900-NEXT:    ;;#ASMEND
11999; GFX900-NEXT:    s_setpc_b64 s[30:31]
12000;
12001; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_4_3:
12002; GFX90A:       ; %bb.0:
12003; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12004; GFX90A-NEXT:    ;;#ASMSTART
12005; GFX90A-NEXT:    ; def s[4:5]
12006; GFX90A-NEXT:    ;;#ASMEND
12007; GFX90A-NEXT:    ;;#ASMSTART
12008; GFX90A-NEXT:    ; def s[6:7]
12009; GFX90A-NEXT:    ;;#ASMEND
12010; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
12011; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s6
12012; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
12013; GFX90A-NEXT:    ;;#ASMSTART
12014; GFX90A-NEXT:    ; use s[8:9]
12015; GFX90A-NEXT:    ;;#ASMEND
12016; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12017;
12018; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_3:
12019; GFX940:       ; %bb.0:
12020; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12021; GFX940-NEXT:    ;;#ASMSTART
12022; GFX940-NEXT:    ; def s[0:1]
12023; GFX940-NEXT:    ;;#ASMEND
12024; GFX940-NEXT:    ;;#ASMSTART
12025; GFX940-NEXT:    ; def s[2:3]
12026; GFX940-NEXT:    ;;#ASMEND
12027; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
12028; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s2
12029; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
12030; GFX940-NEXT:    ;;#ASMSTART
12031; GFX940-NEXT:    ; use s[8:9]
12032; GFX940-NEXT:    ;;#ASMEND
12033; GFX940-NEXT:    s_setpc_b64 s[30:31]
12034  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12035  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12036  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 3>
12037  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12038  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
12039  ret void
12040}
12041
12042define void @s_shuffle_v3i16_v4i16__7_5_3() {
12043; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_5_3:
12044; GFX900:       ; %bb.0:
12045; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12046; GFX900-NEXT:    ;;#ASMSTART
12047; GFX900-NEXT:    ; def s[4:5]
12048; GFX900-NEXT:    ;;#ASMEND
12049; GFX900-NEXT:    ;;#ASMSTART
12050; GFX900-NEXT:    ; def s[6:7]
12051; GFX900-NEXT:    ;;#ASMEND
12052; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s6
12053; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
12054; GFX900-NEXT:    ;;#ASMSTART
12055; GFX900-NEXT:    ; use s[8:9]
12056; GFX900-NEXT:    ;;#ASMEND
12057; GFX900-NEXT:    s_setpc_b64 s[30:31]
12058;
12059; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_5_3:
12060; GFX90A:       ; %bb.0:
12061; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12062; GFX90A-NEXT:    ;;#ASMSTART
12063; GFX90A-NEXT:    ; def s[4:5]
12064; GFX90A-NEXT:    ;;#ASMEND
12065; GFX90A-NEXT:    ;;#ASMSTART
12066; GFX90A-NEXT:    ; def s[6:7]
12067; GFX90A-NEXT:    ;;#ASMEND
12068; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s6
12069; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
12070; GFX90A-NEXT:    ;;#ASMSTART
12071; GFX90A-NEXT:    ; use s[8:9]
12072; GFX90A-NEXT:    ;;#ASMEND
12073; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12074;
12075; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_5_3:
12076; GFX940:       ; %bb.0:
12077; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12078; GFX940-NEXT:    ;;#ASMSTART
12079; GFX940-NEXT:    ; def s[0:1]
12080; GFX940-NEXT:    ;;#ASMEND
12081; GFX940-NEXT:    ;;#ASMSTART
12082; GFX940-NEXT:    ; def s[2:3]
12083; GFX940-NEXT:    ;;#ASMEND
12084; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s2
12085; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
12086; GFX940-NEXT:    ;;#ASMSTART
12087; GFX940-NEXT:    ; use s[8:9]
12088; GFX940-NEXT:    ;;#ASMEND
12089; GFX940-NEXT:    s_setpc_b64 s[30:31]
12090  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12091  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12092  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 3>
12093  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12094  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
12095  ret void
12096}
12097
12098define void @s_shuffle_v3i16_v4i16__7_6_3() {
12099; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_6_3:
12100; GFX900:       ; %bb.0:
12101; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12102; GFX900-NEXT:    ;;#ASMSTART
12103; GFX900-NEXT:    ; def s[4:5]
12104; GFX900-NEXT:    ;;#ASMEND
12105; GFX900-NEXT:    ;;#ASMSTART
12106; GFX900-NEXT:    ; def s[6:7]
12107; GFX900-NEXT:    ;;#ASMEND
12108; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
12109; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s7
12110; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
12111; GFX900-NEXT:    ;;#ASMSTART
12112; GFX900-NEXT:    ; use s[8:9]
12113; GFX900-NEXT:    ;;#ASMEND
12114; GFX900-NEXT:    s_setpc_b64 s[30:31]
12115;
12116; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_6_3:
12117; GFX90A:       ; %bb.0:
12118; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12119; GFX90A-NEXT:    ;;#ASMSTART
12120; GFX90A-NEXT:    ; def s[4:5]
12121; GFX90A-NEXT:    ;;#ASMEND
12122; GFX90A-NEXT:    ;;#ASMSTART
12123; GFX90A-NEXT:    ; def s[6:7]
12124; GFX90A-NEXT:    ;;#ASMEND
12125; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
12126; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s7
12127; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
12128; GFX90A-NEXT:    ;;#ASMSTART
12129; GFX90A-NEXT:    ; use s[8:9]
12130; GFX90A-NEXT:    ;;#ASMEND
12131; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12132;
12133; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_3:
12134; GFX940:       ; %bb.0:
12135; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12136; GFX940-NEXT:    ;;#ASMSTART
12137; GFX940-NEXT:    ; def s[0:1]
12138; GFX940-NEXT:    ;;#ASMEND
12139; GFX940-NEXT:    ;;#ASMSTART
12140; GFX940-NEXT:    ; def s[2:3]
12141; GFX940-NEXT:    ;;#ASMEND
12142; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
12143; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s3
12144; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
12145; GFX940-NEXT:    ;;#ASMSTART
12146; GFX940-NEXT:    ; use s[8:9]
12147; GFX940-NEXT:    ;;#ASMEND
12148; GFX940-NEXT:    s_setpc_b64 s[30:31]
12149  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12150  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12151  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 3>
12152  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12153  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
12154  ret void
12155}
12156
12157define void @s_shuffle_v3i16_v4i16__u_4_4() {
12158; GFX9-LABEL: s_shuffle_v3i16_v4i16__u_4_4:
12159; GFX9:       ; %bb.0:
12160; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12161; GFX9-NEXT:    ;;#ASMSTART
12162; GFX9-NEXT:    ; use s[8:9]
12163; GFX9-NEXT:    ;;#ASMEND
12164; GFX9-NEXT:    s_setpc_b64 s[30:31]
12165  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12166  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 poison, i32 4, i32 4>
12167  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12168  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
12169  ret void
12170}
12171
12172define void @s_shuffle_v3i16_v4i16__0_4_4() {
12173; GFX900-LABEL: s_shuffle_v3i16_v4i16__0_4_4:
12174; GFX900:       ; %bb.0:
12175; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12176; GFX900-NEXT:    ;;#ASMSTART
12177; GFX900-NEXT:    ; def s[8:9]
12178; GFX900-NEXT:    ;;#ASMEND
12179; GFX900-NEXT:    ;;#ASMSTART
12180; GFX900-NEXT:    ; use s[8:9]
12181; GFX900-NEXT:    ;;#ASMEND
12182; GFX900-NEXT:    s_setpc_b64 s[30:31]
12183;
12184; GFX90A-LABEL: s_shuffle_v3i16_v4i16__0_4_4:
12185; GFX90A:       ; %bb.0:
12186; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12187; GFX90A-NEXT:    ;;#ASMSTART
12188; GFX90A-NEXT:    ; def s[8:9]
12189; GFX90A-NEXT:    ;;#ASMEND
12190; GFX90A-NEXT:    ;;#ASMSTART
12191; GFX90A-NEXT:    ; use s[8:9]
12192; GFX90A-NEXT:    ;;#ASMEND
12193; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12194;
12195; GFX940-LABEL: s_shuffle_v3i16_v4i16__0_4_4:
12196; GFX940:       ; %bb.0:
12197; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12198; GFX940-NEXT:    ;;#ASMSTART
12199; GFX940-NEXT:    ; def s[8:9]
12200; GFX940-NEXT:    ;;#ASMEND
12201; GFX940-NEXT:    s_nop 0
12202; GFX940-NEXT:    ;;#ASMSTART
12203; GFX940-NEXT:    ; use s[8:9]
12204; GFX940-NEXT:    ;;#ASMEND
12205; GFX940-NEXT:    s_setpc_b64 s[30:31]
12206  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12207  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 4, i32 4>
12208  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12209  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
12210  ret void
12211}
12212
12213define void @s_shuffle_v3i16_v4i16__1_4_4() {
12214; GFX900-LABEL: s_shuffle_v3i16_v4i16__1_4_4:
12215; GFX900:       ; %bb.0:
12216; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12217; GFX900-NEXT:    ;;#ASMSTART
12218; GFX900-NEXT:    ; def s[4:5]
12219; GFX900-NEXT:    ;;#ASMEND
12220; GFX900-NEXT:    s_lshr_b32 s8, s4, 16
12221; GFX900-NEXT:    ;;#ASMSTART
12222; GFX900-NEXT:    ; use s[8:9]
12223; GFX900-NEXT:    ;;#ASMEND
12224; GFX900-NEXT:    s_setpc_b64 s[30:31]
12225;
12226; GFX90A-LABEL: s_shuffle_v3i16_v4i16__1_4_4:
12227; GFX90A:       ; %bb.0:
12228; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12229; GFX90A-NEXT:    ;;#ASMSTART
12230; GFX90A-NEXT:    ; def s[4:5]
12231; GFX90A-NEXT:    ;;#ASMEND
12232; GFX90A-NEXT:    s_lshr_b32 s8, s4, 16
12233; GFX90A-NEXT:    ;;#ASMSTART
12234; GFX90A-NEXT:    ; use s[8:9]
12235; GFX90A-NEXT:    ;;#ASMEND
12236; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12237;
12238; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_4_4:
12239; GFX940:       ; %bb.0:
12240; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12241; GFX940-NEXT:    ;;#ASMSTART
12242; GFX940-NEXT:    ; def s[0:1]
12243; GFX940-NEXT:    ;;#ASMEND
12244; GFX940-NEXT:    s_lshr_b32 s8, s0, 16
12245; GFX940-NEXT:    ;;#ASMSTART
12246; GFX940-NEXT:    ; use s[8:9]
12247; GFX940-NEXT:    ;;#ASMEND
12248; GFX940-NEXT:    s_setpc_b64 s[30:31]
12249  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12250  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 1, i32 4, i32 4>
12251  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12252  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
12253  ret void
12254}
12255
12256define void @s_shuffle_v3i16_v4i16__2_4_4() {
12257; GFX900-LABEL: s_shuffle_v3i16_v4i16__2_4_4:
12258; GFX900:       ; %bb.0:
12259; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12260; GFX900-NEXT:    ;;#ASMSTART
12261; GFX900-NEXT:    ; def s[4:5]
12262; GFX900-NEXT:    ;;#ASMEND
12263; GFX900-NEXT:    s_mov_b32 s8, s5
12264; GFX900-NEXT:    ;;#ASMSTART
12265; GFX900-NEXT:    ; use s[8:9]
12266; GFX900-NEXT:    ;;#ASMEND
12267; GFX900-NEXT:    s_setpc_b64 s[30:31]
12268;
12269; GFX90A-LABEL: s_shuffle_v3i16_v4i16__2_4_4:
12270; GFX90A:       ; %bb.0:
12271; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12272; GFX90A-NEXT:    ;;#ASMSTART
12273; GFX90A-NEXT:    ; def s[4:5]
12274; GFX90A-NEXT:    ;;#ASMEND
12275; GFX90A-NEXT:    s_mov_b32 s8, s5
12276; GFX90A-NEXT:    ;;#ASMSTART
12277; GFX90A-NEXT:    ; use s[8:9]
12278; GFX90A-NEXT:    ;;#ASMEND
12279; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12280;
12281; GFX940-LABEL: s_shuffle_v3i16_v4i16__2_4_4:
12282; GFX940:       ; %bb.0:
12283; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12284; GFX940-NEXT:    ;;#ASMSTART
12285; GFX940-NEXT:    ; def s[0:1]
12286; GFX940-NEXT:    ;;#ASMEND
12287; GFX940-NEXT:    s_mov_b32 s8, s1
12288; GFX940-NEXT:    ;;#ASMSTART
12289; GFX940-NEXT:    ; use s[8:9]
12290; GFX940-NEXT:    ;;#ASMEND
12291; GFX940-NEXT:    s_setpc_b64 s[30:31]
12292  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12293  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 2, i32 4, i32 4>
12294  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12295  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
12296  ret void
12297}
12298
12299define void @s_shuffle_v3i16_v4i16__3_4_4() {
12300; GFX900-LABEL: s_shuffle_v3i16_v4i16__3_4_4:
12301; GFX900:       ; %bb.0:
12302; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12303; GFX900-NEXT:    ;;#ASMSTART
12304; GFX900-NEXT:    ; def s[4:5]
12305; GFX900-NEXT:    ;;#ASMEND
12306; GFX900-NEXT:    s_lshr_b32 s8, s5, 16
12307; GFX900-NEXT:    ;;#ASMSTART
12308; GFX900-NEXT:    ; use s[8:9]
12309; GFX900-NEXT:    ;;#ASMEND
12310; GFX900-NEXT:    s_setpc_b64 s[30:31]
12311;
12312; GFX90A-LABEL: s_shuffle_v3i16_v4i16__3_4_4:
12313; GFX90A:       ; %bb.0:
12314; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12315; GFX90A-NEXT:    ;;#ASMSTART
12316; GFX90A-NEXT:    ; def s[4:5]
12317; GFX90A-NEXT:    ;;#ASMEND
12318; GFX90A-NEXT:    s_lshr_b32 s8, s5, 16
12319; GFX90A-NEXT:    ;;#ASMSTART
12320; GFX90A-NEXT:    ; use s[8:9]
12321; GFX90A-NEXT:    ;;#ASMEND
12322; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12323;
12324; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_4_4:
12325; GFX940:       ; %bb.0:
12326; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12327; GFX940-NEXT:    ;;#ASMSTART
12328; GFX940-NEXT:    ; def s[0:1]
12329; GFX940-NEXT:    ;;#ASMEND
12330; GFX940-NEXT:    s_lshr_b32 s8, s1, 16
12331; GFX940-NEXT:    ;;#ASMSTART
12332; GFX940-NEXT:    ; use s[8:9]
12333; GFX940-NEXT:    ;;#ASMEND
12334; GFX940-NEXT:    s_setpc_b64 s[30:31]
12335  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12336  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 3, i32 4, i32 4>
12337  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12338  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
12339  ret void
12340}
12341
12342define void @s_shuffle_v3i16_v4i16__4_4_4() {
12343; GFX9-LABEL: s_shuffle_v3i16_v4i16__4_4_4:
12344; GFX9:       ; %bb.0:
12345; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12346; GFX9-NEXT:    ;;#ASMSTART
12347; GFX9-NEXT:    ; use s[8:9]
12348; GFX9-NEXT:    ;;#ASMEND
12349; GFX9-NEXT:    s_setpc_b64 s[30:31]
12350  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12351  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 4, i32 4, i32 4>
12352  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12353  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
12354  ret void
12355}
12356
12357define void @s_shuffle_v3i16_v4i16__5_4_4() {
12358; GFX900-LABEL: s_shuffle_v3i16_v4i16__5_4_4:
12359; GFX900:       ; %bb.0:
12360; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12361; GFX900-NEXT:    ;;#ASMSTART
12362; GFX900-NEXT:    ; def s[4:5]
12363; GFX900-NEXT:    ;;#ASMEND
12364; GFX900-NEXT:    s_lshr_b32 s5, s4, 16
12365; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
12366; GFX900-NEXT:    s_mov_b32 s9, s4
12367; GFX900-NEXT:    ;;#ASMSTART
12368; GFX900-NEXT:    ; use s[8:9]
12369; GFX900-NEXT:    ;;#ASMEND
12370; GFX900-NEXT:    s_setpc_b64 s[30:31]
12371;
12372; GFX90A-LABEL: s_shuffle_v3i16_v4i16__5_4_4:
12373; GFX90A:       ; %bb.0:
12374; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12375; GFX90A-NEXT:    ;;#ASMSTART
12376; GFX90A-NEXT:    ; def s[4:5]
12377; GFX90A-NEXT:    ;;#ASMEND
12378; GFX90A-NEXT:    s_lshr_b32 s5, s4, 16
12379; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
12380; GFX90A-NEXT:    s_mov_b32 s9, s4
12381; GFX90A-NEXT:    ;;#ASMSTART
12382; GFX90A-NEXT:    ; use s[8:9]
12383; GFX90A-NEXT:    ;;#ASMEND
12384; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12385;
12386; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_4_4:
12387; GFX940:       ; %bb.0:
12388; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12389; GFX940-NEXT:    ;;#ASMSTART
12390; GFX940-NEXT:    ; def s[0:1]
12391; GFX940-NEXT:    ;;#ASMEND
12392; GFX940-NEXT:    s_lshr_b32 s1, s0, 16
12393; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
12394; GFX940-NEXT:    s_mov_b32 s9, s0
12395; GFX940-NEXT:    ;;#ASMSTART
12396; GFX940-NEXT:    ; use s[8:9]
12397; GFX940-NEXT:    ;;#ASMEND
12398; GFX940-NEXT:    s_setpc_b64 s[30:31]
12399  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12400  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12401  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 4, i32 4>
12402  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12403  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
12404  ret void
12405}
12406
12407define void @s_shuffle_v3i16_v4i16__6_4_4() {
12408; GFX900-LABEL: s_shuffle_v3i16_v4i16__6_4_4:
12409; GFX900:       ; %bb.0:
12410; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12411; GFX900-NEXT:    ;;#ASMSTART
12412; GFX900-NEXT:    ; def s[4:5]
12413; GFX900-NEXT:    ;;#ASMEND
12414; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
12415; GFX900-NEXT:    s_mov_b32 s9, s4
12416; GFX900-NEXT:    ;;#ASMSTART
12417; GFX900-NEXT:    ; use s[8:9]
12418; GFX900-NEXT:    ;;#ASMEND
12419; GFX900-NEXT:    s_setpc_b64 s[30:31]
12420;
12421; GFX90A-LABEL: s_shuffle_v3i16_v4i16__6_4_4:
12422; GFX90A:       ; %bb.0:
12423; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12424; GFX90A-NEXT:    ;;#ASMSTART
12425; GFX90A-NEXT:    ; def s[4:5]
12426; GFX90A-NEXT:    ;;#ASMEND
12427; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
12428; GFX90A-NEXT:    s_mov_b32 s9, s4
12429; GFX90A-NEXT:    ;;#ASMSTART
12430; GFX90A-NEXT:    ; use s[8:9]
12431; GFX90A-NEXT:    ;;#ASMEND
12432; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12433;
12434; GFX940-LABEL: s_shuffle_v3i16_v4i16__6_4_4:
12435; GFX940:       ; %bb.0:
12436; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12437; GFX940-NEXT:    ;;#ASMSTART
12438; GFX940-NEXT:    ; def s[0:1]
12439; GFX940-NEXT:    ;;#ASMEND
12440; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
12441; GFX940-NEXT:    s_mov_b32 s9, s0
12442; GFX940-NEXT:    ;;#ASMSTART
12443; GFX940-NEXT:    ; use s[8:9]
12444; GFX940-NEXT:    ;;#ASMEND
12445; GFX940-NEXT:    s_setpc_b64 s[30:31]
12446  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12447  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12448  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 4, i32 4>
12449  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12450  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
12451  ret void
12452}
12453
12454define void @s_shuffle_v3i16_v4i16__7_4_4() {
12455; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_4_4:
12456; GFX900:       ; %bb.0:
12457; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12458; GFX900-NEXT:    ;;#ASMSTART
12459; GFX900-NEXT:    ; def s[4:5]
12460; GFX900-NEXT:    ;;#ASMEND
12461; GFX900-NEXT:    s_lshr_b32 s5, s5, 16
12462; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
12463; GFX900-NEXT:    s_mov_b32 s9, s4
12464; GFX900-NEXT:    ;;#ASMSTART
12465; GFX900-NEXT:    ; use s[8:9]
12466; GFX900-NEXT:    ;;#ASMEND
12467; GFX900-NEXT:    s_setpc_b64 s[30:31]
12468;
12469; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_4_4:
12470; GFX90A:       ; %bb.0:
12471; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12472; GFX90A-NEXT:    ;;#ASMSTART
12473; GFX90A-NEXT:    ; def s[4:5]
12474; GFX90A-NEXT:    ;;#ASMEND
12475; GFX90A-NEXT:    s_lshr_b32 s5, s5, 16
12476; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
12477; GFX90A-NEXT:    s_mov_b32 s9, s4
12478; GFX90A-NEXT:    ;;#ASMSTART
12479; GFX90A-NEXT:    ; use s[8:9]
12480; GFX90A-NEXT:    ;;#ASMEND
12481; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12482;
12483; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_4:
12484; GFX940:       ; %bb.0:
12485; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12486; GFX940-NEXT:    ;;#ASMSTART
12487; GFX940-NEXT:    ; def s[0:1]
12488; GFX940-NEXT:    ;;#ASMEND
12489; GFX940-NEXT:    s_lshr_b32 s1, s1, 16
12490; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
12491; GFX940-NEXT:    s_mov_b32 s9, s0
12492; GFX940-NEXT:    ;;#ASMSTART
12493; GFX940-NEXT:    ; use s[8:9]
12494; GFX940-NEXT:    ;;#ASMEND
12495; GFX940-NEXT:    s_setpc_b64 s[30:31]
12496  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12497  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12498  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 4>
12499  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12500  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
12501  ret void
12502}
12503
12504define void @s_shuffle_v3i16_v4i16__7_u_4() {
12505; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_u_4:
12506; GFX900:       ; %bb.0:
12507; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12508; GFX900-NEXT:    ;;#ASMSTART
12509; GFX900-NEXT:    ; def s[4:5]
12510; GFX900-NEXT:    ;;#ASMEND
12511; GFX900-NEXT:    s_lshr_b32 s8, s5, 16
12512; GFX900-NEXT:    s_mov_b32 s9, s4
12513; GFX900-NEXT:    ;;#ASMSTART
12514; GFX900-NEXT:    ; use s[8:9]
12515; GFX900-NEXT:    ;;#ASMEND
12516; GFX900-NEXT:    s_setpc_b64 s[30:31]
12517;
12518; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_u_4:
12519; GFX90A:       ; %bb.0:
12520; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12521; GFX90A-NEXT:    ;;#ASMSTART
12522; GFX90A-NEXT:    ; def s[4:5]
12523; GFX90A-NEXT:    ;;#ASMEND
12524; GFX90A-NEXT:    s_lshr_b32 s8, s5, 16
12525; GFX90A-NEXT:    s_mov_b32 s9, s4
12526; GFX90A-NEXT:    ;;#ASMSTART
12527; GFX90A-NEXT:    ; use s[8:9]
12528; GFX90A-NEXT:    ;;#ASMEND
12529; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12530;
12531; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_u_4:
12532; GFX940:       ; %bb.0:
12533; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12534; GFX940-NEXT:    ;;#ASMSTART
12535; GFX940-NEXT:    ; def s[0:1]
12536; GFX940-NEXT:    ;;#ASMEND
12537; GFX940-NEXT:    s_lshr_b32 s8, s1, 16
12538; GFX940-NEXT:    s_mov_b32 s9, s0
12539; GFX940-NEXT:    ;;#ASMSTART
12540; GFX940-NEXT:    ; use s[8:9]
12541; GFX940-NEXT:    ;;#ASMEND
12542; GFX940-NEXT:    s_setpc_b64 s[30:31]
12543  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12544  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12545  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 4>
12546  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12547  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
12548  ret void
12549}
12550
12551define void @s_shuffle_v3i16_v4i16__7_0_4() {
12552; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_0_4:
12553; GFX900:       ; %bb.0:
12554; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12555; GFX900-NEXT:    ;;#ASMSTART
12556; GFX900-NEXT:    ; def s[4:5]
12557; GFX900-NEXT:    ;;#ASMEND
12558; GFX900-NEXT:    ;;#ASMSTART
12559; GFX900-NEXT:    ; def s[6:7]
12560; GFX900-NEXT:    ;;#ASMEND
12561; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
12562; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
12563; GFX900-NEXT:    s_mov_b32 s9, s6
12564; GFX900-NEXT:    ;;#ASMSTART
12565; GFX900-NEXT:    ; use s[8:9]
12566; GFX900-NEXT:    ;;#ASMEND
12567; GFX900-NEXT:    s_setpc_b64 s[30:31]
12568;
12569; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_0_4:
12570; GFX90A:       ; %bb.0:
12571; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12572; GFX90A-NEXT:    ;;#ASMSTART
12573; GFX90A-NEXT:    ; def s[4:5]
12574; GFX90A-NEXT:    ;;#ASMEND
12575; GFX90A-NEXT:    ;;#ASMSTART
12576; GFX90A-NEXT:    ; def s[6:7]
12577; GFX90A-NEXT:    ;;#ASMEND
12578; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
12579; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
12580; GFX90A-NEXT:    s_mov_b32 s9, s6
12581; GFX90A-NEXT:    ;;#ASMSTART
12582; GFX90A-NEXT:    ; use s[8:9]
12583; GFX90A-NEXT:    ;;#ASMEND
12584; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12585;
12586; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_4:
12587; GFX940:       ; %bb.0:
12588; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12589; GFX940-NEXT:    ;;#ASMSTART
12590; GFX940-NEXT:    ; def s[0:1]
12591; GFX940-NEXT:    ;;#ASMEND
12592; GFX940-NEXT:    ;;#ASMSTART
12593; GFX940-NEXT:    ; def s[2:3]
12594; GFX940-NEXT:    ;;#ASMEND
12595; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
12596; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
12597; GFX940-NEXT:    s_mov_b32 s9, s2
12598; GFX940-NEXT:    ;;#ASMSTART
12599; GFX940-NEXT:    ; use s[8:9]
12600; GFX940-NEXT:    ;;#ASMEND
12601; GFX940-NEXT:    s_setpc_b64 s[30:31]
12602  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12603  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12604  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 4>
12605  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12606  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
12607  ret void
12608}
12609
12610define void @s_shuffle_v3i16_v4i16__7_1_4() {
12611; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_1_4:
12612; GFX900:       ; %bb.0:
12613; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12614; GFX900-NEXT:    ;;#ASMSTART
12615; GFX900-NEXT:    ; def s[4:5]
12616; GFX900-NEXT:    ;;#ASMEND
12617; GFX900-NEXT:    ;;#ASMSTART
12618; GFX900-NEXT:    ; def s[6:7]
12619; GFX900-NEXT:    ;;#ASMEND
12620; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
12621; GFX900-NEXT:    s_mov_b32 s9, s6
12622; GFX900-NEXT:    ;;#ASMSTART
12623; GFX900-NEXT:    ; use s[8:9]
12624; GFX900-NEXT:    ;;#ASMEND
12625; GFX900-NEXT:    s_setpc_b64 s[30:31]
12626;
12627; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_1_4:
12628; GFX90A:       ; %bb.0:
12629; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12630; GFX90A-NEXT:    ;;#ASMSTART
12631; GFX90A-NEXT:    ; def s[4:5]
12632; GFX90A-NEXT:    ;;#ASMEND
12633; GFX90A-NEXT:    ;;#ASMSTART
12634; GFX90A-NEXT:    ; def s[6:7]
12635; GFX90A-NEXT:    ;;#ASMEND
12636; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
12637; GFX90A-NEXT:    s_mov_b32 s9, s6
12638; GFX90A-NEXT:    ;;#ASMSTART
12639; GFX90A-NEXT:    ; use s[8:9]
12640; GFX90A-NEXT:    ;;#ASMEND
12641; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12642;
12643; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_4:
12644; GFX940:       ; %bb.0:
12645; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12646; GFX940-NEXT:    ;;#ASMSTART
12647; GFX940-NEXT:    ; def s[0:1]
12648; GFX940-NEXT:    ;;#ASMEND
12649; GFX940-NEXT:    ;;#ASMSTART
12650; GFX940-NEXT:    ; def s[2:3]
12651; GFX940-NEXT:    ;;#ASMEND
12652; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s0
12653; GFX940-NEXT:    s_mov_b32 s9, s2
12654; GFX940-NEXT:    ;;#ASMSTART
12655; GFX940-NEXT:    ; use s[8:9]
12656; GFX940-NEXT:    ;;#ASMEND
12657; GFX940-NEXT:    s_setpc_b64 s[30:31]
12658  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12659  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12660  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 4>
12661  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12662  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
12663  ret void
12664}
12665
12666define void @s_shuffle_v3i16_v4i16__7_2_4() {
12667; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_2_4:
12668; GFX900:       ; %bb.0:
12669; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12670; GFX900-NEXT:    ;;#ASMSTART
12671; GFX900-NEXT:    ; def s[4:5]
12672; GFX900-NEXT:    ;;#ASMEND
12673; GFX900-NEXT:    ;;#ASMSTART
12674; GFX900-NEXT:    ; def s[6:7]
12675; GFX900-NEXT:    ;;#ASMEND
12676; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
12677; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
12678; GFX900-NEXT:    s_mov_b32 s9, s6
12679; GFX900-NEXT:    ;;#ASMSTART
12680; GFX900-NEXT:    ; use s[8:9]
12681; GFX900-NEXT:    ;;#ASMEND
12682; GFX900-NEXT:    s_setpc_b64 s[30:31]
12683;
12684; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_2_4:
12685; GFX90A:       ; %bb.0:
12686; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12687; GFX90A-NEXT:    ;;#ASMSTART
12688; GFX90A-NEXT:    ; def s[4:5]
12689; GFX90A-NEXT:    ;;#ASMEND
12690; GFX90A-NEXT:    ;;#ASMSTART
12691; GFX90A-NEXT:    ; def s[6:7]
12692; GFX90A-NEXT:    ;;#ASMEND
12693; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
12694; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
12695; GFX90A-NEXT:    s_mov_b32 s9, s6
12696; GFX90A-NEXT:    ;;#ASMSTART
12697; GFX90A-NEXT:    ; use s[8:9]
12698; GFX90A-NEXT:    ;;#ASMEND
12699; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12700;
12701; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_4:
12702; GFX940:       ; %bb.0:
12703; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12704; GFX940-NEXT:    ;;#ASMSTART
12705; GFX940-NEXT:    ; def s[0:1]
12706; GFX940-NEXT:    ;;#ASMEND
12707; GFX940-NEXT:    ;;#ASMSTART
12708; GFX940-NEXT:    ; def s[2:3]
12709; GFX940-NEXT:    ;;#ASMEND
12710; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
12711; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
12712; GFX940-NEXT:    s_mov_b32 s9, s2
12713; GFX940-NEXT:    ;;#ASMSTART
12714; GFX940-NEXT:    ; use s[8:9]
12715; GFX940-NEXT:    ;;#ASMEND
12716; GFX940-NEXT:    s_setpc_b64 s[30:31]
12717  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12718  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12719  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 4>
12720  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12721  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
12722  ret void
12723}
12724
12725define void @s_shuffle_v3i16_v4i16__7_3_4() {
12726; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_3_4:
12727; GFX900:       ; %bb.0:
12728; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12729; GFX900-NEXT:    ;;#ASMSTART
12730; GFX900-NEXT:    ; def s[4:5]
12731; GFX900-NEXT:    ;;#ASMEND
12732; GFX900-NEXT:    ;;#ASMSTART
12733; GFX900-NEXT:    ; def s[6:7]
12734; GFX900-NEXT:    ;;#ASMEND
12735; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
12736; GFX900-NEXT:    s_mov_b32 s9, s6
12737; GFX900-NEXT:    ;;#ASMSTART
12738; GFX900-NEXT:    ; use s[8:9]
12739; GFX900-NEXT:    ;;#ASMEND
12740; GFX900-NEXT:    s_setpc_b64 s[30:31]
12741;
12742; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_3_4:
12743; GFX90A:       ; %bb.0:
12744; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12745; GFX90A-NEXT:    ;;#ASMSTART
12746; GFX90A-NEXT:    ; def s[4:5]
12747; GFX90A-NEXT:    ;;#ASMEND
12748; GFX90A-NEXT:    ;;#ASMSTART
12749; GFX90A-NEXT:    ; def s[6:7]
12750; GFX90A-NEXT:    ;;#ASMEND
12751; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
12752; GFX90A-NEXT:    s_mov_b32 s9, s6
12753; GFX90A-NEXT:    ;;#ASMSTART
12754; GFX90A-NEXT:    ; use s[8:9]
12755; GFX90A-NEXT:    ;;#ASMEND
12756; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12757;
12758; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_4:
12759; GFX940:       ; %bb.0:
12760; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12761; GFX940-NEXT:    ;;#ASMSTART
12762; GFX940-NEXT:    ; def s[0:1]
12763; GFX940-NEXT:    ;;#ASMEND
12764; GFX940-NEXT:    ;;#ASMSTART
12765; GFX940-NEXT:    ; def s[2:3]
12766; GFX940-NEXT:    ;;#ASMEND
12767; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s1
12768; GFX940-NEXT:    s_mov_b32 s9, s2
12769; GFX940-NEXT:    ;;#ASMSTART
12770; GFX940-NEXT:    ; use s[8:9]
12771; GFX940-NEXT:    ;;#ASMEND
12772; GFX940-NEXT:    s_setpc_b64 s[30:31]
12773  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12774  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12775  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 4>
12776  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12777  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
12778  ret void
12779}
12780
12781define void @s_shuffle_v3i16_v4i16__7_5_4() {
12782; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_5_4:
12783; GFX900:       ; %bb.0:
12784; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12785; GFX900-NEXT:    ;;#ASMSTART
12786; GFX900-NEXT:    ; def s[4:5]
12787; GFX900-NEXT:    ;;#ASMEND
12788; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s4
12789; GFX900-NEXT:    s_mov_b32 s9, s4
12790; GFX900-NEXT:    ;;#ASMSTART
12791; GFX900-NEXT:    ; use s[8:9]
12792; GFX900-NEXT:    ;;#ASMEND
12793; GFX900-NEXT:    s_setpc_b64 s[30:31]
12794;
12795; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_5_4:
12796; GFX90A:       ; %bb.0:
12797; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12798; GFX90A-NEXT:    ;;#ASMSTART
12799; GFX90A-NEXT:    ; def s[4:5]
12800; GFX90A-NEXT:    ;;#ASMEND
12801; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s4
12802; GFX90A-NEXT:    s_mov_b32 s9, s4
12803; GFX90A-NEXT:    ;;#ASMSTART
12804; GFX90A-NEXT:    ; use s[8:9]
12805; GFX90A-NEXT:    ;;#ASMEND
12806; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12807;
12808; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_5_4:
12809; GFX940:       ; %bb.0:
12810; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12811; GFX940-NEXT:    ;;#ASMSTART
12812; GFX940-NEXT:    ; def s[0:1]
12813; GFX940-NEXT:    ;;#ASMEND
12814; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s0
12815; GFX940-NEXT:    s_mov_b32 s9, s0
12816; GFX940-NEXT:    ;;#ASMSTART
12817; GFX940-NEXT:    ; use s[8:9]
12818; GFX940-NEXT:    ;;#ASMEND
12819; GFX940-NEXT:    s_setpc_b64 s[30:31]
12820  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12821  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12822  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 4>
12823  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12824  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
12825  ret void
12826}
12827
12828define void @s_shuffle_v3i16_v4i16__7_6_4() {
12829; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_6_4:
12830; GFX900:       ; %bb.0:
12831; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12832; GFX900-NEXT:    ;;#ASMSTART
12833; GFX900-NEXT:    ; def s[4:5]
12834; GFX900-NEXT:    ;;#ASMEND
12835; GFX900-NEXT:    s_lshr_b32 s6, s5, 16
12836; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
12837; GFX900-NEXT:    s_mov_b32 s9, s4
12838; GFX900-NEXT:    ;;#ASMSTART
12839; GFX900-NEXT:    ; use s[8:9]
12840; GFX900-NEXT:    ;;#ASMEND
12841; GFX900-NEXT:    s_setpc_b64 s[30:31]
12842;
12843; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_6_4:
12844; GFX90A:       ; %bb.0:
12845; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12846; GFX90A-NEXT:    ;;#ASMSTART
12847; GFX90A-NEXT:    ; def s[4:5]
12848; GFX90A-NEXT:    ;;#ASMEND
12849; GFX90A-NEXT:    s_lshr_b32 s6, s5, 16
12850; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
12851; GFX90A-NEXT:    s_mov_b32 s9, s4
12852; GFX90A-NEXT:    ;;#ASMSTART
12853; GFX90A-NEXT:    ; use s[8:9]
12854; GFX90A-NEXT:    ;;#ASMEND
12855; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12856;
12857; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_4:
12858; GFX940:       ; %bb.0:
12859; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12860; GFX940-NEXT:    ;;#ASMSTART
12861; GFX940-NEXT:    ; def s[0:1]
12862; GFX940-NEXT:    ;;#ASMEND
12863; GFX940-NEXT:    s_lshr_b32 s2, s1, 16
12864; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s2, s1
12865; GFX940-NEXT:    s_mov_b32 s9, s0
12866; GFX940-NEXT:    ;;#ASMSTART
12867; GFX940-NEXT:    ; use s[8:9]
12868; GFX940-NEXT:    ;;#ASMEND
12869; GFX940-NEXT:    s_setpc_b64 s[30:31]
12870  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12871  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12872  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 4>
12873  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12874  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
12875  ret void
12876}
12877
12878define void @s_shuffle_v3i16_v4i16__u_5_5() {
12879; GFX9-LABEL: s_shuffle_v3i16_v4i16__u_5_5:
12880; GFX9:       ; %bb.0:
12881; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12882; GFX9-NEXT:    ;;#ASMSTART
12883; GFX9-NEXT:    ; def s[8:9]
12884; GFX9-NEXT:    ;;#ASMEND
12885; GFX9-NEXT:    s_lshr_b32 s9, s8, 16
12886; GFX9-NEXT:    ;;#ASMSTART
12887; GFX9-NEXT:    ; use s[8:9]
12888; GFX9-NEXT:    ;;#ASMEND
12889; GFX9-NEXT:    s_setpc_b64 s[30:31]
12890  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12891  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12892  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 poison, i32 5, i32 5>
12893  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12894  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
12895  ret void
12896}
12897
12898define void @s_shuffle_v3i16_v4i16__0_5_5() {
12899; GFX900-LABEL: s_shuffle_v3i16_v4i16__0_5_5:
12900; GFX900:       ; %bb.0:
12901; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12902; GFX900-NEXT:    ;;#ASMSTART
12903; GFX900-NEXT:    ; def s[4:5]
12904; GFX900-NEXT:    ;;#ASMEND
12905; GFX900-NEXT:    ;;#ASMSTART
12906; GFX900-NEXT:    ; def s[6:7]
12907; GFX900-NEXT:    ;;#ASMEND
12908; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s4, s6
12909; GFX900-NEXT:    s_lshr_b32 s9, s6, 16
12910; GFX900-NEXT:    ;;#ASMSTART
12911; GFX900-NEXT:    ; use s[8:9]
12912; GFX900-NEXT:    ;;#ASMEND
12913; GFX900-NEXT:    s_setpc_b64 s[30:31]
12914;
12915; GFX90A-LABEL: s_shuffle_v3i16_v4i16__0_5_5:
12916; GFX90A:       ; %bb.0:
12917; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12918; GFX90A-NEXT:    ;;#ASMSTART
12919; GFX90A-NEXT:    ; def s[4:5]
12920; GFX90A-NEXT:    ;;#ASMEND
12921; GFX90A-NEXT:    ;;#ASMSTART
12922; GFX90A-NEXT:    ; def s[6:7]
12923; GFX90A-NEXT:    ;;#ASMEND
12924; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s4, s6
12925; GFX90A-NEXT:    s_lshr_b32 s9, s6, 16
12926; GFX90A-NEXT:    ;;#ASMSTART
12927; GFX90A-NEXT:    ; use s[8:9]
12928; GFX90A-NEXT:    ;;#ASMEND
12929; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12930;
12931; GFX940-LABEL: s_shuffle_v3i16_v4i16__0_5_5:
12932; GFX940:       ; %bb.0:
12933; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12934; GFX940-NEXT:    ;;#ASMSTART
12935; GFX940-NEXT:    ; def s[0:1]
12936; GFX940-NEXT:    ;;#ASMEND
12937; GFX940-NEXT:    ;;#ASMSTART
12938; GFX940-NEXT:    ; def s[2:3]
12939; GFX940-NEXT:    ;;#ASMEND
12940; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s0, s2
12941; GFX940-NEXT:    s_lshr_b32 s9, s2, 16
12942; GFX940-NEXT:    ;;#ASMSTART
12943; GFX940-NEXT:    ; use s[8:9]
12944; GFX940-NEXT:    ;;#ASMEND
12945; GFX940-NEXT:    s_setpc_b64 s[30:31]
12946  %vec0 = call <4 x i16> asm "; def $0", "=s"()
12947  %vec1 = call <4 x i16> asm "; def $0", "=s"()
12948  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 0, i32 5, i32 5>
12949  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12950  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
12951  ret void
12952}
12953
12954define void @s_shuffle_v3i16_v4i16__1_5_5() {
12955; GFX900-LABEL: s_shuffle_v3i16_v4i16__1_5_5:
12956; GFX900:       ; %bb.0:
12957; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12958; GFX900-NEXT:    ;;#ASMSTART
12959; GFX900-NEXT:    ; def s[4:5]
12960; GFX900-NEXT:    ;;#ASMEND
12961; GFX900-NEXT:    ;;#ASMSTART
12962; GFX900-NEXT:    ; def s[6:7]
12963; GFX900-NEXT:    ;;#ASMEND
12964; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s4, s6
12965; GFX900-NEXT:    s_lshr_b32 s9, s6, 16
12966; GFX900-NEXT:    ;;#ASMSTART
12967; GFX900-NEXT:    ; use s[8:9]
12968; GFX900-NEXT:    ;;#ASMEND
12969; GFX900-NEXT:    s_setpc_b64 s[30:31]
12970;
12971; GFX90A-LABEL: s_shuffle_v3i16_v4i16__1_5_5:
12972; GFX90A:       ; %bb.0:
12973; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12974; GFX90A-NEXT:    ;;#ASMSTART
12975; GFX90A-NEXT:    ; def s[4:5]
12976; GFX90A-NEXT:    ;;#ASMEND
12977; GFX90A-NEXT:    ;;#ASMSTART
12978; GFX90A-NEXT:    ; def s[6:7]
12979; GFX90A-NEXT:    ;;#ASMEND
12980; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s4, s6
12981; GFX90A-NEXT:    s_lshr_b32 s9, s6, 16
12982; GFX90A-NEXT:    ;;#ASMSTART
12983; GFX90A-NEXT:    ; use s[8:9]
12984; GFX90A-NEXT:    ;;#ASMEND
12985; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12986;
12987; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_5_5:
12988; GFX940:       ; %bb.0:
12989; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12990; GFX940-NEXT:    ;;#ASMSTART
12991; GFX940-NEXT:    ; def s[0:1]
12992; GFX940-NEXT:    ;;#ASMEND
12993; GFX940-NEXT:    ;;#ASMSTART
12994; GFX940-NEXT:    ; def s[2:3]
12995; GFX940-NEXT:    ;;#ASMEND
12996; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s0, s2
12997; GFX940-NEXT:    s_lshr_b32 s9, s2, 16
12998; GFX940-NEXT:    ;;#ASMSTART
12999; GFX940-NEXT:    ; use s[8:9]
13000; GFX940-NEXT:    ;;#ASMEND
13001; GFX940-NEXT:    s_setpc_b64 s[30:31]
13002  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13003  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13004  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 1, i32 5, i32 5>
13005  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13006  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
13007  ret void
13008}
13009
13010define void @s_shuffle_v3i16_v4i16__2_5_5() {
13011; GFX900-LABEL: s_shuffle_v3i16_v4i16__2_5_5:
13012; GFX900:       ; %bb.0:
13013; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13014; GFX900-NEXT:    ;;#ASMSTART
13015; GFX900-NEXT:    ; def s[4:5]
13016; GFX900-NEXT:    ;;#ASMEND
13017; GFX900-NEXT:    ;;#ASMSTART
13018; GFX900-NEXT:    ; def s[6:7]
13019; GFX900-NEXT:    ;;#ASMEND
13020; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s5, s6
13021; GFX900-NEXT:    s_lshr_b32 s9, s6, 16
13022; GFX900-NEXT:    ;;#ASMSTART
13023; GFX900-NEXT:    ; use s[8:9]
13024; GFX900-NEXT:    ;;#ASMEND
13025; GFX900-NEXT:    s_setpc_b64 s[30:31]
13026;
13027; GFX90A-LABEL: s_shuffle_v3i16_v4i16__2_5_5:
13028; GFX90A:       ; %bb.0:
13029; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13030; GFX90A-NEXT:    ;;#ASMSTART
13031; GFX90A-NEXT:    ; def s[4:5]
13032; GFX90A-NEXT:    ;;#ASMEND
13033; GFX90A-NEXT:    ;;#ASMSTART
13034; GFX90A-NEXT:    ; def s[6:7]
13035; GFX90A-NEXT:    ;;#ASMEND
13036; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s5, s6
13037; GFX90A-NEXT:    s_lshr_b32 s9, s6, 16
13038; GFX90A-NEXT:    ;;#ASMSTART
13039; GFX90A-NEXT:    ; use s[8:9]
13040; GFX90A-NEXT:    ;;#ASMEND
13041; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13042;
13043; GFX940-LABEL: s_shuffle_v3i16_v4i16__2_5_5:
13044; GFX940:       ; %bb.0:
13045; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13046; GFX940-NEXT:    ;;#ASMSTART
13047; GFX940-NEXT:    ; def s[0:1]
13048; GFX940-NEXT:    ;;#ASMEND
13049; GFX940-NEXT:    ;;#ASMSTART
13050; GFX940-NEXT:    ; def s[2:3]
13051; GFX940-NEXT:    ;;#ASMEND
13052; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s1, s2
13053; GFX940-NEXT:    s_lshr_b32 s9, s2, 16
13054; GFX940-NEXT:    ;;#ASMSTART
13055; GFX940-NEXT:    ; use s[8:9]
13056; GFX940-NEXT:    ;;#ASMEND
13057; GFX940-NEXT:    s_setpc_b64 s[30:31]
13058  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13059  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13060  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 2, i32 5, i32 5>
13061  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13062  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
13063  ret void
13064}
13065
13066define void @s_shuffle_v3i16_v4i16__3_5_5() {
13067; GFX900-LABEL: s_shuffle_v3i16_v4i16__3_5_5:
13068; GFX900:       ; %bb.0:
13069; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13070; GFX900-NEXT:    ;;#ASMSTART
13071; GFX900-NEXT:    ; def s[4:5]
13072; GFX900-NEXT:    ;;#ASMEND
13073; GFX900-NEXT:    ;;#ASMSTART
13074; GFX900-NEXT:    ; def s[6:7]
13075; GFX900-NEXT:    ;;#ASMEND
13076; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s6
13077; GFX900-NEXT:    s_lshr_b32 s9, s6, 16
13078; GFX900-NEXT:    ;;#ASMSTART
13079; GFX900-NEXT:    ; use s[8:9]
13080; GFX900-NEXT:    ;;#ASMEND
13081; GFX900-NEXT:    s_setpc_b64 s[30:31]
13082;
13083; GFX90A-LABEL: s_shuffle_v3i16_v4i16__3_5_5:
13084; GFX90A:       ; %bb.0:
13085; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13086; GFX90A-NEXT:    ;;#ASMSTART
13087; GFX90A-NEXT:    ; def s[4:5]
13088; GFX90A-NEXT:    ;;#ASMEND
13089; GFX90A-NEXT:    ;;#ASMSTART
13090; GFX90A-NEXT:    ; def s[6:7]
13091; GFX90A-NEXT:    ;;#ASMEND
13092; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s6
13093; GFX90A-NEXT:    s_lshr_b32 s9, s6, 16
13094; GFX90A-NEXT:    ;;#ASMSTART
13095; GFX90A-NEXT:    ; use s[8:9]
13096; GFX90A-NEXT:    ;;#ASMEND
13097; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13098;
13099; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_5_5:
13100; GFX940:       ; %bb.0:
13101; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13102; GFX940-NEXT:    ;;#ASMSTART
13103; GFX940-NEXT:    ; def s[0:1]
13104; GFX940-NEXT:    ;;#ASMEND
13105; GFX940-NEXT:    ;;#ASMSTART
13106; GFX940-NEXT:    ; def s[2:3]
13107; GFX940-NEXT:    ;;#ASMEND
13108; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s2
13109; GFX940-NEXT:    s_lshr_b32 s9, s2, 16
13110; GFX940-NEXT:    ;;#ASMSTART
13111; GFX940-NEXT:    ; use s[8:9]
13112; GFX940-NEXT:    ;;#ASMEND
13113; GFX940-NEXT:    s_setpc_b64 s[30:31]
13114  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13115  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13116  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 3, i32 5, i32 5>
13117  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13118  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
13119  ret void
13120}
13121
13122define void @s_shuffle_v3i16_v4i16__4_5_5() {
13123; GFX9-LABEL: s_shuffle_v3i16_v4i16__4_5_5:
13124; GFX9:       ; %bb.0:
13125; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13126; GFX9-NEXT:    ;;#ASMSTART
13127; GFX9-NEXT:    ; def s[8:9]
13128; GFX9-NEXT:    ;;#ASMEND
13129; GFX9-NEXT:    s_lshr_b32 s9, s8, 16
13130; GFX9-NEXT:    ;;#ASMSTART
13131; GFX9-NEXT:    ; use s[8:9]
13132; GFX9-NEXT:    ;;#ASMEND
13133; GFX9-NEXT:    s_setpc_b64 s[30:31]
13134  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13135  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13136  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 4, i32 5, i32 5>
13137  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13138  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
13139  ret void
13140}
13141
13142define void @s_shuffle_v3i16_v4i16__5_5_5() {
13143; GFX900-LABEL: s_shuffle_v3i16_v4i16__5_5_5:
13144; GFX900:       ; %bb.0:
13145; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13146; GFX900-NEXT:    ;;#ASMSTART
13147; GFX900-NEXT:    ; def s[4:5]
13148; GFX900-NEXT:    ;;#ASMEND
13149; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
13150; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s4, s4
13151; GFX900-NEXT:    ;;#ASMSTART
13152; GFX900-NEXT:    ; use s[8:9]
13153; GFX900-NEXT:    ;;#ASMEND
13154; GFX900-NEXT:    s_setpc_b64 s[30:31]
13155;
13156; GFX90A-LABEL: s_shuffle_v3i16_v4i16__5_5_5:
13157; GFX90A:       ; %bb.0:
13158; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13159; GFX90A-NEXT:    ;;#ASMSTART
13160; GFX90A-NEXT:    ; def s[4:5]
13161; GFX90A-NEXT:    ;;#ASMEND
13162; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
13163; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s4, s4
13164; GFX90A-NEXT:    ;;#ASMSTART
13165; GFX90A-NEXT:    ; use s[8:9]
13166; GFX90A-NEXT:    ;;#ASMEND
13167; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13168;
13169; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_5_5:
13170; GFX940:       ; %bb.0:
13171; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13172; GFX940-NEXT:    ;;#ASMSTART
13173; GFX940-NEXT:    ; def s[0:1]
13174; GFX940-NEXT:    ;;#ASMEND
13175; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
13176; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s0, s0
13177; GFX940-NEXT:    ;;#ASMSTART
13178; GFX940-NEXT:    ; use s[8:9]
13179; GFX940-NEXT:    ;;#ASMEND
13180; GFX940-NEXT:    s_setpc_b64 s[30:31]
13181  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13182  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13183  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 5, i32 5>
13184  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13185  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
13186  ret void
13187}
13188
13189define void @s_shuffle_v3i16_v4i16__6_5_5() {
13190; GFX900-LABEL: s_shuffle_v3i16_v4i16__6_5_5:
13191; GFX900:       ; %bb.0:
13192; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13193; GFX900-NEXT:    ;;#ASMSTART
13194; GFX900-NEXT:    ; def s[4:5]
13195; GFX900-NEXT:    ;;#ASMEND
13196; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
13197; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
13198; GFX900-NEXT:    ;;#ASMSTART
13199; GFX900-NEXT:    ; use s[8:9]
13200; GFX900-NEXT:    ;;#ASMEND
13201; GFX900-NEXT:    s_setpc_b64 s[30:31]
13202;
13203; GFX90A-LABEL: s_shuffle_v3i16_v4i16__6_5_5:
13204; GFX90A:       ; %bb.0:
13205; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13206; GFX90A-NEXT:    ;;#ASMSTART
13207; GFX90A-NEXT:    ; def s[4:5]
13208; GFX90A-NEXT:    ;;#ASMEND
13209; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
13210; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
13211; GFX90A-NEXT:    ;;#ASMSTART
13212; GFX90A-NEXT:    ; use s[8:9]
13213; GFX90A-NEXT:    ;;#ASMEND
13214; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13215;
13216; GFX940-LABEL: s_shuffle_v3i16_v4i16__6_5_5:
13217; GFX940:       ; %bb.0:
13218; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13219; GFX940-NEXT:    ;;#ASMSTART
13220; GFX940-NEXT:    ; def s[0:1]
13221; GFX940-NEXT:    ;;#ASMEND
13222; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s1, s0
13223; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
13224; GFX940-NEXT:    ;;#ASMSTART
13225; GFX940-NEXT:    ; use s[8:9]
13226; GFX940-NEXT:    ;;#ASMEND
13227; GFX940-NEXT:    s_setpc_b64 s[30:31]
13228  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13229  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13230  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 5, i32 5>
13231  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13232  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
13233  ret void
13234}
13235
13236define void @s_shuffle_v3i16_v4i16__7_5_5() {
13237; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_5_5:
13238; GFX900:       ; %bb.0:
13239; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13240; GFX900-NEXT:    ;;#ASMSTART
13241; GFX900-NEXT:    ; def s[4:5]
13242; GFX900-NEXT:    ;;#ASMEND
13243; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s4
13244; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
13245; GFX900-NEXT:    ;;#ASMSTART
13246; GFX900-NEXT:    ; use s[8:9]
13247; GFX900-NEXT:    ;;#ASMEND
13248; GFX900-NEXT:    s_setpc_b64 s[30:31]
13249;
13250; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_5_5:
13251; GFX90A:       ; %bb.0:
13252; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13253; GFX90A-NEXT:    ;;#ASMSTART
13254; GFX90A-NEXT:    ; def s[4:5]
13255; GFX90A-NEXT:    ;;#ASMEND
13256; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s4
13257; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
13258; GFX90A-NEXT:    ;;#ASMSTART
13259; GFX90A-NEXT:    ; use s[8:9]
13260; GFX90A-NEXT:    ;;#ASMEND
13261; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13262;
13263; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_5_5:
13264; GFX940:       ; %bb.0:
13265; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13266; GFX940-NEXT:    ;;#ASMSTART
13267; GFX940-NEXT:    ; def s[0:1]
13268; GFX940-NEXT:    ;;#ASMEND
13269; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s0
13270; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
13271; GFX940-NEXT:    ;;#ASMSTART
13272; GFX940-NEXT:    ; use s[8:9]
13273; GFX940-NEXT:    ;;#ASMEND
13274; GFX940-NEXT:    s_setpc_b64 s[30:31]
13275  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13276  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13277  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 5>
13278  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13279  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
13280  ret void
13281}
13282
13283define void @s_shuffle_v3i16_v4i16__7_u_5() {
13284; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_u_5:
13285; GFX900:       ; %bb.0:
13286; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13287; GFX900-NEXT:    ;;#ASMSTART
13288; GFX900-NEXT:    ; def s[4:5]
13289; GFX900-NEXT:    ;;#ASMEND
13290; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
13291; GFX900-NEXT:    s_lshr_b32 s8, s5, 16
13292; GFX900-NEXT:    ;;#ASMSTART
13293; GFX900-NEXT:    ; use s[8:9]
13294; GFX900-NEXT:    ;;#ASMEND
13295; GFX900-NEXT:    s_setpc_b64 s[30:31]
13296;
13297; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_u_5:
13298; GFX90A:       ; %bb.0:
13299; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13300; GFX90A-NEXT:    ;;#ASMSTART
13301; GFX90A-NEXT:    ; def s[4:5]
13302; GFX90A-NEXT:    ;;#ASMEND
13303; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
13304; GFX90A-NEXT:    s_lshr_b32 s8, s5, 16
13305; GFX90A-NEXT:    ;;#ASMSTART
13306; GFX90A-NEXT:    ; use s[8:9]
13307; GFX90A-NEXT:    ;;#ASMEND
13308; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13309;
13310; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_u_5:
13311; GFX940:       ; %bb.0:
13312; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13313; GFX940-NEXT:    ;;#ASMSTART
13314; GFX940-NEXT:    ; def s[0:1]
13315; GFX940-NEXT:    ;;#ASMEND
13316; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
13317; GFX940-NEXT:    s_lshr_b32 s8, s1, 16
13318; GFX940-NEXT:    ;;#ASMSTART
13319; GFX940-NEXT:    ; use s[8:9]
13320; GFX940-NEXT:    ;;#ASMEND
13321; GFX940-NEXT:    s_setpc_b64 s[30:31]
13322  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13323  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13324  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 5>
13325  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13326  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
13327  ret void
13328}
13329
13330define void @s_shuffle_v3i16_v4i16__7_0_5() {
13331; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_0_5:
13332; GFX900:       ; %bb.0:
13333; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13334; GFX900-NEXT:    ;;#ASMSTART
13335; GFX900-NEXT:    ; def s[4:5]
13336; GFX900-NEXT:    ;;#ASMEND
13337; GFX900-NEXT:    ;;#ASMSTART
13338; GFX900-NEXT:    ; def s[6:7]
13339; GFX900-NEXT:    ;;#ASMEND
13340; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
13341; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
13342; GFX900-NEXT:    s_lshr_b32 s9, s6, 16
13343; GFX900-NEXT:    ;;#ASMSTART
13344; GFX900-NEXT:    ; use s[8:9]
13345; GFX900-NEXT:    ;;#ASMEND
13346; GFX900-NEXT:    s_setpc_b64 s[30:31]
13347;
13348; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_0_5:
13349; GFX90A:       ; %bb.0:
13350; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13351; GFX90A-NEXT:    ;;#ASMSTART
13352; GFX90A-NEXT:    ; def s[4:5]
13353; GFX90A-NEXT:    ;;#ASMEND
13354; GFX90A-NEXT:    ;;#ASMSTART
13355; GFX90A-NEXT:    ; def s[6:7]
13356; GFX90A-NEXT:    ;;#ASMEND
13357; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
13358; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
13359; GFX90A-NEXT:    s_lshr_b32 s9, s6, 16
13360; GFX90A-NEXT:    ;;#ASMSTART
13361; GFX90A-NEXT:    ; use s[8:9]
13362; GFX90A-NEXT:    ;;#ASMEND
13363; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13364;
13365; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_5:
13366; GFX940:       ; %bb.0:
13367; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13368; GFX940-NEXT:    ;;#ASMSTART
13369; GFX940-NEXT:    ; def s[0:1]
13370; GFX940-NEXT:    ;;#ASMEND
13371; GFX940-NEXT:    ;;#ASMSTART
13372; GFX940-NEXT:    ; def s[2:3]
13373; GFX940-NEXT:    ;;#ASMEND
13374; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
13375; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
13376; GFX940-NEXT:    s_lshr_b32 s9, s2, 16
13377; GFX940-NEXT:    ;;#ASMSTART
13378; GFX940-NEXT:    ; use s[8:9]
13379; GFX940-NEXT:    ;;#ASMEND
13380; GFX940-NEXT:    s_setpc_b64 s[30:31]
13381  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13382  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13383  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 5>
13384  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13385  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
13386  ret void
13387}
13388
13389define void @s_shuffle_v3i16_v4i16__7_1_5() {
13390; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_1_5:
13391; GFX900:       ; %bb.0:
13392; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13393; GFX900-NEXT:    ;;#ASMSTART
13394; GFX900-NEXT:    ; def s[4:5]
13395; GFX900-NEXT:    ;;#ASMEND
13396; GFX900-NEXT:    ;;#ASMSTART
13397; GFX900-NEXT:    ; def s[6:7]
13398; GFX900-NEXT:    ;;#ASMEND
13399; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
13400; GFX900-NEXT:    s_lshr_b32 s9, s6, 16
13401; GFX900-NEXT:    ;;#ASMSTART
13402; GFX900-NEXT:    ; use s[8:9]
13403; GFX900-NEXT:    ;;#ASMEND
13404; GFX900-NEXT:    s_setpc_b64 s[30:31]
13405;
13406; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_1_5:
13407; GFX90A:       ; %bb.0:
13408; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13409; GFX90A-NEXT:    ;;#ASMSTART
13410; GFX90A-NEXT:    ; def s[4:5]
13411; GFX90A-NEXT:    ;;#ASMEND
13412; GFX90A-NEXT:    ;;#ASMSTART
13413; GFX90A-NEXT:    ; def s[6:7]
13414; GFX90A-NEXT:    ;;#ASMEND
13415; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
13416; GFX90A-NEXT:    s_lshr_b32 s9, s6, 16
13417; GFX90A-NEXT:    ;;#ASMSTART
13418; GFX90A-NEXT:    ; use s[8:9]
13419; GFX90A-NEXT:    ;;#ASMEND
13420; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13421;
13422; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_5:
13423; GFX940:       ; %bb.0:
13424; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13425; GFX940-NEXT:    ;;#ASMSTART
13426; GFX940-NEXT:    ; def s[0:1]
13427; GFX940-NEXT:    ;;#ASMEND
13428; GFX940-NEXT:    ;;#ASMSTART
13429; GFX940-NEXT:    ; def s[2:3]
13430; GFX940-NEXT:    ;;#ASMEND
13431; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s0
13432; GFX940-NEXT:    s_lshr_b32 s9, s2, 16
13433; GFX940-NEXT:    ;;#ASMSTART
13434; GFX940-NEXT:    ; use s[8:9]
13435; GFX940-NEXT:    ;;#ASMEND
13436; GFX940-NEXT:    s_setpc_b64 s[30:31]
13437  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13438  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13439  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 5>
13440  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13441  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
13442  ret void
13443}
13444
13445define void @s_shuffle_v3i16_v4i16__7_2_5() {
13446; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_2_5:
13447; GFX900:       ; %bb.0:
13448; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13449; GFX900-NEXT:    ;;#ASMSTART
13450; GFX900-NEXT:    ; def s[4:5]
13451; GFX900-NEXT:    ;;#ASMEND
13452; GFX900-NEXT:    ;;#ASMSTART
13453; GFX900-NEXT:    ; def s[6:7]
13454; GFX900-NEXT:    ;;#ASMEND
13455; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
13456; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
13457; GFX900-NEXT:    s_lshr_b32 s9, s6, 16
13458; GFX900-NEXT:    ;;#ASMSTART
13459; GFX900-NEXT:    ; use s[8:9]
13460; GFX900-NEXT:    ;;#ASMEND
13461; GFX900-NEXT:    s_setpc_b64 s[30:31]
13462;
13463; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_2_5:
13464; GFX90A:       ; %bb.0:
13465; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13466; GFX90A-NEXT:    ;;#ASMSTART
13467; GFX90A-NEXT:    ; def s[4:5]
13468; GFX90A-NEXT:    ;;#ASMEND
13469; GFX90A-NEXT:    ;;#ASMSTART
13470; GFX90A-NEXT:    ; def s[6:7]
13471; GFX90A-NEXT:    ;;#ASMEND
13472; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
13473; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
13474; GFX90A-NEXT:    s_lshr_b32 s9, s6, 16
13475; GFX90A-NEXT:    ;;#ASMSTART
13476; GFX90A-NEXT:    ; use s[8:9]
13477; GFX90A-NEXT:    ;;#ASMEND
13478; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13479;
13480; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_5:
13481; GFX940:       ; %bb.0:
13482; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13483; GFX940-NEXT:    ;;#ASMSTART
13484; GFX940-NEXT:    ; def s[0:1]
13485; GFX940-NEXT:    ;;#ASMEND
13486; GFX940-NEXT:    ;;#ASMSTART
13487; GFX940-NEXT:    ; def s[2:3]
13488; GFX940-NEXT:    ;;#ASMEND
13489; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
13490; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
13491; GFX940-NEXT:    s_lshr_b32 s9, s2, 16
13492; GFX940-NEXT:    ;;#ASMSTART
13493; GFX940-NEXT:    ; use s[8:9]
13494; GFX940-NEXT:    ;;#ASMEND
13495; GFX940-NEXT:    s_setpc_b64 s[30:31]
13496  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13497  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13498  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 5>
13499  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13500  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
13501  ret void
13502}
13503
13504define void @s_shuffle_v3i16_v4i16__7_3_5() {
13505; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_3_5:
13506; GFX900:       ; %bb.0:
13507; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13508; GFX900-NEXT:    ;;#ASMSTART
13509; GFX900-NEXT:    ; def s[4:5]
13510; GFX900-NEXT:    ;;#ASMEND
13511; GFX900-NEXT:    ;;#ASMSTART
13512; GFX900-NEXT:    ; def s[6:7]
13513; GFX900-NEXT:    ;;#ASMEND
13514; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
13515; GFX900-NEXT:    s_lshr_b32 s9, s6, 16
13516; GFX900-NEXT:    ;;#ASMSTART
13517; GFX900-NEXT:    ; use s[8:9]
13518; GFX900-NEXT:    ;;#ASMEND
13519; GFX900-NEXT:    s_setpc_b64 s[30:31]
13520;
13521; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_3_5:
13522; GFX90A:       ; %bb.0:
13523; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13524; GFX90A-NEXT:    ;;#ASMSTART
13525; GFX90A-NEXT:    ; def s[4:5]
13526; GFX90A-NEXT:    ;;#ASMEND
13527; GFX90A-NEXT:    ;;#ASMSTART
13528; GFX90A-NEXT:    ; def s[6:7]
13529; GFX90A-NEXT:    ;;#ASMEND
13530; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
13531; GFX90A-NEXT:    s_lshr_b32 s9, s6, 16
13532; GFX90A-NEXT:    ;;#ASMSTART
13533; GFX90A-NEXT:    ; use s[8:9]
13534; GFX90A-NEXT:    ;;#ASMEND
13535; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13536;
13537; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_5:
13538; GFX940:       ; %bb.0:
13539; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13540; GFX940-NEXT:    ;;#ASMSTART
13541; GFX940-NEXT:    ; def s[0:1]
13542; GFX940-NEXT:    ;;#ASMEND
13543; GFX940-NEXT:    ;;#ASMSTART
13544; GFX940-NEXT:    ; def s[2:3]
13545; GFX940-NEXT:    ;;#ASMEND
13546; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s1
13547; GFX940-NEXT:    s_lshr_b32 s9, s2, 16
13548; GFX940-NEXT:    ;;#ASMSTART
13549; GFX940-NEXT:    ; use s[8:9]
13550; GFX940-NEXT:    ;;#ASMEND
13551; GFX940-NEXT:    s_setpc_b64 s[30:31]
13552  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13553  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13554  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 5>
13555  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13556  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
13557  ret void
13558}
13559
13560define void @s_shuffle_v3i16_v4i16__7_4_5() {
13561; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_4_5:
13562; GFX900:       ; %bb.0:
13563; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13564; GFX900-NEXT:    ;;#ASMSTART
13565; GFX900-NEXT:    ; def s[4:5]
13566; GFX900-NEXT:    ;;#ASMEND
13567; GFX900-NEXT:    s_lshr_b32 s5, s5, 16
13568; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
13569; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
13570; GFX900-NEXT:    ;;#ASMSTART
13571; GFX900-NEXT:    ; use s[8:9]
13572; GFX900-NEXT:    ;;#ASMEND
13573; GFX900-NEXT:    s_setpc_b64 s[30:31]
13574;
13575; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_4_5:
13576; GFX90A:       ; %bb.0:
13577; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13578; GFX90A-NEXT:    ;;#ASMSTART
13579; GFX90A-NEXT:    ; def s[4:5]
13580; GFX90A-NEXT:    ;;#ASMEND
13581; GFX90A-NEXT:    s_lshr_b32 s5, s5, 16
13582; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
13583; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
13584; GFX90A-NEXT:    ;;#ASMSTART
13585; GFX90A-NEXT:    ; use s[8:9]
13586; GFX90A-NEXT:    ;;#ASMEND
13587; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13588;
13589; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_5:
13590; GFX940:       ; %bb.0:
13591; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13592; GFX940-NEXT:    ;;#ASMSTART
13593; GFX940-NEXT:    ; def s[0:1]
13594; GFX940-NEXT:    ;;#ASMEND
13595; GFX940-NEXT:    s_lshr_b32 s1, s1, 16
13596; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
13597; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
13598; GFX940-NEXT:    ;;#ASMSTART
13599; GFX940-NEXT:    ; use s[8:9]
13600; GFX940-NEXT:    ;;#ASMEND
13601; GFX940-NEXT:    s_setpc_b64 s[30:31]
13602  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13603  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13604  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 5>
13605  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13606  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
13607  ret void
13608}
13609
13610define void @s_shuffle_v3i16_v4i16__7_6_5() {
13611; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_6_5:
13612; GFX900:       ; %bb.0:
13613; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13614; GFX900-NEXT:    ;;#ASMSTART
13615; GFX900-NEXT:    ; def s[4:5]
13616; GFX900-NEXT:    ;;#ASMEND
13617; GFX900-NEXT:    s_lshr_b32 s6, s5, 16
13618; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
13619; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
13620; GFX900-NEXT:    ;;#ASMSTART
13621; GFX900-NEXT:    ; use s[8:9]
13622; GFX900-NEXT:    ;;#ASMEND
13623; GFX900-NEXT:    s_setpc_b64 s[30:31]
13624;
13625; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_6_5:
13626; GFX90A:       ; %bb.0:
13627; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13628; GFX90A-NEXT:    ;;#ASMSTART
13629; GFX90A-NEXT:    ; def s[4:5]
13630; GFX90A-NEXT:    ;;#ASMEND
13631; GFX90A-NEXT:    s_lshr_b32 s6, s5, 16
13632; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
13633; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
13634; GFX90A-NEXT:    ;;#ASMSTART
13635; GFX90A-NEXT:    ; use s[8:9]
13636; GFX90A-NEXT:    ;;#ASMEND
13637; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13638;
13639; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_5:
13640; GFX940:       ; %bb.0:
13641; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13642; GFX940-NEXT:    ;;#ASMSTART
13643; GFX940-NEXT:    ; def s[0:1]
13644; GFX940-NEXT:    ;;#ASMEND
13645; GFX940-NEXT:    s_lshr_b32 s2, s1, 16
13646; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s2, s1
13647; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
13648; GFX940-NEXT:    ;;#ASMSTART
13649; GFX940-NEXT:    ; use s[8:9]
13650; GFX940-NEXT:    ;;#ASMEND
13651; GFX940-NEXT:    s_setpc_b64 s[30:31]
13652  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13653  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13654  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 5>
13655  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13656  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
13657  ret void
13658}
13659
13660define void @s_shuffle_v3i16_v4i16__u_6_6() {
13661; GFX9-LABEL: s_shuffle_v3i16_v4i16__u_6_6:
13662; GFX9:       ; %bb.0:
13663; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13664; GFX9-NEXT:    ;;#ASMSTART
13665; GFX9-NEXT:    ; def s[8:9]
13666; GFX9-NEXT:    ;;#ASMEND
13667; GFX9-NEXT:    s_lshl_b32 s8, s9, 16
13668; GFX9-NEXT:    ;;#ASMSTART
13669; GFX9-NEXT:    ; use s[8:9]
13670; GFX9-NEXT:    ;;#ASMEND
13671; GFX9-NEXT:    s_setpc_b64 s[30:31]
13672  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13673  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13674  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 poison, i32 6, i32 6>
13675  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13676  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
13677  ret void
13678}
13679
13680define void @s_shuffle_v3i16_v4i16__0_6_6() {
13681; GFX900-LABEL: s_shuffle_v3i16_v4i16__0_6_6:
13682; GFX900:       ; %bb.0:
13683; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13684; GFX900-NEXT:    ;;#ASMSTART
13685; GFX900-NEXT:    ; def s[8:9]
13686; GFX900-NEXT:    ;;#ASMEND
13687; GFX900-NEXT:    ;;#ASMSTART
13688; GFX900-NEXT:    ; def s[4:5]
13689; GFX900-NEXT:    ;;#ASMEND
13690; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
13691; GFX900-NEXT:    ;;#ASMSTART
13692; GFX900-NEXT:    ; use s[8:9]
13693; GFX900-NEXT:    ;;#ASMEND
13694; GFX900-NEXT:    s_setpc_b64 s[30:31]
13695;
13696; GFX90A-LABEL: s_shuffle_v3i16_v4i16__0_6_6:
13697; GFX90A:       ; %bb.0:
13698; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13699; GFX90A-NEXT:    ;;#ASMSTART
13700; GFX90A-NEXT:    ; def s[8:9]
13701; GFX90A-NEXT:    ;;#ASMEND
13702; GFX90A-NEXT:    ;;#ASMSTART
13703; GFX90A-NEXT:    ; def s[4:5]
13704; GFX90A-NEXT:    ;;#ASMEND
13705; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
13706; GFX90A-NEXT:    ;;#ASMSTART
13707; GFX90A-NEXT:    ; use s[8:9]
13708; GFX90A-NEXT:    ;;#ASMEND
13709; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13710;
13711; GFX940-LABEL: s_shuffle_v3i16_v4i16__0_6_6:
13712; GFX940:       ; %bb.0:
13713; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13714; GFX940-NEXT:    ;;#ASMSTART
13715; GFX940-NEXT:    ; def s[8:9]
13716; GFX940-NEXT:    ;;#ASMEND
13717; GFX940-NEXT:    ;;#ASMSTART
13718; GFX940-NEXT:    ; def s[0:1]
13719; GFX940-NEXT:    ;;#ASMEND
13720; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
13721; GFX940-NEXT:    ;;#ASMSTART
13722; GFX940-NEXT:    ; use s[8:9]
13723; GFX940-NEXT:    ;;#ASMEND
13724; GFX940-NEXT:    s_setpc_b64 s[30:31]
13725  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13726  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13727  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 0, i32 6, i32 6>
13728  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13729  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
13730  ret void
13731}
13732
13733define void @s_shuffle_v3i16_v4i16__1_6_6() {
13734; GFX900-LABEL: s_shuffle_v3i16_v4i16__1_6_6:
13735; GFX900:       ; %bb.0:
13736; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13737; GFX900-NEXT:    ;;#ASMSTART
13738; GFX900-NEXT:    ; def s[4:5]
13739; GFX900-NEXT:    ;;#ASMEND
13740; GFX900-NEXT:    ;;#ASMSTART
13741; GFX900-NEXT:    ; def s[8:9]
13742; GFX900-NEXT:    ;;#ASMEND
13743; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
13744; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
13745; GFX900-NEXT:    ;;#ASMSTART
13746; GFX900-NEXT:    ; use s[8:9]
13747; GFX900-NEXT:    ;;#ASMEND
13748; GFX900-NEXT:    s_setpc_b64 s[30:31]
13749;
13750; GFX90A-LABEL: s_shuffle_v3i16_v4i16__1_6_6:
13751; GFX90A:       ; %bb.0:
13752; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13753; GFX90A-NEXT:    ;;#ASMSTART
13754; GFX90A-NEXT:    ; def s[4:5]
13755; GFX90A-NEXT:    ;;#ASMEND
13756; GFX90A-NEXT:    ;;#ASMSTART
13757; GFX90A-NEXT:    ; def s[8:9]
13758; GFX90A-NEXT:    ;;#ASMEND
13759; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
13760; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
13761; GFX90A-NEXT:    ;;#ASMSTART
13762; GFX90A-NEXT:    ; use s[8:9]
13763; GFX90A-NEXT:    ;;#ASMEND
13764; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13765;
13766; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_6_6:
13767; GFX940:       ; %bb.0:
13768; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13769; GFX940-NEXT:    ;;#ASMSTART
13770; GFX940-NEXT:    ; def s[0:1]
13771; GFX940-NEXT:    ;;#ASMEND
13772; GFX940-NEXT:    ;;#ASMSTART
13773; GFX940-NEXT:    ; def s[8:9]
13774; GFX940-NEXT:    ;;#ASMEND
13775; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
13776; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
13777; GFX940-NEXT:    ;;#ASMSTART
13778; GFX940-NEXT:    ; use s[8:9]
13779; GFX940-NEXT:    ;;#ASMEND
13780; GFX940-NEXT:    s_setpc_b64 s[30:31]
13781  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13782  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13783  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 1, i32 6, i32 6>
13784  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13785  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
13786  ret void
13787}
13788
13789define void @s_shuffle_v3i16_v4i16__2_6_6() {
13790; GFX900-LABEL: s_shuffle_v3i16_v4i16__2_6_6:
13791; GFX900:       ; %bb.0:
13792; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13793; GFX900-NEXT:    ;;#ASMSTART
13794; GFX900-NEXT:    ; def s[8:9]
13795; GFX900-NEXT:    ;;#ASMEND
13796; GFX900-NEXT:    ;;#ASMSTART
13797; GFX900-NEXT:    ; def s[4:5]
13798; GFX900-NEXT:    ;;#ASMEND
13799; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s9
13800; GFX900-NEXT:    ;;#ASMSTART
13801; GFX900-NEXT:    ; use s[8:9]
13802; GFX900-NEXT:    ;;#ASMEND
13803; GFX900-NEXT:    s_setpc_b64 s[30:31]
13804;
13805; GFX90A-LABEL: s_shuffle_v3i16_v4i16__2_6_6:
13806; GFX90A:       ; %bb.0:
13807; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13808; GFX90A-NEXT:    ;;#ASMSTART
13809; GFX90A-NEXT:    ; def s[8:9]
13810; GFX90A-NEXT:    ;;#ASMEND
13811; GFX90A-NEXT:    ;;#ASMSTART
13812; GFX90A-NEXT:    ; def s[4:5]
13813; GFX90A-NEXT:    ;;#ASMEND
13814; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s9
13815; GFX90A-NEXT:    ;;#ASMSTART
13816; GFX90A-NEXT:    ; use s[8:9]
13817; GFX90A-NEXT:    ;;#ASMEND
13818; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13819;
13820; GFX940-LABEL: s_shuffle_v3i16_v4i16__2_6_6:
13821; GFX940:       ; %bb.0:
13822; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13823; GFX940-NEXT:    ;;#ASMSTART
13824; GFX940-NEXT:    ; def s[8:9]
13825; GFX940-NEXT:    ;;#ASMEND
13826; GFX940-NEXT:    ;;#ASMSTART
13827; GFX940-NEXT:    ; def s[0:1]
13828; GFX940-NEXT:    ;;#ASMEND
13829; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s9
13830; GFX940-NEXT:    ;;#ASMSTART
13831; GFX940-NEXT:    ; use s[8:9]
13832; GFX940-NEXT:    ;;#ASMEND
13833; GFX940-NEXT:    s_setpc_b64 s[30:31]
13834  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13835  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13836  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 2, i32 6, i32 6>
13837  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13838  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
13839  ret void
13840}
13841
13842define void @s_shuffle_v3i16_v4i16__3_6_6() {
13843; GFX900-LABEL: s_shuffle_v3i16_v4i16__3_6_6:
13844; GFX900:       ; %bb.0:
13845; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13846; GFX900-NEXT:    ;;#ASMSTART
13847; GFX900-NEXT:    ; def s[4:5]
13848; GFX900-NEXT:    ;;#ASMEND
13849; GFX900-NEXT:    ;;#ASMSTART
13850; GFX900-NEXT:    ; def s[8:9]
13851; GFX900-NEXT:    ;;#ASMEND
13852; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
13853; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
13854; GFX900-NEXT:    ;;#ASMSTART
13855; GFX900-NEXT:    ; use s[8:9]
13856; GFX900-NEXT:    ;;#ASMEND
13857; GFX900-NEXT:    s_setpc_b64 s[30:31]
13858;
13859; GFX90A-LABEL: s_shuffle_v3i16_v4i16__3_6_6:
13860; GFX90A:       ; %bb.0:
13861; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13862; GFX90A-NEXT:    ;;#ASMSTART
13863; GFX90A-NEXT:    ; def s[4:5]
13864; GFX90A-NEXT:    ;;#ASMEND
13865; GFX90A-NEXT:    ;;#ASMSTART
13866; GFX90A-NEXT:    ; def s[8:9]
13867; GFX90A-NEXT:    ;;#ASMEND
13868; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
13869; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
13870; GFX90A-NEXT:    ;;#ASMSTART
13871; GFX90A-NEXT:    ; use s[8:9]
13872; GFX90A-NEXT:    ;;#ASMEND
13873; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13874;
13875; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_6_6:
13876; GFX940:       ; %bb.0:
13877; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13878; GFX940-NEXT:    ;;#ASMSTART
13879; GFX940-NEXT:    ; def s[0:1]
13880; GFX940-NEXT:    ;;#ASMEND
13881; GFX940-NEXT:    ;;#ASMSTART
13882; GFX940-NEXT:    ; def s[8:9]
13883; GFX940-NEXT:    ;;#ASMEND
13884; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
13885; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
13886; GFX940-NEXT:    ;;#ASMSTART
13887; GFX940-NEXT:    ; use s[8:9]
13888; GFX940-NEXT:    ;;#ASMEND
13889; GFX940-NEXT:    s_setpc_b64 s[30:31]
13890  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13891  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13892  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 3, i32 6, i32 6>
13893  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13894  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
13895  ret void
13896}
13897
13898define void @s_shuffle_v3i16_v4i16__4_6_6() {
13899; GFX9-LABEL: s_shuffle_v3i16_v4i16__4_6_6:
13900; GFX9:       ; %bb.0:
13901; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13902; GFX9-NEXT:    ;;#ASMSTART
13903; GFX9-NEXT:    ; def s[8:9]
13904; GFX9-NEXT:    ;;#ASMEND
13905; GFX9-NEXT:    s_pack_ll_b32_b16 s8, s8, s9
13906; GFX9-NEXT:    ;;#ASMSTART
13907; GFX9-NEXT:    ; use s[8:9]
13908; GFX9-NEXT:    ;;#ASMEND
13909; GFX9-NEXT:    s_setpc_b64 s[30:31]
13910  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13911  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13912  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 4, i32 6, i32 6>
13913  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13914  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
13915  ret void
13916}
13917
13918define void @s_shuffle_v3i16_v4i16__5_6_6() {
13919; GFX900-LABEL: s_shuffle_v3i16_v4i16__5_6_6:
13920; GFX900:       ; %bb.0:
13921; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13922; GFX900-NEXT:    ;;#ASMSTART
13923; GFX900-NEXT:    ; def s[8:9]
13924; GFX900-NEXT:    ;;#ASMEND
13925; GFX900-NEXT:    s_lshr_b32 s4, s8, 16
13926; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
13927; GFX900-NEXT:    ;;#ASMSTART
13928; GFX900-NEXT:    ; use s[8:9]
13929; GFX900-NEXT:    ;;#ASMEND
13930; GFX900-NEXT:    s_setpc_b64 s[30:31]
13931;
13932; GFX90A-LABEL: s_shuffle_v3i16_v4i16__5_6_6:
13933; GFX90A:       ; %bb.0:
13934; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13935; GFX90A-NEXT:    ;;#ASMSTART
13936; GFX90A-NEXT:    ; def s[8:9]
13937; GFX90A-NEXT:    ;;#ASMEND
13938; GFX90A-NEXT:    s_lshr_b32 s4, s8, 16
13939; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
13940; GFX90A-NEXT:    ;;#ASMSTART
13941; GFX90A-NEXT:    ; use s[8:9]
13942; GFX90A-NEXT:    ;;#ASMEND
13943; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13944;
13945; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_6_6:
13946; GFX940:       ; %bb.0:
13947; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13948; GFX940-NEXT:    ;;#ASMSTART
13949; GFX940-NEXT:    ; def s[8:9]
13950; GFX940-NEXT:    ;;#ASMEND
13951; GFX940-NEXT:    s_lshr_b32 s0, s8, 16
13952; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
13953; GFX940-NEXT:    ;;#ASMSTART
13954; GFX940-NEXT:    ; use s[8:9]
13955; GFX940-NEXT:    ;;#ASMEND
13956; GFX940-NEXT:    s_setpc_b64 s[30:31]
13957  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13958  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13959  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 6, i32 6>
13960  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13961  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
13962  ret void
13963}
13964
13965define void @s_shuffle_v3i16_v4i16__6_6_6() {
13966; GFX9-LABEL: s_shuffle_v3i16_v4i16__6_6_6:
13967; GFX9:       ; %bb.0:
13968; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13969; GFX9-NEXT:    ;;#ASMSTART
13970; GFX9-NEXT:    ; def s[8:9]
13971; GFX9-NEXT:    ;;#ASMEND
13972; GFX9-NEXT:    s_pack_ll_b32_b16 s8, s9, s9
13973; GFX9-NEXT:    ;;#ASMSTART
13974; GFX9-NEXT:    ; use s[8:9]
13975; GFX9-NEXT:    ;;#ASMEND
13976; GFX9-NEXT:    s_setpc_b64 s[30:31]
13977  %vec0 = call <4 x i16> asm "; def $0", "=s"()
13978  %vec1 = call <4 x i16> asm "; def $0", "=s"()
13979  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 6, i32 6>
13980  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13981  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
13982  ret void
13983}
13984
13985define void @s_shuffle_v3i16_v4i16__7_6_6() {
13986; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_6_6:
13987; GFX900:       ; %bb.0:
13988; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13989; GFX900-NEXT:    ;;#ASMSTART
13990; GFX900-NEXT:    ; def s[8:9]
13991; GFX900-NEXT:    ;;#ASMEND
13992; GFX900-NEXT:    s_lshr_b32 s4, s9, 16
13993; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
13994; GFX900-NEXT:    ;;#ASMSTART
13995; GFX900-NEXT:    ; use s[8:9]
13996; GFX900-NEXT:    ;;#ASMEND
13997; GFX900-NEXT:    s_setpc_b64 s[30:31]
13998;
13999; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_6_6:
14000; GFX90A:       ; %bb.0:
14001; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14002; GFX90A-NEXT:    ;;#ASMSTART
14003; GFX90A-NEXT:    ; def s[8:9]
14004; GFX90A-NEXT:    ;;#ASMEND
14005; GFX90A-NEXT:    s_lshr_b32 s4, s9, 16
14006; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
14007; GFX90A-NEXT:    ;;#ASMSTART
14008; GFX90A-NEXT:    ; use s[8:9]
14009; GFX90A-NEXT:    ;;#ASMEND
14010; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14011;
14012; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_6:
14013; GFX940:       ; %bb.0:
14014; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14015; GFX940-NEXT:    ;;#ASMSTART
14016; GFX940-NEXT:    ; def s[8:9]
14017; GFX940-NEXT:    ;;#ASMEND
14018; GFX940-NEXT:    s_lshr_b32 s0, s9, 16
14019; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
14020; GFX940-NEXT:    ;;#ASMSTART
14021; GFX940-NEXT:    ; use s[8:9]
14022; GFX940-NEXT:    ;;#ASMEND
14023; GFX940-NEXT:    s_setpc_b64 s[30:31]
14024  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14025  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14026  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 6>
14027  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14028  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
14029  ret void
14030}
14031
14032define void @s_shuffle_v3i16_v4i16__7_u_6() {
14033; GFX9-LABEL: s_shuffle_v3i16_v4i16__7_u_6:
14034; GFX9:       ; %bb.0:
14035; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14036; GFX9-NEXT:    ;;#ASMSTART
14037; GFX9-NEXT:    ; def s[8:9]
14038; GFX9-NEXT:    ;;#ASMEND
14039; GFX9-NEXT:    s_lshr_b32 s8, s9, 16
14040; GFX9-NEXT:    ;;#ASMSTART
14041; GFX9-NEXT:    ; use s[8:9]
14042; GFX9-NEXT:    ;;#ASMEND
14043; GFX9-NEXT:    s_setpc_b64 s[30:31]
14044  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14045  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14046  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 6>
14047  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14048  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
14049  ret void
14050}
14051
14052define void @s_shuffle_v3i16_v4i16__7_0_6() {
14053; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_0_6:
14054; GFX900:       ; %bb.0:
14055; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14056; GFX900-NEXT:    ;;#ASMSTART
14057; GFX900-NEXT:    ; def s[4:5]
14058; GFX900-NEXT:    ;;#ASMEND
14059; GFX900-NEXT:    ;;#ASMSTART
14060; GFX900-NEXT:    ; def s[8:9]
14061; GFX900-NEXT:    ;;#ASMEND
14062; GFX900-NEXT:    s_lshr_b32 s5, s9, 16
14063; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
14064; GFX900-NEXT:    ;;#ASMSTART
14065; GFX900-NEXT:    ; use s[8:9]
14066; GFX900-NEXT:    ;;#ASMEND
14067; GFX900-NEXT:    s_setpc_b64 s[30:31]
14068;
14069; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_0_6:
14070; GFX90A:       ; %bb.0:
14071; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14072; GFX90A-NEXT:    ;;#ASMSTART
14073; GFX90A-NEXT:    ; def s[4:5]
14074; GFX90A-NEXT:    ;;#ASMEND
14075; GFX90A-NEXT:    ;;#ASMSTART
14076; GFX90A-NEXT:    ; def s[8:9]
14077; GFX90A-NEXT:    ;;#ASMEND
14078; GFX90A-NEXT:    s_lshr_b32 s5, s9, 16
14079; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
14080; GFX90A-NEXT:    ;;#ASMSTART
14081; GFX90A-NEXT:    ; use s[8:9]
14082; GFX90A-NEXT:    ;;#ASMEND
14083; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14084;
14085; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_6:
14086; GFX940:       ; %bb.0:
14087; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14088; GFX940-NEXT:    ;;#ASMSTART
14089; GFX940-NEXT:    ; def s[0:1]
14090; GFX940-NEXT:    ;;#ASMEND
14091; GFX940-NEXT:    ;;#ASMSTART
14092; GFX940-NEXT:    ; def s[8:9]
14093; GFX940-NEXT:    ;;#ASMEND
14094; GFX940-NEXT:    s_lshr_b32 s1, s9, 16
14095; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
14096; GFX940-NEXT:    ;;#ASMSTART
14097; GFX940-NEXT:    ; use s[8:9]
14098; GFX940-NEXT:    ;;#ASMEND
14099; GFX940-NEXT:    s_setpc_b64 s[30:31]
14100  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14101  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14102  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 6>
14103  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14104  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
14105  ret void
14106}
14107
14108define void @s_shuffle_v3i16_v4i16__7_1_6() {
14109; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_1_6:
14110; GFX900:       ; %bb.0:
14111; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14112; GFX900-NEXT:    ;;#ASMSTART
14113; GFX900-NEXT:    ; def s[8:9]
14114; GFX900-NEXT:    ;;#ASMEND
14115; GFX900-NEXT:    ;;#ASMSTART
14116; GFX900-NEXT:    ; def s[4:5]
14117; GFX900-NEXT:    ;;#ASMEND
14118; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s9, s4
14119; GFX900-NEXT:    ;;#ASMSTART
14120; GFX900-NEXT:    ; use s[8:9]
14121; GFX900-NEXT:    ;;#ASMEND
14122; GFX900-NEXT:    s_setpc_b64 s[30:31]
14123;
14124; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_1_6:
14125; GFX90A:       ; %bb.0:
14126; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14127; GFX90A-NEXT:    ;;#ASMSTART
14128; GFX90A-NEXT:    ; def s[8:9]
14129; GFX90A-NEXT:    ;;#ASMEND
14130; GFX90A-NEXT:    ;;#ASMSTART
14131; GFX90A-NEXT:    ; def s[4:5]
14132; GFX90A-NEXT:    ;;#ASMEND
14133; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s9, s4
14134; GFX90A-NEXT:    ;;#ASMSTART
14135; GFX90A-NEXT:    ; use s[8:9]
14136; GFX90A-NEXT:    ;;#ASMEND
14137; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14138;
14139; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_6:
14140; GFX940:       ; %bb.0:
14141; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14142; GFX940-NEXT:    ;;#ASMSTART
14143; GFX940-NEXT:    ; def s[8:9]
14144; GFX940-NEXT:    ;;#ASMEND
14145; GFX940-NEXT:    ;;#ASMSTART
14146; GFX940-NEXT:    ; def s[0:1]
14147; GFX940-NEXT:    ;;#ASMEND
14148; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s9, s0
14149; GFX940-NEXT:    ;;#ASMSTART
14150; GFX940-NEXT:    ; use s[8:9]
14151; GFX940-NEXT:    ;;#ASMEND
14152; GFX940-NEXT:    s_setpc_b64 s[30:31]
14153  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14154  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14155  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 6>
14156  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14157  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
14158  ret void
14159}
14160
14161define void @s_shuffle_v3i16_v4i16__7_2_6() {
14162; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_2_6:
14163; GFX900:       ; %bb.0:
14164; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14165; GFX900-NEXT:    ;;#ASMSTART
14166; GFX900-NEXT:    ; def s[4:5]
14167; GFX900-NEXT:    ;;#ASMEND
14168; GFX900-NEXT:    ;;#ASMSTART
14169; GFX900-NEXT:    ; def s[8:9]
14170; GFX900-NEXT:    ;;#ASMEND
14171; GFX900-NEXT:    s_lshr_b32 s4, s9, 16
14172; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
14173; GFX900-NEXT:    ;;#ASMSTART
14174; GFX900-NEXT:    ; use s[8:9]
14175; GFX900-NEXT:    ;;#ASMEND
14176; GFX900-NEXT:    s_setpc_b64 s[30:31]
14177;
14178; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_2_6:
14179; GFX90A:       ; %bb.0:
14180; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14181; GFX90A-NEXT:    ;;#ASMSTART
14182; GFX90A-NEXT:    ; def s[4:5]
14183; GFX90A-NEXT:    ;;#ASMEND
14184; GFX90A-NEXT:    ;;#ASMSTART
14185; GFX90A-NEXT:    ; def s[8:9]
14186; GFX90A-NEXT:    ;;#ASMEND
14187; GFX90A-NEXT:    s_lshr_b32 s4, s9, 16
14188; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
14189; GFX90A-NEXT:    ;;#ASMSTART
14190; GFX90A-NEXT:    ; use s[8:9]
14191; GFX90A-NEXT:    ;;#ASMEND
14192; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14193;
14194; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_6:
14195; GFX940:       ; %bb.0:
14196; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14197; GFX940-NEXT:    ;;#ASMSTART
14198; GFX940-NEXT:    ; def s[0:1]
14199; GFX940-NEXT:    ;;#ASMEND
14200; GFX940-NEXT:    ;;#ASMSTART
14201; GFX940-NEXT:    ; def s[8:9]
14202; GFX940-NEXT:    ;;#ASMEND
14203; GFX940-NEXT:    s_lshr_b32 s0, s9, 16
14204; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
14205; GFX940-NEXT:    ;;#ASMSTART
14206; GFX940-NEXT:    ; use s[8:9]
14207; GFX940-NEXT:    ;;#ASMEND
14208; GFX940-NEXT:    s_setpc_b64 s[30:31]
14209  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14210  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14211  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 6>
14212  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14213  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
14214  ret void
14215}
14216
14217define void @s_shuffle_v3i16_v4i16__7_3_6() {
14218; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_3_6:
14219; GFX900:       ; %bb.0:
14220; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14221; GFX900-NEXT:    ;;#ASMSTART
14222; GFX900-NEXT:    ; def s[8:9]
14223; GFX900-NEXT:    ;;#ASMEND
14224; GFX900-NEXT:    ;;#ASMSTART
14225; GFX900-NEXT:    ; def s[4:5]
14226; GFX900-NEXT:    ;;#ASMEND
14227; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s9, s5
14228; GFX900-NEXT:    ;;#ASMSTART
14229; GFX900-NEXT:    ; use s[8:9]
14230; GFX900-NEXT:    ;;#ASMEND
14231; GFX900-NEXT:    s_setpc_b64 s[30:31]
14232;
14233; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_3_6:
14234; GFX90A:       ; %bb.0:
14235; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14236; GFX90A-NEXT:    ;;#ASMSTART
14237; GFX90A-NEXT:    ; def s[8:9]
14238; GFX90A-NEXT:    ;;#ASMEND
14239; GFX90A-NEXT:    ;;#ASMSTART
14240; GFX90A-NEXT:    ; def s[4:5]
14241; GFX90A-NEXT:    ;;#ASMEND
14242; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s9, s5
14243; GFX90A-NEXT:    ;;#ASMSTART
14244; GFX90A-NEXT:    ; use s[8:9]
14245; GFX90A-NEXT:    ;;#ASMEND
14246; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14247;
14248; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_6:
14249; GFX940:       ; %bb.0:
14250; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14251; GFX940-NEXT:    ;;#ASMSTART
14252; GFX940-NEXT:    ; def s[8:9]
14253; GFX940-NEXT:    ;;#ASMEND
14254; GFX940-NEXT:    ;;#ASMSTART
14255; GFX940-NEXT:    ; def s[0:1]
14256; GFX940-NEXT:    ;;#ASMEND
14257; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s9, s1
14258; GFX940-NEXT:    ;;#ASMSTART
14259; GFX940-NEXT:    ; use s[8:9]
14260; GFX940-NEXT:    ;;#ASMEND
14261; GFX940-NEXT:    s_setpc_b64 s[30:31]
14262  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14263  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14264  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 6>
14265  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14266  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
14267  ret void
14268}
14269
14270define void @s_shuffle_v3i16_v4i16__7_4_6() {
14271; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_4_6:
14272; GFX900:       ; %bb.0:
14273; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14274; GFX900-NEXT:    ;;#ASMSTART
14275; GFX900-NEXT:    ; def s[8:9]
14276; GFX900-NEXT:    ;;#ASMEND
14277; GFX900-NEXT:    s_lshr_b32 s4, s9, 16
14278; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s8
14279; GFX900-NEXT:    ;;#ASMSTART
14280; GFX900-NEXT:    ; use s[8:9]
14281; GFX900-NEXT:    ;;#ASMEND
14282; GFX900-NEXT:    s_setpc_b64 s[30:31]
14283;
14284; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_4_6:
14285; GFX90A:       ; %bb.0:
14286; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14287; GFX90A-NEXT:    ;;#ASMSTART
14288; GFX90A-NEXT:    ; def s[8:9]
14289; GFX90A-NEXT:    ;;#ASMEND
14290; GFX90A-NEXT:    s_lshr_b32 s4, s9, 16
14291; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s8
14292; GFX90A-NEXT:    ;;#ASMSTART
14293; GFX90A-NEXT:    ; use s[8:9]
14294; GFX90A-NEXT:    ;;#ASMEND
14295; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14296;
14297; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_6:
14298; GFX940:       ; %bb.0:
14299; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14300; GFX940-NEXT:    ;;#ASMSTART
14301; GFX940-NEXT:    ; def s[8:9]
14302; GFX940-NEXT:    ;;#ASMEND
14303; GFX940-NEXT:    s_lshr_b32 s0, s9, 16
14304; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s8
14305; GFX940-NEXT:    ;;#ASMSTART
14306; GFX940-NEXT:    ; use s[8:9]
14307; GFX940-NEXT:    ;;#ASMEND
14308; GFX940-NEXT:    s_setpc_b64 s[30:31]
14309  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14310  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14311  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 6>
14312  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14313  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
14314  ret void
14315}
14316
14317define void @s_shuffle_v3i16_v4i16__7_5_6() {
14318; GFX9-LABEL: s_shuffle_v3i16_v4i16__7_5_6:
14319; GFX9:       ; %bb.0:
14320; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14321; GFX9-NEXT:    ;;#ASMSTART
14322; GFX9-NEXT:    ; def s[8:9]
14323; GFX9-NEXT:    ;;#ASMEND
14324; GFX9-NEXT:    s_pack_hh_b32_b16 s8, s9, s8
14325; GFX9-NEXT:    ;;#ASMSTART
14326; GFX9-NEXT:    ; use s[8:9]
14327; GFX9-NEXT:    ;;#ASMEND
14328; GFX9-NEXT:    s_setpc_b64 s[30:31]
14329  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14330  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14331  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 6>
14332  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14333  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
14334  ret void
14335}
14336
14337define void @s_shuffle_v3i16_v4i16__u_7_7() {
14338; GFX900-LABEL: s_shuffle_v3i16_v4i16__u_7_7:
14339; GFX900:       ; %bb.0:
14340; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14341; GFX900-NEXT:    ;;#ASMSTART
14342; GFX900-NEXT:    ; def s[4:5]
14343; GFX900-NEXT:    ;;#ASMEND
14344; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
14345; GFX900-NEXT:    s_mov_b32 s8, s5
14346; GFX900-NEXT:    ;;#ASMSTART
14347; GFX900-NEXT:    ; use s[8:9]
14348; GFX900-NEXT:    ;;#ASMEND
14349; GFX900-NEXT:    s_setpc_b64 s[30:31]
14350;
14351; GFX90A-LABEL: s_shuffle_v3i16_v4i16__u_7_7:
14352; GFX90A:       ; %bb.0:
14353; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14354; GFX90A-NEXT:    ;;#ASMSTART
14355; GFX90A-NEXT:    ; def s[4:5]
14356; GFX90A-NEXT:    ;;#ASMEND
14357; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
14358; GFX90A-NEXT:    s_mov_b32 s8, s5
14359; GFX90A-NEXT:    ;;#ASMSTART
14360; GFX90A-NEXT:    ; use s[8:9]
14361; GFX90A-NEXT:    ;;#ASMEND
14362; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14363;
14364; GFX940-LABEL: s_shuffle_v3i16_v4i16__u_7_7:
14365; GFX940:       ; %bb.0:
14366; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14367; GFX940-NEXT:    ;;#ASMSTART
14368; GFX940-NEXT:    ; def s[0:1]
14369; GFX940-NEXT:    ;;#ASMEND
14370; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
14371; GFX940-NEXT:    s_mov_b32 s8, s1
14372; GFX940-NEXT:    ;;#ASMSTART
14373; GFX940-NEXT:    ; use s[8:9]
14374; GFX940-NEXT:    ;;#ASMEND
14375; GFX940-NEXT:    s_setpc_b64 s[30:31]
14376  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14377  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14378  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 poison, i32 7, i32 7>
14379  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14380  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
14381  ret void
14382}
14383
14384define void @s_shuffle_v3i16_v4i16__0_7_7() {
14385; GFX900-LABEL: s_shuffle_v3i16_v4i16__0_7_7:
14386; GFX900:       ; %bb.0:
14387; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14388; GFX900-NEXT:    ;;#ASMSTART
14389; GFX900-NEXT:    ; def s[4:5]
14390; GFX900-NEXT:    ;;#ASMEND
14391; GFX900-NEXT:    ;;#ASMSTART
14392; GFX900-NEXT:    ; def s[6:7]
14393; GFX900-NEXT:    ;;#ASMEND
14394; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s4, s7
14395; GFX900-NEXT:    s_lshr_b32 s9, s7, 16
14396; GFX900-NEXT:    ;;#ASMSTART
14397; GFX900-NEXT:    ; use s[8:9]
14398; GFX900-NEXT:    ;;#ASMEND
14399; GFX900-NEXT:    s_setpc_b64 s[30:31]
14400;
14401; GFX90A-LABEL: s_shuffle_v3i16_v4i16__0_7_7:
14402; GFX90A:       ; %bb.0:
14403; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14404; GFX90A-NEXT:    ;;#ASMSTART
14405; GFX90A-NEXT:    ; def s[4:5]
14406; GFX90A-NEXT:    ;;#ASMEND
14407; GFX90A-NEXT:    ;;#ASMSTART
14408; GFX90A-NEXT:    ; def s[6:7]
14409; GFX90A-NEXT:    ;;#ASMEND
14410; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s4, s7
14411; GFX90A-NEXT:    s_lshr_b32 s9, s7, 16
14412; GFX90A-NEXT:    ;;#ASMSTART
14413; GFX90A-NEXT:    ; use s[8:9]
14414; GFX90A-NEXT:    ;;#ASMEND
14415; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14416;
14417; GFX940-LABEL: s_shuffle_v3i16_v4i16__0_7_7:
14418; GFX940:       ; %bb.0:
14419; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14420; GFX940-NEXT:    ;;#ASMSTART
14421; GFX940-NEXT:    ; def s[0:1]
14422; GFX940-NEXT:    ;;#ASMEND
14423; GFX940-NEXT:    ;;#ASMSTART
14424; GFX940-NEXT:    ; def s[2:3]
14425; GFX940-NEXT:    ;;#ASMEND
14426; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s0, s3
14427; GFX940-NEXT:    s_lshr_b32 s9, s3, 16
14428; GFX940-NEXT:    ;;#ASMSTART
14429; GFX940-NEXT:    ; use s[8:9]
14430; GFX940-NEXT:    ;;#ASMEND
14431; GFX940-NEXT:    s_setpc_b64 s[30:31]
14432  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14433  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14434  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 0, i32 7, i32 7>
14435  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14436  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
14437  ret void
14438}
14439
14440define void @s_shuffle_v3i16_v4i16__1_7_7() {
14441; GFX900-LABEL: s_shuffle_v3i16_v4i16__1_7_7:
14442; GFX900:       ; %bb.0:
14443; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14444; GFX900-NEXT:    ;;#ASMSTART
14445; GFX900-NEXT:    ; def s[4:5]
14446; GFX900-NEXT:    ;;#ASMEND
14447; GFX900-NEXT:    ;;#ASMSTART
14448; GFX900-NEXT:    ; def s[6:7]
14449; GFX900-NEXT:    ;;#ASMEND
14450; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s4, s7
14451; GFX900-NEXT:    s_lshr_b32 s9, s7, 16
14452; GFX900-NEXT:    ;;#ASMSTART
14453; GFX900-NEXT:    ; use s[8:9]
14454; GFX900-NEXT:    ;;#ASMEND
14455; GFX900-NEXT:    s_setpc_b64 s[30:31]
14456;
14457; GFX90A-LABEL: s_shuffle_v3i16_v4i16__1_7_7:
14458; GFX90A:       ; %bb.0:
14459; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14460; GFX90A-NEXT:    ;;#ASMSTART
14461; GFX90A-NEXT:    ; def s[4:5]
14462; GFX90A-NEXT:    ;;#ASMEND
14463; GFX90A-NEXT:    ;;#ASMSTART
14464; GFX90A-NEXT:    ; def s[6:7]
14465; GFX90A-NEXT:    ;;#ASMEND
14466; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s4, s7
14467; GFX90A-NEXT:    s_lshr_b32 s9, s7, 16
14468; GFX90A-NEXT:    ;;#ASMSTART
14469; GFX90A-NEXT:    ; use s[8:9]
14470; GFX90A-NEXT:    ;;#ASMEND
14471; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14472;
14473; GFX940-LABEL: s_shuffle_v3i16_v4i16__1_7_7:
14474; GFX940:       ; %bb.0:
14475; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14476; GFX940-NEXT:    ;;#ASMSTART
14477; GFX940-NEXT:    ; def s[0:1]
14478; GFX940-NEXT:    ;;#ASMEND
14479; GFX940-NEXT:    ;;#ASMSTART
14480; GFX940-NEXT:    ; def s[2:3]
14481; GFX940-NEXT:    ;;#ASMEND
14482; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s0, s3
14483; GFX940-NEXT:    s_lshr_b32 s9, s3, 16
14484; GFX940-NEXT:    ;;#ASMSTART
14485; GFX940-NEXT:    ; use s[8:9]
14486; GFX940-NEXT:    ;;#ASMEND
14487; GFX940-NEXT:    s_setpc_b64 s[30:31]
14488  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14489  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14490  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 1, i32 7, i32 7>
14491  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14492  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
14493  ret void
14494}
14495
14496define void @s_shuffle_v3i16_v4i16__2_7_7() {
14497; GFX900-LABEL: s_shuffle_v3i16_v4i16__2_7_7:
14498; GFX900:       ; %bb.0:
14499; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14500; GFX900-NEXT:    ;;#ASMSTART
14501; GFX900-NEXT:    ; def s[4:5]
14502; GFX900-NEXT:    ;;#ASMEND
14503; GFX900-NEXT:    ;;#ASMSTART
14504; GFX900-NEXT:    ; def s[6:7]
14505; GFX900-NEXT:    ;;#ASMEND
14506; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s5, s7
14507; GFX900-NEXT:    s_lshr_b32 s9, s7, 16
14508; GFX900-NEXT:    ;;#ASMSTART
14509; GFX900-NEXT:    ; use s[8:9]
14510; GFX900-NEXT:    ;;#ASMEND
14511; GFX900-NEXT:    s_setpc_b64 s[30:31]
14512;
14513; GFX90A-LABEL: s_shuffle_v3i16_v4i16__2_7_7:
14514; GFX90A:       ; %bb.0:
14515; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14516; GFX90A-NEXT:    ;;#ASMSTART
14517; GFX90A-NEXT:    ; def s[4:5]
14518; GFX90A-NEXT:    ;;#ASMEND
14519; GFX90A-NEXT:    ;;#ASMSTART
14520; GFX90A-NEXT:    ; def s[6:7]
14521; GFX90A-NEXT:    ;;#ASMEND
14522; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s5, s7
14523; GFX90A-NEXT:    s_lshr_b32 s9, s7, 16
14524; GFX90A-NEXT:    ;;#ASMSTART
14525; GFX90A-NEXT:    ; use s[8:9]
14526; GFX90A-NEXT:    ;;#ASMEND
14527; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14528;
14529; GFX940-LABEL: s_shuffle_v3i16_v4i16__2_7_7:
14530; GFX940:       ; %bb.0:
14531; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14532; GFX940-NEXT:    ;;#ASMSTART
14533; GFX940-NEXT:    ; def s[0:1]
14534; GFX940-NEXT:    ;;#ASMEND
14535; GFX940-NEXT:    ;;#ASMSTART
14536; GFX940-NEXT:    ; def s[2:3]
14537; GFX940-NEXT:    ;;#ASMEND
14538; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s1, s3
14539; GFX940-NEXT:    s_lshr_b32 s9, s3, 16
14540; GFX940-NEXT:    ;;#ASMSTART
14541; GFX940-NEXT:    ; use s[8:9]
14542; GFX940-NEXT:    ;;#ASMEND
14543; GFX940-NEXT:    s_setpc_b64 s[30:31]
14544  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14545  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14546  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 2, i32 7, i32 7>
14547  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14548  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
14549  ret void
14550}
14551
14552define void @s_shuffle_v3i16_v4i16__3_7_7() {
14553; GFX900-LABEL: s_shuffle_v3i16_v4i16__3_7_7:
14554; GFX900:       ; %bb.0:
14555; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14556; GFX900-NEXT:    ;;#ASMSTART
14557; GFX900-NEXT:    ; def s[4:5]
14558; GFX900-NEXT:    ;;#ASMEND
14559; GFX900-NEXT:    ;;#ASMSTART
14560; GFX900-NEXT:    ; def s[6:7]
14561; GFX900-NEXT:    ;;#ASMEND
14562; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s7
14563; GFX900-NEXT:    s_lshr_b32 s9, s7, 16
14564; GFX900-NEXT:    ;;#ASMSTART
14565; GFX900-NEXT:    ; use s[8:9]
14566; GFX900-NEXT:    ;;#ASMEND
14567; GFX900-NEXT:    s_setpc_b64 s[30:31]
14568;
14569; GFX90A-LABEL: s_shuffle_v3i16_v4i16__3_7_7:
14570; GFX90A:       ; %bb.0:
14571; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14572; GFX90A-NEXT:    ;;#ASMSTART
14573; GFX90A-NEXT:    ; def s[4:5]
14574; GFX90A-NEXT:    ;;#ASMEND
14575; GFX90A-NEXT:    ;;#ASMSTART
14576; GFX90A-NEXT:    ; def s[6:7]
14577; GFX90A-NEXT:    ;;#ASMEND
14578; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s7
14579; GFX90A-NEXT:    s_lshr_b32 s9, s7, 16
14580; GFX90A-NEXT:    ;;#ASMSTART
14581; GFX90A-NEXT:    ; use s[8:9]
14582; GFX90A-NEXT:    ;;#ASMEND
14583; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14584;
14585; GFX940-LABEL: s_shuffle_v3i16_v4i16__3_7_7:
14586; GFX940:       ; %bb.0:
14587; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14588; GFX940-NEXT:    ;;#ASMSTART
14589; GFX940-NEXT:    ; def s[0:1]
14590; GFX940-NEXT:    ;;#ASMEND
14591; GFX940-NEXT:    ;;#ASMSTART
14592; GFX940-NEXT:    ; def s[2:3]
14593; GFX940-NEXT:    ;;#ASMEND
14594; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s3
14595; GFX940-NEXT:    s_lshr_b32 s9, s3, 16
14596; GFX940-NEXT:    ;;#ASMSTART
14597; GFX940-NEXT:    ; use s[8:9]
14598; GFX940-NEXT:    ;;#ASMEND
14599; GFX940-NEXT:    s_setpc_b64 s[30:31]
14600  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14601  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14602  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 3, i32 7, i32 7>
14603  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14604  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
14605  ret void
14606}
14607
14608define void @s_shuffle_v3i16_v4i16__4_7_7() {
14609; GFX900-LABEL: s_shuffle_v3i16_v4i16__4_7_7:
14610; GFX900:       ; %bb.0:
14611; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14612; GFX900-NEXT:    ;;#ASMSTART
14613; GFX900-NEXT:    ; def s[4:5]
14614; GFX900-NEXT:    ;;#ASMEND
14615; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s4, s5
14616; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
14617; GFX900-NEXT:    ;;#ASMSTART
14618; GFX900-NEXT:    ; use s[8:9]
14619; GFX900-NEXT:    ;;#ASMEND
14620; GFX900-NEXT:    s_setpc_b64 s[30:31]
14621;
14622; GFX90A-LABEL: s_shuffle_v3i16_v4i16__4_7_7:
14623; GFX90A:       ; %bb.0:
14624; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14625; GFX90A-NEXT:    ;;#ASMSTART
14626; GFX90A-NEXT:    ; def s[4:5]
14627; GFX90A-NEXT:    ;;#ASMEND
14628; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s4, s5
14629; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
14630; GFX90A-NEXT:    ;;#ASMSTART
14631; GFX90A-NEXT:    ; use s[8:9]
14632; GFX90A-NEXT:    ;;#ASMEND
14633; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14634;
14635; GFX940-LABEL: s_shuffle_v3i16_v4i16__4_7_7:
14636; GFX940:       ; %bb.0:
14637; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14638; GFX940-NEXT:    ;;#ASMSTART
14639; GFX940-NEXT:    ; def s[0:1]
14640; GFX940-NEXT:    ;;#ASMEND
14641; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s0, s1
14642; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
14643; GFX940-NEXT:    ;;#ASMSTART
14644; GFX940-NEXT:    ; use s[8:9]
14645; GFX940-NEXT:    ;;#ASMEND
14646; GFX940-NEXT:    s_setpc_b64 s[30:31]
14647  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14648  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14649  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 4, i32 7, i32 7>
14650  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14651  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
14652  ret void
14653}
14654
14655define void @s_shuffle_v3i16_v4i16__5_7_7() {
14656; GFX900-LABEL: s_shuffle_v3i16_v4i16__5_7_7:
14657; GFX900:       ; %bb.0:
14658; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14659; GFX900-NEXT:    ;;#ASMSTART
14660; GFX900-NEXT:    ; def s[4:5]
14661; GFX900-NEXT:    ;;#ASMEND
14662; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s4, s5
14663; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
14664; GFX900-NEXT:    ;;#ASMSTART
14665; GFX900-NEXT:    ; use s[8:9]
14666; GFX900-NEXT:    ;;#ASMEND
14667; GFX900-NEXT:    s_setpc_b64 s[30:31]
14668;
14669; GFX90A-LABEL: s_shuffle_v3i16_v4i16__5_7_7:
14670; GFX90A:       ; %bb.0:
14671; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14672; GFX90A-NEXT:    ;;#ASMSTART
14673; GFX90A-NEXT:    ; def s[4:5]
14674; GFX90A-NEXT:    ;;#ASMEND
14675; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s4, s5
14676; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
14677; GFX90A-NEXT:    ;;#ASMSTART
14678; GFX90A-NEXT:    ; use s[8:9]
14679; GFX90A-NEXT:    ;;#ASMEND
14680; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14681;
14682; GFX940-LABEL: s_shuffle_v3i16_v4i16__5_7_7:
14683; GFX940:       ; %bb.0:
14684; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14685; GFX940-NEXT:    ;;#ASMSTART
14686; GFX940-NEXT:    ; def s[0:1]
14687; GFX940-NEXT:    ;;#ASMEND
14688; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s0, s1
14689; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
14690; GFX940-NEXT:    ;;#ASMSTART
14691; GFX940-NEXT:    ; use s[8:9]
14692; GFX940-NEXT:    ;;#ASMEND
14693; GFX940-NEXT:    s_setpc_b64 s[30:31]
14694  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14695  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14696  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 5, i32 7, i32 7>
14697  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14698  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
14699  ret void
14700}
14701
14702define void @s_shuffle_v3i16_v4i16__6_7_7() {
14703; GFX900-LABEL: s_shuffle_v3i16_v4i16__6_7_7:
14704; GFX900:       ; %bb.0:
14705; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14706; GFX900-NEXT:    ;;#ASMSTART
14707; GFX900-NEXT:    ; def s[4:5]
14708; GFX900-NEXT:    ;;#ASMEND
14709; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
14710; GFX900-NEXT:    s_mov_b32 s8, s5
14711; GFX900-NEXT:    ;;#ASMSTART
14712; GFX900-NEXT:    ; use s[8:9]
14713; GFX900-NEXT:    ;;#ASMEND
14714; GFX900-NEXT:    s_setpc_b64 s[30:31]
14715;
14716; GFX90A-LABEL: s_shuffle_v3i16_v4i16__6_7_7:
14717; GFX90A:       ; %bb.0:
14718; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14719; GFX90A-NEXT:    ;;#ASMSTART
14720; GFX90A-NEXT:    ; def s[4:5]
14721; GFX90A-NEXT:    ;;#ASMEND
14722; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
14723; GFX90A-NEXT:    s_mov_b32 s8, s5
14724; GFX90A-NEXT:    ;;#ASMSTART
14725; GFX90A-NEXT:    ; use s[8:9]
14726; GFX90A-NEXT:    ;;#ASMEND
14727; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14728;
14729; GFX940-LABEL: s_shuffle_v3i16_v4i16__6_7_7:
14730; GFX940:       ; %bb.0:
14731; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14732; GFX940-NEXT:    ;;#ASMSTART
14733; GFX940-NEXT:    ; def s[0:1]
14734; GFX940-NEXT:    ;;#ASMEND
14735; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
14736; GFX940-NEXT:    s_mov_b32 s8, s1
14737; GFX940-NEXT:    ;;#ASMSTART
14738; GFX940-NEXT:    ; use s[8:9]
14739; GFX940-NEXT:    ;;#ASMEND
14740; GFX940-NEXT:    s_setpc_b64 s[30:31]
14741  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14742  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14743  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 6, i32 7, i32 7>
14744  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14745  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
14746  ret void
14747}
14748
14749define void @s_shuffle_v3i16_v4i16__7_u_7() {
14750; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_u_7:
14751; GFX900:       ; %bb.0:
14752; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14753; GFX900-NEXT:    ;;#ASMSTART
14754; GFX900-NEXT:    ; def s[4:5]
14755; GFX900-NEXT:    ;;#ASMEND
14756; GFX900-NEXT:    s_lshr_b32 s8, s5, 16
14757; GFX900-NEXT:    s_mov_b32 s9, s8
14758; GFX900-NEXT:    ;;#ASMSTART
14759; GFX900-NEXT:    ; use s[8:9]
14760; GFX900-NEXT:    ;;#ASMEND
14761; GFX900-NEXT:    s_setpc_b64 s[30:31]
14762;
14763; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_u_7:
14764; GFX90A:       ; %bb.0:
14765; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14766; GFX90A-NEXT:    ;;#ASMSTART
14767; GFX90A-NEXT:    ; def s[4:5]
14768; GFX90A-NEXT:    ;;#ASMEND
14769; GFX90A-NEXT:    s_lshr_b32 s8, s5, 16
14770; GFX90A-NEXT:    s_mov_b32 s9, s8
14771; GFX90A-NEXT:    ;;#ASMSTART
14772; GFX90A-NEXT:    ; use s[8:9]
14773; GFX90A-NEXT:    ;;#ASMEND
14774; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14775;
14776; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_u_7:
14777; GFX940:       ; %bb.0:
14778; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14779; GFX940-NEXT:    ;;#ASMSTART
14780; GFX940-NEXT:    ; def s[0:1]
14781; GFX940-NEXT:    ;;#ASMEND
14782; GFX940-NEXT:    s_lshr_b32 s8, s1, 16
14783; GFX940-NEXT:    s_mov_b32 s9, s8
14784; GFX940-NEXT:    ;;#ASMSTART
14785; GFX940-NEXT:    ; use s[8:9]
14786; GFX940-NEXT:    ;;#ASMEND
14787; GFX940-NEXT:    s_setpc_b64 s[30:31]
14788  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14789  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14790  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 poison, i32 7>
14791  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14792  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
14793  ret void
14794}
14795
14796define void @s_shuffle_v3i16_v4i16__7_0_7() {
14797; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_0_7:
14798; GFX900:       ; %bb.0:
14799; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14800; GFX900-NEXT:    ;;#ASMSTART
14801; GFX900-NEXT:    ; def s[6:7]
14802; GFX900-NEXT:    ;;#ASMEND
14803; GFX900-NEXT:    s_lshr_b32 s9, s7, 16
14804; GFX900-NEXT:    ;;#ASMSTART
14805; GFX900-NEXT:    ; def s[4:5]
14806; GFX900-NEXT:    ;;#ASMEND
14807; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s9, s4
14808; GFX900-NEXT:    ;;#ASMSTART
14809; GFX900-NEXT:    ; use s[8:9]
14810; GFX900-NEXT:    ;;#ASMEND
14811; GFX900-NEXT:    s_setpc_b64 s[30:31]
14812;
14813; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_0_7:
14814; GFX90A:       ; %bb.0:
14815; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14816; GFX90A-NEXT:    ;;#ASMSTART
14817; GFX90A-NEXT:    ; def s[6:7]
14818; GFX90A-NEXT:    ;;#ASMEND
14819; GFX90A-NEXT:    s_lshr_b32 s9, s7, 16
14820; GFX90A-NEXT:    ;;#ASMSTART
14821; GFX90A-NEXT:    ; def s[4:5]
14822; GFX90A-NEXT:    ;;#ASMEND
14823; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s9, s4
14824; GFX90A-NEXT:    ;;#ASMSTART
14825; GFX90A-NEXT:    ; use s[8:9]
14826; GFX90A-NEXT:    ;;#ASMEND
14827; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14828;
14829; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_0_7:
14830; GFX940:       ; %bb.0:
14831; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14832; GFX940-NEXT:    ;;#ASMSTART
14833; GFX940-NEXT:    ; def s[2:3]
14834; GFX940-NEXT:    ;;#ASMEND
14835; GFX940-NEXT:    s_lshr_b32 s9, s3, 16
14836; GFX940-NEXT:    ;;#ASMSTART
14837; GFX940-NEXT:    ; def s[0:1]
14838; GFX940-NEXT:    ;;#ASMEND
14839; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s9, s0
14840; GFX940-NEXT:    ;;#ASMSTART
14841; GFX940-NEXT:    ; use s[8:9]
14842; GFX940-NEXT:    ;;#ASMEND
14843; GFX940-NEXT:    s_setpc_b64 s[30:31]
14844  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14845  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14846  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 0, i32 7>
14847  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14848  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
14849  ret void
14850}
14851
14852define void @s_shuffle_v3i16_v4i16__7_1_7() {
14853; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_1_7:
14854; GFX900:       ; %bb.0:
14855; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14856; GFX900-NEXT:    ;;#ASMSTART
14857; GFX900-NEXT:    ; def s[4:5]
14858; GFX900-NEXT:    ;;#ASMEND
14859; GFX900-NEXT:    ;;#ASMSTART
14860; GFX900-NEXT:    ; def s[6:7]
14861; GFX900-NEXT:    ;;#ASMEND
14862; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
14863; GFX900-NEXT:    s_lshr_b32 s9, s7, 16
14864; GFX900-NEXT:    ;;#ASMSTART
14865; GFX900-NEXT:    ; use s[8:9]
14866; GFX900-NEXT:    ;;#ASMEND
14867; GFX900-NEXT:    s_setpc_b64 s[30:31]
14868;
14869; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_1_7:
14870; GFX90A:       ; %bb.0:
14871; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14872; GFX90A-NEXT:    ;;#ASMSTART
14873; GFX90A-NEXT:    ; def s[4:5]
14874; GFX90A-NEXT:    ;;#ASMEND
14875; GFX90A-NEXT:    ;;#ASMSTART
14876; GFX90A-NEXT:    ; def s[6:7]
14877; GFX90A-NEXT:    ;;#ASMEND
14878; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s4
14879; GFX90A-NEXT:    s_lshr_b32 s9, s7, 16
14880; GFX90A-NEXT:    ;;#ASMSTART
14881; GFX90A-NEXT:    ; use s[8:9]
14882; GFX90A-NEXT:    ;;#ASMEND
14883; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14884;
14885; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_1_7:
14886; GFX940:       ; %bb.0:
14887; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14888; GFX940-NEXT:    ;;#ASMSTART
14889; GFX940-NEXT:    ; def s[0:1]
14890; GFX940-NEXT:    ;;#ASMEND
14891; GFX940-NEXT:    ;;#ASMSTART
14892; GFX940-NEXT:    ; def s[2:3]
14893; GFX940-NEXT:    ;;#ASMEND
14894; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s0
14895; GFX940-NEXT:    s_lshr_b32 s9, s3, 16
14896; GFX940-NEXT:    ;;#ASMSTART
14897; GFX940-NEXT:    ; use s[8:9]
14898; GFX940-NEXT:    ;;#ASMEND
14899; GFX940-NEXT:    s_setpc_b64 s[30:31]
14900  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14901  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14902  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 1, i32 7>
14903  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14904  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
14905  ret void
14906}
14907
14908define void @s_shuffle_v3i16_v4i16__7_2_7() {
14909; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_2_7:
14910; GFX900:       ; %bb.0:
14911; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14912; GFX900-NEXT:    ;;#ASMSTART
14913; GFX900-NEXT:    ; def s[6:7]
14914; GFX900-NEXT:    ;;#ASMEND
14915; GFX900-NEXT:    s_lshr_b32 s9, s7, 16
14916; GFX900-NEXT:    ;;#ASMSTART
14917; GFX900-NEXT:    ; def s[4:5]
14918; GFX900-NEXT:    ;;#ASMEND
14919; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s9, s5
14920; GFX900-NEXT:    ;;#ASMSTART
14921; GFX900-NEXT:    ; use s[8:9]
14922; GFX900-NEXT:    ;;#ASMEND
14923; GFX900-NEXT:    s_setpc_b64 s[30:31]
14924;
14925; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_2_7:
14926; GFX90A:       ; %bb.0:
14927; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14928; GFX90A-NEXT:    ;;#ASMSTART
14929; GFX90A-NEXT:    ; def s[6:7]
14930; GFX90A-NEXT:    ;;#ASMEND
14931; GFX90A-NEXT:    s_lshr_b32 s9, s7, 16
14932; GFX90A-NEXT:    ;;#ASMSTART
14933; GFX90A-NEXT:    ; def s[4:5]
14934; GFX90A-NEXT:    ;;#ASMEND
14935; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s9, s5
14936; GFX90A-NEXT:    ;;#ASMSTART
14937; GFX90A-NEXT:    ; use s[8:9]
14938; GFX90A-NEXT:    ;;#ASMEND
14939; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14940;
14941; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_2_7:
14942; GFX940:       ; %bb.0:
14943; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14944; GFX940-NEXT:    ;;#ASMSTART
14945; GFX940-NEXT:    ; def s[2:3]
14946; GFX940-NEXT:    ;;#ASMEND
14947; GFX940-NEXT:    s_lshr_b32 s9, s3, 16
14948; GFX940-NEXT:    ;;#ASMSTART
14949; GFX940-NEXT:    ; def s[0:1]
14950; GFX940-NEXT:    ;;#ASMEND
14951; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s9, s1
14952; GFX940-NEXT:    ;;#ASMSTART
14953; GFX940-NEXT:    ; use s[8:9]
14954; GFX940-NEXT:    ;;#ASMEND
14955; GFX940-NEXT:    s_setpc_b64 s[30:31]
14956  %vec0 = call <4 x i16> asm "; def $0", "=s"()
14957  %vec1 = call <4 x i16> asm "; def $0", "=s"()
14958  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 2, i32 7>
14959  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14960  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
14961  ret void
14962}
14963
14964define void @s_shuffle_v3i16_v4i16__7_3_7() {
14965; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_3_7:
14966; GFX900:       ; %bb.0:
14967; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14968; GFX900-NEXT:    ;;#ASMSTART
14969; GFX900-NEXT:    ; def s[4:5]
14970; GFX900-NEXT:    ;;#ASMEND
14971; GFX900-NEXT:    ;;#ASMSTART
14972; GFX900-NEXT:    ; def s[6:7]
14973; GFX900-NEXT:    ;;#ASMEND
14974; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
14975; GFX900-NEXT:    s_lshr_b32 s9, s7, 16
14976; GFX900-NEXT:    ;;#ASMSTART
14977; GFX900-NEXT:    ; use s[8:9]
14978; GFX900-NEXT:    ;;#ASMEND
14979; GFX900-NEXT:    s_setpc_b64 s[30:31]
14980;
14981; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_3_7:
14982; GFX90A:       ; %bb.0:
14983; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14984; GFX90A-NEXT:    ;;#ASMSTART
14985; GFX90A-NEXT:    ; def s[4:5]
14986; GFX90A-NEXT:    ;;#ASMEND
14987; GFX90A-NEXT:    ;;#ASMSTART
14988; GFX90A-NEXT:    ; def s[6:7]
14989; GFX90A-NEXT:    ;;#ASMEND
14990; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s7, s5
14991; GFX90A-NEXT:    s_lshr_b32 s9, s7, 16
14992; GFX90A-NEXT:    ;;#ASMSTART
14993; GFX90A-NEXT:    ; use s[8:9]
14994; GFX90A-NEXT:    ;;#ASMEND
14995; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14996;
14997; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_3_7:
14998; GFX940:       ; %bb.0:
14999; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15000; GFX940-NEXT:    ;;#ASMSTART
15001; GFX940-NEXT:    ; def s[0:1]
15002; GFX940-NEXT:    ;;#ASMEND
15003; GFX940-NEXT:    ;;#ASMSTART
15004; GFX940-NEXT:    ; def s[2:3]
15005; GFX940-NEXT:    ;;#ASMEND
15006; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s3, s1
15007; GFX940-NEXT:    s_lshr_b32 s9, s3, 16
15008; GFX940-NEXT:    ;;#ASMSTART
15009; GFX940-NEXT:    ; use s[8:9]
15010; GFX940-NEXT:    ;;#ASMEND
15011; GFX940-NEXT:    s_setpc_b64 s[30:31]
15012  %vec0 = call <4 x i16> asm "; def $0", "=s"()
15013  %vec1 = call <4 x i16> asm "; def $0", "=s"()
15014  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 3, i32 7>
15015  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
15016  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
15017  ret void
15018}
15019
15020define void @s_shuffle_v3i16_v4i16__7_4_7() {
15021; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_4_7:
15022; GFX900:       ; %bb.0:
15023; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15024; GFX900-NEXT:    ;;#ASMSTART
15025; GFX900-NEXT:    ; def s[4:5]
15026; GFX900-NEXT:    ;;#ASMEND
15027; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
15028; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s9, s4
15029; GFX900-NEXT:    ;;#ASMSTART
15030; GFX900-NEXT:    ; use s[8:9]
15031; GFX900-NEXT:    ;;#ASMEND
15032; GFX900-NEXT:    s_setpc_b64 s[30:31]
15033;
15034; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_4_7:
15035; GFX90A:       ; %bb.0:
15036; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15037; GFX90A-NEXT:    ;;#ASMSTART
15038; GFX90A-NEXT:    ; def s[4:5]
15039; GFX90A-NEXT:    ;;#ASMEND
15040; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
15041; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s9, s4
15042; GFX90A-NEXT:    ;;#ASMSTART
15043; GFX90A-NEXT:    ; use s[8:9]
15044; GFX90A-NEXT:    ;;#ASMEND
15045; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15046;
15047; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_4_7:
15048; GFX940:       ; %bb.0:
15049; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15050; GFX940-NEXT:    ;;#ASMSTART
15051; GFX940-NEXT:    ; def s[0:1]
15052; GFX940-NEXT:    ;;#ASMEND
15053; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
15054; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s9, s0
15055; GFX940-NEXT:    ;;#ASMSTART
15056; GFX940-NEXT:    ; use s[8:9]
15057; GFX940-NEXT:    ;;#ASMEND
15058; GFX940-NEXT:    s_setpc_b64 s[30:31]
15059  %vec0 = call <4 x i16> asm "; def $0", "=s"()
15060  %vec1 = call <4 x i16> asm "; def $0", "=s"()
15061  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 4, i32 7>
15062  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
15063  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
15064  ret void
15065}
15066
15067define void @s_shuffle_v3i16_v4i16__7_5_7() {
15068; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_5_7:
15069; GFX900:       ; %bb.0:
15070; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15071; GFX900-NEXT:    ;;#ASMSTART
15072; GFX900-NEXT:    ; def s[4:5]
15073; GFX900-NEXT:    ;;#ASMEND
15074; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s5, s4
15075; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
15076; GFX900-NEXT:    ;;#ASMSTART
15077; GFX900-NEXT:    ; use s[8:9]
15078; GFX900-NEXT:    ;;#ASMEND
15079; GFX900-NEXT:    s_setpc_b64 s[30:31]
15080;
15081; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_5_7:
15082; GFX90A:       ; %bb.0:
15083; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15084; GFX90A-NEXT:    ;;#ASMSTART
15085; GFX90A-NEXT:    ; def s[4:5]
15086; GFX90A-NEXT:    ;;#ASMEND
15087; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s5, s4
15088; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
15089; GFX90A-NEXT:    ;;#ASMSTART
15090; GFX90A-NEXT:    ; use s[8:9]
15091; GFX90A-NEXT:    ;;#ASMEND
15092; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15093;
15094; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_5_7:
15095; GFX940:       ; %bb.0:
15096; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15097; GFX940-NEXT:    ;;#ASMSTART
15098; GFX940-NEXT:    ; def s[0:1]
15099; GFX940-NEXT:    ;;#ASMEND
15100; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s1, s0
15101; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
15102; GFX940-NEXT:    ;;#ASMSTART
15103; GFX940-NEXT:    ; use s[8:9]
15104; GFX940-NEXT:    ;;#ASMEND
15105; GFX940-NEXT:    s_setpc_b64 s[30:31]
15106  %vec0 = call <4 x i16> asm "; def $0", "=s"()
15107  %vec1 = call <4 x i16> asm "; def $0", "=s"()
15108  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 5, i32 7>
15109  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
15110  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
15111  ret void
15112}
15113
15114define void @s_shuffle_v3i16_v4i16__7_6_7() {
15115; GFX900-LABEL: s_shuffle_v3i16_v4i16__7_6_7:
15116; GFX900:       ; %bb.0:
15117; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15118; GFX900-NEXT:    ;;#ASMSTART
15119; GFX900-NEXT:    ; def s[4:5]
15120; GFX900-NEXT:    ;;#ASMEND
15121; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
15122; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s9, s5
15123; GFX900-NEXT:    ;;#ASMSTART
15124; GFX900-NEXT:    ; use s[8:9]
15125; GFX900-NEXT:    ;;#ASMEND
15126; GFX900-NEXT:    s_setpc_b64 s[30:31]
15127;
15128; GFX90A-LABEL: s_shuffle_v3i16_v4i16__7_6_7:
15129; GFX90A:       ; %bb.0:
15130; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15131; GFX90A-NEXT:    ;;#ASMSTART
15132; GFX90A-NEXT:    ; def s[4:5]
15133; GFX90A-NEXT:    ;;#ASMEND
15134; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
15135; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s9, s5
15136; GFX90A-NEXT:    ;;#ASMSTART
15137; GFX90A-NEXT:    ; use s[8:9]
15138; GFX90A-NEXT:    ;;#ASMEND
15139; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15140;
15141; GFX940-LABEL: s_shuffle_v3i16_v4i16__7_6_7:
15142; GFX940:       ; %bb.0:
15143; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15144; GFX940-NEXT:    ;;#ASMSTART
15145; GFX940-NEXT:    ; def s[0:1]
15146; GFX940-NEXT:    ;;#ASMEND
15147; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
15148; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s9, s1
15149; GFX940-NEXT:    ;;#ASMSTART
15150; GFX940-NEXT:    ; use s[8:9]
15151; GFX940-NEXT:    ;;#ASMEND
15152; GFX940-NEXT:    s_setpc_b64 s[30:31]
15153  %vec0 = call <4 x i16> asm "; def $0", "=s"()
15154  %vec1 = call <4 x i16> asm "; def $0", "=s"()
15155  %shuf = shufflevector <4 x i16> %vec0, <4 x i16> %vec1, <3 x i32> <i32 7, i32 6, i32 7>
15156  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
15157  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
15158  ret void
15159}
15160;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
15161; GFX90APLUS: {{.*}}
15162