xref: /llvm-project/llvm/test/CodeGen/AMDGPU/shufflevector.v3i16.v3i16.ll (revision 585858aeb6247b3892218edb9d353c63f1c33186)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s
5
6
7define void @v_shuffle_v3i16_v3i16__u_u_u(ptr addrspace(1) inreg %ptr) {
8; GFX9-LABEL: v_shuffle_v3i16_v3i16__u_u_u:
9; GFX9:       ; %bb.0:
10; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GFX9-NEXT:    s_setpc_b64 s[30:31]
12  %vec0 = call <4 x i16> asm "; def $0", "=v"()
13  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
14  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> poison
15  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
16  ret void
17}
18
19define void @v_shuffle_v3i16_v3i16__0_u_u(ptr addrspace(1) inreg %ptr) {
20; GFX900-LABEL: v_shuffle_v3i16_v3i16__0_u_u:
21; GFX900:       ; %bb.0:
22; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23; GFX900-NEXT:    v_mov_b32_e32 v2, 0
24; GFX900-NEXT:    ;;#ASMSTART
25; GFX900-NEXT:    ; def v[0:1]
26; GFX900-NEXT:    ;;#ASMEND
27; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
28; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
29; GFX900-NEXT:    s_waitcnt vmcnt(0)
30; GFX900-NEXT:    s_setpc_b64 s[30:31]
31;
32; GFX90A-LABEL: v_shuffle_v3i16_v3i16__0_u_u:
33; GFX90A:       ; %bb.0:
34; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
36; GFX90A-NEXT:    ;;#ASMSTART
37; GFX90A-NEXT:    ; def v[0:1]
38; GFX90A-NEXT:    ;;#ASMEND
39; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
40; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
41; GFX90A-NEXT:    s_waitcnt vmcnt(0)
42; GFX90A-NEXT:    s_setpc_b64 s[30:31]
43;
44; GFX940-LABEL: v_shuffle_v3i16_v3i16__0_u_u:
45; GFX940:       ; %bb.0:
46; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
47; GFX940-NEXT:    v_mov_b32_e32 v2, 0
48; GFX940-NEXT:    ;;#ASMSTART
49; GFX940-NEXT:    ; def v[0:1]
50; GFX940-NEXT:    ;;#ASMEND
51; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
52; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
53; GFX940-NEXT:    s_waitcnt vmcnt(0)
54; GFX940-NEXT:    s_setpc_b64 s[30:31]
55  %vec0 = call <4 x i16> asm "; def $0", "=v"()
56  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
57  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 0, i32 poison, i32 poison>
58  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
59  ret void
60}
61
62define void @v_shuffle_v3i16_v3i16__1_u_u(ptr addrspace(1) inreg %ptr) {
63; GFX900-LABEL: v_shuffle_v3i16_v3i16__1_u_u:
64; GFX900:       ; %bb.0:
65; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66; GFX900-NEXT:    ;;#ASMSTART
67; GFX900-NEXT:    ; def v[0:1]
68; GFX900-NEXT:    ;;#ASMEND
69; GFX900-NEXT:    v_mov_b32_e32 v2, 0
70; GFX900-NEXT:    v_alignbit_b32 v0, s4, v0, 16
71; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
72; GFX900-NEXT:    s_waitcnt vmcnt(0)
73; GFX900-NEXT:    s_setpc_b64 s[30:31]
74;
75; GFX90A-LABEL: v_shuffle_v3i16_v3i16__1_u_u:
76; GFX90A:       ; %bb.0:
77; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
78; GFX90A-NEXT:    ;;#ASMSTART
79; GFX90A-NEXT:    ; def v[0:1]
80; GFX90A-NEXT:    ;;#ASMEND
81; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
82; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v0, 16
83; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
84; GFX90A-NEXT:    s_waitcnt vmcnt(0)
85; GFX90A-NEXT:    s_setpc_b64 s[30:31]
86;
87; GFX940-LABEL: v_shuffle_v3i16_v3i16__1_u_u:
88; GFX940:       ; %bb.0:
89; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90; GFX940-NEXT:    ;;#ASMSTART
91; GFX940-NEXT:    ; def v[0:1]
92; GFX940-NEXT:    ;;#ASMEND
93; GFX940-NEXT:    v_mov_b32_e32 v2, 0
94; GFX940-NEXT:    v_alignbit_b32 v0, s0, v0, 16
95; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
96; GFX940-NEXT:    s_waitcnt vmcnt(0)
97; GFX940-NEXT:    s_setpc_b64 s[30:31]
98  %vec0 = call <4 x i16> asm "; def $0", "=v"()
99  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
100  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 1, i32 poison, i32 poison>
101  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
102  ret void
103}
104
105define void @v_shuffle_v3i16_v3i16__2_u_u(ptr addrspace(1) inreg %ptr) {
106; GFX900-LABEL: v_shuffle_v3i16_v3i16__2_u_u:
107; GFX900:       ; %bb.0:
108; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109; GFX900-NEXT:    v_mov_b32_e32 v2, 0
110; GFX900-NEXT:    ;;#ASMSTART
111; GFX900-NEXT:    ; def v[0:1]
112; GFX900-NEXT:    ;;#ASMEND
113; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
114; GFX900-NEXT:    s_waitcnt vmcnt(0)
115; GFX900-NEXT:    s_setpc_b64 s[30:31]
116;
117; GFX90A-LABEL: v_shuffle_v3i16_v3i16__2_u_u:
118; GFX90A:       ; %bb.0:
119; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
121; GFX90A-NEXT:    ;;#ASMSTART
122; GFX90A-NEXT:    ; def v[0:1]
123; GFX90A-NEXT:    ;;#ASMEND
124; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
125; GFX90A-NEXT:    s_waitcnt vmcnt(0)
126; GFX90A-NEXT:    s_setpc_b64 s[30:31]
127;
128; GFX940-LABEL: v_shuffle_v3i16_v3i16__2_u_u:
129; GFX940:       ; %bb.0:
130; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
131; GFX940-NEXT:    v_mov_b32_e32 v2, 0
132; GFX940-NEXT:    ;;#ASMSTART
133; GFX940-NEXT:    ; def v[0:1]
134; GFX940-NEXT:    ;;#ASMEND
135; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
136; GFX940-NEXT:    s_waitcnt vmcnt(0)
137; GFX940-NEXT:    s_setpc_b64 s[30:31]
138  %vec0 = call <4 x i16> asm "; def $0", "=v"()
139  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
140  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 2, i32 poison, i32 poison>
141  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
142  ret void
143}
144
145define void @v_shuffle_v3i16_v3i16__3_u_u(ptr addrspace(1) inreg %ptr) {
146; GFX9-LABEL: v_shuffle_v3i16_v3i16__3_u_u:
147; GFX9:       ; %bb.0:
148; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
149; GFX9-NEXT:    s_setpc_b64 s[30:31]
150  %vec0 = call <4 x i16> asm "; def $0", "=v"()
151  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
152  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 3, i32 poison, i32 poison>
153  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
154  ret void
155}
156
157define void @v_shuffle_v3i16_v3i16__4_u_u(ptr addrspace(1) inreg %ptr) {
158; GFX900-LABEL: v_shuffle_v3i16_v3i16__4_u_u:
159; GFX900:       ; %bb.0:
160; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
161; GFX900-NEXT:    ;;#ASMSTART
162; GFX900-NEXT:    ; def v[0:1]
163; GFX900-NEXT:    ;;#ASMEND
164; GFX900-NEXT:    v_mov_b32_e32 v2, 0
165; GFX900-NEXT:    v_alignbit_b32 v0, s4, v0, 16
166; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
167; GFX900-NEXT:    s_waitcnt vmcnt(0)
168; GFX900-NEXT:    s_setpc_b64 s[30:31]
169;
170; GFX90A-LABEL: v_shuffle_v3i16_v3i16__4_u_u:
171; GFX90A:       ; %bb.0:
172; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
173; GFX90A-NEXT:    ;;#ASMSTART
174; GFX90A-NEXT:    ; def v[0:1]
175; GFX90A-NEXT:    ;;#ASMEND
176; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
177; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v0, 16
178; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
179; GFX90A-NEXT:    s_waitcnt vmcnt(0)
180; GFX90A-NEXT:    s_setpc_b64 s[30:31]
181;
182; GFX940-LABEL: v_shuffle_v3i16_v3i16__4_u_u:
183; GFX940:       ; %bb.0:
184; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
185; GFX940-NEXT:    ;;#ASMSTART
186; GFX940-NEXT:    ; def v[0:1]
187; GFX940-NEXT:    ;;#ASMEND
188; GFX940-NEXT:    v_mov_b32_e32 v2, 0
189; GFX940-NEXT:    v_alignbit_b32 v0, s0, v0, 16
190; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
191; GFX940-NEXT:    s_waitcnt vmcnt(0)
192; GFX940-NEXT:    s_setpc_b64 s[30:31]
193  %vec0 = call <4 x i16> asm "; def $0", "=v"()
194  %vec1 = call <4 x i16> asm "; def $0", "=v"()
195  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
196  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
197  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 poison, i32 poison>
198  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
199  ret void
200}
201
202define void @v_shuffle_v3i16_v3i16__5_u_u(ptr addrspace(1) inreg %ptr) {
203; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_u_u:
204; GFX900:       ; %bb.0:
205; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206; GFX900-NEXT:    v_mov_b32_e32 v2, 0
207; GFX900-NEXT:    ;;#ASMSTART
208; GFX900-NEXT:    ; def v[0:1]
209; GFX900-NEXT:    ;;#ASMEND
210; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
211; GFX900-NEXT:    s_waitcnt vmcnt(0)
212; GFX900-NEXT:    s_setpc_b64 s[30:31]
213;
214; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_u_u:
215; GFX90A:       ; %bb.0:
216; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
218; GFX90A-NEXT:    ;;#ASMSTART
219; GFX90A-NEXT:    ; def v[0:1]
220; GFX90A-NEXT:    ;;#ASMEND
221; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
222; GFX90A-NEXT:    s_waitcnt vmcnt(0)
223; GFX90A-NEXT:    s_setpc_b64 s[30:31]
224;
225; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_u_u:
226; GFX940:       ; %bb.0:
227; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
228; GFX940-NEXT:    v_mov_b32_e32 v2, 0
229; GFX940-NEXT:    ;;#ASMSTART
230; GFX940-NEXT:    ; def v[0:1]
231; GFX940-NEXT:    ;;#ASMEND
232; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
233; GFX940-NEXT:    s_waitcnt vmcnt(0)
234; GFX940-NEXT:    s_setpc_b64 s[30:31]
235  %vec0 = call <4 x i16> asm "; def $0", "=v"()
236  %vec1 = call <4 x i16> asm "; def $0", "=v"()
237  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
238  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
239  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 poison>
240  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
241  ret void
242}
243
244define void @v_shuffle_v3i16_v3i16__5_0_u(ptr addrspace(1) inreg %ptr) {
245; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_0_u:
246; GFX900:       ; %bb.0:
247; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
248; GFX900-NEXT:    ;;#ASMSTART
249; GFX900-NEXT:    ; def v[0:1]
250; GFX900-NEXT:    ;;#ASMEND
251; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
252; GFX900-NEXT:    v_mov_b32_e32 v3, 0
253; GFX900-NEXT:    ;;#ASMSTART
254; GFX900-NEXT:    ; def v[1:2]
255; GFX900-NEXT:    ;;#ASMEND
256; GFX900-NEXT:    v_perm_b32 v0, v0, v2, s4
257; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
258; GFX900-NEXT:    s_waitcnt vmcnt(0)
259; GFX900-NEXT:    s_setpc_b64 s[30:31]
260;
261; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_0_u:
262; GFX90A:       ; %bb.0:
263; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
264; GFX90A-NEXT:    ;;#ASMSTART
265; GFX90A-NEXT:    ; def v[0:1]
266; GFX90A-NEXT:    ;;#ASMEND
267; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
268; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
269; GFX90A-NEXT:    ;;#ASMSTART
270; GFX90A-NEXT:    ; def v[2:3]
271; GFX90A-NEXT:    ;;#ASMEND
272; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
273; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
274; GFX90A-NEXT:    s_waitcnt vmcnt(0)
275; GFX90A-NEXT:    s_setpc_b64 s[30:31]
276;
277; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_0_u:
278; GFX940:       ; %bb.0:
279; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
280; GFX940-NEXT:    ;;#ASMSTART
281; GFX940-NEXT:    ; def v[0:1]
282; GFX940-NEXT:    ;;#ASMEND
283; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
284; GFX940-NEXT:    v_mov_b32_e32 v4, 0
285; GFX940-NEXT:    ;;#ASMSTART
286; GFX940-NEXT:    ; def v[2:3]
287; GFX940-NEXT:    ;;#ASMEND
288; GFX940-NEXT:    s_nop 0
289; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
290; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
291; GFX940-NEXT:    s_waitcnt vmcnt(0)
292; GFX940-NEXT:    s_setpc_b64 s[30:31]
293  %vec0 = call <4 x i16> asm "; def $0", "=v"()
294  %vec1 = call <4 x i16> asm "; def $0", "=v"()
295  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
296  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
297  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 poison>
298  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
299  ret void
300}
301
302define void @v_shuffle_v3i16_v3i16__5_1_u(ptr addrspace(1) inreg %ptr) {
303; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_1_u:
304; GFX900:       ; %bb.0:
305; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
306; GFX900-NEXT:    ;;#ASMSTART
307; GFX900-NEXT:    ; def v[0:1]
308; GFX900-NEXT:    ;;#ASMEND
309; GFX900-NEXT:    s_mov_b32 s4, 0xffff
310; GFX900-NEXT:    v_mov_b32_e32 v3, 0
311; GFX900-NEXT:    ;;#ASMSTART
312; GFX900-NEXT:    ; def v[1:2]
313; GFX900-NEXT:    ;;#ASMEND
314; GFX900-NEXT:    v_bfi_b32 v0, s4, v2, v0
315; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
316; GFX900-NEXT:    s_waitcnt vmcnt(0)
317; GFX900-NEXT:    s_setpc_b64 s[30:31]
318;
319; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_1_u:
320; GFX90A:       ; %bb.0:
321; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322; GFX90A-NEXT:    ;;#ASMSTART
323; GFX90A-NEXT:    ; def v[0:1]
324; GFX90A-NEXT:    ;;#ASMEND
325; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
326; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
327; GFX90A-NEXT:    ;;#ASMSTART
328; GFX90A-NEXT:    ; def v[2:3]
329; GFX90A-NEXT:    ;;#ASMEND
330; GFX90A-NEXT:    v_bfi_b32 v0, s4, v3, v0
331; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
332; GFX90A-NEXT:    s_waitcnt vmcnt(0)
333; GFX90A-NEXT:    s_setpc_b64 s[30:31]
334;
335; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_1_u:
336; GFX940:       ; %bb.0:
337; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
338; GFX940-NEXT:    ;;#ASMSTART
339; GFX940-NEXT:    ; def v[0:1]
340; GFX940-NEXT:    ;;#ASMEND
341; GFX940-NEXT:    s_mov_b32 s2, 0xffff
342; GFX940-NEXT:    v_mov_b32_e32 v4, 0
343; GFX940-NEXT:    ;;#ASMSTART
344; GFX940-NEXT:    ; def v[2:3]
345; GFX940-NEXT:    ;;#ASMEND
346; GFX940-NEXT:    s_nop 0
347; GFX940-NEXT:    v_bfi_b32 v0, s2, v3, v0
348; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
349; GFX940-NEXT:    s_waitcnt vmcnt(0)
350; GFX940-NEXT:    s_setpc_b64 s[30:31]
351  %vec0 = call <4 x i16> asm "; def $0", "=v"()
352  %vec1 = call <4 x i16> asm "; def $0", "=v"()
353  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
354  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
355  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 poison>
356  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
357  ret void
358}
359
360define void @v_shuffle_v3i16_v3i16__5_2_u(ptr addrspace(1) inreg %ptr) {
361; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_2_u:
362; GFX900:       ; %bb.0:
363; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
364; GFX900-NEXT:    ;;#ASMSTART
365; GFX900-NEXT:    ; def v[0:1]
366; GFX900-NEXT:    ;;#ASMEND
367; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
368; GFX900-NEXT:    v_mov_b32_e32 v4, 0
369; GFX900-NEXT:    ;;#ASMSTART
370; GFX900-NEXT:    ; def v[2:3]
371; GFX900-NEXT:    ;;#ASMEND
372; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
373; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
374; GFX900-NEXT:    s_waitcnt vmcnt(0)
375; GFX900-NEXT:    s_setpc_b64 s[30:31]
376;
377; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_2_u:
378; GFX90A:       ; %bb.0:
379; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
380; GFX90A-NEXT:    ;;#ASMSTART
381; GFX90A-NEXT:    ; def v[0:1]
382; GFX90A-NEXT:    ;;#ASMEND
383; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
384; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
385; GFX90A-NEXT:    ;;#ASMSTART
386; GFX90A-NEXT:    ; def v[2:3]
387; GFX90A-NEXT:    ;;#ASMEND
388; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
389; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
390; GFX90A-NEXT:    s_waitcnt vmcnt(0)
391; GFX90A-NEXT:    s_setpc_b64 s[30:31]
392;
393; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_2_u:
394; GFX940:       ; %bb.0:
395; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
396; GFX940-NEXT:    ;;#ASMSTART
397; GFX940-NEXT:    ; def v[0:1]
398; GFX940-NEXT:    ;;#ASMEND
399; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
400; GFX940-NEXT:    v_mov_b32_e32 v4, 0
401; GFX940-NEXT:    ;;#ASMSTART
402; GFX940-NEXT:    ; def v[2:3]
403; GFX940-NEXT:    ;;#ASMEND
404; GFX940-NEXT:    s_nop 0
405; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
406; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
407; GFX940-NEXT:    s_waitcnt vmcnt(0)
408; GFX940-NEXT:    s_setpc_b64 s[30:31]
409  %vec0 = call <4 x i16> asm "; def $0", "=v"()
410  %vec1 = call <4 x i16> asm "; def $0", "=v"()
411  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
412  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
413  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 poison>
414  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
415  ret void
416}
417
418define void @v_shuffle_v3i16_v3i16__5_3_u(ptr addrspace(1) inreg %ptr) {
419; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_3_u:
420; GFX900:       ; %bb.0:
421; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
422; GFX900-NEXT:    ;;#ASMSTART
423; GFX900-NEXT:    ; def v[0:1]
424; GFX900-NEXT:    ;;#ASMEND
425; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
426; GFX900-NEXT:    v_mov_b32_e32 v2, 0
427; GFX900-NEXT:    v_perm_b32 v0, v0, v1, s4
428; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
429; GFX900-NEXT:    s_waitcnt vmcnt(0)
430; GFX900-NEXT:    s_setpc_b64 s[30:31]
431;
432; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_3_u:
433; GFX90A:       ; %bb.0:
434; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
435; GFX90A-NEXT:    ;;#ASMSTART
436; GFX90A-NEXT:    ; def v[0:1]
437; GFX90A-NEXT:    ;;#ASMEND
438; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
439; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
440; GFX90A-NEXT:    v_perm_b32 v0, v0, v1, s4
441; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
442; GFX90A-NEXT:    s_waitcnt vmcnt(0)
443; GFX90A-NEXT:    s_setpc_b64 s[30:31]
444;
445; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_3_u:
446; GFX940:       ; %bb.0:
447; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
448; GFX940-NEXT:    ;;#ASMSTART
449; GFX940-NEXT:    ; def v[0:1]
450; GFX940-NEXT:    ;;#ASMEND
451; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
452; GFX940-NEXT:    v_mov_b32_e32 v2, 0
453; GFX940-NEXT:    v_perm_b32 v0, v0, v1, s2
454; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
455; GFX940-NEXT:    s_waitcnt vmcnt(0)
456; GFX940-NEXT:    s_setpc_b64 s[30:31]
457  %vec0 = call <4 x i16> asm "; def $0", "=v"()
458  %vec1 = call <4 x i16> asm "; def $0", "=v"()
459  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
460  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
461  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 poison>
462  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
463  ret void
464}
465
466define void @v_shuffle_v3i16_v3i16__5_4_u(ptr addrspace(1) inreg %ptr) {
467; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_4_u:
468; GFX900:       ; %bb.0:
469; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
470; GFX900-NEXT:    ;;#ASMSTART
471; GFX900-NEXT:    ; def v[0:1]
472; GFX900-NEXT:    ;;#ASMEND
473; GFX900-NEXT:    s_mov_b32 s4, 0xffff
474; GFX900-NEXT:    v_mov_b32_e32 v2, 0
475; GFX900-NEXT:    v_bfi_b32 v0, s4, v1, v0
476; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
477; GFX900-NEXT:    s_waitcnt vmcnt(0)
478; GFX900-NEXT:    s_setpc_b64 s[30:31]
479;
480; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_4_u:
481; GFX90A:       ; %bb.0:
482; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
483; GFX90A-NEXT:    ;;#ASMSTART
484; GFX90A-NEXT:    ; def v[0:1]
485; GFX90A-NEXT:    ;;#ASMEND
486; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
487; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
488; GFX90A-NEXT:    v_bfi_b32 v0, s4, v1, v0
489; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
490; GFX90A-NEXT:    s_waitcnt vmcnt(0)
491; GFX90A-NEXT:    s_setpc_b64 s[30:31]
492;
493; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_4_u:
494; GFX940:       ; %bb.0:
495; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
496; GFX940-NEXT:    ;;#ASMSTART
497; GFX940-NEXT:    ; def v[0:1]
498; GFX940-NEXT:    ;;#ASMEND
499; GFX940-NEXT:    s_mov_b32 s2, 0xffff
500; GFX940-NEXT:    v_mov_b32_e32 v2, 0
501; GFX940-NEXT:    v_bfi_b32 v0, s2, v1, v0
502; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
503; GFX940-NEXT:    s_waitcnt vmcnt(0)
504; GFX940-NEXT:    s_setpc_b64 s[30:31]
505  %vec0 = call <4 x i16> asm "; def $0", "=v"()
506  %vec1 = call <4 x i16> asm "; def $0", "=v"()
507  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
508  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
509  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 poison>
510  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
511  ret void
512}
513
514define void @v_shuffle_v3i16_v3i16__5_5_u(ptr addrspace(1) inreg %ptr) {
515; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_5_u:
516; GFX900:       ; %bb.0:
517; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
518; GFX900-NEXT:    ;;#ASMSTART
519; GFX900-NEXT:    ; def v[0:1]
520; GFX900-NEXT:    ;;#ASMEND
521; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
522; GFX900-NEXT:    v_mov_b32_e32 v2, 0
523; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
524; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
525; GFX900-NEXT:    s_waitcnt vmcnt(0)
526; GFX900-NEXT:    s_setpc_b64 s[30:31]
527;
528; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_5_u:
529; GFX90A:       ; %bb.0:
530; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
531; GFX90A-NEXT:    ;;#ASMSTART
532; GFX90A-NEXT:    ; def v[0:1]
533; GFX90A-NEXT:    ;;#ASMEND
534; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
535; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
536; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
537; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
538; GFX90A-NEXT:    s_waitcnt vmcnt(0)
539; GFX90A-NEXT:    s_setpc_b64 s[30:31]
540;
541; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_5_u:
542; GFX940:       ; %bb.0:
543; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
544; GFX940-NEXT:    ;;#ASMSTART
545; GFX940-NEXT:    ; def v[0:1]
546; GFX940-NEXT:    ;;#ASMEND
547; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
548; GFX940-NEXT:    v_mov_b32_e32 v2, 0
549; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
550; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
551; GFX940-NEXT:    s_waitcnt vmcnt(0)
552; GFX940-NEXT:    s_setpc_b64 s[30:31]
553  %vec0 = call <4 x i16> asm "; def $0", "=v"()
554  %vec1 = call <4 x i16> asm "; def $0", "=v"()
555  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
556  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
557  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 poison>
558  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
559  ret void
560}
561
562define void @v_shuffle_v3i16_v3i16__5_5_0(ptr addrspace(1) inreg %ptr) {
563; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_5_0:
564; GFX900:       ; %bb.0:
565; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
566; GFX900-NEXT:    ;;#ASMSTART
567; GFX900-NEXT:    ; def v[0:1]
568; GFX900-NEXT:    ;;#ASMEND
569; GFX900-NEXT:    v_mov_b32_e32 v3, 0
570; GFX900-NEXT:    ;;#ASMSTART
571; GFX900-NEXT:    ; def v[1:2]
572; GFX900-NEXT:    ;;#ASMEND
573; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
574; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
575; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
576; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
577; GFX900-NEXT:    s_waitcnt vmcnt(0)
578; GFX900-NEXT:    s_setpc_b64 s[30:31]
579;
580; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_5_0:
581; GFX90A:       ; %bb.0:
582; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
583; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
584; GFX90A-NEXT:    ;;#ASMSTART
585; GFX90A-NEXT:    ; def v[0:1]
586; GFX90A-NEXT:    ;;#ASMEND
587; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
588; GFX90A-NEXT:    ;;#ASMSTART
589; GFX90A-NEXT:    ; def v[2:3]
590; GFX90A-NEXT:    ;;#ASMEND
591; GFX90A-NEXT:    v_perm_b32 v1, v3, v3, s4
592; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
593; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
594; GFX90A-NEXT:    s_waitcnt vmcnt(0)
595; GFX90A-NEXT:    s_setpc_b64 s[30:31]
596;
597; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_5_0:
598; GFX940:       ; %bb.0:
599; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
600; GFX940-NEXT:    v_mov_b32_e32 v4, 0
601; GFX940-NEXT:    ;;#ASMSTART
602; GFX940-NEXT:    ; def v[0:1]
603; GFX940-NEXT:    ;;#ASMEND
604; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
605; GFX940-NEXT:    ;;#ASMSTART
606; GFX940-NEXT:    ; def v[2:3]
607; GFX940-NEXT:    ;;#ASMEND
608; GFX940-NEXT:    s_nop 0
609; GFX940-NEXT:    v_perm_b32 v1, v3, v3, s2
610; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
611; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
612; GFX940-NEXT:    s_waitcnt vmcnt(0)
613; GFX940-NEXT:    s_setpc_b64 s[30:31]
614  %vec0 = call <4 x i16> asm "; def $0", "=v"()
615  %vec1 = call <4 x i16> asm "; def $0", "=v"()
616  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
617  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
618  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 0>
619  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
620  ret void
621}
622
623define void @v_shuffle_v3i16_v3i16__5_5_1(ptr addrspace(1) inreg %ptr) {
624; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_5_1:
625; GFX900:       ; %bb.0:
626; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
627; GFX900-NEXT:    ;;#ASMSTART
628; GFX900-NEXT:    ; def v[0:1]
629; GFX900-NEXT:    ;;#ASMEND
630; GFX900-NEXT:    v_mov_b32_e32 v3, 0
631; GFX900-NEXT:    ;;#ASMSTART
632; GFX900-NEXT:    ; def v[1:2]
633; GFX900-NEXT:    ;;#ASMEND
634; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
635; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
636; GFX900-NEXT:    global_store_short_d16_hi v3, v0, s[16:17] offset:4
637; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
638; GFX900-NEXT:    s_waitcnt vmcnt(0)
639; GFX900-NEXT:    s_setpc_b64 s[30:31]
640;
641; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_5_1:
642; GFX90A:       ; %bb.0:
643; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
644; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
645; GFX90A-NEXT:    ;;#ASMSTART
646; GFX90A-NEXT:    ; def v[0:1]
647; GFX90A-NEXT:    ;;#ASMEND
648; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
649; GFX90A-NEXT:    ;;#ASMSTART
650; GFX90A-NEXT:    ; def v[2:3]
651; GFX90A-NEXT:    ;;#ASMEND
652; GFX90A-NEXT:    v_perm_b32 v1, v3, v3, s4
653; GFX90A-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
654; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
655; GFX90A-NEXT:    s_waitcnt vmcnt(0)
656; GFX90A-NEXT:    s_setpc_b64 s[30:31]
657;
658; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_5_1:
659; GFX940:       ; %bb.0:
660; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
661; GFX940-NEXT:    v_mov_b32_e32 v4, 0
662; GFX940-NEXT:    ;;#ASMSTART
663; GFX940-NEXT:    ; def v[0:1]
664; GFX940-NEXT:    ;;#ASMEND
665; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
666; GFX940-NEXT:    ;;#ASMSTART
667; GFX940-NEXT:    ; def v[2:3]
668; GFX940-NEXT:    ;;#ASMEND
669; GFX940-NEXT:    s_nop 0
670; GFX940-NEXT:    v_perm_b32 v1, v3, v3, s2
671; GFX940-NEXT:    global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
672; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
673; GFX940-NEXT:    s_waitcnt vmcnt(0)
674; GFX940-NEXT:    s_setpc_b64 s[30:31]
675  %vec0 = call <4 x i16> asm "; def $0", "=v"()
676  %vec1 = call <4 x i16> asm "; def $0", "=v"()
677  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
678  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
679  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 1>
680  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
681  ret void
682}
683
684define void @v_shuffle_v3i16_v3i16__5_5_2(ptr addrspace(1) inreg %ptr) {
685; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_5_2:
686; GFX900:       ; %bb.0:
687; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
688; GFX900-NEXT:    v_mov_b32_e32 v4, 0
689; GFX900-NEXT:    ;;#ASMSTART
690; GFX900-NEXT:    ; def v[0:1]
691; GFX900-NEXT:    ;;#ASMEND
692; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
693; GFX900-NEXT:    ;;#ASMSTART
694; GFX900-NEXT:    ; def v[2:3]
695; GFX900-NEXT:    ;;#ASMEND
696; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
697; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
698; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
699; GFX900-NEXT:    s_waitcnt vmcnt(0)
700; GFX900-NEXT:    s_setpc_b64 s[30:31]
701;
702; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_5_2:
703; GFX90A:       ; %bb.0:
704; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
705; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
706; GFX90A-NEXT:    ;;#ASMSTART
707; GFX90A-NEXT:    ; def v[0:1]
708; GFX90A-NEXT:    ;;#ASMEND
709; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
710; GFX90A-NEXT:    ;;#ASMSTART
711; GFX90A-NEXT:    ; def v[2:3]
712; GFX90A-NEXT:    ;;#ASMEND
713; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
714; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
715; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
716; GFX90A-NEXT:    s_waitcnt vmcnt(0)
717; GFX90A-NEXT:    s_setpc_b64 s[30:31]
718;
719; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_5_2:
720; GFX940:       ; %bb.0:
721; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
722; GFX940-NEXT:    v_mov_b32_e32 v4, 0
723; GFX940-NEXT:    ;;#ASMSTART
724; GFX940-NEXT:    ; def v[0:1]
725; GFX940-NEXT:    ;;#ASMEND
726; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
727; GFX940-NEXT:    ;;#ASMSTART
728; GFX940-NEXT:    ; def v[2:3]
729; GFX940-NEXT:    ;;#ASMEND
730; GFX940-NEXT:    s_nop 0
731; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
732; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
733; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
734; GFX940-NEXT:    s_waitcnt vmcnt(0)
735; GFX940-NEXT:    s_setpc_b64 s[30:31]
736  %vec0 = call <4 x i16> asm "; def $0", "=v"()
737  %vec1 = call <4 x i16> asm "; def $0", "=v"()
738  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
739  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
740  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 2>
741  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
742  ret void
743}
744
745define void @v_shuffle_v3i16_v3i16__5_5_3(ptr addrspace(1) inreg %ptr) {
746; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_5_3:
747; GFX900:       ; %bb.0:
748; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
749; GFX900-NEXT:    v_mov_b32_e32 v2, 0
750; GFX900-NEXT:    ;;#ASMSTART
751; GFX900-NEXT:    ; def v[0:1]
752; GFX900-NEXT:    ;;#ASMEND
753; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
754; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
755; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
756; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
757; GFX900-NEXT:    s_waitcnt vmcnt(0)
758; GFX900-NEXT:    s_setpc_b64 s[30:31]
759;
760; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_5_3:
761; GFX90A:       ; %bb.0:
762; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
763; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
764; GFX90A-NEXT:    ;;#ASMSTART
765; GFX90A-NEXT:    ; def v[0:1]
766; GFX90A-NEXT:    ;;#ASMEND
767; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
768; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
769; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
770; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
771; GFX90A-NEXT:    s_waitcnt vmcnt(0)
772; GFX90A-NEXT:    s_setpc_b64 s[30:31]
773;
774; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_5_3:
775; GFX940:       ; %bb.0:
776; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
777; GFX940-NEXT:    v_mov_b32_e32 v2, 0
778; GFX940-NEXT:    ;;#ASMSTART
779; GFX940-NEXT:    ; def v[0:1]
780; GFX940-NEXT:    ;;#ASMEND
781; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
782; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
783; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
784; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
785; GFX940-NEXT:    s_waitcnt vmcnt(0)
786; GFX940-NEXT:    s_setpc_b64 s[30:31]
787  %vec0 = call <4 x i16> asm "; def $0", "=v"()
788  %vec1 = call <4 x i16> asm "; def $0", "=v"()
789  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
790  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
791  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 3>
792  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
793  ret void
794}
795
796define void @v_shuffle_v3i16_v3i16__5_5_4(ptr addrspace(1) inreg %ptr) {
797; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_5_4:
798; GFX900:       ; %bb.0:
799; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
800; GFX900-NEXT:    v_mov_b32_e32 v2, 0
801; GFX900-NEXT:    ;;#ASMSTART
802; GFX900-NEXT:    ; def v[0:1]
803; GFX900-NEXT:    ;;#ASMEND
804; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
805; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
806; GFX900-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
807; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
808; GFX900-NEXT:    s_waitcnt vmcnt(0)
809; GFX900-NEXT:    s_setpc_b64 s[30:31]
810;
811; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_5_4:
812; GFX90A:       ; %bb.0:
813; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
814; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
815; GFX90A-NEXT:    ;;#ASMSTART
816; GFX90A-NEXT:    ; def v[0:1]
817; GFX90A-NEXT:    ;;#ASMEND
818; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
819; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
820; GFX90A-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
821; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
822; GFX90A-NEXT:    s_waitcnt vmcnt(0)
823; GFX90A-NEXT:    s_setpc_b64 s[30:31]
824;
825; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_5_4:
826; GFX940:       ; %bb.0:
827; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
828; GFX940-NEXT:    v_mov_b32_e32 v2, 0
829; GFX940-NEXT:    ;;#ASMSTART
830; GFX940-NEXT:    ; def v[0:1]
831; GFX940-NEXT:    ;;#ASMEND
832; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
833; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
834; GFX940-NEXT:    global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1
835; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
836; GFX940-NEXT:    s_waitcnt vmcnt(0)
837; GFX940-NEXT:    s_setpc_b64 s[30:31]
838  %vec0 = call <4 x i16> asm "; def $0", "=v"()
839  %vec1 = call <4 x i16> asm "; def $0", "=v"()
840  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
841  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
842  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 4>
843  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
844  ret void
845}
846
847define void @v_shuffle_v3i16_v3i16__5_5_5(ptr addrspace(1) inreg %ptr) {
848; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_5_5:
849; GFX900:       ; %bb.0:
850; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
851; GFX900-NEXT:    v_mov_b32_e32 v2, 0
852; GFX900-NEXT:    ;;#ASMSTART
853; GFX900-NEXT:    ; def v[0:1]
854; GFX900-NEXT:    ;;#ASMEND
855; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
856; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
857; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
858; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
859; GFX900-NEXT:    s_waitcnt vmcnt(0)
860; GFX900-NEXT:    s_setpc_b64 s[30:31]
861;
862; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_5_5:
863; GFX90A:       ; %bb.0:
864; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
865; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
866; GFX90A-NEXT:    ;;#ASMSTART
867; GFX90A-NEXT:    ; def v[0:1]
868; GFX90A-NEXT:    ;;#ASMEND
869; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
870; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
871; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
872; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
873; GFX90A-NEXT:    s_waitcnt vmcnt(0)
874; GFX90A-NEXT:    s_setpc_b64 s[30:31]
875;
876; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_5_5:
877; GFX940:       ; %bb.0:
878; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
879; GFX940-NEXT:    v_mov_b32_e32 v2, 0
880; GFX940-NEXT:    ;;#ASMSTART
881; GFX940-NEXT:    ; def v[0:1]
882; GFX940-NEXT:    ;;#ASMEND
883; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
884; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
885; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
886; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
887; GFX940-NEXT:    s_waitcnt vmcnt(0)
888; GFX940-NEXT:    s_setpc_b64 s[30:31]
889  %vec0 = call <4 x i16> asm "; def $0", "=v"()
890  %vec1 = call <4 x i16> asm "; def $0", "=v"()
891  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
892  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
893  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 5>
894  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
895  ret void
896}
897
898define void @v_shuffle_v3i16_v3i16__u_0_0(ptr addrspace(1) inreg %ptr) {
899; GFX900-LABEL: v_shuffle_v3i16_v3i16__u_0_0:
900; GFX900:       ; %bb.0:
901; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
902; GFX900-NEXT:    v_mov_b32_e32 v2, 0
903; GFX900-NEXT:    ;;#ASMSTART
904; GFX900-NEXT:    ; def v[0:1]
905; GFX900-NEXT:    ;;#ASMEND
906; GFX900-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
907; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
908; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
909; GFX900-NEXT:    s_waitcnt vmcnt(0)
910; GFX900-NEXT:    s_setpc_b64 s[30:31]
911;
912; GFX90A-LABEL: v_shuffle_v3i16_v3i16__u_0_0:
913; GFX90A:       ; %bb.0:
914; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
915; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
916; GFX90A-NEXT:    ;;#ASMSTART
917; GFX90A-NEXT:    ; def v[0:1]
918; GFX90A-NEXT:    ;;#ASMEND
919; GFX90A-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
920; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
921; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
922; GFX90A-NEXT:    s_waitcnt vmcnt(0)
923; GFX90A-NEXT:    s_setpc_b64 s[30:31]
924;
925; GFX940-LABEL: v_shuffle_v3i16_v3i16__u_0_0:
926; GFX940:       ; %bb.0:
927; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
928; GFX940-NEXT:    v_mov_b32_e32 v2, 0
929; GFX940-NEXT:    ;;#ASMSTART
930; GFX940-NEXT:    ; def v[0:1]
931; GFX940-NEXT:    ;;#ASMEND
932; GFX940-NEXT:    s_nop 0
933; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
934; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
935; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
936; GFX940-NEXT:    s_waitcnt vmcnt(0)
937; GFX940-NEXT:    s_setpc_b64 s[30:31]
938  %vec0 = call <4 x i16> asm "; def $0", "=v"()
939  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
940  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 poison, i32 0, i32 0>
941  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
942  ret void
943}
944
945define void @v_shuffle_v3i16_v3i16__0_0_0(ptr addrspace(1) inreg %ptr) {
946; GFX900-LABEL: v_shuffle_v3i16_v3i16__0_0_0:
947; GFX900:       ; %bb.0:
948; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
949; GFX900-NEXT:    v_mov_b32_e32 v2, 0
950; GFX900-NEXT:    ;;#ASMSTART
951; GFX900-NEXT:    ; def v[0:1]
952; GFX900-NEXT:    ;;#ASMEND
953; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
954; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
955; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
956; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
957; GFX900-NEXT:    s_waitcnt vmcnt(0)
958; GFX900-NEXT:    s_setpc_b64 s[30:31]
959;
960; GFX90A-LABEL: v_shuffle_v3i16_v3i16__0_0_0:
961; GFX90A:       ; %bb.0:
962; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
963; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
964; GFX90A-NEXT:    ;;#ASMSTART
965; GFX90A-NEXT:    ; def v[0:1]
966; GFX90A-NEXT:    ;;#ASMEND
967; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
968; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
969; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
970; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
971; GFX90A-NEXT:    s_waitcnt vmcnt(0)
972; GFX90A-NEXT:    s_setpc_b64 s[30:31]
973;
974; GFX940-LABEL: v_shuffle_v3i16_v3i16__0_0_0:
975; GFX940:       ; %bb.0:
976; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
977; GFX940-NEXT:    v_mov_b32_e32 v2, 0
978; GFX940-NEXT:    ;;#ASMSTART
979; GFX940-NEXT:    ; def v[0:1]
980; GFX940-NEXT:    ;;#ASMEND
981; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
982; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
983; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
984; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
985; GFX940-NEXT:    s_waitcnt vmcnt(0)
986; GFX940-NEXT:    s_setpc_b64 s[30:31]
987  %vec0 = call <4 x i16> asm "; def $0", "=v"()
988  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
989  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> zeroinitializer
990  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
991  ret void
992}
993
994define void @v_shuffle_v3i16_v3i16__1_0_0(ptr addrspace(1) inreg %ptr) {
995; GFX900-LABEL: v_shuffle_v3i16_v3i16__1_0_0:
996; GFX900:       ; %bb.0:
997; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
998; GFX900-NEXT:    v_mov_b32_e32 v2, 0
999; GFX900-NEXT:    ;;#ASMSTART
1000; GFX900-NEXT:    ; def v[0:1]
1001; GFX900-NEXT:    ;;#ASMEND
1002; GFX900-NEXT:    v_alignbit_b32 v1, v0, v0, 16
1003; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1004; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
1005; GFX900-NEXT:    s_waitcnt vmcnt(0)
1006; GFX900-NEXT:    s_setpc_b64 s[30:31]
1007;
1008; GFX90A-LABEL: v_shuffle_v3i16_v3i16__1_0_0:
1009; GFX90A:       ; %bb.0:
1010; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1011; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1012; GFX90A-NEXT:    ;;#ASMSTART
1013; GFX90A-NEXT:    ; def v[0:1]
1014; GFX90A-NEXT:    ;;#ASMEND
1015; GFX90A-NEXT:    v_alignbit_b32 v1, v0, v0, 16
1016; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1017; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
1018; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1019; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1020;
1021; GFX940-LABEL: v_shuffle_v3i16_v3i16__1_0_0:
1022; GFX940:       ; %bb.0:
1023; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1024; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1025; GFX940-NEXT:    ;;#ASMSTART
1026; GFX940-NEXT:    ; def v[0:1]
1027; GFX940-NEXT:    ;;#ASMEND
1028; GFX940-NEXT:    s_nop 0
1029; GFX940-NEXT:    v_alignbit_b32 v1, v0, v0, 16
1030; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1031; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
1032; GFX940-NEXT:    s_waitcnt vmcnt(0)
1033; GFX940-NEXT:    s_setpc_b64 s[30:31]
1034  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1035  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1036  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 1, i32 0, i32 0>
1037  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1038  ret void
1039}
1040
1041define void @v_shuffle_v3i16_v3i16__2_0_0(ptr addrspace(1) inreg %ptr) {
1042; GFX900-LABEL: v_shuffle_v3i16_v3i16__2_0_0:
1043; GFX900:       ; %bb.0:
1044; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1045; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1046; GFX900-NEXT:    ;;#ASMSTART
1047; GFX900-NEXT:    ; def v[0:1]
1048; GFX900-NEXT:    ;;#ASMEND
1049; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1050; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
1051; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1052; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
1053; GFX900-NEXT:    s_waitcnt vmcnt(0)
1054; GFX900-NEXT:    s_setpc_b64 s[30:31]
1055;
1056; GFX90A-LABEL: v_shuffle_v3i16_v3i16__2_0_0:
1057; GFX90A:       ; %bb.0:
1058; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1059; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1060; GFX90A-NEXT:    ;;#ASMSTART
1061; GFX90A-NEXT:    ; def v[0:1]
1062; GFX90A-NEXT:    ;;#ASMEND
1063; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1064; GFX90A-NEXT:    v_perm_b32 v1, v0, v1, s4
1065; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1066; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
1067; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1068; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1069;
1070; GFX940-LABEL: v_shuffle_v3i16_v3i16__2_0_0:
1071; GFX940:       ; %bb.0:
1072; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1073; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1074; GFX940-NEXT:    ;;#ASMSTART
1075; GFX940-NEXT:    ; def v[0:1]
1076; GFX940-NEXT:    ;;#ASMEND
1077; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1078; GFX940-NEXT:    v_perm_b32 v1, v0, v1, s2
1079; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1080; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
1081; GFX940-NEXT:    s_waitcnt vmcnt(0)
1082; GFX940-NEXT:    s_setpc_b64 s[30:31]
1083  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1084  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1085  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 2, i32 0, i32 0>
1086  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1087  ret void
1088}
1089
1090define void @v_shuffle_v3i16_v3i16__3_0_0(ptr addrspace(1) inreg %ptr) {
1091; GFX900-LABEL: v_shuffle_v3i16_v3i16__3_0_0:
1092; GFX900:       ; %bb.0:
1093; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1094; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1095; GFX900-NEXT:    ;;#ASMSTART
1096; GFX900-NEXT:    ; def v[0:1]
1097; GFX900-NEXT:    ;;#ASMEND
1098; GFX900-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
1099; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1100; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
1101; GFX900-NEXT:    s_waitcnt vmcnt(0)
1102; GFX900-NEXT:    s_setpc_b64 s[30:31]
1103;
1104; GFX90A-LABEL: v_shuffle_v3i16_v3i16__3_0_0:
1105; GFX90A:       ; %bb.0:
1106; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1107; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1108; GFX90A-NEXT:    ;;#ASMSTART
1109; GFX90A-NEXT:    ; def v[0:1]
1110; GFX90A-NEXT:    ;;#ASMEND
1111; GFX90A-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
1112; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1113; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
1114; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1115; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1116;
1117; GFX940-LABEL: v_shuffle_v3i16_v3i16__3_0_0:
1118; GFX940:       ; %bb.0:
1119; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1120; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1121; GFX940-NEXT:    ;;#ASMSTART
1122; GFX940-NEXT:    ; def v[0:1]
1123; GFX940-NEXT:    ;;#ASMEND
1124; GFX940-NEXT:    s_nop 0
1125; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
1126; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1127; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
1128; GFX940-NEXT:    s_waitcnt vmcnt(0)
1129; GFX940-NEXT:    s_setpc_b64 s[30:31]
1130  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1131  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1132  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 3, i32 0, i32 0>
1133  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1134  ret void
1135}
1136
1137define void @v_shuffle_v3i16_v3i16__4_0_0(ptr addrspace(1) inreg %ptr) {
1138; GFX900-LABEL: v_shuffle_v3i16_v3i16__4_0_0:
1139; GFX900:       ; %bb.0:
1140; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1141; GFX900-NEXT:    ;;#ASMSTART
1142; GFX900-NEXT:    ; def v[0:1]
1143; GFX900-NEXT:    ;;#ASMEND
1144; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1145; GFX900-NEXT:    ;;#ASMSTART
1146; GFX900-NEXT:    ; def v[1:2]
1147; GFX900-NEXT:    ;;#ASMEND
1148; GFX900-NEXT:    v_alignbit_b32 v1, v0, v1, 16
1149; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
1150; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
1151; GFX900-NEXT:    s_waitcnt vmcnt(0)
1152; GFX900-NEXT:    s_setpc_b64 s[30:31]
1153;
1154; GFX90A-LABEL: v_shuffle_v3i16_v3i16__4_0_0:
1155; GFX90A:       ; %bb.0:
1156; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1157; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1158; GFX90A-NEXT:    ;;#ASMSTART
1159; GFX90A-NEXT:    ; def v[0:1]
1160; GFX90A-NEXT:    ;;#ASMEND
1161; GFX90A-NEXT:    ;;#ASMSTART
1162; GFX90A-NEXT:    ; def v[2:3]
1163; GFX90A-NEXT:    ;;#ASMEND
1164; GFX90A-NEXT:    v_alignbit_b32 v1, v0, v2, 16
1165; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1166; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1167; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1168; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1169;
1170; GFX940-LABEL: v_shuffle_v3i16_v3i16__4_0_0:
1171; GFX940:       ; %bb.0:
1172; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1173; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1174; GFX940-NEXT:    ;;#ASMSTART
1175; GFX940-NEXT:    ; def v[0:1]
1176; GFX940-NEXT:    ;;#ASMEND
1177; GFX940-NEXT:    ;;#ASMSTART
1178; GFX940-NEXT:    ; def v[2:3]
1179; GFX940-NEXT:    ;;#ASMEND
1180; GFX940-NEXT:    s_nop 0
1181; GFX940-NEXT:    v_alignbit_b32 v1, v0, v2, 16
1182; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1183; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1184; GFX940-NEXT:    s_waitcnt vmcnt(0)
1185; GFX940-NEXT:    s_setpc_b64 s[30:31]
1186  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1187  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1188  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1189  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1190  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 0, i32 0>
1191  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1192  ret void
1193}
1194
1195define void @v_shuffle_v3i16_v3i16__5_0_0(ptr addrspace(1) inreg %ptr) {
1196; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_0_0:
1197; GFX900:       ; %bb.0:
1198; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1199; GFX900-NEXT:    ;;#ASMSTART
1200; GFX900-NEXT:    ; def v[0:1]
1201; GFX900-NEXT:    ;;#ASMEND
1202; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1203; GFX900-NEXT:    ;;#ASMSTART
1204; GFX900-NEXT:    ; def v[1:2]
1205; GFX900-NEXT:    ;;#ASMEND
1206; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1207; GFX900-NEXT:    v_perm_b32 v1, v0, v2, s4
1208; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
1209; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
1210; GFX900-NEXT:    s_waitcnt vmcnt(0)
1211; GFX900-NEXT:    s_setpc_b64 s[30:31]
1212;
1213; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_0_0:
1214; GFX90A:       ; %bb.0:
1215; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1216; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1217; GFX90A-NEXT:    ;;#ASMSTART
1218; GFX90A-NEXT:    ; def v[0:1]
1219; GFX90A-NEXT:    ;;#ASMEND
1220; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1221; GFX90A-NEXT:    ;;#ASMSTART
1222; GFX90A-NEXT:    ; def v[2:3]
1223; GFX90A-NEXT:    ;;#ASMEND
1224; GFX90A-NEXT:    v_perm_b32 v1, v0, v3, s4
1225; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1226; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1227; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1228; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1229;
1230; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_0_0:
1231; GFX940:       ; %bb.0:
1232; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1233; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1234; GFX940-NEXT:    ;;#ASMSTART
1235; GFX940-NEXT:    ; def v[0:1]
1236; GFX940-NEXT:    ;;#ASMEND
1237; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1238; GFX940-NEXT:    ;;#ASMSTART
1239; GFX940-NEXT:    ; def v[2:3]
1240; GFX940-NEXT:    ;;#ASMEND
1241; GFX940-NEXT:    s_nop 0
1242; GFX940-NEXT:    v_perm_b32 v1, v0, v3, s2
1243; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1244; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1245; GFX940-NEXT:    s_waitcnt vmcnt(0)
1246; GFX940-NEXT:    s_setpc_b64 s[30:31]
1247  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1248  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1249  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1250  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1251  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 0>
1252  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1253  ret void
1254}
1255
1256define void @v_shuffle_v3i16_v3i16__5_u_0(ptr addrspace(1) inreg %ptr) {
1257; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_u_0:
1258; GFX900:       ; %bb.0:
1259; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1260; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1261; GFX900-NEXT:    ;;#ASMSTART
1262; GFX900-NEXT:    ; def v[0:1]
1263; GFX900-NEXT:    ;;#ASMEND
1264; GFX900-NEXT:    ;;#ASMSTART
1265; GFX900-NEXT:    ; def v[1:2]
1266; GFX900-NEXT:    ;;#ASMEND
1267; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
1268; GFX900-NEXT:    global_store_dword v3, v2, s[16:17]
1269; GFX900-NEXT:    s_waitcnt vmcnt(0)
1270; GFX900-NEXT:    s_setpc_b64 s[30:31]
1271;
1272; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_u_0:
1273; GFX90A:       ; %bb.0:
1274; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1275; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1276; GFX90A-NEXT:    ;;#ASMSTART
1277; GFX90A-NEXT:    ; def v[0:1]
1278; GFX90A-NEXT:    ;;#ASMEND
1279; GFX90A-NEXT:    ;;#ASMSTART
1280; GFX90A-NEXT:    ; def v[2:3]
1281; GFX90A-NEXT:    ;;#ASMEND
1282; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1283; GFX90A-NEXT:    global_store_dword v4, v3, s[16:17]
1284; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1285; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1286;
1287; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_u_0:
1288; GFX940:       ; %bb.0:
1289; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1290; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1291; GFX940-NEXT:    ;;#ASMSTART
1292; GFX940-NEXT:    ; def v[0:1]
1293; GFX940-NEXT:    ;;#ASMEND
1294; GFX940-NEXT:    ;;#ASMSTART
1295; GFX940-NEXT:    ; def v[2:3]
1296; GFX940-NEXT:    ;;#ASMEND
1297; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1298; GFX940-NEXT:    global_store_dword v4, v3, s[0:1] sc0 sc1
1299; GFX940-NEXT:    s_waitcnt vmcnt(0)
1300; GFX940-NEXT:    s_setpc_b64 s[30:31]
1301  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1302  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1303  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1304  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1305  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 0>
1306  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1307  ret void
1308}
1309
1310define void @v_shuffle_v3i16_v3i16__5_1_0(ptr addrspace(1) inreg %ptr) {
1311; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_1_0:
1312; GFX900:       ; %bb.0:
1313; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1314; GFX900-NEXT:    ;;#ASMSTART
1315; GFX900-NEXT:    ; def v[0:1]
1316; GFX900-NEXT:    ;;#ASMEND
1317; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1318; GFX900-NEXT:    ;;#ASMSTART
1319; GFX900-NEXT:    ; def v[1:2]
1320; GFX900-NEXT:    ;;#ASMEND
1321; GFX900-NEXT:    s_mov_b32 s4, 0xffff
1322; GFX900-NEXT:    v_bfi_b32 v1, s4, v2, v0
1323; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
1324; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
1325; GFX900-NEXT:    s_waitcnt vmcnt(0)
1326; GFX900-NEXT:    s_setpc_b64 s[30:31]
1327;
1328; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_1_0:
1329; GFX90A:       ; %bb.0:
1330; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1331; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1332; GFX90A-NEXT:    ;;#ASMSTART
1333; GFX90A-NEXT:    ; def v[0:1]
1334; GFX90A-NEXT:    ;;#ASMEND
1335; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
1336; GFX90A-NEXT:    ;;#ASMSTART
1337; GFX90A-NEXT:    ; def v[2:3]
1338; GFX90A-NEXT:    ;;#ASMEND
1339; GFX90A-NEXT:    v_bfi_b32 v1, s4, v3, v0
1340; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1341; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1342; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1343; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1344;
1345; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_1_0:
1346; GFX940:       ; %bb.0:
1347; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1348; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1349; GFX940-NEXT:    ;;#ASMSTART
1350; GFX940-NEXT:    ; def v[0:1]
1351; GFX940-NEXT:    ;;#ASMEND
1352; GFX940-NEXT:    s_mov_b32 s2, 0xffff
1353; GFX940-NEXT:    ;;#ASMSTART
1354; GFX940-NEXT:    ; def v[2:3]
1355; GFX940-NEXT:    ;;#ASMEND
1356; GFX940-NEXT:    s_nop 0
1357; GFX940-NEXT:    v_bfi_b32 v1, s2, v3, v0
1358; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1359; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1360; GFX940-NEXT:    s_waitcnt vmcnt(0)
1361; GFX940-NEXT:    s_setpc_b64 s[30:31]
1362  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1363  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1364  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1365  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1366  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 0>
1367  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1368  ret void
1369}
1370
1371define void @v_shuffle_v3i16_v3i16__5_2_0(ptr addrspace(1) inreg %ptr) {
1372; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_2_0:
1373; GFX900:       ; %bb.0:
1374; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1375; GFX900-NEXT:    v_mov_b32_e32 v4, 0
1376; GFX900-NEXT:    ;;#ASMSTART
1377; GFX900-NEXT:    ; def v[0:1]
1378; GFX900-NEXT:    ;;#ASMEND
1379; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1380; GFX900-NEXT:    ;;#ASMSTART
1381; GFX900-NEXT:    ; def v[2:3]
1382; GFX900-NEXT:    ;;#ASMEND
1383; GFX900-NEXT:    v_perm_b32 v1, v1, v3, s4
1384; GFX900-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1385; GFX900-NEXT:    global_store_dword v4, v1, s[16:17]
1386; GFX900-NEXT:    s_waitcnt vmcnt(0)
1387; GFX900-NEXT:    s_setpc_b64 s[30:31]
1388;
1389; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_2_0:
1390; GFX90A:       ; %bb.0:
1391; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1392; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1393; GFX90A-NEXT:    ;;#ASMSTART
1394; GFX90A-NEXT:    ; def v[0:1]
1395; GFX90A-NEXT:    ;;#ASMEND
1396; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1397; GFX90A-NEXT:    ;;#ASMSTART
1398; GFX90A-NEXT:    ; def v[2:3]
1399; GFX90A-NEXT:    ;;#ASMEND
1400; GFX90A-NEXT:    v_perm_b32 v1, v1, v3, s4
1401; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1402; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1403; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1404; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1405;
1406; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_2_0:
1407; GFX940:       ; %bb.0:
1408; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1409; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1410; GFX940-NEXT:    ;;#ASMSTART
1411; GFX940-NEXT:    ; def v[0:1]
1412; GFX940-NEXT:    ;;#ASMEND
1413; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1414; GFX940-NEXT:    ;;#ASMSTART
1415; GFX940-NEXT:    ; def v[2:3]
1416; GFX940-NEXT:    ;;#ASMEND
1417; GFX940-NEXT:    s_nop 0
1418; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s2
1419; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1420; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1421; GFX940-NEXT:    s_waitcnt vmcnt(0)
1422; GFX940-NEXT:    s_setpc_b64 s[30:31]
1423  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1424  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1425  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1426  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1427  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 0>
1428  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1429  ret void
1430}
1431
1432define void @v_shuffle_v3i16_v3i16__5_3_0(ptr addrspace(1) inreg %ptr) {
1433; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_3_0:
1434; GFX900:       ; %bb.0:
1435; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1436; GFX900-NEXT:    ;;#ASMSTART
1437; GFX900-NEXT:    ; def v[0:1]
1438; GFX900-NEXT:    ;;#ASMEND
1439; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1440; GFX900-NEXT:    ;;#ASMSTART
1441; GFX900-NEXT:    ; def v[1:2]
1442; GFX900-NEXT:    ;;#ASMEND
1443; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1444; GFX900-NEXT:    v_perm_b32 v1, v1, v2, s4
1445; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
1446; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
1447; GFX900-NEXT:    s_waitcnt vmcnt(0)
1448; GFX900-NEXT:    s_setpc_b64 s[30:31]
1449;
1450; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_3_0:
1451; GFX90A:       ; %bb.0:
1452; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1453; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1454; GFX90A-NEXT:    ;;#ASMSTART
1455; GFX90A-NEXT:    ; def v[0:1]
1456; GFX90A-NEXT:    ;;#ASMEND
1457; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1458; GFX90A-NEXT:    ;;#ASMSTART
1459; GFX90A-NEXT:    ; def v[2:3]
1460; GFX90A-NEXT:    ;;#ASMEND
1461; GFX90A-NEXT:    v_perm_b32 v1, v2, v3, s4
1462; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1463; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1464; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1465; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1466;
1467; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_3_0:
1468; GFX940:       ; %bb.0:
1469; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1470; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1471; GFX940-NEXT:    ;;#ASMSTART
1472; GFX940-NEXT:    ; def v[0:1]
1473; GFX940-NEXT:    ;;#ASMEND
1474; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1475; GFX940-NEXT:    ;;#ASMSTART
1476; GFX940-NEXT:    ; def v[2:3]
1477; GFX940-NEXT:    ;;#ASMEND
1478; GFX940-NEXT:    s_nop 0
1479; GFX940-NEXT:    v_perm_b32 v1, v2, v3, s2
1480; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1481; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1482; GFX940-NEXT:    s_waitcnt vmcnt(0)
1483; GFX940-NEXT:    s_setpc_b64 s[30:31]
1484  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1485  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1486  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1487  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1488  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 0>
1489  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1490  ret void
1491}
1492
1493define void @v_shuffle_v3i16_v3i16__5_4_0(ptr addrspace(1) inreg %ptr) {
1494; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_4_0:
1495; GFX900:       ; %bb.0:
1496; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1497; GFX900-NEXT:    ;;#ASMSTART
1498; GFX900-NEXT:    ; def v[0:1]
1499; GFX900-NEXT:    ;;#ASMEND
1500; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1501; GFX900-NEXT:    ;;#ASMSTART
1502; GFX900-NEXT:    ; def v[1:2]
1503; GFX900-NEXT:    ;;#ASMEND
1504; GFX900-NEXT:    s_mov_b32 s4, 0xffff
1505; GFX900-NEXT:    v_bfi_b32 v1, s4, v2, v1
1506; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
1507; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
1508; GFX900-NEXT:    s_waitcnt vmcnt(0)
1509; GFX900-NEXT:    s_setpc_b64 s[30:31]
1510;
1511; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_4_0:
1512; GFX90A:       ; %bb.0:
1513; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1514; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1515; GFX90A-NEXT:    ;;#ASMSTART
1516; GFX90A-NEXT:    ; def v[0:1]
1517; GFX90A-NEXT:    ;;#ASMEND
1518; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
1519; GFX90A-NEXT:    ;;#ASMSTART
1520; GFX90A-NEXT:    ; def v[2:3]
1521; GFX90A-NEXT:    ;;#ASMEND
1522; GFX90A-NEXT:    v_bfi_b32 v1, s4, v3, v2
1523; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1524; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1525; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1526; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1527;
1528; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_4_0:
1529; GFX940:       ; %bb.0:
1530; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1531; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1532; GFX940-NEXT:    ;;#ASMSTART
1533; GFX940-NEXT:    ; def v[0:1]
1534; GFX940-NEXT:    ;;#ASMEND
1535; GFX940-NEXT:    s_mov_b32 s2, 0xffff
1536; GFX940-NEXT:    ;;#ASMSTART
1537; GFX940-NEXT:    ; def v[2:3]
1538; GFX940-NEXT:    ;;#ASMEND
1539; GFX940-NEXT:    s_nop 0
1540; GFX940-NEXT:    v_bfi_b32 v1, s2, v3, v2
1541; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1542; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1543; GFX940-NEXT:    s_waitcnt vmcnt(0)
1544; GFX940-NEXT:    s_setpc_b64 s[30:31]
1545  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1546  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1547  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1548  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1549  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 0>
1550  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1551  ret void
1552}
1553
1554define void @v_shuffle_v3i16_v3i16__u_1_1(ptr addrspace(1) inreg %ptr) {
1555; GFX900-LABEL: v_shuffle_v3i16_v3i16__u_1_1:
1556; GFX900:       ; %bb.0:
1557; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1558; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1559; GFX900-NEXT:    ;;#ASMSTART
1560; GFX900-NEXT:    ; def v[0:1]
1561; GFX900-NEXT:    ;;#ASMEND
1562; GFX900-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
1563; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
1564; GFX900-NEXT:    s_waitcnt vmcnt(0)
1565; GFX900-NEXT:    s_setpc_b64 s[30:31]
1566;
1567; GFX90A-LABEL: v_shuffle_v3i16_v3i16__u_1_1:
1568; GFX90A:       ; %bb.0:
1569; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1570; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1571; GFX90A-NEXT:    ;;#ASMSTART
1572; GFX90A-NEXT:    ; def v[0:1]
1573; GFX90A-NEXT:    ;;#ASMEND
1574; GFX90A-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
1575; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
1576; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1577; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1578;
1579; GFX940-LABEL: v_shuffle_v3i16_v3i16__u_1_1:
1580; GFX940:       ; %bb.0:
1581; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1582; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1583; GFX940-NEXT:    ;;#ASMSTART
1584; GFX940-NEXT:    ; def v[0:1]
1585; GFX940-NEXT:    ;;#ASMEND
1586; GFX940-NEXT:    global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1
1587; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
1588; GFX940-NEXT:    s_waitcnt vmcnt(0)
1589; GFX940-NEXT:    s_setpc_b64 s[30:31]
1590  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1591  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1592  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 poison, i32 1, i32 1>
1593  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1594  ret void
1595}
1596
1597define void @v_shuffle_v3i16_v3i16__0_1_1(ptr addrspace(1) inreg %ptr) {
1598; GFX900-LABEL: v_shuffle_v3i16_v3i16__0_1_1:
1599; GFX900:       ; %bb.0:
1600; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1601; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1602; GFX900-NEXT:    ;;#ASMSTART
1603; GFX900-NEXT:    ; def v[0:1]
1604; GFX900-NEXT:    ;;#ASMEND
1605; GFX900-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
1606; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
1607; GFX900-NEXT:    s_waitcnt vmcnt(0)
1608; GFX900-NEXT:    s_setpc_b64 s[30:31]
1609;
1610; GFX90A-LABEL: v_shuffle_v3i16_v3i16__0_1_1:
1611; GFX90A:       ; %bb.0:
1612; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1613; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1614; GFX90A-NEXT:    ;;#ASMSTART
1615; GFX90A-NEXT:    ; def v[0:1]
1616; GFX90A-NEXT:    ;;#ASMEND
1617; GFX90A-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
1618; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
1619; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1620; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1621;
1622; GFX940-LABEL: v_shuffle_v3i16_v3i16__0_1_1:
1623; GFX940:       ; %bb.0:
1624; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1625; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1626; GFX940-NEXT:    ;;#ASMSTART
1627; GFX940-NEXT:    ; def v[0:1]
1628; GFX940-NEXT:    ;;#ASMEND
1629; GFX940-NEXT:    global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1
1630; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
1631; GFX940-NEXT:    s_waitcnt vmcnt(0)
1632; GFX940-NEXT:    s_setpc_b64 s[30:31]
1633  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1634  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1635  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 0, i32 1, i32 1>
1636  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1637  ret void
1638}
1639
1640define void @v_shuffle_v3i16_v3i16__1_1_1(ptr addrspace(1) inreg %ptr) {
1641; GFX900-LABEL: v_shuffle_v3i16_v3i16__1_1_1:
1642; GFX900:       ; %bb.0:
1643; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1644; GFX900-NEXT:    ;;#ASMSTART
1645; GFX900-NEXT:    ; def v[0:1]
1646; GFX900-NEXT:    ;;#ASMEND
1647; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1648; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1649; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
1650; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
1651; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
1652; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1653; GFX900-NEXT:    s_waitcnt vmcnt(0)
1654; GFX900-NEXT:    s_setpc_b64 s[30:31]
1655;
1656; GFX90A-LABEL: v_shuffle_v3i16_v3i16__1_1_1:
1657; GFX90A:       ; %bb.0:
1658; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1659; GFX90A-NEXT:    ;;#ASMSTART
1660; GFX90A-NEXT:    ; def v[0:1]
1661; GFX90A-NEXT:    ;;#ASMEND
1662; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1663; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1664; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
1665; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
1666; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
1667; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1668; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1669; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1670;
1671; GFX940-LABEL: v_shuffle_v3i16_v3i16__1_1_1:
1672; GFX940:       ; %bb.0:
1673; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1674; GFX940-NEXT:    ;;#ASMSTART
1675; GFX940-NEXT:    ; def v[0:1]
1676; GFX940-NEXT:    ;;#ASMEND
1677; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1678; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1679; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
1680; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
1681; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
1682; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1683; GFX940-NEXT:    s_waitcnt vmcnt(0)
1684; GFX940-NEXT:    s_setpc_b64 s[30:31]
1685  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1686  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1687  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 1, i32 1, i32 1>
1688  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1689  ret void
1690}
1691
1692define void @v_shuffle_v3i16_v3i16__2_1_1(ptr addrspace(1) inreg %ptr) {
1693; GFX900-LABEL: v_shuffle_v3i16_v3i16__2_1_1:
1694; GFX900:       ; %bb.0:
1695; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1696; GFX900-NEXT:    ;;#ASMSTART
1697; GFX900-NEXT:    ; def v[0:1]
1698; GFX900-NEXT:    ;;#ASMEND
1699; GFX900-NEXT:    s_mov_b32 s4, 0xffff
1700; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1701; GFX900-NEXT:    v_bfi_b32 v1, s4, v1, v0
1702; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
1703; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1704; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
1705; GFX900-NEXT:    s_waitcnt vmcnt(0)
1706; GFX900-NEXT:    s_setpc_b64 s[30:31]
1707;
1708; GFX90A-LABEL: v_shuffle_v3i16_v3i16__2_1_1:
1709; GFX90A:       ; %bb.0:
1710; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1711; GFX90A-NEXT:    ;;#ASMSTART
1712; GFX90A-NEXT:    ; def v[0:1]
1713; GFX90A-NEXT:    ;;#ASMEND
1714; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
1715; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1716; GFX90A-NEXT:    v_bfi_b32 v1, s4, v1, v0
1717; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
1718; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1719; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
1720; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1721; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1722;
1723; GFX940-LABEL: v_shuffle_v3i16_v3i16__2_1_1:
1724; GFX940:       ; %bb.0:
1725; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1726; GFX940-NEXT:    ;;#ASMSTART
1727; GFX940-NEXT:    ; def v[0:1]
1728; GFX940-NEXT:    ;;#ASMEND
1729; GFX940-NEXT:    s_mov_b32 s2, 0xffff
1730; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1731; GFX940-NEXT:    v_bfi_b32 v1, s2, v1, v0
1732; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
1733; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1734; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
1735; GFX940-NEXT:    s_waitcnt vmcnt(0)
1736; GFX940-NEXT:    s_setpc_b64 s[30:31]
1737  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1738  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1739  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 2, i32 1, i32 1>
1740  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1741  ret void
1742}
1743
1744define void @v_shuffle_v3i16_v3i16__3_1_1(ptr addrspace(1) inreg %ptr) {
1745; GFX900-LABEL: v_shuffle_v3i16_v3i16__3_1_1:
1746; GFX900:       ; %bb.0:
1747; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1748; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1749; GFX900-NEXT:    ;;#ASMSTART
1750; GFX900-NEXT:    ; def v[0:1]
1751; GFX900-NEXT:    ;;#ASMEND
1752; GFX900-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
1753; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
1754; GFX900-NEXT:    s_waitcnt vmcnt(0)
1755; GFX900-NEXT:    s_setpc_b64 s[30:31]
1756;
1757; GFX90A-LABEL: v_shuffle_v3i16_v3i16__3_1_1:
1758; GFX90A:       ; %bb.0:
1759; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1760; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1761; GFX90A-NEXT:    ;;#ASMSTART
1762; GFX90A-NEXT:    ; def v[0:1]
1763; GFX90A-NEXT:    ;;#ASMEND
1764; GFX90A-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
1765; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
1766; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1767; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1768;
1769; GFX940-LABEL: v_shuffle_v3i16_v3i16__3_1_1:
1770; GFX940:       ; %bb.0:
1771; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1772; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1773; GFX940-NEXT:    ;;#ASMSTART
1774; GFX940-NEXT:    ; def v[0:1]
1775; GFX940-NEXT:    ;;#ASMEND
1776; GFX940-NEXT:    global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1
1777; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
1778; GFX940-NEXT:    s_waitcnt vmcnt(0)
1779; GFX940-NEXT:    s_setpc_b64 s[30:31]
1780  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1781  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1782  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 3, i32 1, i32 1>
1783  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1784  ret void
1785}
1786
1787define void @v_shuffle_v3i16_v3i16__4_1_1(ptr addrspace(1) inreg %ptr) {
1788; GFX900-LABEL: v_shuffle_v3i16_v3i16__4_1_1:
1789; GFX900:       ; %bb.0:
1790; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1791; GFX900-NEXT:    ;;#ASMSTART
1792; GFX900-NEXT:    ; def v[0:1]
1793; GFX900-NEXT:    ;;#ASMEND
1794; GFX900-NEXT:    ;;#ASMSTART
1795; GFX900-NEXT:    ; def v[1:2]
1796; GFX900-NEXT:    ;;#ASMEND
1797; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1798; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1799; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
1800; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
1801; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
1802; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
1803; GFX900-NEXT:    s_waitcnt vmcnt(0)
1804; GFX900-NEXT:    s_setpc_b64 s[30:31]
1805;
1806; GFX90A-LABEL: v_shuffle_v3i16_v3i16__4_1_1:
1807; GFX90A:       ; %bb.0:
1808; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1809; GFX90A-NEXT:    ;;#ASMSTART
1810; GFX90A-NEXT:    ; def v[0:1]
1811; GFX90A-NEXT:    ;;#ASMEND
1812; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1813; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1814; GFX90A-NEXT:    ;;#ASMSTART
1815; GFX90A-NEXT:    ; def v[2:3]
1816; GFX90A-NEXT:    ;;#ASMEND
1817; GFX90A-NEXT:    v_perm_b32 v1, v0, v2, s4
1818; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
1819; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1820; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1821; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1822; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1823;
1824; GFX940-LABEL: v_shuffle_v3i16_v3i16__4_1_1:
1825; GFX940:       ; %bb.0:
1826; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1827; GFX940-NEXT:    ;;#ASMSTART
1828; GFX940-NEXT:    ; def v[0:1]
1829; GFX940-NEXT:    ;;#ASMEND
1830; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1831; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1832; GFX940-NEXT:    ;;#ASMSTART
1833; GFX940-NEXT:    ; def v[2:3]
1834; GFX940-NEXT:    ;;#ASMEND
1835; GFX940-NEXT:    s_nop 0
1836; GFX940-NEXT:    v_perm_b32 v1, v0, v2, s2
1837; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
1838; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1839; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1840; GFX940-NEXT:    s_waitcnt vmcnt(0)
1841; GFX940-NEXT:    s_setpc_b64 s[30:31]
1842  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1843  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1844  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1845  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1846  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 1, i32 1>
1847  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1848  ret void
1849}
1850
1851define void @v_shuffle_v3i16_v3i16__5_1_1(ptr addrspace(1) inreg %ptr) {
1852; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_1_1:
1853; GFX900:       ; %bb.0:
1854; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1855; GFX900-NEXT:    ;;#ASMSTART
1856; GFX900-NEXT:    ; def v[0:1]
1857; GFX900-NEXT:    ;;#ASMEND
1858; GFX900-NEXT:    ;;#ASMSTART
1859; GFX900-NEXT:    ; def v[1:2]
1860; GFX900-NEXT:    ;;#ASMEND
1861; GFX900-NEXT:    s_mov_b32 s4, 0xffff
1862; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1863; GFX900-NEXT:    v_bfi_b32 v1, s4, v2, v0
1864; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
1865; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
1866; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
1867; GFX900-NEXT:    s_waitcnt vmcnt(0)
1868; GFX900-NEXT:    s_setpc_b64 s[30:31]
1869;
1870; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_1_1:
1871; GFX90A:       ; %bb.0:
1872; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1873; GFX90A-NEXT:    ;;#ASMSTART
1874; GFX90A-NEXT:    ; def v[0:1]
1875; GFX90A-NEXT:    ;;#ASMEND
1876; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
1877; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1878; GFX90A-NEXT:    ;;#ASMSTART
1879; GFX90A-NEXT:    ; def v[2:3]
1880; GFX90A-NEXT:    ;;#ASMEND
1881; GFX90A-NEXT:    v_bfi_b32 v1, s4, v3, v0
1882; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
1883; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1884; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1885; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1886; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1887;
1888; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_1_1:
1889; GFX940:       ; %bb.0:
1890; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1891; GFX940-NEXT:    ;;#ASMSTART
1892; GFX940-NEXT:    ; def v[0:1]
1893; GFX940-NEXT:    ;;#ASMEND
1894; GFX940-NEXT:    s_mov_b32 s2, 0xffff
1895; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1896; GFX940-NEXT:    ;;#ASMSTART
1897; GFX940-NEXT:    ; def v[2:3]
1898; GFX940-NEXT:    ;;#ASMEND
1899; GFX940-NEXT:    s_nop 0
1900; GFX940-NEXT:    v_bfi_b32 v1, s2, v3, v0
1901; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
1902; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1903; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1904; GFX940-NEXT:    s_waitcnt vmcnt(0)
1905; GFX940-NEXT:    s_setpc_b64 s[30:31]
1906  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1907  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1908  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1909  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1910  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 1>
1911  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1912  ret void
1913}
1914
1915define void @v_shuffle_v3i16_v3i16__5_u_1(ptr addrspace(1) inreg %ptr) {
1916; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_u_1:
1917; GFX900:       ; %bb.0:
1918; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1919; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1920; GFX900-NEXT:    ;;#ASMSTART
1921; GFX900-NEXT:    ; def v[0:1]
1922; GFX900-NEXT:    ;;#ASMEND
1923; GFX900-NEXT:    ;;#ASMSTART
1924; GFX900-NEXT:    ; def v[1:2]
1925; GFX900-NEXT:    ;;#ASMEND
1926; GFX900-NEXT:    global_store_short_d16_hi v3, v0, s[16:17] offset:4
1927; GFX900-NEXT:    global_store_dword v3, v2, s[16:17]
1928; GFX900-NEXT:    s_waitcnt vmcnt(0)
1929; GFX900-NEXT:    s_setpc_b64 s[30:31]
1930;
1931; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_u_1:
1932; GFX90A:       ; %bb.0:
1933; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1934; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1935; GFX90A-NEXT:    ;;#ASMSTART
1936; GFX90A-NEXT:    ; def v[0:1]
1937; GFX90A-NEXT:    ;;#ASMEND
1938; GFX90A-NEXT:    ;;#ASMSTART
1939; GFX90A-NEXT:    ; def v[2:3]
1940; GFX90A-NEXT:    ;;#ASMEND
1941; GFX90A-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
1942; GFX90A-NEXT:    global_store_dword v4, v3, s[16:17]
1943; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1944; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1945;
1946; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_u_1:
1947; GFX940:       ; %bb.0:
1948; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1949; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1950; GFX940-NEXT:    ;;#ASMSTART
1951; GFX940-NEXT:    ; def v[0:1]
1952; GFX940-NEXT:    ;;#ASMEND
1953; GFX940-NEXT:    ;;#ASMSTART
1954; GFX940-NEXT:    ; def v[2:3]
1955; GFX940-NEXT:    ;;#ASMEND
1956; GFX940-NEXT:    global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
1957; GFX940-NEXT:    global_store_dword v4, v3, s[0:1] sc0 sc1
1958; GFX940-NEXT:    s_waitcnt vmcnt(0)
1959; GFX940-NEXT:    s_setpc_b64 s[30:31]
1960  %vec0 = call <4 x i16> asm "; def $0", "=v"()
1961  %vec1 = call <4 x i16> asm "; def $0", "=v"()
1962  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1963  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
1964  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 1>
1965  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
1966  ret void
1967}
1968
1969define void @v_shuffle_v3i16_v3i16__5_0_1(ptr addrspace(1) inreg %ptr) {
1970; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_0_1:
1971; GFX900:       ; %bb.0:
1972; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1973; GFX900-NEXT:    ;;#ASMSTART
1974; GFX900-NEXT:    ; def v[0:1]
1975; GFX900-NEXT:    ;;#ASMEND
1976; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1977; GFX900-NEXT:    ;;#ASMSTART
1978; GFX900-NEXT:    ; def v[1:2]
1979; GFX900-NEXT:    ;;#ASMEND
1980; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1981; GFX900-NEXT:    v_perm_b32 v1, v0, v2, s4
1982; GFX900-NEXT:    global_store_short_d16_hi v3, v0, s[16:17] offset:4
1983; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
1984; GFX900-NEXT:    s_waitcnt vmcnt(0)
1985; GFX900-NEXT:    s_setpc_b64 s[30:31]
1986;
1987; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_0_1:
1988; GFX90A:       ; %bb.0:
1989; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1990; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1991; GFX90A-NEXT:    ;;#ASMSTART
1992; GFX90A-NEXT:    ; def v[0:1]
1993; GFX90A-NEXT:    ;;#ASMEND
1994; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1995; GFX90A-NEXT:    ;;#ASMSTART
1996; GFX90A-NEXT:    ; def v[2:3]
1997; GFX90A-NEXT:    ;;#ASMEND
1998; GFX90A-NEXT:    v_perm_b32 v1, v0, v3, s4
1999; GFX90A-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
2000; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
2001; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2002; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2003;
2004; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_0_1:
2005; GFX940:       ; %bb.0:
2006; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2007; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2008; GFX940-NEXT:    ;;#ASMSTART
2009; GFX940-NEXT:    ; def v[0:1]
2010; GFX940-NEXT:    ;;#ASMEND
2011; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2012; GFX940-NEXT:    ;;#ASMSTART
2013; GFX940-NEXT:    ; def v[2:3]
2014; GFX940-NEXT:    ;;#ASMEND
2015; GFX940-NEXT:    s_nop 0
2016; GFX940-NEXT:    v_perm_b32 v1, v0, v3, s2
2017; GFX940-NEXT:    global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2018; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
2019; GFX940-NEXT:    s_waitcnt vmcnt(0)
2020; GFX940-NEXT:    s_setpc_b64 s[30:31]
2021  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2022  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2023  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2024  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2025  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 1>
2026  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2027  ret void
2028}
2029
2030define void @v_shuffle_v3i16_v3i16__5_2_1(ptr addrspace(1) inreg %ptr) {
2031; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_2_1:
2032; GFX900:       ; %bb.0:
2033; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2034; GFX900-NEXT:    v_mov_b32_e32 v4, 0
2035; GFX900-NEXT:    ;;#ASMSTART
2036; GFX900-NEXT:    ; def v[0:1]
2037; GFX900-NEXT:    ;;#ASMEND
2038; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2039; GFX900-NEXT:    ;;#ASMSTART
2040; GFX900-NEXT:    ; def v[2:3]
2041; GFX900-NEXT:    ;;#ASMEND
2042; GFX900-NEXT:    v_perm_b32 v1, v1, v3, s4
2043; GFX900-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
2044; GFX900-NEXT:    global_store_dword v4, v1, s[16:17]
2045; GFX900-NEXT:    s_waitcnt vmcnt(0)
2046; GFX900-NEXT:    s_setpc_b64 s[30:31]
2047;
2048; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_2_1:
2049; GFX90A:       ; %bb.0:
2050; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2051; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2052; GFX90A-NEXT:    ;;#ASMSTART
2053; GFX90A-NEXT:    ; def v[0:1]
2054; GFX90A-NEXT:    ;;#ASMEND
2055; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2056; GFX90A-NEXT:    ;;#ASMSTART
2057; GFX90A-NEXT:    ; def v[2:3]
2058; GFX90A-NEXT:    ;;#ASMEND
2059; GFX90A-NEXT:    v_perm_b32 v1, v1, v3, s4
2060; GFX90A-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
2061; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
2062; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2063; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2064;
2065; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_2_1:
2066; GFX940:       ; %bb.0:
2067; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2068; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2069; GFX940-NEXT:    ;;#ASMSTART
2070; GFX940-NEXT:    ; def v[0:1]
2071; GFX940-NEXT:    ;;#ASMEND
2072; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2073; GFX940-NEXT:    ;;#ASMSTART
2074; GFX940-NEXT:    ; def v[2:3]
2075; GFX940-NEXT:    ;;#ASMEND
2076; GFX940-NEXT:    s_nop 0
2077; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s2
2078; GFX940-NEXT:    global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2079; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
2080; GFX940-NEXT:    s_waitcnt vmcnt(0)
2081; GFX940-NEXT:    s_setpc_b64 s[30:31]
2082  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2083  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2084  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2085  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2086  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 1>
2087  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2088  ret void
2089}
2090
2091define void @v_shuffle_v3i16_v3i16__5_3_1(ptr addrspace(1) inreg %ptr) {
2092; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_3_1:
2093; GFX900:       ; %bb.0:
2094; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2095; GFX900-NEXT:    ;;#ASMSTART
2096; GFX900-NEXT:    ; def v[0:1]
2097; GFX900-NEXT:    ;;#ASMEND
2098; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2099; GFX900-NEXT:    ;;#ASMSTART
2100; GFX900-NEXT:    ; def v[1:2]
2101; GFX900-NEXT:    ;;#ASMEND
2102; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2103; GFX900-NEXT:    v_perm_b32 v1, v1, v2, s4
2104; GFX900-NEXT:    global_store_short_d16_hi v3, v0, s[16:17] offset:4
2105; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
2106; GFX900-NEXT:    s_waitcnt vmcnt(0)
2107; GFX900-NEXT:    s_setpc_b64 s[30:31]
2108;
2109; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_3_1:
2110; GFX90A:       ; %bb.0:
2111; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2112; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2113; GFX90A-NEXT:    ;;#ASMSTART
2114; GFX90A-NEXT:    ; def v[0:1]
2115; GFX90A-NEXT:    ;;#ASMEND
2116; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2117; GFX90A-NEXT:    ;;#ASMSTART
2118; GFX90A-NEXT:    ; def v[2:3]
2119; GFX90A-NEXT:    ;;#ASMEND
2120; GFX90A-NEXT:    v_perm_b32 v1, v2, v3, s4
2121; GFX90A-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
2122; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
2123; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2124; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2125;
2126; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_3_1:
2127; GFX940:       ; %bb.0:
2128; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2129; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2130; GFX940-NEXT:    ;;#ASMSTART
2131; GFX940-NEXT:    ; def v[0:1]
2132; GFX940-NEXT:    ;;#ASMEND
2133; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2134; GFX940-NEXT:    ;;#ASMSTART
2135; GFX940-NEXT:    ; def v[2:3]
2136; GFX940-NEXT:    ;;#ASMEND
2137; GFX940-NEXT:    s_nop 0
2138; GFX940-NEXT:    v_perm_b32 v1, v2, v3, s2
2139; GFX940-NEXT:    global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2140; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
2141; GFX940-NEXT:    s_waitcnt vmcnt(0)
2142; GFX940-NEXT:    s_setpc_b64 s[30:31]
2143  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2144  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2145  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2146  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2147  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 1>
2148  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2149  ret void
2150}
2151
2152define void @v_shuffle_v3i16_v3i16__5_4_1(ptr addrspace(1) inreg %ptr) {
2153; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_4_1:
2154; GFX900:       ; %bb.0:
2155; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2156; GFX900-NEXT:    ;;#ASMSTART
2157; GFX900-NEXT:    ; def v[0:1]
2158; GFX900-NEXT:    ;;#ASMEND
2159; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2160; GFX900-NEXT:    ;;#ASMSTART
2161; GFX900-NEXT:    ; def v[1:2]
2162; GFX900-NEXT:    ;;#ASMEND
2163; GFX900-NEXT:    s_mov_b32 s4, 0xffff
2164; GFX900-NEXT:    v_bfi_b32 v1, s4, v2, v1
2165; GFX900-NEXT:    global_store_short_d16_hi v3, v0, s[16:17] offset:4
2166; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
2167; GFX900-NEXT:    s_waitcnt vmcnt(0)
2168; GFX900-NEXT:    s_setpc_b64 s[30:31]
2169;
2170; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_4_1:
2171; GFX90A:       ; %bb.0:
2172; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2173; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2174; GFX90A-NEXT:    ;;#ASMSTART
2175; GFX90A-NEXT:    ; def v[0:1]
2176; GFX90A-NEXT:    ;;#ASMEND
2177; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
2178; GFX90A-NEXT:    ;;#ASMSTART
2179; GFX90A-NEXT:    ; def v[2:3]
2180; GFX90A-NEXT:    ;;#ASMEND
2181; GFX90A-NEXT:    v_bfi_b32 v1, s4, v3, v2
2182; GFX90A-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
2183; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
2184; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2185; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2186;
2187; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_4_1:
2188; GFX940:       ; %bb.0:
2189; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2190; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2191; GFX940-NEXT:    ;;#ASMSTART
2192; GFX940-NEXT:    ; def v[0:1]
2193; GFX940-NEXT:    ;;#ASMEND
2194; GFX940-NEXT:    s_mov_b32 s2, 0xffff
2195; GFX940-NEXT:    ;;#ASMSTART
2196; GFX940-NEXT:    ; def v[2:3]
2197; GFX940-NEXT:    ;;#ASMEND
2198; GFX940-NEXT:    s_nop 0
2199; GFX940-NEXT:    v_bfi_b32 v1, s2, v3, v2
2200; GFX940-NEXT:    global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2201; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
2202; GFX940-NEXT:    s_waitcnt vmcnt(0)
2203; GFX940-NEXT:    s_setpc_b64 s[30:31]
2204  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2205  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2206  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2207  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2208  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 1>
2209  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2210  ret void
2211}
2212
2213define void @v_shuffle_v3i16_v3i16__u_2_2(ptr addrspace(1) inreg %ptr) {
2214; GFX900-LABEL: v_shuffle_v3i16_v3i16__u_2_2:
2215; GFX900:       ; %bb.0:
2216; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2217; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2218; GFX900-NEXT:    ;;#ASMSTART
2219; GFX900-NEXT:    ; def v[0:1]
2220; GFX900-NEXT:    ;;#ASMEND
2221; GFX900-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
2222; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
2223; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
2224; GFX900-NEXT:    s_waitcnt vmcnt(0)
2225; GFX900-NEXT:    s_setpc_b64 s[30:31]
2226;
2227; GFX90A-LABEL: v_shuffle_v3i16_v3i16__u_2_2:
2228; GFX90A:       ; %bb.0:
2229; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2230; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2231; GFX90A-NEXT:    ;;#ASMSTART
2232; GFX90A-NEXT:    ; def v[0:1]
2233; GFX90A-NEXT:    ;;#ASMEND
2234; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
2235; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
2236; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
2237; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2238; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2239;
2240; GFX940-LABEL: v_shuffle_v3i16_v3i16__u_2_2:
2241; GFX940:       ; %bb.0:
2242; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2243; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2244; GFX940-NEXT:    ;;#ASMSTART
2245; GFX940-NEXT:    ; def v[0:1]
2246; GFX940-NEXT:    ;;#ASMEND
2247; GFX940-NEXT:    s_nop 0
2248; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
2249; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
2250; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
2251; GFX940-NEXT:    s_waitcnt vmcnt(0)
2252; GFX940-NEXT:    s_setpc_b64 s[30:31]
2253  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2254  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2255  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 poison, i32 2, i32 2>
2256  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2257  ret void
2258}
2259
2260define void @v_shuffle_v3i16_v3i16__0_2_2(ptr addrspace(1) inreg %ptr) {
2261; GFX900-LABEL: v_shuffle_v3i16_v3i16__0_2_2:
2262; GFX900:       ; %bb.0:
2263; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2264; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2265; GFX900-NEXT:    ;;#ASMSTART
2266; GFX900-NEXT:    ; def v[0:1]
2267; GFX900-NEXT:    ;;#ASMEND
2268; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2269; GFX900-NEXT:    v_perm_b32 v0, v1, v0, s4
2270; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
2271; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
2272; GFX900-NEXT:    s_waitcnt vmcnt(0)
2273; GFX900-NEXT:    s_setpc_b64 s[30:31]
2274;
2275; GFX90A-LABEL: v_shuffle_v3i16_v3i16__0_2_2:
2276; GFX90A:       ; %bb.0:
2277; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2278; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2279; GFX90A-NEXT:    ;;#ASMSTART
2280; GFX90A-NEXT:    ; def v[0:1]
2281; GFX90A-NEXT:    ;;#ASMEND
2282; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2283; GFX90A-NEXT:    v_perm_b32 v0, v1, v0, s4
2284; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
2285; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
2286; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2287; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2288;
2289; GFX940-LABEL: v_shuffle_v3i16_v3i16__0_2_2:
2290; GFX940:       ; %bb.0:
2291; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2292; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2293; GFX940-NEXT:    ;;#ASMSTART
2294; GFX940-NEXT:    ; def v[0:1]
2295; GFX940-NEXT:    ;;#ASMEND
2296; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2297; GFX940-NEXT:    v_perm_b32 v0, v1, v0, s2
2298; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
2299; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
2300; GFX940-NEXT:    s_waitcnt vmcnt(0)
2301; GFX940-NEXT:    s_setpc_b64 s[30:31]
2302  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2303  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2304  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 0, i32 2, i32 2>
2305  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2306  ret void
2307}
2308
2309define void @v_shuffle_v3i16_v3i16__1_2_2(ptr addrspace(1) inreg %ptr) {
2310; GFX900-LABEL: v_shuffle_v3i16_v3i16__1_2_2:
2311; GFX900:       ; %bb.0:
2312; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2313; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2314; GFX900-NEXT:    ;;#ASMSTART
2315; GFX900-NEXT:    ; def v[0:1]
2316; GFX900-NEXT:    ;;#ASMEND
2317; GFX900-NEXT:    v_alignbit_b32 v0, v1, v0, 16
2318; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
2319; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
2320; GFX900-NEXT:    s_waitcnt vmcnt(0)
2321; GFX900-NEXT:    s_setpc_b64 s[30:31]
2322;
2323; GFX90A-LABEL: v_shuffle_v3i16_v3i16__1_2_2:
2324; GFX90A:       ; %bb.0:
2325; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2326; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2327; GFX90A-NEXT:    ;;#ASMSTART
2328; GFX90A-NEXT:    ; def v[0:1]
2329; GFX90A-NEXT:    ;;#ASMEND
2330; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v0, 16
2331; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
2332; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
2333; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2334; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2335;
2336; GFX940-LABEL: v_shuffle_v3i16_v3i16__1_2_2:
2337; GFX940:       ; %bb.0:
2338; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2339; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2340; GFX940-NEXT:    ;;#ASMSTART
2341; GFX940-NEXT:    ; def v[0:1]
2342; GFX940-NEXT:    ;;#ASMEND
2343; GFX940-NEXT:    s_nop 0
2344; GFX940-NEXT:    v_alignbit_b32 v0, v1, v0, 16
2345; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
2346; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
2347; GFX940-NEXT:    s_waitcnt vmcnt(0)
2348; GFX940-NEXT:    s_setpc_b64 s[30:31]
2349  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2350  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2351  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 1, i32 2, i32 2>
2352  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2353  ret void
2354}
2355
2356define void @v_shuffle_v3i16_v3i16__2_2_2(ptr addrspace(1) inreg %ptr) {
2357; GFX900-LABEL: v_shuffle_v3i16_v3i16__2_2_2:
2358; GFX900:       ; %bb.0:
2359; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2360; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2361; GFX900-NEXT:    ;;#ASMSTART
2362; GFX900-NEXT:    ; def v[0:1]
2363; GFX900-NEXT:    ;;#ASMEND
2364; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2365; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
2366; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
2367; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
2368; GFX900-NEXT:    s_waitcnt vmcnt(0)
2369; GFX900-NEXT:    s_setpc_b64 s[30:31]
2370;
2371; GFX90A-LABEL: v_shuffle_v3i16_v3i16__2_2_2:
2372; GFX90A:       ; %bb.0:
2373; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2374; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2375; GFX90A-NEXT:    ;;#ASMSTART
2376; GFX90A-NEXT:    ; def v[0:1]
2377; GFX90A-NEXT:    ;;#ASMEND
2378; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2379; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
2380; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
2381; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
2382; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2383; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2384;
2385; GFX940-LABEL: v_shuffle_v3i16_v3i16__2_2_2:
2386; GFX940:       ; %bb.0:
2387; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2388; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2389; GFX940-NEXT:    ;;#ASMSTART
2390; GFX940-NEXT:    ; def v[0:1]
2391; GFX940-NEXT:    ;;#ASMEND
2392; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2393; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
2394; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
2395; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
2396; GFX940-NEXT:    s_waitcnt vmcnt(0)
2397; GFX940-NEXT:    s_setpc_b64 s[30:31]
2398  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2399  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2400  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 2, i32 2, i32 2>
2401  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2402  ret void
2403}
2404
2405define void @v_shuffle_v3i16_v3i16__3_2_2(ptr addrspace(1) inreg %ptr) {
2406; GFX900-LABEL: v_shuffle_v3i16_v3i16__3_2_2:
2407; GFX900:       ; %bb.0:
2408; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2409; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2410; GFX900-NEXT:    ;;#ASMSTART
2411; GFX900-NEXT:    ; def v[0:1]
2412; GFX900-NEXT:    ;;#ASMEND
2413; GFX900-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
2414; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
2415; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
2416; GFX900-NEXT:    s_waitcnt vmcnt(0)
2417; GFX900-NEXT:    s_setpc_b64 s[30:31]
2418;
2419; GFX90A-LABEL: v_shuffle_v3i16_v3i16__3_2_2:
2420; GFX90A:       ; %bb.0:
2421; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2422; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2423; GFX90A-NEXT:    ;;#ASMSTART
2424; GFX90A-NEXT:    ; def v[0:1]
2425; GFX90A-NEXT:    ;;#ASMEND
2426; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
2427; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
2428; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
2429; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2430; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2431;
2432; GFX940-LABEL: v_shuffle_v3i16_v3i16__3_2_2:
2433; GFX940:       ; %bb.0:
2434; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2435; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2436; GFX940-NEXT:    ;;#ASMSTART
2437; GFX940-NEXT:    ; def v[0:1]
2438; GFX940-NEXT:    ;;#ASMEND
2439; GFX940-NEXT:    s_nop 0
2440; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
2441; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
2442; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
2443; GFX940-NEXT:    s_waitcnt vmcnt(0)
2444; GFX940-NEXT:    s_setpc_b64 s[30:31]
2445  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2446  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2447  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 3, i32 2, i32 2>
2448  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2449  ret void
2450}
2451
2452define void @v_shuffle_v3i16_v3i16__4_2_2(ptr addrspace(1) inreg %ptr) {
2453; GFX900-LABEL: v_shuffle_v3i16_v3i16__4_2_2:
2454; GFX900:       ; %bb.0:
2455; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2456; GFX900-NEXT:    v_mov_b32_e32 v4, 0
2457; GFX900-NEXT:    ;;#ASMSTART
2458; GFX900-NEXT:    ; def v[0:1]
2459; GFX900-NEXT:    ;;#ASMEND
2460; GFX900-NEXT:    ;;#ASMSTART
2461; GFX900-NEXT:    ; def v[2:3]
2462; GFX900-NEXT:    ;;#ASMEND
2463; GFX900-NEXT:    v_alignbit_b32 v0, v1, v2, 16
2464; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
2465; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
2466; GFX900-NEXT:    s_waitcnt vmcnt(0)
2467; GFX900-NEXT:    s_setpc_b64 s[30:31]
2468;
2469; GFX90A-LABEL: v_shuffle_v3i16_v3i16__4_2_2:
2470; GFX90A:       ; %bb.0:
2471; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2472; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2473; GFX90A-NEXT:    ;;#ASMSTART
2474; GFX90A-NEXT:    ; def v[0:1]
2475; GFX90A-NEXT:    ;;#ASMEND
2476; GFX90A-NEXT:    ;;#ASMSTART
2477; GFX90A-NEXT:    ; def v[2:3]
2478; GFX90A-NEXT:    ;;#ASMEND
2479; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v2, 16
2480; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
2481; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
2482; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2483; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2484;
2485; GFX940-LABEL: v_shuffle_v3i16_v3i16__4_2_2:
2486; GFX940:       ; %bb.0:
2487; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2488; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2489; GFX940-NEXT:    ;;#ASMSTART
2490; GFX940-NEXT:    ; def v[0:1]
2491; GFX940-NEXT:    ;;#ASMEND
2492; GFX940-NEXT:    ;;#ASMSTART
2493; GFX940-NEXT:    ; def v[2:3]
2494; GFX940-NEXT:    ;;#ASMEND
2495; GFX940-NEXT:    s_nop 0
2496; GFX940-NEXT:    v_alignbit_b32 v0, v1, v2, 16
2497; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
2498; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
2499; GFX940-NEXT:    s_waitcnt vmcnt(0)
2500; GFX940-NEXT:    s_setpc_b64 s[30:31]
2501  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2502  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2503  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2504  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2505  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 2, i32 2>
2506  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2507  ret void
2508}
2509
2510define void @v_shuffle_v3i16_v3i16__5_2_2(ptr addrspace(1) inreg %ptr) {
2511; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_2_2:
2512; GFX900:       ; %bb.0:
2513; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2514; GFX900-NEXT:    v_mov_b32_e32 v4, 0
2515; GFX900-NEXT:    ;;#ASMSTART
2516; GFX900-NEXT:    ; def v[0:1]
2517; GFX900-NEXT:    ;;#ASMEND
2518; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2519; GFX900-NEXT:    ;;#ASMSTART
2520; GFX900-NEXT:    ; def v[2:3]
2521; GFX900-NEXT:    ;;#ASMEND
2522; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
2523; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
2524; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
2525; GFX900-NEXT:    s_waitcnt vmcnt(0)
2526; GFX900-NEXT:    s_setpc_b64 s[30:31]
2527;
2528; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_2_2:
2529; GFX90A:       ; %bb.0:
2530; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2531; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2532; GFX90A-NEXT:    ;;#ASMSTART
2533; GFX90A-NEXT:    ; def v[0:1]
2534; GFX90A-NEXT:    ;;#ASMEND
2535; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2536; GFX90A-NEXT:    ;;#ASMSTART
2537; GFX90A-NEXT:    ; def v[2:3]
2538; GFX90A-NEXT:    ;;#ASMEND
2539; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
2540; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
2541; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
2542; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2543; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2544;
2545; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_2_2:
2546; GFX940:       ; %bb.0:
2547; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2548; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2549; GFX940-NEXT:    ;;#ASMSTART
2550; GFX940-NEXT:    ; def v[0:1]
2551; GFX940-NEXT:    ;;#ASMEND
2552; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2553; GFX940-NEXT:    ;;#ASMSTART
2554; GFX940-NEXT:    ; def v[2:3]
2555; GFX940-NEXT:    ;;#ASMEND
2556; GFX940-NEXT:    s_nop 0
2557; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
2558; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
2559; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
2560; GFX940-NEXT:    s_waitcnt vmcnt(0)
2561; GFX940-NEXT:    s_setpc_b64 s[30:31]
2562  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2563  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2564  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2565  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2566  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 2>
2567  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2568  ret void
2569}
2570
2571define void @v_shuffle_v3i16_v3i16__5_u_2(ptr addrspace(1) inreg %ptr) {
2572; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_u_2:
2573; GFX900:       ; %bb.0:
2574; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2575; GFX900-NEXT:    v_mov_b32_e32 v4, 0
2576; GFX900-NEXT:    ;;#ASMSTART
2577; GFX900-NEXT:    ; def v[0:1]
2578; GFX900-NEXT:    ;;#ASMEND
2579; GFX900-NEXT:    ;;#ASMSTART
2580; GFX900-NEXT:    ; def v[2:3]
2581; GFX900-NEXT:    ;;#ASMEND
2582; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
2583; GFX900-NEXT:    global_store_dword v4, v3, s[16:17]
2584; GFX900-NEXT:    s_waitcnt vmcnt(0)
2585; GFX900-NEXT:    s_setpc_b64 s[30:31]
2586;
2587; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_u_2:
2588; GFX90A:       ; %bb.0:
2589; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2590; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2591; GFX90A-NEXT:    ;;#ASMSTART
2592; GFX90A-NEXT:    ; def v[0:1]
2593; GFX90A-NEXT:    ;;#ASMEND
2594; GFX90A-NEXT:    ;;#ASMSTART
2595; GFX90A-NEXT:    ; def v[2:3]
2596; GFX90A-NEXT:    ;;#ASMEND
2597; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
2598; GFX90A-NEXT:    global_store_dword v4, v3, s[16:17]
2599; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2600; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2601;
2602; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_u_2:
2603; GFX940:       ; %bb.0:
2604; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2605; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2606; GFX940-NEXT:    ;;#ASMSTART
2607; GFX940-NEXT:    ; def v[0:1]
2608; GFX940-NEXT:    ;;#ASMEND
2609; GFX940-NEXT:    ;;#ASMSTART
2610; GFX940-NEXT:    ; def v[2:3]
2611; GFX940-NEXT:    ;;#ASMEND
2612; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
2613; GFX940-NEXT:    global_store_dword v4, v3, s[0:1] sc0 sc1
2614; GFX940-NEXT:    s_waitcnt vmcnt(0)
2615; GFX940-NEXT:    s_setpc_b64 s[30:31]
2616  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2617  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2618  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2619  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2620  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 2>
2621  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2622  ret void
2623}
2624
2625define void @v_shuffle_v3i16_v3i16__5_0_2(ptr addrspace(1) inreg %ptr) {
2626; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_0_2:
2627; GFX900:       ; %bb.0:
2628; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2629; GFX900-NEXT:    v_mov_b32_e32 v4, 0
2630; GFX900-NEXT:    ;;#ASMSTART
2631; GFX900-NEXT:    ; def v[0:1]
2632; GFX900-NEXT:    ;;#ASMEND
2633; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2634; GFX900-NEXT:    ;;#ASMSTART
2635; GFX900-NEXT:    ; def v[2:3]
2636; GFX900-NEXT:    ;;#ASMEND
2637; GFX900-NEXT:    v_perm_b32 v0, v0, v3, s4
2638; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
2639; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
2640; GFX900-NEXT:    s_waitcnt vmcnt(0)
2641; GFX900-NEXT:    s_setpc_b64 s[30:31]
2642;
2643; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_0_2:
2644; GFX90A:       ; %bb.0:
2645; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2646; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2647; GFX90A-NEXT:    ;;#ASMSTART
2648; GFX90A-NEXT:    ; def v[0:1]
2649; GFX90A-NEXT:    ;;#ASMEND
2650; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2651; GFX90A-NEXT:    ;;#ASMSTART
2652; GFX90A-NEXT:    ; def v[2:3]
2653; GFX90A-NEXT:    ;;#ASMEND
2654; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
2655; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
2656; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
2657; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2658; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2659;
2660; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_0_2:
2661; GFX940:       ; %bb.0:
2662; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2663; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2664; GFX940-NEXT:    ;;#ASMSTART
2665; GFX940-NEXT:    ; def v[0:1]
2666; GFX940-NEXT:    ;;#ASMEND
2667; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2668; GFX940-NEXT:    ;;#ASMSTART
2669; GFX940-NEXT:    ; def v[2:3]
2670; GFX940-NEXT:    ;;#ASMEND
2671; GFX940-NEXT:    s_nop 0
2672; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
2673; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
2674; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
2675; GFX940-NEXT:    s_waitcnt vmcnt(0)
2676; GFX940-NEXT:    s_setpc_b64 s[30:31]
2677  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2678  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2679  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2680  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2681  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 2>
2682  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2683  ret void
2684}
2685
2686define void @v_shuffle_v3i16_v3i16__5_1_2(ptr addrspace(1) inreg %ptr) {
2687; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_1_2:
2688; GFX900:       ; %bb.0:
2689; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2690; GFX900-NEXT:    v_mov_b32_e32 v4, 0
2691; GFX900-NEXT:    ;;#ASMSTART
2692; GFX900-NEXT:    ; def v[0:1]
2693; GFX900-NEXT:    ;;#ASMEND
2694; GFX900-NEXT:    s_mov_b32 s4, 0xffff
2695; GFX900-NEXT:    ;;#ASMSTART
2696; GFX900-NEXT:    ; def v[2:3]
2697; GFX900-NEXT:    ;;#ASMEND
2698; GFX900-NEXT:    v_bfi_b32 v0, s4, v3, v0
2699; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
2700; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
2701; GFX900-NEXT:    s_waitcnt vmcnt(0)
2702; GFX900-NEXT:    s_setpc_b64 s[30:31]
2703;
2704; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_1_2:
2705; GFX90A:       ; %bb.0:
2706; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2707; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2708; GFX90A-NEXT:    ;;#ASMSTART
2709; GFX90A-NEXT:    ; def v[0:1]
2710; GFX90A-NEXT:    ;;#ASMEND
2711; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
2712; GFX90A-NEXT:    ;;#ASMSTART
2713; GFX90A-NEXT:    ; def v[2:3]
2714; GFX90A-NEXT:    ;;#ASMEND
2715; GFX90A-NEXT:    v_bfi_b32 v0, s4, v3, v0
2716; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
2717; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
2718; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2719; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2720;
2721; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_1_2:
2722; GFX940:       ; %bb.0:
2723; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2724; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2725; GFX940-NEXT:    ;;#ASMSTART
2726; GFX940-NEXT:    ; def v[0:1]
2727; GFX940-NEXT:    ;;#ASMEND
2728; GFX940-NEXT:    s_mov_b32 s2, 0xffff
2729; GFX940-NEXT:    ;;#ASMSTART
2730; GFX940-NEXT:    ; def v[2:3]
2731; GFX940-NEXT:    ;;#ASMEND
2732; GFX940-NEXT:    s_nop 0
2733; GFX940-NEXT:    v_bfi_b32 v0, s2, v3, v0
2734; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
2735; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
2736; GFX940-NEXT:    s_waitcnt vmcnt(0)
2737; GFX940-NEXT:    s_setpc_b64 s[30:31]
2738  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2739  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2740  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2741  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2742  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 2>
2743  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2744  ret void
2745}
2746
2747define void @v_shuffle_v3i16_v3i16__5_3_2(ptr addrspace(1) inreg %ptr) {
2748; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_3_2:
2749; GFX900:       ; %bb.0:
2750; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2751; GFX900-NEXT:    v_mov_b32_e32 v4, 0
2752; GFX900-NEXT:    ;;#ASMSTART
2753; GFX900-NEXT:    ; def v[0:1]
2754; GFX900-NEXT:    ;;#ASMEND
2755; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2756; GFX900-NEXT:    ;;#ASMSTART
2757; GFX900-NEXT:    ; def v[2:3]
2758; GFX900-NEXT:    ;;#ASMEND
2759; GFX900-NEXT:    v_perm_b32 v0, v2, v3, s4
2760; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
2761; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
2762; GFX900-NEXT:    s_waitcnt vmcnt(0)
2763; GFX900-NEXT:    s_setpc_b64 s[30:31]
2764;
2765; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_3_2:
2766; GFX90A:       ; %bb.0:
2767; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2768; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2769; GFX90A-NEXT:    ;;#ASMSTART
2770; GFX90A-NEXT:    ; def v[0:1]
2771; GFX90A-NEXT:    ;;#ASMEND
2772; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2773; GFX90A-NEXT:    ;;#ASMSTART
2774; GFX90A-NEXT:    ; def v[2:3]
2775; GFX90A-NEXT:    ;;#ASMEND
2776; GFX90A-NEXT:    v_perm_b32 v0, v2, v3, s4
2777; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
2778; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
2779; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2780; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2781;
2782; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_3_2:
2783; GFX940:       ; %bb.0:
2784; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2785; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2786; GFX940-NEXT:    ;;#ASMSTART
2787; GFX940-NEXT:    ; def v[0:1]
2788; GFX940-NEXT:    ;;#ASMEND
2789; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2790; GFX940-NEXT:    ;;#ASMSTART
2791; GFX940-NEXT:    ; def v[2:3]
2792; GFX940-NEXT:    ;;#ASMEND
2793; GFX940-NEXT:    s_nop 0
2794; GFX940-NEXT:    v_perm_b32 v0, v2, v3, s2
2795; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
2796; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
2797; GFX940-NEXT:    s_waitcnt vmcnt(0)
2798; GFX940-NEXT:    s_setpc_b64 s[30:31]
2799  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2800  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2801  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2802  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2803  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 2>
2804  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2805  ret void
2806}
2807
2808define void @v_shuffle_v3i16_v3i16__5_4_2(ptr addrspace(1) inreg %ptr) {
2809; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_4_2:
2810; GFX900:       ; %bb.0:
2811; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2812; GFX900-NEXT:    v_mov_b32_e32 v4, 0
2813; GFX900-NEXT:    ;;#ASMSTART
2814; GFX900-NEXT:    ; def v[0:1]
2815; GFX900-NEXT:    ;;#ASMEND
2816; GFX900-NEXT:    s_mov_b32 s4, 0xffff
2817; GFX900-NEXT:    ;;#ASMSTART
2818; GFX900-NEXT:    ; def v[2:3]
2819; GFX900-NEXT:    ;;#ASMEND
2820; GFX900-NEXT:    v_bfi_b32 v0, s4, v3, v2
2821; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
2822; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
2823; GFX900-NEXT:    s_waitcnt vmcnt(0)
2824; GFX900-NEXT:    s_setpc_b64 s[30:31]
2825;
2826; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_4_2:
2827; GFX90A:       ; %bb.0:
2828; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2829; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2830; GFX90A-NEXT:    ;;#ASMSTART
2831; GFX90A-NEXT:    ; def v[0:1]
2832; GFX90A-NEXT:    ;;#ASMEND
2833; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
2834; GFX90A-NEXT:    ;;#ASMSTART
2835; GFX90A-NEXT:    ; def v[2:3]
2836; GFX90A-NEXT:    ;;#ASMEND
2837; GFX90A-NEXT:    v_bfi_b32 v0, s4, v3, v2
2838; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
2839; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
2840; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2841; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2842;
2843; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_4_2:
2844; GFX940:       ; %bb.0:
2845; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2846; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2847; GFX940-NEXT:    ;;#ASMSTART
2848; GFX940-NEXT:    ; def v[0:1]
2849; GFX940-NEXT:    ;;#ASMEND
2850; GFX940-NEXT:    s_mov_b32 s2, 0xffff
2851; GFX940-NEXT:    ;;#ASMSTART
2852; GFX940-NEXT:    ; def v[2:3]
2853; GFX940-NEXT:    ;;#ASMEND
2854; GFX940-NEXT:    s_nop 0
2855; GFX940-NEXT:    v_bfi_b32 v0, s2, v3, v2
2856; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
2857; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
2858; GFX940-NEXT:    s_waitcnt vmcnt(0)
2859; GFX940-NEXT:    s_setpc_b64 s[30:31]
2860  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2861  %vec1 = call <4 x i16> asm "; def $0", "=v"()
2862  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2863  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2864  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 2>
2865  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2866  ret void
2867}
2868
2869define void @v_shuffle_v3i16_v3i16__u_3_3(ptr addrspace(1) inreg %ptr) {
2870; GFX9-LABEL: v_shuffle_v3i16_v3i16__u_3_3:
2871; GFX9:       ; %bb.0:
2872; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2873; GFX9-NEXT:    s_setpc_b64 s[30:31]
2874  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2875  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2876  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 poison, i32 3, i32 3>
2877  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2878  ret void
2879}
2880
2881define void @v_shuffle_v3i16_v3i16__0_3_3(ptr addrspace(1) inreg %ptr) {
2882; GFX900-LABEL: v_shuffle_v3i16_v3i16__0_3_3:
2883; GFX900:       ; %bb.0:
2884; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2885; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2886; GFX900-NEXT:    ;;#ASMSTART
2887; GFX900-NEXT:    ; def v[0:1]
2888; GFX900-NEXT:    ;;#ASMEND
2889; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
2890; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
2891; GFX900-NEXT:    s_waitcnt vmcnt(0)
2892; GFX900-NEXT:    s_setpc_b64 s[30:31]
2893;
2894; GFX90A-LABEL: v_shuffle_v3i16_v3i16__0_3_3:
2895; GFX90A:       ; %bb.0:
2896; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2897; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2898; GFX90A-NEXT:    ;;#ASMSTART
2899; GFX90A-NEXT:    ; def v[0:1]
2900; GFX90A-NEXT:    ;;#ASMEND
2901; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
2902; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
2903; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2904; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2905;
2906; GFX940-LABEL: v_shuffle_v3i16_v3i16__0_3_3:
2907; GFX940:       ; %bb.0:
2908; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2909; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2910; GFX940-NEXT:    ;;#ASMSTART
2911; GFX940-NEXT:    ; def v[0:1]
2912; GFX940-NEXT:    ;;#ASMEND
2913; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
2914; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
2915; GFX940-NEXT:    s_waitcnt vmcnt(0)
2916; GFX940-NEXT:    s_setpc_b64 s[30:31]
2917  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2918  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2919  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 0, i32 3, i32 3>
2920  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2921  ret void
2922}
2923
2924define void @v_shuffle_v3i16_v3i16__1_3_3(ptr addrspace(1) inreg %ptr) {
2925; GFX900-LABEL: v_shuffle_v3i16_v3i16__1_3_3:
2926; GFX900:       ; %bb.0:
2927; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2928; GFX900-NEXT:    ;;#ASMSTART
2929; GFX900-NEXT:    ; def v[0:1]
2930; GFX900-NEXT:    ;;#ASMEND
2931; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2932; GFX900-NEXT:    v_alignbit_b32 v0, s4, v0, 16
2933; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
2934; GFX900-NEXT:    s_waitcnt vmcnt(0)
2935; GFX900-NEXT:    s_setpc_b64 s[30:31]
2936;
2937; GFX90A-LABEL: v_shuffle_v3i16_v3i16__1_3_3:
2938; GFX90A:       ; %bb.0:
2939; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2940; GFX90A-NEXT:    ;;#ASMSTART
2941; GFX90A-NEXT:    ; def v[0:1]
2942; GFX90A-NEXT:    ;;#ASMEND
2943; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2944; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v0, 16
2945; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
2946; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2947; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2948;
2949; GFX940-LABEL: v_shuffle_v3i16_v3i16__1_3_3:
2950; GFX940:       ; %bb.0:
2951; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2952; GFX940-NEXT:    ;;#ASMSTART
2953; GFX940-NEXT:    ; def v[0:1]
2954; GFX940-NEXT:    ;;#ASMEND
2955; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2956; GFX940-NEXT:    v_alignbit_b32 v0, s0, v0, 16
2957; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
2958; GFX940-NEXT:    s_waitcnt vmcnt(0)
2959; GFX940-NEXT:    s_setpc_b64 s[30:31]
2960  %vec0 = call <4 x i16> asm "; def $0", "=v"()
2961  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
2962  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 1, i32 3, i32 3>
2963  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
2964  ret void
2965}
2966
2967define void @v_shuffle_v3i16_v3i16__2_3_3(ptr addrspace(1) inreg %ptr) {
2968; GFX900-LABEL: v_shuffle_v3i16_v3i16__2_3_3:
2969; GFX900:       ; %bb.0:
2970; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2971; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2972; GFX900-NEXT:    ;;#ASMSTART
2973; GFX900-NEXT:    ; def v[0:1]
2974; GFX900-NEXT:    ;;#ASMEND
2975; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
2976; GFX900-NEXT:    s_waitcnt vmcnt(0)
2977; GFX900-NEXT:    s_setpc_b64 s[30:31]
2978;
2979; GFX90A-LABEL: v_shuffle_v3i16_v3i16__2_3_3:
2980; GFX90A:       ; %bb.0:
2981; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2982; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2983; GFX90A-NEXT:    ;;#ASMSTART
2984; GFX90A-NEXT:    ; def v[0:1]
2985; GFX90A-NEXT:    ;;#ASMEND
2986; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
2987; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2988; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2989;
2990; GFX940-LABEL: v_shuffle_v3i16_v3i16__2_3_3:
2991; GFX940:       ; %bb.0:
2992; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2993; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2994; GFX940-NEXT:    ;;#ASMSTART
2995; GFX940-NEXT:    ; def v[0:1]
2996; GFX940-NEXT:    ;;#ASMEND
2997; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
2998; GFX940-NEXT:    s_waitcnt vmcnt(0)
2999; GFX940-NEXT:    s_setpc_b64 s[30:31]
3000  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3001  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3002  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 2, i32 3, i32 3>
3003  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3004  ret void
3005}
3006
3007define void @v_shuffle_v3i16_v3i16__3_3_3(ptr addrspace(1) inreg %ptr) {
3008; GFX9-LABEL: v_shuffle_v3i16_v3i16__3_3_3:
3009; GFX9:       ; %bb.0:
3010; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3011; GFX9-NEXT:    s_setpc_b64 s[30:31]
3012  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3013  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3014  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 3, i32 3, i32 3>
3015  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3016  ret void
3017}
3018
3019define void @v_shuffle_v3i16_v3i16__4_3_3(ptr addrspace(1) inreg %ptr) {
3020; GFX900-LABEL: v_shuffle_v3i16_v3i16__4_3_3:
3021; GFX900:       ; %bb.0:
3022; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3023; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3024; GFX900-NEXT:    ;;#ASMSTART
3025; GFX900-NEXT:    ; def v[0:1]
3026; GFX900-NEXT:    ;;#ASMEND
3027; GFX900-NEXT:    v_alignbit_b32 v1, v0, v0, 16
3028; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
3029; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
3030; GFX900-NEXT:    s_waitcnt vmcnt(0)
3031; GFX900-NEXT:    s_setpc_b64 s[30:31]
3032;
3033; GFX90A-LABEL: v_shuffle_v3i16_v3i16__4_3_3:
3034; GFX90A:       ; %bb.0:
3035; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3036; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3037; GFX90A-NEXT:    ;;#ASMSTART
3038; GFX90A-NEXT:    ; def v[0:1]
3039; GFX90A-NEXT:    ;;#ASMEND
3040; GFX90A-NEXT:    v_alignbit_b32 v1, v0, v0, 16
3041; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
3042; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
3043; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3044; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3045;
3046; GFX940-LABEL: v_shuffle_v3i16_v3i16__4_3_3:
3047; GFX940:       ; %bb.0:
3048; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3049; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3050; GFX940-NEXT:    ;;#ASMSTART
3051; GFX940-NEXT:    ; def v[0:1]
3052; GFX940-NEXT:    ;;#ASMEND
3053; GFX940-NEXT:    s_nop 0
3054; GFX940-NEXT:    v_alignbit_b32 v1, v0, v0, 16
3055; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
3056; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
3057; GFX940-NEXT:    s_waitcnt vmcnt(0)
3058; GFX940-NEXT:    s_setpc_b64 s[30:31]
3059  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3060  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3061  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3062  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3063  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 3, i32 3>
3064  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3065  ret void
3066}
3067
3068define void @v_shuffle_v3i16_v3i16__5_3_3(ptr addrspace(1) inreg %ptr) {
3069; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_3_3:
3070; GFX900:       ; %bb.0:
3071; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3072; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3073; GFX900-NEXT:    ;;#ASMSTART
3074; GFX900-NEXT:    ; def v[0:1]
3075; GFX900-NEXT:    ;;#ASMEND
3076; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
3077; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
3078; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
3079; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
3080; GFX900-NEXT:    s_waitcnt vmcnt(0)
3081; GFX900-NEXT:    s_setpc_b64 s[30:31]
3082;
3083; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_3_3:
3084; GFX90A:       ; %bb.0:
3085; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3086; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3087; GFX90A-NEXT:    ;;#ASMSTART
3088; GFX90A-NEXT:    ; def v[0:1]
3089; GFX90A-NEXT:    ;;#ASMEND
3090; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
3091; GFX90A-NEXT:    v_perm_b32 v1, v0, v1, s4
3092; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
3093; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
3094; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3095; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3096;
3097; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_3_3:
3098; GFX940:       ; %bb.0:
3099; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3100; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3101; GFX940-NEXT:    ;;#ASMSTART
3102; GFX940-NEXT:    ; def v[0:1]
3103; GFX940-NEXT:    ;;#ASMEND
3104; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
3105; GFX940-NEXT:    v_perm_b32 v1, v0, v1, s2
3106; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
3107; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
3108; GFX940-NEXT:    s_waitcnt vmcnt(0)
3109; GFX940-NEXT:    s_setpc_b64 s[30:31]
3110  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3111  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3112  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3113  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3114  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 3>
3115  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3116  ret void
3117}
3118
3119define void @v_shuffle_v3i16_v3i16__5_u_3(ptr addrspace(1) inreg %ptr) {
3120; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_u_3:
3121; GFX900:       ; %bb.0:
3122; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3123; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3124; GFX900-NEXT:    ;;#ASMSTART
3125; GFX900-NEXT:    ; def v[0:1]
3126; GFX900-NEXT:    ;;#ASMEND
3127; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
3128; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
3129; GFX900-NEXT:    s_waitcnt vmcnt(0)
3130; GFX900-NEXT:    s_setpc_b64 s[30:31]
3131;
3132; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_u_3:
3133; GFX90A:       ; %bb.0:
3134; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3135; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3136; GFX90A-NEXT:    ;;#ASMSTART
3137; GFX90A-NEXT:    ; def v[0:1]
3138; GFX90A-NEXT:    ;;#ASMEND
3139; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
3140; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
3141; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3142; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3143;
3144; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_u_3:
3145; GFX940:       ; %bb.0:
3146; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3147; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3148; GFX940-NEXT:    ;;#ASMSTART
3149; GFX940-NEXT:    ; def v[0:1]
3150; GFX940-NEXT:    ;;#ASMEND
3151; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
3152; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
3153; GFX940-NEXT:    s_waitcnt vmcnt(0)
3154; GFX940-NEXT:    s_setpc_b64 s[30:31]
3155  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3156  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3157  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3158  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3159  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 3>
3160  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3161  ret void
3162}
3163
3164define void @v_shuffle_v3i16_v3i16__5_0_3(ptr addrspace(1) inreg %ptr) {
3165; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_0_3:
3166; GFX900:       ; %bb.0:
3167; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3168; GFX900-NEXT:    ;;#ASMSTART
3169; GFX900-NEXT:    ; def v[0:1]
3170; GFX900-NEXT:    ;;#ASMEND
3171; GFX900-NEXT:    v_mov_b32_e32 v3, 0
3172; GFX900-NEXT:    ;;#ASMSTART
3173; GFX900-NEXT:    ; def v[1:2]
3174; GFX900-NEXT:    ;;#ASMEND
3175; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
3176; GFX900-NEXT:    v_perm_b32 v0, v0, v2, s4
3177; GFX900-NEXT:    global_store_short v3, v1, s[16:17] offset:4
3178; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
3179; GFX900-NEXT:    s_waitcnt vmcnt(0)
3180; GFX900-NEXT:    s_setpc_b64 s[30:31]
3181;
3182; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_0_3:
3183; GFX90A:       ; %bb.0:
3184; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3185; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3186; GFX90A-NEXT:    ;;#ASMSTART
3187; GFX90A-NEXT:    ; def v[0:1]
3188; GFX90A-NEXT:    ;;#ASMEND
3189; GFX90A-NEXT:    ;;#ASMSTART
3190; GFX90A-NEXT:    ; def v[2:3]
3191; GFX90A-NEXT:    ;;#ASMEND
3192; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
3193; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
3194; GFX90A-NEXT:    global_store_short v4, v2, s[16:17] offset:4
3195; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3196; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3197; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3198;
3199; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_0_3:
3200; GFX940:       ; %bb.0:
3201; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3202; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3203; GFX940-NEXT:    ;;#ASMSTART
3204; GFX940-NEXT:    ; def v[0:1]
3205; GFX940-NEXT:    ;;#ASMEND
3206; GFX940-NEXT:    ;;#ASMSTART
3207; GFX940-NEXT:    ; def v[2:3]
3208; GFX940-NEXT:    ;;#ASMEND
3209; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
3210; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
3211; GFX940-NEXT:    global_store_short v4, v2, s[0:1] offset:4 sc0 sc1
3212; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3213; GFX940-NEXT:    s_waitcnt vmcnt(0)
3214; GFX940-NEXT:    s_setpc_b64 s[30:31]
3215  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3216  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3217  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3218  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3219  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 3>
3220  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3221  ret void
3222}
3223
3224define void @v_shuffle_v3i16_v3i16__5_1_3(ptr addrspace(1) inreg %ptr) {
3225; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_1_3:
3226; GFX900:       ; %bb.0:
3227; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3228; GFX900-NEXT:    ;;#ASMSTART
3229; GFX900-NEXT:    ; def v[0:1]
3230; GFX900-NEXT:    ;;#ASMEND
3231; GFX900-NEXT:    v_mov_b32_e32 v3, 0
3232; GFX900-NEXT:    ;;#ASMSTART
3233; GFX900-NEXT:    ; def v[1:2]
3234; GFX900-NEXT:    ;;#ASMEND
3235; GFX900-NEXT:    s_mov_b32 s4, 0xffff
3236; GFX900-NEXT:    v_bfi_b32 v0, s4, v2, v0
3237; GFX900-NEXT:    global_store_short v3, v1, s[16:17] offset:4
3238; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
3239; GFX900-NEXT:    s_waitcnt vmcnt(0)
3240; GFX900-NEXT:    s_setpc_b64 s[30:31]
3241;
3242; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_1_3:
3243; GFX90A:       ; %bb.0:
3244; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3245; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3246; GFX90A-NEXT:    ;;#ASMSTART
3247; GFX90A-NEXT:    ; def v[0:1]
3248; GFX90A-NEXT:    ;;#ASMEND
3249; GFX90A-NEXT:    ;;#ASMSTART
3250; GFX90A-NEXT:    ; def v[2:3]
3251; GFX90A-NEXT:    ;;#ASMEND
3252; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
3253; GFX90A-NEXT:    v_bfi_b32 v0, s4, v3, v0
3254; GFX90A-NEXT:    global_store_short v4, v2, s[16:17] offset:4
3255; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3256; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3257; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3258;
3259; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_1_3:
3260; GFX940:       ; %bb.0:
3261; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3262; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3263; GFX940-NEXT:    ;;#ASMSTART
3264; GFX940-NEXT:    ; def v[0:1]
3265; GFX940-NEXT:    ;;#ASMEND
3266; GFX940-NEXT:    ;;#ASMSTART
3267; GFX940-NEXT:    ; def v[2:3]
3268; GFX940-NEXT:    ;;#ASMEND
3269; GFX940-NEXT:    s_mov_b32 s2, 0xffff
3270; GFX940-NEXT:    v_bfi_b32 v0, s2, v3, v0
3271; GFX940-NEXT:    global_store_short v4, v2, s[0:1] offset:4 sc0 sc1
3272; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3273; GFX940-NEXT:    s_waitcnt vmcnt(0)
3274; GFX940-NEXT:    s_setpc_b64 s[30:31]
3275  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3276  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3277  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3278  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3279  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 3>
3280  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3281  ret void
3282}
3283
3284define void @v_shuffle_v3i16_v3i16__5_2_3(ptr addrspace(1) inreg %ptr) {
3285; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_2_3:
3286; GFX900:       ; %bb.0:
3287; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3288; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3289; GFX900-NEXT:    ;;#ASMSTART
3290; GFX900-NEXT:    ; def v[0:1]
3291; GFX900-NEXT:    ;;#ASMEND
3292; GFX900-NEXT:    ;;#ASMSTART
3293; GFX900-NEXT:    ; def v[2:3]
3294; GFX900-NEXT:    ;;#ASMEND
3295; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
3296; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
3297; GFX900-NEXT:    global_store_short v4, v2, s[16:17] offset:4
3298; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
3299; GFX900-NEXT:    s_waitcnt vmcnt(0)
3300; GFX900-NEXT:    s_setpc_b64 s[30:31]
3301;
3302; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_2_3:
3303; GFX90A:       ; %bb.0:
3304; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3305; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3306; GFX90A-NEXT:    ;;#ASMSTART
3307; GFX90A-NEXT:    ; def v[0:1]
3308; GFX90A-NEXT:    ;;#ASMEND
3309; GFX90A-NEXT:    ;;#ASMSTART
3310; GFX90A-NEXT:    ; def v[2:3]
3311; GFX90A-NEXT:    ;;#ASMEND
3312; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
3313; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
3314; GFX90A-NEXT:    global_store_short v4, v2, s[16:17] offset:4
3315; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3316; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3317; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3318;
3319; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_2_3:
3320; GFX940:       ; %bb.0:
3321; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3322; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3323; GFX940-NEXT:    ;;#ASMSTART
3324; GFX940-NEXT:    ; def v[0:1]
3325; GFX940-NEXT:    ;;#ASMEND
3326; GFX940-NEXT:    ;;#ASMSTART
3327; GFX940-NEXT:    ; def v[2:3]
3328; GFX940-NEXT:    ;;#ASMEND
3329; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
3330; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
3331; GFX940-NEXT:    global_store_short v4, v2, s[0:1] offset:4 sc0 sc1
3332; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3333; GFX940-NEXT:    s_waitcnt vmcnt(0)
3334; GFX940-NEXT:    s_setpc_b64 s[30:31]
3335  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3336  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3337  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3338  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3339  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 3>
3340  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3341  ret void
3342}
3343
3344define void @v_shuffle_v3i16_v3i16__5_4_3(ptr addrspace(1) inreg %ptr) {
3345; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_4_3:
3346; GFX900:       ; %bb.0:
3347; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3348; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3349; GFX900-NEXT:    ;;#ASMSTART
3350; GFX900-NEXT:    ; def v[0:1]
3351; GFX900-NEXT:    ;;#ASMEND
3352; GFX900-NEXT:    s_mov_b32 s4, 0xffff
3353; GFX900-NEXT:    v_bfi_b32 v1, s4, v1, v0
3354; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
3355; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
3356; GFX900-NEXT:    s_waitcnt vmcnt(0)
3357; GFX900-NEXT:    s_setpc_b64 s[30:31]
3358;
3359; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_4_3:
3360; GFX90A:       ; %bb.0:
3361; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3362; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3363; GFX90A-NEXT:    ;;#ASMSTART
3364; GFX90A-NEXT:    ; def v[0:1]
3365; GFX90A-NEXT:    ;;#ASMEND
3366; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
3367; GFX90A-NEXT:    v_bfi_b32 v1, s4, v1, v0
3368; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
3369; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
3370; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3371; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3372;
3373; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_4_3:
3374; GFX940:       ; %bb.0:
3375; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3376; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3377; GFX940-NEXT:    ;;#ASMSTART
3378; GFX940-NEXT:    ; def v[0:1]
3379; GFX940-NEXT:    ;;#ASMEND
3380; GFX940-NEXT:    s_mov_b32 s2, 0xffff
3381; GFX940-NEXT:    v_bfi_b32 v1, s2, v1, v0
3382; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
3383; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
3384; GFX940-NEXT:    s_waitcnt vmcnt(0)
3385; GFX940-NEXT:    s_setpc_b64 s[30:31]
3386  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3387  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3388  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3389  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3390  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 3>
3391  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3392  ret void
3393}
3394
3395define void @v_shuffle_v3i16_v3i16__u_4_4(ptr addrspace(1) inreg %ptr) {
3396; GFX900-LABEL: v_shuffle_v3i16_v3i16__u_4_4:
3397; GFX900:       ; %bb.0:
3398; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3399; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3400; GFX900-NEXT:    ;;#ASMSTART
3401; GFX900-NEXT:    ; def v[0:1]
3402; GFX900-NEXT:    ;;#ASMEND
3403; GFX900-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
3404; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
3405; GFX900-NEXT:    s_waitcnt vmcnt(0)
3406; GFX900-NEXT:    s_setpc_b64 s[30:31]
3407;
3408; GFX90A-LABEL: v_shuffle_v3i16_v3i16__u_4_4:
3409; GFX90A:       ; %bb.0:
3410; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3411; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3412; GFX90A-NEXT:    ;;#ASMSTART
3413; GFX90A-NEXT:    ; def v[0:1]
3414; GFX90A-NEXT:    ;;#ASMEND
3415; GFX90A-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
3416; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
3417; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3418; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3419;
3420; GFX940-LABEL: v_shuffle_v3i16_v3i16__u_4_4:
3421; GFX940:       ; %bb.0:
3422; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3423; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3424; GFX940-NEXT:    ;;#ASMSTART
3425; GFX940-NEXT:    ; def v[0:1]
3426; GFX940-NEXT:    ;;#ASMEND
3427; GFX940-NEXT:    global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1
3428; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
3429; GFX940-NEXT:    s_waitcnt vmcnt(0)
3430; GFX940-NEXT:    s_setpc_b64 s[30:31]
3431  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3432  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3433  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3434  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3435  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 poison, i32 4, i32 4>
3436  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3437  ret void
3438}
3439
3440define void @v_shuffle_v3i16_v3i16__0_4_4(ptr addrspace(1) inreg %ptr) {
3441; GFX900-LABEL: v_shuffle_v3i16_v3i16__0_4_4:
3442; GFX900:       ; %bb.0:
3443; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3444; GFX900-NEXT:    ;;#ASMSTART
3445; GFX900-NEXT:    ; def v[0:1]
3446; GFX900-NEXT:    ;;#ASMEND
3447; GFX900-NEXT:    s_mov_b32 s4, 0xffff
3448; GFX900-NEXT:    v_mov_b32_e32 v3, 0
3449; GFX900-NEXT:    ;;#ASMSTART
3450; GFX900-NEXT:    ; def v[1:2]
3451; GFX900-NEXT:    ;;#ASMEND
3452; GFX900-NEXT:    v_bfi_b32 v0, s4, v0, v1
3453; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
3454; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
3455; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
3456; GFX900-NEXT:    s_waitcnt vmcnt(0)
3457; GFX900-NEXT:    s_setpc_b64 s[30:31]
3458;
3459; GFX90A-LABEL: v_shuffle_v3i16_v3i16__0_4_4:
3460; GFX90A:       ; %bb.0:
3461; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3462; GFX90A-NEXT:    ;;#ASMSTART
3463; GFX90A-NEXT:    ; def v[0:1]
3464; GFX90A-NEXT:    ;;#ASMEND
3465; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
3466; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3467; GFX90A-NEXT:    ;;#ASMSTART
3468; GFX90A-NEXT:    ; def v[2:3]
3469; GFX90A-NEXT:    ;;#ASMEND
3470; GFX90A-NEXT:    v_bfi_b32 v0, s4, v0, v2
3471; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3472; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
3473; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
3474; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3475; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3476;
3477; GFX940-LABEL: v_shuffle_v3i16_v3i16__0_4_4:
3478; GFX940:       ; %bb.0:
3479; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3480; GFX940-NEXT:    ;;#ASMSTART
3481; GFX940-NEXT:    ; def v[0:1]
3482; GFX940-NEXT:    ;;#ASMEND
3483; GFX940-NEXT:    s_mov_b32 s2, 0xffff
3484; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3485; GFX940-NEXT:    ;;#ASMSTART
3486; GFX940-NEXT:    ; def v[2:3]
3487; GFX940-NEXT:    ;;#ASMEND
3488; GFX940-NEXT:    s_nop 0
3489; GFX940-NEXT:    v_bfi_b32 v0, s2, v0, v2
3490; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3491; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
3492; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
3493; GFX940-NEXT:    s_waitcnt vmcnt(0)
3494; GFX940-NEXT:    s_setpc_b64 s[30:31]
3495  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3496  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3497  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3498  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3499  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 0, i32 4, i32 4>
3500  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3501  ret void
3502}
3503
3504define void @v_shuffle_v3i16_v3i16__1_4_4(ptr addrspace(1) inreg %ptr) {
3505; GFX900-LABEL: v_shuffle_v3i16_v3i16__1_4_4:
3506; GFX900:       ; %bb.0:
3507; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3508; GFX900-NEXT:    ;;#ASMSTART
3509; GFX900-NEXT:    ; def v[0:1]
3510; GFX900-NEXT:    ;;#ASMEND
3511; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3512; GFX900-NEXT:    v_mov_b32_e32 v3, 0
3513; GFX900-NEXT:    ;;#ASMSTART
3514; GFX900-NEXT:    ; def v[1:2]
3515; GFX900-NEXT:    ;;#ASMEND
3516; GFX900-NEXT:    v_perm_b32 v0, v1, v0, s4
3517; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
3518; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
3519; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
3520; GFX900-NEXT:    s_waitcnt vmcnt(0)
3521; GFX900-NEXT:    s_setpc_b64 s[30:31]
3522;
3523; GFX90A-LABEL: v_shuffle_v3i16_v3i16__1_4_4:
3524; GFX90A:       ; %bb.0:
3525; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3526; GFX90A-NEXT:    ;;#ASMSTART
3527; GFX90A-NEXT:    ; def v[0:1]
3528; GFX90A-NEXT:    ;;#ASMEND
3529; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3530; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3531; GFX90A-NEXT:    ;;#ASMSTART
3532; GFX90A-NEXT:    ; def v[2:3]
3533; GFX90A-NEXT:    ;;#ASMEND
3534; GFX90A-NEXT:    v_perm_b32 v0, v2, v0, s4
3535; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3536; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
3537; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
3538; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3539; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3540;
3541; GFX940-LABEL: v_shuffle_v3i16_v3i16__1_4_4:
3542; GFX940:       ; %bb.0:
3543; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3544; GFX940-NEXT:    ;;#ASMSTART
3545; GFX940-NEXT:    ; def v[0:1]
3546; GFX940-NEXT:    ;;#ASMEND
3547; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3548; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3549; GFX940-NEXT:    ;;#ASMSTART
3550; GFX940-NEXT:    ; def v[2:3]
3551; GFX940-NEXT:    ;;#ASMEND
3552; GFX940-NEXT:    s_nop 0
3553; GFX940-NEXT:    v_perm_b32 v0, v2, v0, s2
3554; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3555; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
3556; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
3557; GFX940-NEXT:    s_waitcnt vmcnt(0)
3558; GFX940-NEXT:    s_setpc_b64 s[30:31]
3559  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3560  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3561  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3562  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3563  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 1, i32 4, i32 4>
3564  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3565  ret void
3566}
3567
3568define void @v_shuffle_v3i16_v3i16__2_4_4(ptr addrspace(1) inreg %ptr) {
3569; GFX900-LABEL: v_shuffle_v3i16_v3i16__2_4_4:
3570; GFX900:       ; %bb.0:
3571; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3572; GFX900-NEXT:    ;;#ASMSTART
3573; GFX900-NEXT:    ; def v[0:1]
3574; GFX900-NEXT:    ;;#ASMEND
3575; GFX900-NEXT:    s_mov_b32 s4, 0xffff
3576; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3577; GFX900-NEXT:    ;;#ASMSTART
3578; GFX900-NEXT:    ; def v[2:3]
3579; GFX900-NEXT:    ;;#ASMEND
3580; GFX900-NEXT:    v_bfi_b32 v0, s4, v1, v2
3581; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
3582; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3583; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
3584; GFX900-NEXT:    s_waitcnt vmcnt(0)
3585; GFX900-NEXT:    s_setpc_b64 s[30:31]
3586;
3587; GFX90A-LABEL: v_shuffle_v3i16_v3i16__2_4_4:
3588; GFX90A:       ; %bb.0:
3589; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3590; GFX90A-NEXT:    ;;#ASMSTART
3591; GFX90A-NEXT:    ; def v[0:1]
3592; GFX90A-NEXT:    ;;#ASMEND
3593; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
3594; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3595; GFX90A-NEXT:    ;;#ASMSTART
3596; GFX90A-NEXT:    ; def v[2:3]
3597; GFX90A-NEXT:    ;;#ASMEND
3598; GFX90A-NEXT:    v_bfi_b32 v0, s4, v1, v2
3599; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
3600; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3601; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3602; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3603; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3604;
3605; GFX940-LABEL: v_shuffle_v3i16_v3i16__2_4_4:
3606; GFX940:       ; %bb.0:
3607; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3608; GFX940-NEXT:    ;;#ASMSTART
3609; GFX940-NEXT:    ; def v[0:1]
3610; GFX940-NEXT:    ;;#ASMEND
3611; GFX940-NEXT:    s_mov_b32 s2, 0xffff
3612; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3613; GFX940-NEXT:    ;;#ASMSTART
3614; GFX940-NEXT:    ; def v[2:3]
3615; GFX940-NEXT:    ;;#ASMEND
3616; GFX940-NEXT:    s_nop 0
3617; GFX940-NEXT:    v_bfi_b32 v0, s2, v1, v2
3618; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
3619; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3620; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3621; GFX940-NEXT:    s_waitcnt vmcnt(0)
3622; GFX940-NEXT:    s_setpc_b64 s[30:31]
3623  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3624  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3625  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3626  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3627  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 2, i32 4, i32 4>
3628  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3629  ret void
3630}
3631
3632define void @v_shuffle_v3i16_v3i16__3_4_4(ptr addrspace(1) inreg %ptr) {
3633; GFX900-LABEL: v_shuffle_v3i16_v3i16__3_4_4:
3634; GFX900:       ; %bb.0:
3635; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3636; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3637; GFX900-NEXT:    ;;#ASMSTART
3638; GFX900-NEXT:    ; def v[0:1]
3639; GFX900-NEXT:    ;;#ASMEND
3640; GFX900-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
3641; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
3642; GFX900-NEXT:    s_waitcnt vmcnt(0)
3643; GFX900-NEXT:    s_setpc_b64 s[30:31]
3644;
3645; GFX90A-LABEL: v_shuffle_v3i16_v3i16__3_4_4:
3646; GFX90A:       ; %bb.0:
3647; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3648; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3649; GFX90A-NEXT:    ;;#ASMSTART
3650; GFX90A-NEXT:    ; def v[0:1]
3651; GFX90A-NEXT:    ;;#ASMEND
3652; GFX90A-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
3653; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
3654; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3655; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3656;
3657; GFX940-LABEL: v_shuffle_v3i16_v3i16__3_4_4:
3658; GFX940:       ; %bb.0:
3659; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3660; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3661; GFX940-NEXT:    ;;#ASMSTART
3662; GFX940-NEXT:    ; def v[0:1]
3663; GFX940-NEXT:    ;;#ASMEND
3664; GFX940-NEXT:    global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1
3665; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
3666; GFX940-NEXT:    s_waitcnt vmcnt(0)
3667; GFX940-NEXT:    s_setpc_b64 s[30:31]
3668  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3669  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3670  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3671  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3672  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 3, i32 4, i32 4>
3673  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3674  ret void
3675}
3676
3677define void @v_shuffle_v3i16_v3i16__4_4_4(ptr addrspace(1) inreg %ptr) {
3678; GFX900-LABEL: v_shuffle_v3i16_v3i16__4_4_4:
3679; GFX900:       ; %bb.0:
3680; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3681; GFX900-NEXT:    ;;#ASMSTART
3682; GFX900-NEXT:    ; def v[0:1]
3683; GFX900-NEXT:    ;;#ASMEND
3684; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3685; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3686; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
3687; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
3688; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
3689; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
3690; GFX900-NEXT:    s_waitcnt vmcnt(0)
3691; GFX900-NEXT:    s_setpc_b64 s[30:31]
3692;
3693; GFX90A-LABEL: v_shuffle_v3i16_v3i16__4_4_4:
3694; GFX90A:       ; %bb.0:
3695; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3696; GFX90A-NEXT:    ;;#ASMSTART
3697; GFX90A-NEXT:    ; def v[0:1]
3698; GFX90A-NEXT:    ;;#ASMEND
3699; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3700; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3701; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
3702; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
3703; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
3704; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
3705; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3706; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3707;
3708; GFX940-LABEL: v_shuffle_v3i16_v3i16__4_4_4:
3709; GFX940:       ; %bb.0:
3710; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3711; GFX940-NEXT:    ;;#ASMSTART
3712; GFX940-NEXT:    ; def v[0:1]
3713; GFX940-NEXT:    ;;#ASMEND
3714; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3715; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3716; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
3717; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
3718; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
3719; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
3720; GFX940-NEXT:    s_waitcnt vmcnt(0)
3721; GFX940-NEXT:    s_setpc_b64 s[30:31]
3722  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3723  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3724  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3725  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3726  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 4, i32 4>
3727  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3728  ret void
3729}
3730
3731define void @v_shuffle_v3i16_v3i16__5_4_4(ptr addrspace(1) inreg %ptr) {
3732; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_4_4:
3733; GFX900:       ; %bb.0:
3734; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3735; GFX900-NEXT:    ;;#ASMSTART
3736; GFX900-NEXT:    ; def v[0:1]
3737; GFX900-NEXT:    ;;#ASMEND
3738; GFX900-NEXT:    s_mov_b32 s4, 0xffff
3739; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3740; GFX900-NEXT:    v_bfi_b32 v1, s4, v1, v0
3741; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
3742; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
3743; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
3744; GFX900-NEXT:    s_waitcnt vmcnt(0)
3745; GFX900-NEXT:    s_setpc_b64 s[30:31]
3746;
3747; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_4_4:
3748; GFX90A:       ; %bb.0:
3749; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3750; GFX90A-NEXT:    ;;#ASMSTART
3751; GFX90A-NEXT:    ; def v[0:1]
3752; GFX90A-NEXT:    ;;#ASMEND
3753; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
3754; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3755; GFX90A-NEXT:    v_bfi_b32 v1, s4, v1, v0
3756; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
3757; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
3758; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
3759; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3760; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3761;
3762; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_4_4:
3763; GFX940:       ; %bb.0:
3764; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3765; GFX940-NEXT:    ;;#ASMSTART
3766; GFX940-NEXT:    ; def v[0:1]
3767; GFX940-NEXT:    ;;#ASMEND
3768; GFX940-NEXT:    s_mov_b32 s2, 0xffff
3769; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3770; GFX940-NEXT:    v_bfi_b32 v1, s2, v1, v0
3771; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
3772; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
3773; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
3774; GFX940-NEXT:    s_waitcnt vmcnt(0)
3775; GFX940-NEXT:    s_setpc_b64 s[30:31]
3776  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3777  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3778  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3779  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3780  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 4>
3781  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3782  ret void
3783}
3784
3785define void @v_shuffle_v3i16_v3i16__5_u_4(ptr addrspace(1) inreg %ptr) {
3786; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_u_4:
3787; GFX900:       ; %bb.0:
3788; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3789; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3790; GFX900-NEXT:    ;;#ASMSTART
3791; GFX900-NEXT:    ; def v[0:1]
3792; GFX900-NEXT:    ;;#ASMEND
3793; GFX900-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
3794; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
3795; GFX900-NEXT:    s_waitcnt vmcnt(0)
3796; GFX900-NEXT:    s_setpc_b64 s[30:31]
3797;
3798; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_u_4:
3799; GFX90A:       ; %bb.0:
3800; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3801; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3802; GFX90A-NEXT:    ;;#ASMSTART
3803; GFX90A-NEXT:    ; def v[0:1]
3804; GFX90A-NEXT:    ;;#ASMEND
3805; GFX90A-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
3806; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
3807; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3808; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3809;
3810; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_u_4:
3811; GFX940:       ; %bb.0:
3812; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3813; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3814; GFX940-NEXT:    ;;#ASMSTART
3815; GFX940-NEXT:    ; def v[0:1]
3816; GFX940-NEXT:    ;;#ASMEND
3817; GFX940-NEXT:    global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1
3818; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
3819; GFX940-NEXT:    s_waitcnt vmcnt(0)
3820; GFX940-NEXT:    s_setpc_b64 s[30:31]
3821  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3822  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3823  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3824  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3825  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 4>
3826  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3827  ret void
3828}
3829
3830define void @v_shuffle_v3i16_v3i16__5_0_4(ptr addrspace(1) inreg %ptr) {
3831; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_0_4:
3832; GFX900:       ; %bb.0:
3833; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3834; GFX900-NEXT:    ;;#ASMSTART
3835; GFX900-NEXT:    ; def v[0:1]
3836; GFX900-NEXT:    ;;#ASMEND
3837; GFX900-NEXT:    v_mov_b32_e32 v3, 0
3838; GFX900-NEXT:    ;;#ASMSTART
3839; GFX900-NEXT:    ; def v[1:2]
3840; GFX900-NEXT:    ;;#ASMEND
3841; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
3842; GFX900-NEXT:    v_perm_b32 v0, v0, v2, s4
3843; GFX900-NEXT:    global_store_short_d16_hi v3, v1, s[16:17] offset:4
3844; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
3845; GFX900-NEXT:    s_waitcnt vmcnt(0)
3846; GFX900-NEXT:    s_setpc_b64 s[30:31]
3847;
3848; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_0_4:
3849; GFX90A:       ; %bb.0:
3850; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3851; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3852; GFX90A-NEXT:    ;;#ASMSTART
3853; GFX90A-NEXT:    ; def v[0:1]
3854; GFX90A-NEXT:    ;;#ASMEND
3855; GFX90A-NEXT:    ;;#ASMSTART
3856; GFX90A-NEXT:    ; def v[2:3]
3857; GFX90A-NEXT:    ;;#ASMEND
3858; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
3859; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
3860; GFX90A-NEXT:    global_store_short_d16_hi v4, v2, s[16:17] offset:4
3861; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3862; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3863; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3864;
3865; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_0_4:
3866; GFX940:       ; %bb.0:
3867; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3868; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3869; GFX940-NEXT:    ;;#ASMSTART
3870; GFX940-NEXT:    ; def v[0:1]
3871; GFX940-NEXT:    ;;#ASMEND
3872; GFX940-NEXT:    ;;#ASMSTART
3873; GFX940-NEXT:    ; def v[2:3]
3874; GFX940-NEXT:    ;;#ASMEND
3875; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
3876; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
3877; GFX940-NEXT:    global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1
3878; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3879; GFX940-NEXT:    s_waitcnt vmcnt(0)
3880; GFX940-NEXT:    s_setpc_b64 s[30:31]
3881  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3882  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3883  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3884  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3885  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 4>
3886  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3887  ret void
3888}
3889
3890define void @v_shuffle_v3i16_v3i16__5_1_4(ptr addrspace(1) inreg %ptr) {
3891; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_1_4:
3892; GFX900:       ; %bb.0:
3893; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3894; GFX900-NEXT:    ;;#ASMSTART
3895; GFX900-NEXT:    ; def v[0:1]
3896; GFX900-NEXT:    ;;#ASMEND
3897; GFX900-NEXT:    v_mov_b32_e32 v3, 0
3898; GFX900-NEXT:    ;;#ASMSTART
3899; GFX900-NEXT:    ; def v[1:2]
3900; GFX900-NEXT:    ;;#ASMEND
3901; GFX900-NEXT:    s_mov_b32 s4, 0xffff
3902; GFX900-NEXT:    v_bfi_b32 v0, s4, v2, v0
3903; GFX900-NEXT:    global_store_short_d16_hi v3, v1, s[16:17] offset:4
3904; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
3905; GFX900-NEXT:    s_waitcnt vmcnt(0)
3906; GFX900-NEXT:    s_setpc_b64 s[30:31]
3907;
3908; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_1_4:
3909; GFX90A:       ; %bb.0:
3910; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3911; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3912; GFX90A-NEXT:    ;;#ASMSTART
3913; GFX90A-NEXT:    ; def v[0:1]
3914; GFX90A-NEXT:    ;;#ASMEND
3915; GFX90A-NEXT:    ;;#ASMSTART
3916; GFX90A-NEXT:    ; def v[2:3]
3917; GFX90A-NEXT:    ;;#ASMEND
3918; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
3919; GFX90A-NEXT:    v_bfi_b32 v0, s4, v3, v0
3920; GFX90A-NEXT:    global_store_short_d16_hi v4, v2, s[16:17] offset:4
3921; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3922; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3923; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3924;
3925; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_1_4:
3926; GFX940:       ; %bb.0:
3927; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3928; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3929; GFX940-NEXT:    ;;#ASMSTART
3930; GFX940-NEXT:    ; def v[0:1]
3931; GFX940-NEXT:    ;;#ASMEND
3932; GFX940-NEXT:    ;;#ASMSTART
3933; GFX940-NEXT:    ; def v[2:3]
3934; GFX940-NEXT:    ;;#ASMEND
3935; GFX940-NEXT:    s_mov_b32 s2, 0xffff
3936; GFX940-NEXT:    v_bfi_b32 v0, s2, v3, v0
3937; GFX940-NEXT:    global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1
3938; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3939; GFX940-NEXT:    s_waitcnt vmcnt(0)
3940; GFX940-NEXT:    s_setpc_b64 s[30:31]
3941  %vec0 = call <4 x i16> asm "; def $0", "=v"()
3942  %vec1 = call <4 x i16> asm "; def $0", "=v"()
3943  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3944  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
3945  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 4>
3946  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
3947  ret void
3948}
3949
3950define void @v_shuffle_v3i16_v3i16__5_2_4(ptr addrspace(1) inreg %ptr) {
3951; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_2_4:
3952; GFX900:       ; %bb.0:
3953; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3954; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3955; GFX900-NEXT:    ;;#ASMSTART
3956; GFX900-NEXT:    ; def v[0:1]
3957; GFX900-NEXT:    ;;#ASMEND
3958; GFX900-NEXT:    ;;#ASMSTART
3959; GFX900-NEXT:    ; def v[2:3]
3960; GFX900-NEXT:    ;;#ASMEND
3961; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
3962; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
3963; GFX900-NEXT:    global_store_short_d16_hi v4, v2, s[16:17] offset:4
3964; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
3965; GFX900-NEXT:    s_waitcnt vmcnt(0)
3966; GFX900-NEXT:    s_setpc_b64 s[30:31]
3967;
3968; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_2_4:
3969; GFX90A:       ; %bb.0:
3970; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3971; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3972; GFX90A-NEXT:    ;;#ASMSTART
3973; GFX90A-NEXT:    ; def v[0:1]
3974; GFX90A-NEXT:    ;;#ASMEND
3975; GFX90A-NEXT:    ;;#ASMSTART
3976; GFX90A-NEXT:    ; def v[2:3]
3977; GFX90A-NEXT:    ;;#ASMEND
3978; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
3979; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
3980; GFX90A-NEXT:    global_store_short_d16_hi v4, v2, s[16:17] offset:4
3981; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3982; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3983; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3984;
3985; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_2_4:
3986; GFX940:       ; %bb.0:
3987; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3988; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3989; GFX940-NEXT:    ;;#ASMSTART
3990; GFX940-NEXT:    ; def v[0:1]
3991; GFX940-NEXT:    ;;#ASMEND
3992; GFX940-NEXT:    ;;#ASMSTART
3993; GFX940-NEXT:    ; def v[2:3]
3994; GFX940-NEXT:    ;;#ASMEND
3995; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
3996; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
3997; GFX940-NEXT:    global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1
3998; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3999; GFX940-NEXT:    s_waitcnt vmcnt(0)
4000; GFX940-NEXT:    s_setpc_b64 s[30:31]
4001  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4002  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4003  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4004  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4005  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 4>
4006  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4007  ret void
4008}
4009
4010define void @v_shuffle_v3i16_v3i16__5_3_4(ptr addrspace(1) inreg %ptr) {
4011; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_3_4:
4012; GFX900:       ; %bb.0:
4013; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4014; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4015; GFX900-NEXT:    ;;#ASMSTART
4016; GFX900-NEXT:    ; def v[0:1]
4017; GFX900-NEXT:    ;;#ASMEND
4018; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4019; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
4020; GFX900-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
4021; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
4022; GFX900-NEXT:    s_waitcnt vmcnt(0)
4023; GFX900-NEXT:    s_setpc_b64 s[30:31]
4024;
4025; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_3_4:
4026; GFX90A:       ; %bb.0:
4027; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4028; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4029; GFX90A-NEXT:    ;;#ASMSTART
4030; GFX90A-NEXT:    ; def v[0:1]
4031; GFX90A-NEXT:    ;;#ASMEND
4032; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4033; GFX90A-NEXT:    v_perm_b32 v1, v0, v1, s4
4034; GFX90A-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
4035; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
4036; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4037; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4038;
4039; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_3_4:
4040; GFX940:       ; %bb.0:
4041; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4042; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4043; GFX940-NEXT:    ;;#ASMSTART
4044; GFX940-NEXT:    ; def v[0:1]
4045; GFX940-NEXT:    ;;#ASMEND
4046; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4047; GFX940-NEXT:    v_perm_b32 v1, v0, v1, s2
4048; GFX940-NEXT:    global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1
4049; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
4050; GFX940-NEXT:    s_waitcnt vmcnt(0)
4051; GFX940-NEXT:    s_setpc_b64 s[30:31]
4052  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4053  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4054  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4055  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4056  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 4>
4057  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4058  ret void
4059}
4060
4061define void @v_shuffle_v3i16_v3i16__u_5_5(ptr addrspace(1) inreg %ptr) {
4062; GFX900-LABEL: v_shuffle_v3i16_v3i16__u_5_5:
4063; GFX900:       ; %bb.0:
4064; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4065; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4066; GFX900-NEXT:    ;;#ASMSTART
4067; GFX900-NEXT:    ; def v[0:1]
4068; GFX900-NEXT:    ;;#ASMEND
4069; GFX900-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
4070; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
4071; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
4072; GFX900-NEXT:    s_waitcnt vmcnt(0)
4073; GFX900-NEXT:    s_setpc_b64 s[30:31]
4074;
4075; GFX90A-LABEL: v_shuffle_v3i16_v3i16__u_5_5:
4076; GFX90A:       ; %bb.0:
4077; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4078; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4079; GFX90A-NEXT:    ;;#ASMSTART
4080; GFX90A-NEXT:    ; def v[0:1]
4081; GFX90A-NEXT:    ;;#ASMEND
4082; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
4083; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
4084; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
4085; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4086; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4087;
4088; GFX940-LABEL: v_shuffle_v3i16_v3i16__u_5_5:
4089; GFX940:       ; %bb.0:
4090; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4091; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4092; GFX940-NEXT:    ;;#ASMSTART
4093; GFX940-NEXT:    ; def v[0:1]
4094; GFX940-NEXT:    ;;#ASMEND
4095; GFX940-NEXT:    s_nop 0
4096; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
4097; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
4098; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
4099; GFX940-NEXT:    s_waitcnt vmcnt(0)
4100; GFX940-NEXT:    s_setpc_b64 s[30:31]
4101  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4102  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4103  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4104  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4105  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 poison, i32 5, i32 5>
4106  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4107  ret void
4108}
4109
4110define void @v_shuffle_v3i16_v3i16__0_5_5(ptr addrspace(1) inreg %ptr) {
4111; GFX900-LABEL: v_shuffle_v3i16_v3i16__0_5_5:
4112; GFX900:       ; %bb.0:
4113; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4114; GFX900-NEXT:    ;;#ASMSTART
4115; GFX900-NEXT:    ; def v[0:1]
4116; GFX900-NEXT:    ;;#ASMEND
4117; GFX900-NEXT:    v_mov_b32_e32 v3, 0
4118; GFX900-NEXT:    ;;#ASMSTART
4119; GFX900-NEXT:    ; def v[1:2]
4120; GFX900-NEXT:    ;;#ASMEND
4121; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4122; GFX900-NEXT:    v_perm_b32 v0, v2, v0, s4
4123; GFX900-NEXT:    global_store_short v3, v2, s[16:17] offset:4
4124; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
4125; GFX900-NEXT:    s_waitcnt vmcnt(0)
4126; GFX900-NEXT:    s_setpc_b64 s[30:31]
4127;
4128; GFX90A-LABEL: v_shuffle_v3i16_v3i16__0_5_5:
4129; GFX90A:       ; %bb.0:
4130; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4131; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4132; GFX90A-NEXT:    ;;#ASMSTART
4133; GFX90A-NEXT:    ; def v[0:1]
4134; GFX90A-NEXT:    ;;#ASMEND
4135; GFX90A-NEXT:    ;;#ASMSTART
4136; GFX90A-NEXT:    ; def v[2:3]
4137; GFX90A-NEXT:    ;;#ASMEND
4138; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4139; GFX90A-NEXT:    v_perm_b32 v0, v3, v0, s4
4140; GFX90A-NEXT:    global_store_short v4, v3, s[16:17] offset:4
4141; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4142; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4143; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4144;
4145; GFX940-LABEL: v_shuffle_v3i16_v3i16__0_5_5:
4146; GFX940:       ; %bb.0:
4147; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4148; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4149; GFX940-NEXT:    ;;#ASMSTART
4150; GFX940-NEXT:    ; def v[0:1]
4151; GFX940-NEXT:    ;;#ASMEND
4152; GFX940-NEXT:    ;;#ASMSTART
4153; GFX940-NEXT:    ; def v[2:3]
4154; GFX940-NEXT:    ;;#ASMEND
4155; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4156; GFX940-NEXT:    v_perm_b32 v0, v3, v0, s2
4157; GFX940-NEXT:    global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
4158; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
4159; GFX940-NEXT:    s_waitcnt vmcnt(0)
4160; GFX940-NEXT:    s_setpc_b64 s[30:31]
4161  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4162  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4163  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4164  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4165  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 0, i32 5, i32 5>
4166  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4167  ret void
4168}
4169
4170define void @v_shuffle_v3i16_v3i16__1_5_5(ptr addrspace(1) inreg %ptr) {
4171; GFX900-LABEL: v_shuffle_v3i16_v3i16__1_5_5:
4172; GFX900:       ; %bb.0:
4173; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4174; GFX900-NEXT:    ;;#ASMSTART
4175; GFX900-NEXT:    ; def v[0:1]
4176; GFX900-NEXT:    ;;#ASMEND
4177; GFX900-NEXT:    v_mov_b32_e32 v3, 0
4178; GFX900-NEXT:    ;;#ASMSTART
4179; GFX900-NEXT:    ; def v[1:2]
4180; GFX900-NEXT:    ;;#ASMEND
4181; GFX900-NEXT:    v_alignbit_b32 v0, v2, v0, 16
4182; GFX900-NEXT:    global_store_short v3, v2, s[16:17] offset:4
4183; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
4184; GFX900-NEXT:    s_waitcnt vmcnt(0)
4185; GFX900-NEXT:    s_setpc_b64 s[30:31]
4186;
4187; GFX90A-LABEL: v_shuffle_v3i16_v3i16__1_5_5:
4188; GFX90A:       ; %bb.0:
4189; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4190; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4191; GFX90A-NEXT:    ;;#ASMSTART
4192; GFX90A-NEXT:    ; def v[0:1]
4193; GFX90A-NEXT:    ;;#ASMEND
4194; GFX90A-NEXT:    ;;#ASMSTART
4195; GFX90A-NEXT:    ; def v[2:3]
4196; GFX90A-NEXT:    ;;#ASMEND
4197; GFX90A-NEXT:    v_alignbit_b32 v0, v3, v0, 16
4198; GFX90A-NEXT:    global_store_short v4, v3, s[16:17] offset:4
4199; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4200; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4201; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4202;
4203; GFX940-LABEL: v_shuffle_v3i16_v3i16__1_5_5:
4204; GFX940:       ; %bb.0:
4205; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4206; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4207; GFX940-NEXT:    ;;#ASMSTART
4208; GFX940-NEXT:    ; def v[0:1]
4209; GFX940-NEXT:    ;;#ASMEND
4210; GFX940-NEXT:    ;;#ASMSTART
4211; GFX940-NEXT:    ; def v[2:3]
4212; GFX940-NEXT:    ;;#ASMEND
4213; GFX940-NEXT:    s_nop 0
4214; GFX940-NEXT:    v_alignbit_b32 v0, v3, v0, 16
4215; GFX940-NEXT:    global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
4216; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
4217; GFX940-NEXT:    s_waitcnt vmcnt(0)
4218; GFX940-NEXT:    s_setpc_b64 s[30:31]
4219  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4220  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4221  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4222  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4223  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 1, i32 5, i32 5>
4224  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4225  ret void
4226}
4227
4228define void @v_shuffle_v3i16_v3i16__2_5_5(ptr addrspace(1) inreg %ptr) {
4229; GFX900-LABEL: v_shuffle_v3i16_v3i16__2_5_5:
4230; GFX900:       ; %bb.0:
4231; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4232; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4233; GFX900-NEXT:    ;;#ASMSTART
4234; GFX900-NEXT:    ; def v[0:1]
4235; GFX900-NEXT:    ;;#ASMEND
4236; GFX900-NEXT:    ;;#ASMSTART
4237; GFX900-NEXT:    ; def v[2:3]
4238; GFX900-NEXT:    ;;#ASMEND
4239; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4240; GFX900-NEXT:    v_perm_b32 v0, v3, v1, s4
4241; GFX900-NEXT:    global_store_short v4, v3, s[16:17] offset:4
4242; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
4243; GFX900-NEXT:    s_waitcnt vmcnt(0)
4244; GFX900-NEXT:    s_setpc_b64 s[30:31]
4245;
4246; GFX90A-LABEL: v_shuffle_v3i16_v3i16__2_5_5:
4247; GFX90A:       ; %bb.0:
4248; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4249; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4250; GFX90A-NEXT:    ;;#ASMSTART
4251; GFX90A-NEXT:    ; def v[0:1]
4252; GFX90A-NEXT:    ;;#ASMEND
4253; GFX90A-NEXT:    ;;#ASMSTART
4254; GFX90A-NEXT:    ; def v[2:3]
4255; GFX90A-NEXT:    ;;#ASMEND
4256; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4257; GFX90A-NEXT:    v_perm_b32 v0, v3, v1, s4
4258; GFX90A-NEXT:    global_store_short v4, v3, s[16:17] offset:4
4259; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4260; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4261; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4262;
4263; GFX940-LABEL: v_shuffle_v3i16_v3i16__2_5_5:
4264; GFX940:       ; %bb.0:
4265; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4266; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4267; GFX940-NEXT:    ;;#ASMSTART
4268; GFX940-NEXT:    ; def v[0:1]
4269; GFX940-NEXT:    ;;#ASMEND
4270; GFX940-NEXT:    ;;#ASMSTART
4271; GFX940-NEXT:    ; def v[2:3]
4272; GFX940-NEXT:    ;;#ASMEND
4273; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4274; GFX940-NEXT:    v_perm_b32 v0, v3, v1, s2
4275; GFX940-NEXT:    global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
4276; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
4277; GFX940-NEXT:    s_waitcnt vmcnt(0)
4278; GFX940-NEXT:    s_setpc_b64 s[30:31]
4279  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4280  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4281  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4282  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4283  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 2, i32 5, i32 5>
4284  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4285  ret void
4286}
4287
4288define void @v_shuffle_v3i16_v3i16__3_5_5(ptr addrspace(1) inreg %ptr) {
4289; GFX900-LABEL: v_shuffle_v3i16_v3i16__3_5_5:
4290; GFX900:       ; %bb.0:
4291; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4292; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4293; GFX900-NEXT:    ;;#ASMSTART
4294; GFX900-NEXT:    ; def v[0:1]
4295; GFX900-NEXT:    ;;#ASMEND
4296; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4297; GFX900-NEXT:    v_perm_b32 v0, v1, v0, s4
4298; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
4299; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
4300; GFX900-NEXT:    s_waitcnt vmcnt(0)
4301; GFX900-NEXT:    s_setpc_b64 s[30:31]
4302;
4303; GFX90A-LABEL: v_shuffle_v3i16_v3i16__3_5_5:
4304; GFX90A:       ; %bb.0:
4305; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4306; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4307; GFX90A-NEXT:    ;;#ASMSTART
4308; GFX90A-NEXT:    ; def v[0:1]
4309; GFX90A-NEXT:    ;;#ASMEND
4310; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4311; GFX90A-NEXT:    v_perm_b32 v0, v1, v0, s4
4312; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
4313; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
4314; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4315; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4316;
4317; GFX940-LABEL: v_shuffle_v3i16_v3i16__3_5_5:
4318; GFX940:       ; %bb.0:
4319; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4320; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4321; GFX940-NEXT:    ;;#ASMSTART
4322; GFX940-NEXT:    ; def v[0:1]
4323; GFX940-NEXT:    ;;#ASMEND
4324; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4325; GFX940-NEXT:    v_perm_b32 v0, v1, v0, s2
4326; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
4327; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
4328; GFX940-NEXT:    s_waitcnt vmcnt(0)
4329; GFX940-NEXT:    s_setpc_b64 s[30:31]
4330  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4331  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4332  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4333  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4334  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 3, i32 5, i32 5>
4335  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4336  ret void
4337}
4338
4339define void @v_shuffle_v3i16_v3i16__4_5_5(ptr addrspace(1) inreg %ptr) {
4340; GFX900-LABEL: v_shuffle_v3i16_v3i16__4_5_5:
4341; GFX900:       ; %bb.0:
4342; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4343; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4344; GFX900-NEXT:    ;;#ASMSTART
4345; GFX900-NEXT:    ; def v[0:1]
4346; GFX900-NEXT:    ;;#ASMEND
4347; GFX900-NEXT:    v_alignbit_b32 v0, v1, v0, 16
4348; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
4349; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
4350; GFX900-NEXT:    s_waitcnt vmcnt(0)
4351; GFX900-NEXT:    s_setpc_b64 s[30:31]
4352;
4353; GFX90A-LABEL: v_shuffle_v3i16_v3i16__4_5_5:
4354; GFX90A:       ; %bb.0:
4355; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4356; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4357; GFX90A-NEXT:    ;;#ASMSTART
4358; GFX90A-NEXT:    ; def v[0:1]
4359; GFX90A-NEXT:    ;;#ASMEND
4360; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v0, 16
4361; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
4362; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
4363; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4364; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4365;
4366; GFX940-LABEL: v_shuffle_v3i16_v3i16__4_5_5:
4367; GFX940:       ; %bb.0:
4368; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4369; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4370; GFX940-NEXT:    ;;#ASMSTART
4371; GFX940-NEXT:    ; def v[0:1]
4372; GFX940-NEXT:    ;;#ASMEND
4373; GFX940-NEXT:    s_nop 0
4374; GFX940-NEXT:    v_alignbit_b32 v0, v1, v0, 16
4375; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
4376; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
4377; GFX940-NEXT:    s_waitcnt vmcnt(0)
4378; GFX940-NEXT:    s_setpc_b64 s[30:31]
4379  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4380  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4381  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4382  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4383  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 5, i32 5>
4384  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4385  ret void
4386}
4387
4388define void @v_shuffle_v3i16_v3i16__5_u_5(ptr addrspace(1) inreg %ptr) {
4389; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_u_5:
4390; GFX900:       ; %bb.0:
4391; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4392; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4393; GFX900-NEXT:    ;;#ASMSTART
4394; GFX900-NEXT:    ; def v[0:1]
4395; GFX900-NEXT:    ;;#ASMEND
4396; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
4397; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
4398; GFX900-NEXT:    s_waitcnt vmcnt(0)
4399; GFX900-NEXT:    s_setpc_b64 s[30:31]
4400;
4401; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_u_5:
4402; GFX90A:       ; %bb.0:
4403; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4404; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4405; GFX90A-NEXT:    ;;#ASMSTART
4406; GFX90A-NEXT:    ; def v[0:1]
4407; GFX90A-NEXT:    ;;#ASMEND
4408; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
4409; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
4410; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4411; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4412;
4413; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_u_5:
4414; GFX940:       ; %bb.0:
4415; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4416; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4417; GFX940-NEXT:    ;;#ASMSTART
4418; GFX940-NEXT:    ; def v[0:1]
4419; GFX940-NEXT:    ;;#ASMEND
4420; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
4421; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
4422; GFX940-NEXT:    s_waitcnt vmcnt(0)
4423; GFX940-NEXT:    s_setpc_b64 s[30:31]
4424  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4425  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4426  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4427  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4428  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 5>
4429  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4430  ret void
4431}
4432
4433define void @v_shuffle_v3i16_v3i16__5_0_5(ptr addrspace(1) inreg %ptr) {
4434; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_0_5:
4435; GFX900:       ; %bb.0:
4436; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4437; GFX900-NEXT:    ;;#ASMSTART
4438; GFX900-NEXT:    ; def v[0:1]
4439; GFX900-NEXT:    ;;#ASMEND
4440; GFX900-NEXT:    v_mov_b32_e32 v3, 0
4441; GFX900-NEXT:    ;;#ASMSTART
4442; GFX900-NEXT:    ; def v[1:2]
4443; GFX900-NEXT:    ;;#ASMEND
4444; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4445; GFX900-NEXT:    v_perm_b32 v0, v0, v2, s4
4446; GFX900-NEXT:    global_store_short v3, v2, s[16:17] offset:4
4447; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
4448; GFX900-NEXT:    s_waitcnt vmcnt(0)
4449; GFX900-NEXT:    s_setpc_b64 s[30:31]
4450;
4451; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_0_5:
4452; GFX90A:       ; %bb.0:
4453; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4454; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4455; GFX90A-NEXT:    ;;#ASMSTART
4456; GFX90A-NEXT:    ; def v[0:1]
4457; GFX90A-NEXT:    ;;#ASMEND
4458; GFX90A-NEXT:    ;;#ASMSTART
4459; GFX90A-NEXT:    ; def v[2:3]
4460; GFX90A-NEXT:    ;;#ASMEND
4461; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4462; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
4463; GFX90A-NEXT:    global_store_short v4, v3, s[16:17] offset:4
4464; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4465; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4466; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4467;
4468; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_0_5:
4469; GFX940:       ; %bb.0:
4470; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4471; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4472; GFX940-NEXT:    ;;#ASMSTART
4473; GFX940-NEXT:    ; def v[0:1]
4474; GFX940-NEXT:    ;;#ASMEND
4475; GFX940-NEXT:    ;;#ASMSTART
4476; GFX940-NEXT:    ; def v[2:3]
4477; GFX940-NEXT:    ;;#ASMEND
4478; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4479; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
4480; GFX940-NEXT:    global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
4481; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
4482; GFX940-NEXT:    s_waitcnt vmcnt(0)
4483; GFX940-NEXT:    s_setpc_b64 s[30:31]
4484  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4485  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4486  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4487  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4488  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 5>
4489  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4490  ret void
4491}
4492
4493define void @v_shuffle_v3i16_v3i16__5_1_5(ptr addrspace(1) inreg %ptr) {
4494; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_1_5:
4495; GFX900:       ; %bb.0:
4496; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4497; GFX900-NEXT:    ;;#ASMSTART
4498; GFX900-NEXT:    ; def v[0:1]
4499; GFX900-NEXT:    ;;#ASMEND
4500; GFX900-NEXT:    v_mov_b32_e32 v3, 0
4501; GFX900-NEXT:    ;;#ASMSTART
4502; GFX900-NEXT:    ; def v[1:2]
4503; GFX900-NEXT:    ;;#ASMEND
4504; GFX900-NEXT:    s_mov_b32 s4, 0xffff
4505; GFX900-NEXT:    v_bfi_b32 v0, s4, v2, v0
4506; GFX900-NEXT:    global_store_short v3, v2, s[16:17] offset:4
4507; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
4508; GFX900-NEXT:    s_waitcnt vmcnt(0)
4509; GFX900-NEXT:    s_setpc_b64 s[30:31]
4510;
4511; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_1_5:
4512; GFX90A:       ; %bb.0:
4513; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4514; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4515; GFX90A-NEXT:    ;;#ASMSTART
4516; GFX90A-NEXT:    ; def v[0:1]
4517; GFX90A-NEXT:    ;;#ASMEND
4518; GFX90A-NEXT:    ;;#ASMSTART
4519; GFX90A-NEXT:    ; def v[2:3]
4520; GFX90A-NEXT:    ;;#ASMEND
4521; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
4522; GFX90A-NEXT:    v_bfi_b32 v0, s4, v3, v0
4523; GFX90A-NEXT:    global_store_short v4, v3, s[16:17] offset:4
4524; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4525; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4526; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4527;
4528; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_1_5:
4529; GFX940:       ; %bb.0:
4530; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4531; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4532; GFX940-NEXT:    ;;#ASMSTART
4533; GFX940-NEXT:    ; def v[0:1]
4534; GFX940-NEXT:    ;;#ASMEND
4535; GFX940-NEXT:    ;;#ASMSTART
4536; GFX940-NEXT:    ; def v[2:3]
4537; GFX940-NEXT:    ;;#ASMEND
4538; GFX940-NEXT:    s_mov_b32 s2, 0xffff
4539; GFX940-NEXT:    v_bfi_b32 v0, s2, v3, v0
4540; GFX940-NEXT:    global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
4541; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
4542; GFX940-NEXT:    s_waitcnt vmcnt(0)
4543; GFX940-NEXT:    s_setpc_b64 s[30:31]
4544  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4545  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4546  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4547  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4548  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 5>
4549  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4550  ret void
4551}
4552
4553define void @v_shuffle_v3i16_v3i16__5_2_5(ptr addrspace(1) inreg %ptr) {
4554; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_2_5:
4555; GFX900:       ; %bb.0:
4556; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4557; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4558; GFX900-NEXT:    ;;#ASMSTART
4559; GFX900-NEXT:    ; def v[0:1]
4560; GFX900-NEXT:    ;;#ASMEND
4561; GFX900-NEXT:    ;;#ASMSTART
4562; GFX900-NEXT:    ; def v[2:3]
4563; GFX900-NEXT:    ;;#ASMEND
4564; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4565; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
4566; GFX900-NEXT:    global_store_short v4, v3, s[16:17] offset:4
4567; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
4568; GFX900-NEXT:    s_waitcnt vmcnt(0)
4569; GFX900-NEXT:    s_setpc_b64 s[30:31]
4570;
4571; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_2_5:
4572; GFX90A:       ; %bb.0:
4573; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4574; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4575; GFX90A-NEXT:    ;;#ASMSTART
4576; GFX90A-NEXT:    ; def v[0:1]
4577; GFX90A-NEXT:    ;;#ASMEND
4578; GFX90A-NEXT:    ;;#ASMSTART
4579; GFX90A-NEXT:    ; def v[2:3]
4580; GFX90A-NEXT:    ;;#ASMEND
4581; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4582; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
4583; GFX90A-NEXT:    global_store_short v4, v3, s[16:17] offset:4
4584; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4585; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4586; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4587;
4588; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_2_5:
4589; GFX940:       ; %bb.0:
4590; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4591; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4592; GFX940-NEXT:    ;;#ASMSTART
4593; GFX940-NEXT:    ; def v[0:1]
4594; GFX940-NEXT:    ;;#ASMEND
4595; GFX940-NEXT:    ;;#ASMSTART
4596; GFX940-NEXT:    ; def v[2:3]
4597; GFX940-NEXT:    ;;#ASMEND
4598; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4599; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
4600; GFX940-NEXT:    global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
4601; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
4602; GFX940-NEXT:    s_waitcnt vmcnt(0)
4603; GFX940-NEXT:    s_setpc_b64 s[30:31]
4604  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4605  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4606  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4607  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4608  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 5>
4609  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4610  ret void
4611}
4612
4613define void @v_shuffle_v3i16_v3i16__5_3_5(ptr addrspace(1) inreg %ptr) {
4614; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_3_5:
4615; GFX900:       ; %bb.0:
4616; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4617; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4618; GFX900-NEXT:    ;;#ASMSTART
4619; GFX900-NEXT:    ; def v[0:1]
4620; GFX900-NEXT:    ;;#ASMEND
4621; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4622; GFX900-NEXT:    v_perm_b32 v0, v0, v1, s4
4623; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
4624; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
4625; GFX900-NEXT:    s_waitcnt vmcnt(0)
4626; GFX900-NEXT:    s_setpc_b64 s[30:31]
4627;
4628; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_3_5:
4629; GFX90A:       ; %bb.0:
4630; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4631; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4632; GFX90A-NEXT:    ;;#ASMSTART
4633; GFX90A-NEXT:    ; def v[0:1]
4634; GFX90A-NEXT:    ;;#ASMEND
4635; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4636; GFX90A-NEXT:    v_perm_b32 v0, v0, v1, s4
4637; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
4638; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
4639; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4640; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4641;
4642; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_3_5:
4643; GFX940:       ; %bb.0:
4644; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4645; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4646; GFX940-NEXT:    ;;#ASMSTART
4647; GFX940-NEXT:    ; def v[0:1]
4648; GFX940-NEXT:    ;;#ASMEND
4649; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4650; GFX940-NEXT:    v_perm_b32 v0, v0, v1, s2
4651; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
4652; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
4653; GFX940-NEXT:    s_waitcnt vmcnt(0)
4654; GFX940-NEXT:    s_setpc_b64 s[30:31]
4655  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4656  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4657  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4658  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4659  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 5>
4660  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4661  ret void
4662}
4663
4664define void @v_shuffle_v3i16_v3i16__5_4_5(ptr addrspace(1) inreg %ptr) {
4665; GFX900-LABEL: v_shuffle_v3i16_v3i16__5_4_5:
4666; GFX900:       ; %bb.0:
4667; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4668; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4669; GFX900-NEXT:    ;;#ASMSTART
4670; GFX900-NEXT:    ; def v[0:1]
4671; GFX900-NEXT:    ;;#ASMEND
4672; GFX900-NEXT:    s_mov_b32 s4, 0xffff
4673; GFX900-NEXT:    v_bfi_b32 v0, s4, v1, v0
4674; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
4675; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
4676; GFX900-NEXT:    s_waitcnt vmcnt(0)
4677; GFX900-NEXT:    s_setpc_b64 s[30:31]
4678;
4679; GFX90A-LABEL: v_shuffle_v3i16_v3i16__5_4_5:
4680; GFX90A:       ; %bb.0:
4681; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4682; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4683; GFX90A-NEXT:    ;;#ASMSTART
4684; GFX90A-NEXT:    ; def v[0:1]
4685; GFX90A-NEXT:    ;;#ASMEND
4686; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
4687; GFX90A-NEXT:    v_bfi_b32 v0, s4, v1, v0
4688; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
4689; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
4690; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4691; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4692;
4693; GFX940-LABEL: v_shuffle_v3i16_v3i16__5_4_5:
4694; GFX940:       ; %bb.0:
4695; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4696; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4697; GFX940-NEXT:    ;;#ASMSTART
4698; GFX940-NEXT:    ; def v[0:1]
4699; GFX940-NEXT:    ;;#ASMEND
4700; GFX940-NEXT:    s_mov_b32 s2, 0xffff
4701; GFX940-NEXT:    v_bfi_b32 v0, s2, v1, v0
4702; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
4703; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
4704; GFX940-NEXT:    s_waitcnt vmcnt(0)
4705; GFX940-NEXT:    s_setpc_b64 s[30:31]
4706  %vec0 = call <4 x i16> asm "; def $0", "=v"()
4707  %vec1 = call <4 x i16> asm "; def $0", "=v"()
4708  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4709  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4710  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 5>
4711  store <3 x i16> %shuf, ptr addrspace(1) %ptr, align 8
4712  ret void
4713}
4714
4715define void @s_shuffle_v3i16_v3i16__u_u_u() {
4716; GFX9-LABEL: s_shuffle_v3i16_v3i16__u_u_u:
4717; GFX9:       ; %bb.0:
4718; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4719; GFX9-NEXT:    ;;#ASMSTART
4720; GFX9-NEXT:    ; use s[8:9]
4721; GFX9-NEXT:    ;;#ASMEND
4722; GFX9-NEXT:    s_setpc_b64 s[30:31]
4723  %vec0 = call <4 x i16> asm "; def $0", "=s"()
4724  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4725  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> poison
4726  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
4727  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
4728  ret void
4729}
4730
4731define void @s_shuffle_v3i16_v3i16__0_u_u() {
4732; GFX900-LABEL: s_shuffle_v3i16_v3i16__0_u_u:
4733; GFX900:       ; %bb.0:
4734; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4735; GFX900-NEXT:    ;;#ASMSTART
4736; GFX900-NEXT:    ; def s[8:9]
4737; GFX900-NEXT:    ;;#ASMEND
4738; GFX900-NEXT:    ;;#ASMSTART
4739; GFX900-NEXT:    ; use s[8:9]
4740; GFX900-NEXT:    ;;#ASMEND
4741; GFX900-NEXT:    s_setpc_b64 s[30:31]
4742;
4743; GFX90A-LABEL: s_shuffle_v3i16_v3i16__0_u_u:
4744; GFX90A:       ; %bb.0:
4745; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4746; GFX90A-NEXT:    ;;#ASMSTART
4747; GFX90A-NEXT:    ; def s[8:9]
4748; GFX90A-NEXT:    ;;#ASMEND
4749; GFX90A-NEXT:    ;;#ASMSTART
4750; GFX90A-NEXT:    ; use s[8:9]
4751; GFX90A-NEXT:    ;;#ASMEND
4752; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4753;
4754; GFX940-LABEL: s_shuffle_v3i16_v3i16__0_u_u:
4755; GFX940:       ; %bb.0:
4756; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4757; GFX940-NEXT:    ;;#ASMSTART
4758; GFX940-NEXT:    ; def s[8:9]
4759; GFX940-NEXT:    ;;#ASMEND
4760; GFX940-NEXT:    s_nop 0
4761; GFX940-NEXT:    ;;#ASMSTART
4762; GFX940-NEXT:    ; use s[8:9]
4763; GFX940-NEXT:    ;;#ASMEND
4764; GFX940-NEXT:    s_setpc_b64 s[30:31]
4765  %vec0 = call <4 x i16> asm "; def $0", "=s"()
4766  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4767  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 0, i32 poison, i32 poison>
4768  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
4769  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
4770  ret void
4771}
4772
4773define void @s_shuffle_v3i16_v3i16__1_u_u() {
4774; GFX900-LABEL: s_shuffle_v3i16_v3i16__1_u_u:
4775; GFX900:       ; %bb.0:
4776; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4777; GFX900-NEXT:    ;;#ASMSTART
4778; GFX900-NEXT:    ; def s[4:5]
4779; GFX900-NEXT:    ;;#ASMEND
4780; GFX900-NEXT:    s_lshr_b32 s8, s4, 16
4781; GFX900-NEXT:    ;;#ASMSTART
4782; GFX900-NEXT:    ; use s[8:9]
4783; GFX900-NEXT:    ;;#ASMEND
4784; GFX900-NEXT:    s_setpc_b64 s[30:31]
4785;
4786; GFX90A-LABEL: s_shuffle_v3i16_v3i16__1_u_u:
4787; GFX90A:       ; %bb.0:
4788; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4789; GFX90A-NEXT:    ;;#ASMSTART
4790; GFX90A-NEXT:    ; def s[4:5]
4791; GFX90A-NEXT:    ;;#ASMEND
4792; GFX90A-NEXT:    s_lshr_b32 s8, s4, 16
4793; GFX90A-NEXT:    ;;#ASMSTART
4794; GFX90A-NEXT:    ; use s[8:9]
4795; GFX90A-NEXT:    ;;#ASMEND
4796; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4797;
4798; GFX940-LABEL: s_shuffle_v3i16_v3i16__1_u_u:
4799; GFX940:       ; %bb.0:
4800; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4801; GFX940-NEXT:    ;;#ASMSTART
4802; GFX940-NEXT:    ; def s[0:1]
4803; GFX940-NEXT:    ;;#ASMEND
4804; GFX940-NEXT:    s_lshr_b32 s8, s0, 16
4805; GFX940-NEXT:    ;;#ASMSTART
4806; GFX940-NEXT:    ; use s[8:9]
4807; GFX940-NEXT:    ;;#ASMEND
4808; GFX940-NEXT:    s_setpc_b64 s[30:31]
4809  %vec0 = call <4 x i16> asm "; def $0", "=s"()
4810  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4811  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 1, i32 poison, i32 poison>
4812  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
4813  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
4814  ret void
4815}
4816
4817define void @s_shuffle_v3i16_v3i16__2_u_u() {
4818; GFX900-LABEL: s_shuffle_v3i16_v3i16__2_u_u:
4819; GFX900:       ; %bb.0:
4820; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4821; GFX900-NEXT:    ;;#ASMSTART
4822; GFX900-NEXT:    ; def s[4:5]
4823; GFX900-NEXT:    ;;#ASMEND
4824; GFX900-NEXT:    s_mov_b32 s8, s5
4825; GFX900-NEXT:    ;;#ASMSTART
4826; GFX900-NEXT:    ; use s[8:9]
4827; GFX900-NEXT:    ;;#ASMEND
4828; GFX900-NEXT:    s_setpc_b64 s[30:31]
4829;
4830; GFX90A-LABEL: s_shuffle_v3i16_v3i16__2_u_u:
4831; GFX90A:       ; %bb.0:
4832; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4833; GFX90A-NEXT:    ;;#ASMSTART
4834; GFX90A-NEXT:    ; def s[4:5]
4835; GFX90A-NEXT:    ;;#ASMEND
4836; GFX90A-NEXT:    s_mov_b32 s8, s5
4837; GFX90A-NEXT:    ;;#ASMSTART
4838; GFX90A-NEXT:    ; use s[8:9]
4839; GFX90A-NEXT:    ;;#ASMEND
4840; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4841;
4842; GFX940-LABEL: s_shuffle_v3i16_v3i16__2_u_u:
4843; GFX940:       ; %bb.0:
4844; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4845; GFX940-NEXT:    ;;#ASMSTART
4846; GFX940-NEXT:    ; def s[0:1]
4847; GFX940-NEXT:    ;;#ASMEND
4848; GFX940-NEXT:    s_mov_b32 s8, s1
4849; GFX940-NEXT:    ;;#ASMSTART
4850; GFX940-NEXT:    ; use s[8:9]
4851; GFX940-NEXT:    ;;#ASMEND
4852; GFX940-NEXT:    s_setpc_b64 s[30:31]
4853  %vec0 = call <4 x i16> asm "; def $0", "=s"()
4854  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4855  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 2, i32 poison, i32 poison>
4856  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
4857  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
4858  ret void
4859}
4860
4861define void @s_shuffle_v3i16_v3i16__3_u_u() {
4862; GFX9-LABEL: s_shuffle_v3i16_v3i16__3_u_u:
4863; GFX9:       ; %bb.0:
4864; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4865; GFX9-NEXT:    ;;#ASMSTART
4866; GFX9-NEXT:    ; use s[8:9]
4867; GFX9-NEXT:    ;;#ASMEND
4868; GFX9-NEXT:    s_setpc_b64 s[30:31]
4869  %vec0 = call <4 x i16> asm "; def $0", "=s"()
4870  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4871  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 3, i32 poison, i32 poison>
4872  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
4873  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
4874  ret void
4875}
4876
4877define void @s_shuffle_v3i16_v3i16__4_u_u() {
4878; GFX900-LABEL: s_shuffle_v3i16_v3i16__4_u_u:
4879; GFX900:       ; %bb.0:
4880; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4881; GFX900-NEXT:    ;;#ASMSTART
4882; GFX900-NEXT:    ; def s[4:5]
4883; GFX900-NEXT:    ;;#ASMEND
4884; GFX900-NEXT:    s_lshr_b32 s8, s4, 16
4885; GFX900-NEXT:    ;;#ASMSTART
4886; GFX900-NEXT:    ; use s[8:9]
4887; GFX900-NEXT:    ;;#ASMEND
4888; GFX900-NEXT:    s_setpc_b64 s[30:31]
4889;
4890; GFX90A-LABEL: s_shuffle_v3i16_v3i16__4_u_u:
4891; GFX90A:       ; %bb.0:
4892; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4893; GFX90A-NEXT:    ;;#ASMSTART
4894; GFX90A-NEXT:    ; def s[4:5]
4895; GFX90A-NEXT:    ;;#ASMEND
4896; GFX90A-NEXT:    s_lshr_b32 s8, s4, 16
4897; GFX90A-NEXT:    ;;#ASMSTART
4898; GFX90A-NEXT:    ; use s[8:9]
4899; GFX90A-NEXT:    ;;#ASMEND
4900; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4901;
4902; GFX940-LABEL: s_shuffle_v3i16_v3i16__4_u_u:
4903; GFX940:       ; %bb.0:
4904; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4905; GFX940-NEXT:    ;;#ASMSTART
4906; GFX940-NEXT:    ; def s[0:1]
4907; GFX940-NEXT:    ;;#ASMEND
4908; GFX940-NEXT:    s_lshr_b32 s8, s0, 16
4909; GFX940-NEXT:    ;;#ASMSTART
4910; GFX940-NEXT:    ; use s[8:9]
4911; GFX940-NEXT:    ;;#ASMEND
4912; GFX940-NEXT:    s_setpc_b64 s[30:31]
4913  %vec0 = call <4 x i16> asm "; def $0", "=s"()
4914  %vec1 = call <4 x i16> asm "; def $0", "=s"()
4915  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4916  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4917  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 poison, i32 poison>
4918  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
4919  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
4920  ret void
4921}
4922
4923define void @s_shuffle_v3i16_v3i16__5_u_u() {
4924; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_u_u:
4925; GFX900:       ; %bb.0:
4926; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4927; GFX900-NEXT:    ;;#ASMSTART
4928; GFX900-NEXT:    ; def s[4:5]
4929; GFX900-NEXT:    ;;#ASMEND
4930; GFX900-NEXT:    s_mov_b32 s8, s5
4931; GFX900-NEXT:    ;;#ASMSTART
4932; GFX900-NEXT:    ; use s[8:9]
4933; GFX900-NEXT:    ;;#ASMEND
4934; GFX900-NEXT:    s_setpc_b64 s[30:31]
4935;
4936; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_u_u:
4937; GFX90A:       ; %bb.0:
4938; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4939; GFX90A-NEXT:    ;;#ASMSTART
4940; GFX90A-NEXT:    ; def s[4:5]
4941; GFX90A-NEXT:    ;;#ASMEND
4942; GFX90A-NEXT:    s_mov_b32 s8, s5
4943; GFX90A-NEXT:    ;;#ASMSTART
4944; GFX90A-NEXT:    ; use s[8:9]
4945; GFX90A-NEXT:    ;;#ASMEND
4946; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4947;
4948; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_u_u:
4949; GFX940:       ; %bb.0:
4950; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4951; GFX940-NEXT:    ;;#ASMSTART
4952; GFX940-NEXT:    ; def s[0:1]
4953; GFX940-NEXT:    ;;#ASMEND
4954; GFX940-NEXT:    s_mov_b32 s8, s1
4955; GFX940-NEXT:    ;;#ASMSTART
4956; GFX940-NEXT:    ; use s[8:9]
4957; GFX940-NEXT:    ;;#ASMEND
4958; GFX940-NEXT:    s_setpc_b64 s[30:31]
4959  %vec0 = call <4 x i16> asm "; def $0", "=s"()
4960  %vec1 = call <4 x i16> asm "; def $0", "=s"()
4961  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4962  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
4963  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 poison>
4964  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
4965  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
4966  ret void
4967}
4968
4969define void @s_shuffle_v3i16_v3i16__5_0_u() {
4970; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_0_u:
4971; GFX900:       ; %bb.0:
4972; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4973; GFX900-NEXT:    ;;#ASMSTART
4974; GFX900-NEXT:    ; def s[4:5]
4975; GFX900-NEXT:    ;;#ASMEND
4976; GFX900-NEXT:    ;;#ASMSTART
4977; GFX900-NEXT:    ; def s[6:7]
4978; GFX900-NEXT:    ;;#ASMEND
4979; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
4980; GFX900-NEXT:    ;;#ASMSTART
4981; GFX900-NEXT:    ; use s[8:9]
4982; GFX900-NEXT:    ;;#ASMEND
4983; GFX900-NEXT:    s_setpc_b64 s[30:31]
4984;
4985; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_0_u:
4986; GFX90A:       ; %bb.0:
4987; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4988; GFX90A-NEXT:    ;;#ASMSTART
4989; GFX90A-NEXT:    ; def s[4:5]
4990; GFX90A-NEXT:    ;;#ASMEND
4991; GFX90A-NEXT:    ;;#ASMSTART
4992; GFX90A-NEXT:    ; def s[6:7]
4993; GFX90A-NEXT:    ;;#ASMEND
4994; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
4995; GFX90A-NEXT:    ;;#ASMSTART
4996; GFX90A-NEXT:    ; use s[8:9]
4997; GFX90A-NEXT:    ;;#ASMEND
4998; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4999;
5000; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_0_u:
5001; GFX940:       ; %bb.0:
5002; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5003; GFX940-NEXT:    ;;#ASMSTART
5004; GFX940-NEXT:    ; def s[0:1]
5005; GFX940-NEXT:    ;;#ASMEND
5006; GFX940-NEXT:    ;;#ASMSTART
5007; GFX940-NEXT:    ; def s[2:3]
5008; GFX940-NEXT:    ;;#ASMEND
5009; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s0
5010; GFX940-NEXT:    ;;#ASMSTART
5011; GFX940-NEXT:    ; use s[8:9]
5012; GFX940-NEXT:    ;;#ASMEND
5013; GFX940-NEXT:    s_setpc_b64 s[30:31]
5014  %vec0 = call <4 x i16> asm "; def $0", "=s"()
5015  %vec1 = call <4 x i16> asm "; def $0", "=s"()
5016  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5017  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5018  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 poison>
5019  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5020  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
5021  ret void
5022}
5023
5024define void @s_shuffle_v3i16_v3i16__5_1_u() {
5025; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_1_u:
5026; GFX900:       ; %bb.0:
5027; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5028; GFX900-NEXT:    ;;#ASMSTART
5029; GFX900-NEXT:    ; def s[4:5]
5030; GFX900-NEXT:    ;;#ASMEND
5031; GFX900-NEXT:    ;;#ASMSTART
5032; GFX900-NEXT:    ; def s[6:7]
5033; GFX900-NEXT:    ;;#ASMEND
5034; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
5035; GFX900-NEXT:    ;;#ASMSTART
5036; GFX900-NEXT:    ; use s[8:9]
5037; GFX900-NEXT:    ;;#ASMEND
5038; GFX900-NEXT:    s_setpc_b64 s[30:31]
5039;
5040; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_1_u:
5041; GFX90A:       ; %bb.0:
5042; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5043; GFX90A-NEXT:    ;;#ASMSTART
5044; GFX90A-NEXT:    ; def s[4:5]
5045; GFX90A-NEXT:    ;;#ASMEND
5046; GFX90A-NEXT:    ;;#ASMSTART
5047; GFX90A-NEXT:    ; def s[6:7]
5048; GFX90A-NEXT:    ;;#ASMEND
5049; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
5050; GFX90A-NEXT:    ;;#ASMSTART
5051; GFX90A-NEXT:    ; use s[8:9]
5052; GFX90A-NEXT:    ;;#ASMEND
5053; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5054;
5055; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_1_u:
5056; GFX940:       ; %bb.0:
5057; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5058; GFX940-NEXT:    ;;#ASMSTART
5059; GFX940-NEXT:    ; def s[0:1]
5060; GFX940-NEXT:    ;;#ASMEND
5061; GFX940-NEXT:    ;;#ASMSTART
5062; GFX940-NEXT:    ; def s[2:3]
5063; GFX940-NEXT:    ;;#ASMEND
5064; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s3, s0
5065; GFX940-NEXT:    ;;#ASMSTART
5066; GFX940-NEXT:    ; use s[8:9]
5067; GFX940-NEXT:    ;;#ASMEND
5068; GFX940-NEXT:    s_setpc_b64 s[30:31]
5069  %vec0 = call <4 x i16> asm "; def $0", "=s"()
5070  %vec1 = call <4 x i16> asm "; def $0", "=s"()
5071  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5072  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5073  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 poison>
5074  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5075  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
5076  ret void
5077}
5078
5079define void @s_shuffle_v3i16_v3i16__5_2_u() {
5080; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_2_u:
5081; GFX900:       ; %bb.0:
5082; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5083; GFX900-NEXT:    ;;#ASMSTART
5084; GFX900-NEXT:    ; def s[4:5]
5085; GFX900-NEXT:    ;;#ASMEND
5086; GFX900-NEXT:    ;;#ASMSTART
5087; GFX900-NEXT:    ; def s[6:7]
5088; GFX900-NEXT:    ;;#ASMEND
5089; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
5090; GFX900-NEXT:    ;;#ASMSTART
5091; GFX900-NEXT:    ; use s[8:9]
5092; GFX900-NEXT:    ;;#ASMEND
5093; GFX900-NEXT:    s_setpc_b64 s[30:31]
5094;
5095; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_2_u:
5096; GFX90A:       ; %bb.0:
5097; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5098; GFX90A-NEXT:    ;;#ASMSTART
5099; GFX90A-NEXT:    ; def s[4:5]
5100; GFX90A-NEXT:    ;;#ASMEND
5101; GFX90A-NEXT:    ;;#ASMSTART
5102; GFX90A-NEXT:    ; def s[6:7]
5103; GFX90A-NEXT:    ;;#ASMEND
5104; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
5105; GFX90A-NEXT:    ;;#ASMSTART
5106; GFX90A-NEXT:    ; use s[8:9]
5107; GFX90A-NEXT:    ;;#ASMEND
5108; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5109;
5110; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_2_u:
5111; GFX940:       ; %bb.0:
5112; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5113; GFX940-NEXT:    ;;#ASMSTART
5114; GFX940-NEXT:    ; def s[0:1]
5115; GFX940-NEXT:    ;;#ASMEND
5116; GFX940-NEXT:    ;;#ASMSTART
5117; GFX940-NEXT:    ; def s[2:3]
5118; GFX940-NEXT:    ;;#ASMEND
5119; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s1
5120; GFX940-NEXT:    ;;#ASMSTART
5121; GFX940-NEXT:    ; use s[8:9]
5122; GFX940-NEXT:    ;;#ASMEND
5123; GFX940-NEXT:    s_setpc_b64 s[30:31]
5124  %vec0 = call <4 x i16> asm "; def $0", "=s"()
5125  %vec1 = call <4 x i16> asm "; def $0", "=s"()
5126  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5127  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5128  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 poison>
5129  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5130  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
5131  ret void
5132}
5133
5134define void @s_shuffle_v3i16_v3i16__5_3_u() {
5135; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_3_u:
5136; GFX900:       ; %bb.0:
5137; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5138; GFX900-NEXT:    ;;#ASMSTART
5139; GFX900-NEXT:    ; def s[4:5]
5140; GFX900-NEXT:    ;;#ASMEND
5141; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
5142; GFX900-NEXT:    ;;#ASMSTART
5143; GFX900-NEXT:    ; use s[8:9]
5144; GFX900-NEXT:    ;;#ASMEND
5145; GFX900-NEXT:    s_setpc_b64 s[30:31]
5146;
5147; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_3_u:
5148; GFX90A:       ; %bb.0:
5149; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5150; GFX90A-NEXT:    ;;#ASMSTART
5151; GFX90A-NEXT:    ; def s[4:5]
5152; GFX90A-NEXT:    ;;#ASMEND
5153; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
5154; GFX90A-NEXT:    ;;#ASMSTART
5155; GFX90A-NEXT:    ; use s[8:9]
5156; GFX90A-NEXT:    ;;#ASMEND
5157; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5158;
5159; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_3_u:
5160; GFX940:       ; %bb.0:
5161; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5162; GFX940-NEXT:    ;;#ASMSTART
5163; GFX940-NEXT:    ; def s[0:1]
5164; GFX940-NEXT:    ;;#ASMEND
5165; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
5166; GFX940-NEXT:    ;;#ASMSTART
5167; GFX940-NEXT:    ; use s[8:9]
5168; GFX940-NEXT:    ;;#ASMEND
5169; GFX940-NEXT:    s_setpc_b64 s[30:31]
5170  %vec0 = call <4 x i16> asm "; def $0", "=s"()
5171  %vec1 = call <4 x i16> asm "; def $0", "=s"()
5172  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5173  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5174  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 poison>
5175  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5176  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
5177  ret void
5178}
5179
5180define void @s_shuffle_v3i16_v3i16__5_4_u() {
5181; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_4_u:
5182; GFX900:       ; %bb.0:
5183; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5184; GFX900-NEXT:    ;;#ASMSTART
5185; GFX900-NEXT:    ; def s[4:5]
5186; GFX900-NEXT:    ;;#ASMEND
5187; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
5188; GFX900-NEXT:    ;;#ASMSTART
5189; GFX900-NEXT:    ; use s[8:9]
5190; GFX900-NEXT:    ;;#ASMEND
5191; GFX900-NEXT:    s_setpc_b64 s[30:31]
5192;
5193; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_4_u:
5194; GFX90A:       ; %bb.0:
5195; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5196; GFX90A-NEXT:    ;;#ASMSTART
5197; GFX90A-NEXT:    ; def s[4:5]
5198; GFX90A-NEXT:    ;;#ASMEND
5199; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
5200; GFX90A-NEXT:    ;;#ASMSTART
5201; GFX90A-NEXT:    ; use s[8:9]
5202; GFX90A-NEXT:    ;;#ASMEND
5203; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5204;
5205; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_4_u:
5206; GFX940:       ; %bb.0:
5207; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5208; GFX940-NEXT:    ;;#ASMSTART
5209; GFX940-NEXT:    ; def s[0:1]
5210; GFX940-NEXT:    ;;#ASMEND
5211; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s1, s0
5212; GFX940-NEXT:    ;;#ASMSTART
5213; GFX940-NEXT:    ; use s[8:9]
5214; GFX940-NEXT:    ;;#ASMEND
5215; GFX940-NEXT:    s_setpc_b64 s[30:31]
5216  %vec0 = call <4 x i16> asm "; def $0", "=s"()
5217  %vec1 = call <4 x i16> asm "; def $0", "=s"()
5218  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5219  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5220  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 poison>
5221  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5222  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
5223  ret void
5224}
5225
5226define void @s_shuffle_v3i16_v3i16__5_5_u() {
5227; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_5_u:
5228; GFX900:       ; %bb.0:
5229; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5230; GFX900-NEXT:    ;;#ASMSTART
5231; GFX900-NEXT:    ; def s[4:5]
5232; GFX900-NEXT:    ;;#ASMEND
5233; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
5234; GFX900-NEXT:    ;;#ASMSTART
5235; GFX900-NEXT:    ; use s[8:9]
5236; GFX900-NEXT:    ;;#ASMEND
5237; GFX900-NEXT:    s_setpc_b64 s[30:31]
5238;
5239; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_5_u:
5240; GFX90A:       ; %bb.0:
5241; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5242; GFX90A-NEXT:    ;;#ASMSTART
5243; GFX90A-NEXT:    ; def s[4:5]
5244; GFX90A-NEXT:    ;;#ASMEND
5245; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
5246; GFX90A-NEXT:    ;;#ASMSTART
5247; GFX90A-NEXT:    ; use s[8:9]
5248; GFX90A-NEXT:    ;;#ASMEND
5249; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5250;
5251; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_5_u:
5252; GFX940:       ; %bb.0:
5253; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5254; GFX940-NEXT:    ;;#ASMSTART
5255; GFX940-NEXT:    ; def s[0:1]
5256; GFX940-NEXT:    ;;#ASMEND
5257; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
5258; GFX940-NEXT:    ;;#ASMSTART
5259; GFX940-NEXT:    ; use s[8:9]
5260; GFX940-NEXT:    ;;#ASMEND
5261; GFX940-NEXT:    s_setpc_b64 s[30:31]
5262  %vec0 = call <4 x i16> asm "; def $0", "=s"()
5263  %vec1 = call <4 x i16> asm "; def $0", "=s"()
5264  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5265  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5266  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 poison>
5267  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5268  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
5269  ret void
5270}
5271
5272define void @s_shuffle_v3i16_v3i16__5_5_0() {
5273; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_5_0:
5274; GFX900:       ; %bb.0:
5275; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5276; GFX900-NEXT:    ;;#ASMSTART
5277; GFX900-NEXT:    ; def s[4:5]
5278; GFX900-NEXT:    ;;#ASMEND
5279; GFX900-NEXT:    ;;#ASMSTART
5280; GFX900-NEXT:    ; def s[6:7]
5281; GFX900-NEXT:    ;;#ASMEND
5282; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
5283; GFX900-NEXT:    s_mov_b32 s9, s4
5284; GFX900-NEXT:    ;;#ASMSTART
5285; GFX900-NEXT:    ; use s[8:9]
5286; GFX900-NEXT:    ;;#ASMEND
5287; GFX900-NEXT:    s_setpc_b64 s[30:31]
5288;
5289; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_5_0:
5290; GFX90A:       ; %bb.0:
5291; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5292; GFX90A-NEXT:    ;;#ASMSTART
5293; GFX90A-NEXT:    ; def s[4:5]
5294; GFX90A-NEXT:    ;;#ASMEND
5295; GFX90A-NEXT:    ;;#ASMSTART
5296; GFX90A-NEXT:    ; def s[6:7]
5297; GFX90A-NEXT:    ;;#ASMEND
5298; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
5299; GFX90A-NEXT:    s_mov_b32 s9, s4
5300; GFX90A-NEXT:    ;;#ASMSTART
5301; GFX90A-NEXT:    ; use s[8:9]
5302; GFX90A-NEXT:    ;;#ASMEND
5303; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5304;
5305; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_5_0:
5306; GFX940:       ; %bb.0:
5307; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5308; GFX940-NEXT:    ;;#ASMSTART
5309; GFX940-NEXT:    ; def s[0:1]
5310; GFX940-NEXT:    ;;#ASMEND
5311; GFX940-NEXT:    ;;#ASMSTART
5312; GFX940-NEXT:    ; def s[2:3]
5313; GFX940-NEXT:    ;;#ASMEND
5314; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
5315; GFX940-NEXT:    s_mov_b32 s9, s0
5316; GFX940-NEXT:    ;;#ASMSTART
5317; GFX940-NEXT:    ; use s[8:9]
5318; GFX940-NEXT:    ;;#ASMEND
5319; GFX940-NEXT:    s_setpc_b64 s[30:31]
5320  %vec0 = call <4 x i16> asm "; def $0", "=s"()
5321  %vec1 = call <4 x i16> asm "; def $0", "=s"()
5322  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5323  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5324  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 0>
5325  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5326  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
5327  ret void
5328}
5329
5330define void @s_shuffle_v3i16_v3i16__5_5_1() {
5331; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_5_1:
5332; GFX900:       ; %bb.0:
5333; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5334; GFX900-NEXT:    ;;#ASMSTART
5335; GFX900-NEXT:    ; def s[4:5]
5336; GFX900-NEXT:    ;;#ASMEND
5337; GFX900-NEXT:    ;;#ASMSTART
5338; GFX900-NEXT:    ; def s[6:7]
5339; GFX900-NEXT:    ;;#ASMEND
5340; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
5341; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
5342; GFX900-NEXT:    ;;#ASMSTART
5343; GFX900-NEXT:    ; use s[8:9]
5344; GFX900-NEXT:    ;;#ASMEND
5345; GFX900-NEXT:    s_setpc_b64 s[30:31]
5346;
5347; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_5_1:
5348; GFX90A:       ; %bb.0:
5349; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5350; GFX90A-NEXT:    ;;#ASMSTART
5351; GFX90A-NEXT:    ; def s[4:5]
5352; GFX90A-NEXT:    ;;#ASMEND
5353; GFX90A-NEXT:    ;;#ASMSTART
5354; GFX90A-NEXT:    ; def s[6:7]
5355; GFX90A-NEXT:    ;;#ASMEND
5356; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
5357; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s7
5358; GFX90A-NEXT:    ;;#ASMSTART
5359; GFX90A-NEXT:    ; use s[8:9]
5360; GFX90A-NEXT:    ;;#ASMEND
5361; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5362;
5363; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_5_1:
5364; GFX940:       ; %bb.0:
5365; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5366; GFX940-NEXT:    ;;#ASMSTART
5367; GFX940-NEXT:    ; def s[0:1]
5368; GFX940-NEXT:    ;;#ASMEND
5369; GFX940-NEXT:    ;;#ASMSTART
5370; GFX940-NEXT:    ; def s[2:3]
5371; GFX940-NEXT:    ;;#ASMEND
5372; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
5373; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s3
5374; GFX940-NEXT:    ;;#ASMSTART
5375; GFX940-NEXT:    ; use s[8:9]
5376; GFX940-NEXT:    ;;#ASMEND
5377; GFX940-NEXT:    s_setpc_b64 s[30:31]
5378  %vec0 = call <4 x i16> asm "; def $0", "=s"()
5379  %vec1 = call <4 x i16> asm "; def $0", "=s"()
5380  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5381  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5382  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 1>
5383  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5384  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
5385  ret void
5386}
5387
5388define void @s_shuffle_v3i16_v3i16__5_5_2() {
5389; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_5_2:
5390; GFX900:       ; %bb.0:
5391; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5392; GFX900-NEXT:    ;;#ASMSTART
5393; GFX900-NEXT:    ; def s[8:9]
5394; GFX900-NEXT:    ;;#ASMEND
5395; GFX900-NEXT:    ;;#ASMSTART
5396; GFX900-NEXT:    ; def s[4:5]
5397; GFX900-NEXT:    ;;#ASMEND
5398; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
5399; GFX900-NEXT:    ;;#ASMSTART
5400; GFX900-NEXT:    ; use s[8:9]
5401; GFX900-NEXT:    ;;#ASMEND
5402; GFX900-NEXT:    s_setpc_b64 s[30:31]
5403;
5404; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_5_2:
5405; GFX90A:       ; %bb.0:
5406; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5407; GFX90A-NEXT:    ;;#ASMSTART
5408; GFX90A-NEXT:    ; def s[8:9]
5409; GFX90A-NEXT:    ;;#ASMEND
5410; GFX90A-NEXT:    ;;#ASMSTART
5411; GFX90A-NEXT:    ; def s[4:5]
5412; GFX90A-NEXT:    ;;#ASMEND
5413; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
5414; GFX90A-NEXT:    ;;#ASMSTART
5415; GFX90A-NEXT:    ; use s[8:9]
5416; GFX90A-NEXT:    ;;#ASMEND
5417; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5418;
5419; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_5_2:
5420; GFX940:       ; %bb.0:
5421; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5422; GFX940-NEXT:    ;;#ASMSTART
5423; GFX940-NEXT:    ; def s[8:9]
5424; GFX940-NEXT:    ;;#ASMEND
5425; GFX940-NEXT:    ;;#ASMSTART
5426; GFX940-NEXT:    ; def s[0:1]
5427; GFX940-NEXT:    ;;#ASMEND
5428; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
5429; GFX940-NEXT:    ;;#ASMSTART
5430; GFX940-NEXT:    ; use s[8:9]
5431; GFX940-NEXT:    ;;#ASMEND
5432; GFX940-NEXT:    s_setpc_b64 s[30:31]
5433  %vec0 = call <4 x i16> asm "; def $0", "=s"()
5434  %vec1 = call <4 x i16> asm "; def $0", "=s"()
5435  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5436  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5437  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 2>
5438  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5439  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
5440  ret void
5441}
5442
5443define void @s_shuffle_v3i16_v3i16__5_5_3() {
5444; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_5_3:
5445; GFX900:       ; %bb.0:
5446; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5447; GFX900-NEXT:    ;;#ASMSTART
5448; GFX900-NEXT:    ; def s[4:5]
5449; GFX900-NEXT:    ;;#ASMEND
5450; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
5451; GFX900-NEXT:    s_mov_b32 s9, s4
5452; GFX900-NEXT:    ;;#ASMSTART
5453; GFX900-NEXT:    ; use s[8:9]
5454; GFX900-NEXT:    ;;#ASMEND
5455; GFX900-NEXT:    s_setpc_b64 s[30:31]
5456;
5457; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_5_3:
5458; GFX90A:       ; %bb.0:
5459; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5460; GFX90A-NEXT:    ;;#ASMSTART
5461; GFX90A-NEXT:    ; def s[4:5]
5462; GFX90A-NEXT:    ;;#ASMEND
5463; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
5464; GFX90A-NEXT:    s_mov_b32 s9, s4
5465; GFX90A-NEXT:    ;;#ASMSTART
5466; GFX90A-NEXT:    ; use s[8:9]
5467; GFX90A-NEXT:    ;;#ASMEND
5468; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5469;
5470; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_5_3:
5471; GFX940:       ; %bb.0:
5472; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5473; GFX940-NEXT:    ;;#ASMSTART
5474; GFX940-NEXT:    ; def s[0:1]
5475; GFX940-NEXT:    ;;#ASMEND
5476; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
5477; GFX940-NEXT:    s_mov_b32 s9, s0
5478; GFX940-NEXT:    ;;#ASMSTART
5479; GFX940-NEXT:    ; use s[8:9]
5480; GFX940-NEXT:    ;;#ASMEND
5481; GFX940-NEXT:    s_setpc_b64 s[30:31]
5482  %vec0 = call <4 x i16> asm "; def $0", "=s"()
5483  %vec1 = call <4 x i16> asm "; def $0", "=s"()
5484  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5485  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5486  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 3>
5487  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5488  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
5489  ret void
5490}
5491
5492define void @s_shuffle_v3i16_v3i16__5_5_4() {
5493; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_5_4:
5494; GFX900:       ; %bb.0:
5495; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5496; GFX900-NEXT:    ;;#ASMSTART
5497; GFX900-NEXT:    ; def s[4:5]
5498; GFX900-NEXT:    ;;#ASMEND
5499; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
5500; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
5501; GFX900-NEXT:    ;;#ASMSTART
5502; GFX900-NEXT:    ; use s[8:9]
5503; GFX900-NEXT:    ;;#ASMEND
5504; GFX900-NEXT:    s_setpc_b64 s[30:31]
5505;
5506; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_5_4:
5507; GFX90A:       ; %bb.0:
5508; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5509; GFX90A-NEXT:    ;;#ASMSTART
5510; GFX90A-NEXT:    ; def s[4:5]
5511; GFX90A-NEXT:    ;;#ASMEND
5512; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
5513; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
5514; GFX90A-NEXT:    ;;#ASMSTART
5515; GFX90A-NEXT:    ; use s[8:9]
5516; GFX90A-NEXT:    ;;#ASMEND
5517; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5518;
5519; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_5_4:
5520; GFX940:       ; %bb.0:
5521; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5522; GFX940-NEXT:    ;;#ASMSTART
5523; GFX940-NEXT:    ; def s[0:1]
5524; GFX940-NEXT:    ;;#ASMEND
5525; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
5526; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
5527; GFX940-NEXT:    ;;#ASMSTART
5528; GFX940-NEXT:    ; use s[8:9]
5529; GFX940-NEXT:    ;;#ASMEND
5530; GFX940-NEXT:    s_setpc_b64 s[30:31]
5531  %vec0 = call <4 x i16> asm "; def $0", "=s"()
5532  %vec1 = call <4 x i16> asm "; def $0", "=s"()
5533  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5534  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5535  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 4>
5536  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5537  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
5538  ret void
5539}
5540
5541define void @s_shuffle_v3i16_v3i16__5_5_5() {
5542; GFX9-LABEL: s_shuffle_v3i16_v3i16__5_5_5:
5543; GFX9:       ; %bb.0:
5544; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5545; GFX9-NEXT:    ;;#ASMSTART
5546; GFX9-NEXT:    ; def s[8:9]
5547; GFX9-NEXT:    ;;#ASMEND
5548; GFX9-NEXT:    s_pack_ll_b32_b16 s8, s9, s9
5549; GFX9-NEXT:    ;;#ASMSTART
5550; GFX9-NEXT:    ; use s[8:9]
5551; GFX9-NEXT:    ;;#ASMEND
5552; GFX9-NEXT:    s_setpc_b64 s[30:31]
5553  %vec0 = call <4 x i16> asm "; def $0", "=s"()
5554  %vec1 = call <4 x i16> asm "; def $0", "=s"()
5555  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5556  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5557  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 5, i32 5>
5558  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5559  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
5560  ret void
5561}
5562
5563define void @s_shuffle_v3i16_v3i16__u_0_0() {
5564; GFX900-LABEL: s_shuffle_v3i16_v3i16__u_0_0:
5565; GFX900:       ; %bb.0:
5566; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5567; GFX900-NEXT:    ;;#ASMSTART
5568; GFX900-NEXT:    ; def s[4:5]
5569; GFX900-NEXT:    ;;#ASMEND
5570; GFX900-NEXT:    s_lshl_b32 s8, s4, 16
5571; GFX900-NEXT:    s_mov_b32 s9, s4
5572; GFX900-NEXT:    ;;#ASMSTART
5573; GFX900-NEXT:    ; use s[8:9]
5574; GFX900-NEXT:    ;;#ASMEND
5575; GFX900-NEXT:    s_setpc_b64 s[30:31]
5576;
5577; GFX90A-LABEL: s_shuffle_v3i16_v3i16__u_0_0:
5578; GFX90A:       ; %bb.0:
5579; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5580; GFX90A-NEXT:    ;;#ASMSTART
5581; GFX90A-NEXT:    ; def s[4:5]
5582; GFX90A-NEXT:    ;;#ASMEND
5583; GFX90A-NEXT:    s_lshl_b32 s8, s4, 16
5584; GFX90A-NEXT:    s_mov_b32 s9, s4
5585; GFX90A-NEXT:    ;;#ASMSTART
5586; GFX90A-NEXT:    ; use s[8:9]
5587; GFX90A-NEXT:    ;;#ASMEND
5588; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5589;
5590; GFX940-LABEL: s_shuffle_v3i16_v3i16__u_0_0:
5591; GFX940:       ; %bb.0:
5592; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5593; GFX940-NEXT:    ;;#ASMSTART
5594; GFX940-NEXT:    ; def s[0:1]
5595; GFX940-NEXT:    ;;#ASMEND
5596; GFX940-NEXT:    s_lshl_b32 s8, s0, 16
5597; GFX940-NEXT:    s_mov_b32 s9, s0
5598; GFX940-NEXT:    ;;#ASMSTART
5599; GFX940-NEXT:    ; use s[8:9]
5600; GFX940-NEXT:    ;;#ASMEND
5601; GFX940-NEXT:    s_setpc_b64 s[30:31]
5602  %vec0 = call <4 x i16> asm "; def $0", "=s"()
5603  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5604  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 poison, i32 0, i32 0>
5605  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5606  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
5607  ret void
5608}
5609
5610define void @s_shuffle_v3i16_v3i16__0_0_0() {
5611; GFX900-LABEL: s_shuffle_v3i16_v3i16__0_0_0:
5612; GFX900:       ; %bb.0:
5613; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5614; GFX900-NEXT:    ;;#ASMSTART
5615; GFX900-NEXT:    ; def s[4:5]
5616; GFX900-NEXT:    ;;#ASMEND
5617; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
5618; GFX900-NEXT:    s_mov_b32 s9, s4
5619; GFX900-NEXT:    ;;#ASMSTART
5620; GFX900-NEXT:    ; use s[8:9]
5621; GFX900-NEXT:    ;;#ASMEND
5622; GFX900-NEXT:    s_setpc_b64 s[30:31]
5623;
5624; GFX90A-LABEL: s_shuffle_v3i16_v3i16__0_0_0:
5625; GFX90A:       ; %bb.0:
5626; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5627; GFX90A-NEXT:    ;;#ASMSTART
5628; GFX90A-NEXT:    ; def s[4:5]
5629; GFX90A-NEXT:    ;;#ASMEND
5630; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
5631; GFX90A-NEXT:    s_mov_b32 s9, s4
5632; GFX90A-NEXT:    ;;#ASMSTART
5633; GFX90A-NEXT:    ; use s[8:9]
5634; GFX90A-NEXT:    ;;#ASMEND
5635; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5636;
5637; GFX940-LABEL: s_shuffle_v3i16_v3i16__0_0_0:
5638; GFX940:       ; %bb.0:
5639; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5640; GFX940-NEXT:    ;;#ASMSTART
5641; GFX940-NEXT:    ; def s[0:1]
5642; GFX940-NEXT:    ;;#ASMEND
5643; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s0
5644; GFX940-NEXT:    s_mov_b32 s9, s0
5645; GFX940-NEXT:    ;;#ASMSTART
5646; GFX940-NEXT:    ; use s[8:9]
5647; GFX940-NEXT:    ;;#ASMEND
5648; GFX940-NEXT:    s_setpc_b64 s[30:31]
5649  %vec0 = call <4 x i16> asm "; def $0", "=s"()
5650  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5651  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> zeroinitializer
5652  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5653  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
5654  ret void
5655}
5656
5657define void @s_shuffle_v3i16_v3i16__1_0_0() {
5658; GFX900-LABEL: s_shuffle_v3i16_v3i16__1_0_0:
5659; GFX900:       ; %bb.0:
5660; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5661; GFX900-NEXT:    ;;#ASMSTART
5662; GFX900-NEXT:    ; def s[4:5]
5663; GFX900-NEXT:    ;;#ASMEND
5664; GFX900-NEXT:    s_lshr_b32 s5, s4, 16
5665; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
5666; GFX900-NEXT:    s_mov_b32 s9, s4
5667; GFX900-NEXT:    ;;#ASMSTART
5668; GFX900-NEXT:    ; use s[8:9]
5669; GFX900-NEXT:    ;;#ASMEND
5670; GFX900-NEXT:    s_setpc_b64 s[30:31]
5671;
5672; GFX90A-LABEL: s_shuffle_v3i16_v3i16__1_0_0:
5673; GFX90A:       ; %bb.0:
5674; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5675; GFX90A-NEXT:    ;;#ASMSTART
5676; GFX90A-NEXT:    ; def s[4:5]
5677; GFX90A-NEXT:    ;;#ASMEND
5678; GFX90A-NEXT:    s_lshr_b32 s5, s4, 16
5679; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
5680; GFX90A-NEXT:    s_mov_b32 s9, s4
5681; GFX90A-NEXT:    ;;#ASMSTART
5682; GFX90A-NEXT:    ; use s[8:9]
5683; GFX90A-NEXT:    ;;#ASMEND
5684; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5685;
5686; GFX940-LABEL: s_shuffle_v3i16_v3i16__1_0_0:
5687; GFX940:       ; %bb.0:
5688; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5689; GFX940-NEXT:    ;;#ASMSTART
5690; GFX940-NEXT:    ; def s[0:1]
5691; GFX940-NEXT:    ;;#ASMEND
5692; GFX940-NEXT:    s_lshr_b32 s1, s0, 16
5693; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
5694; GFX940-NEXT:    s_mov_b32 s9, s0
5695; GFX940-NEXT:    ;;#ASMSTART
5696; GFX940-NEXT:    ; use s[8:9]
5697; GFX940-NEXT:    ;;#ASMEND
5698; GFX940-NEXT:    s_setpc_b64 s[30:31]
5699  %vec0 = call <4 x i16> asm "; def $0", "=s"()
5700  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5701  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 1, i32 0, i32 0>
5702  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5703  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
5704  ret void
5705}
5706
5707define void @s_shuffle_v3i16_v3i16__2_0_0() {
5708; GFX900-LABEL: s_shuffle_v3i16_v3i16__2_0_0:
5709; GFX900:       ; %bb.0:
5710; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5711; GFX900-NEXT:    ;;#ASMSTART
5712; GFX900-NEXT:    ; def s[4:5]
5713; GFX900-NEXT:    ;;#ASMEND
5714; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
5715; GFX900-NEXT:    s_mov_b32 s9, s4
5716; GFX900-NEXT:    ;;#ASMSTART
5717; GFX900-NEXT:    ; use s[8:9]
5718; GFX900-NEXT:    ;;#ASMEND
5719; GFX900-NEXT:    s_setpc_b64 s[30:31]
5720;
5721; GFX90A-LABEL: s_shuffle_v3i16_v3i16__2_0_0:
5722; GFX90A:       ; %bb.0:
5723; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5724; GFX90A-NEXT:    ;;#ASMSTART
5725; GFX90A-NEXT:    ; def s[4:5]
5726; GFX90A-NEXT:    ;;#ASMEND
5727; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
5728; GFX90A-NEXT:    s_mov_b32 s9, s4
5729; GFX90A-NEXT:    ;;#ASMSTART
5730; GFX90A-NEXT:    ; use s[8:9]
5731; GFX90A-NEXT:    ;;#ASMEND
5732; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5733;
5734; GFX940-LABEL: s_shuffle_v3i16_v3i16__2_0_0:
5735; GFX940:       ; %bb.0:
5736; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5737; GFX940-NEXT:    ;;#ASMSTART
5738; GFX940-NEXT:    ; def s[0:1]
5739; GFX940-NEXT:    ;;#ASMEND
5740; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
5741; GFX940-NEXT:    s_mov_b32 s9, s0
5742; GFX940-NEXT:    ;;#ASMSTART
5743; GFX940-NEXT:    ; use s[8:9]
5744; GFX940-NEXT:    ;;#ASMEND
5745; GFX940-NEXT:    s_setpc_b64 s[30:31]
5746  %vec0 = call <4 x i16> asm "; def $0", "=s"()
5747  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5748  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 2, i32 0, i32 0>
5749  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5750  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
5751  ret void
5752}
5753
5754define void @s_shuffle_v3i16_v3i16__3_0_0() {
5755; GFX900-LABEL: s_shuffle_v3i16_v3i16__3_0_0:
5756; GFX900:       ; %bb.0:
5757; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5758; GFX900-NEXT:    ;;#ASMSTART
5759; GFX900-NEXT:    ; def s[4:5]
5760; GFX900-NEXT:    ;;#ASMEND
5761; GFX900-NEXT:    s_lshl_b32 s8, s4, 16
5762; GFX900-NEXT:    s_mov_b32 s9, s4
5763; GFX900-NEXT:    ;;#ASMSTART
5764; GFX900-NEXT:    ; use s[8:9]
5765; GFX900-NEXT:    ;;#ASMEND
5766; GFX900-NEXT:    s_setpc_b64 s[30:31]
5767;
5768; GFX90A-LABEL: s_shuffle_v3i16_v3i16__3_0_0:
5769; GFX90A:       ; %bb.0:
5770; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5771; GFX90A-NEXT:    ;;#ASMSTART
5772; GFX90A-NEXT:    ; def s[4:5]
5773; GFX90A-NEXT:    ;;#ASMEND
5774; GFX90A-NEXT:    s_lshl_b32 s8, s4, 16
5775; GFX90A-NEXT:    s_mov_b32 s9, s4
5776; GFX90A-NEXT:    ;;#ASMSTART
5777; GFX90A-NEXT:    ; use s[8:9]
5778; GFX90A-NEXT:    ;;#ASMEND
5779; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5780;
5781; GFX940-LABEL: s_shuffle_v3i16_v3i16__3_0_0:
5782; GFX940:       ; %bb.0:
5783; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5784; GFX940-NEXT:    ;;#ASMSTART
5785; GFX940-NEXT:    ; def s[0:1]
5786; GFX940-NEXT:    ;;#ASMEND
5787; GFX940-NEXT:    s_lshl_b32 s8, s0, 16
5788; GFX940-NEXT:    s_mov_b32 s9, s0
5789; GFX940-NEXT:    ;;#ASMSTART
5790; GFX940-NEXT:    ; use s[8:9]
5791; GFX940-NEXT:    ;;#ASMEND
5792; GFX940-NEXT:    s_setpc_b64 s[30:31]
5793  %vec0 = call <4 x i16> asm "; def $0", "=s"()
5794  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5795  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 3, i32 0, i32 0>
5796  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5797  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
5798  ret void
5799}
5800
5801define void @s_shuffle_v3i16_v3i16__4_0_0() {
5802; GFX900-LABEL: s_shuffle_v3i16_v3i16__4_0_0:
5803; GFX900:       ; %bb.0:
5804; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5805; GFX900-NEXT:    ;;#ASMSTART
5806; GFX900-NEXT:    ; def s[4:5]
5807; GFX900-NEXT:    ;;#ASMEND
5808; GFX900-NEXT:    ;;#ASMSTART
5809; GFX900-NEXT:    ; def s[6:7]
5810; GFX900-NEXT:    ;;#ASMEND
5811; GFX900-NEXT:    s_lshr_b32 s5, s6, 16
5812; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
5813; GFX900-NEXT:    s_mov_b32 s9, s4
5814; GFX900-NEXT:    ;;#ASMSTART
5815; GFX900-NEXT:    ; use s[8:9]
5816; GFX900-NEXT:    ;;#ASMEND
5817; GFX900-NEXT:    s_setpc_b64 s[30:31]
5818;
5819; GFX90A-LABEL: s_shuffle_v3i16_v3i16__4_0_0:
5820; GFX90A:       ; %bb.0:
5821; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5822; GFX90A-NEXT:    ;;#ASMSTART
5823; GFX90A-NEXT:    ; def s[4:5]
5824; GFX90A-NEXT:    ;;#ASMEND
5825; GFX90A-NEXT:    ;;#ASMSTART
5826; GFX90A-NEXT:    ; def s[6:7]
5827; GFX90A-NEXT:    ;;#ASMEND
5828; GFX90A-NEXT:    s_lshr_b32 s5, s6, 16
5829; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
5830; GFX90A-NEXT:    s_mov_b32 s9, s4
5831; GFX90A-NEXT:    ;;#ASMSTART
5832; GFX90A-NEXT:    ; use s[8:9]
5833; GFX90A-NEXT:    ;;#ASMEND
5834; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5835;
5836; GFX940-LABEL: s_shuffle_v3i16_v3i16__4_0_0:
5837; GFX940:       ; %bb.0:
5838; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5839; GFX940-NEXT:    ;;#ASMSTART
5840; GFX940-NEXT:    ; def s[0:1]
5841; GFX940-NEXT:    ;;#ASMEND
5842; GFX940-NEXT:    ;;#ASMSTART
5843; GFX940-NEXT:    ; def s[2:3]
5844; GFX940-NEXT:    ;;#ASMEND
5845; GFX940-NEXT:    s_lshr_b32 s1, s2, 16
5846; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
5847; GFX940-NEXT:    s_mov_b32 s9, s0
5848; GFX940-NEXT:    ;;#ASMSTART
5849; GFX940-NEXT:    ; use s[8:9]
5850; GFX940-NEXT:    ;;#ASMEND
5851; GFX940-NEXT:    s_setpc_b64 s[30:31]
5852  %vec0 = call <4 x i16> asm "; def $0", "=s"()
5853  %vec1 = call <4 x i16> asm "; def $0", "=s"()
5854  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5855  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5856  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 0, i32 0>
5857  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5858  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
5859  ret void
5860}
5861
5862define void @s_shuffle_v3i16_v3i16__5_0_0() {
5863; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_0_0:
5864; GFX900:       ; %bb.0:
5865; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5866; GFX900-NEXT:    ;;#ASMSTART
5867; GFX900-NEXT:    ; def s[4:5]
5868; GFX900-NEXT:    ;;#ASMEND
5869; GFX900-NEXT:    ;;#ASMSTART
5870; GFX900-NEXT:    ; def s[6:7]
5871; GFX900-NEXT:    ;;#ASMEND
5872; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
5873; GFX900-NEXT:    s_mov_b32 s9, s4
5874; GFX900-NEXT:    ;;#ASMSTART
5875; GFX900-NEXT:    ; use s[8:9]
5876; GFX900-NEXT:    ;;#ASMEND
5877; GFX900-NEXT:    s_setpc_b64 s[30:31]
5878;
5879; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_0_0:
5880; GFX90A:       ; %bb.0:
5881; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5882; GFX90A-NEXT:    ;;#ASMSTART
5883; GFX90A-NEXT:    ; def s[4:5]
5884; GFX90A-NEXT:    ;;#ASMEND
5885; GFX90A-NEXT:    ;;#ASMSTART
5886; GFX90A-NEXT:    ; def s[6:7]
5887; GFX90A-NEXT:    ;;#ASMEND
5888; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
5889; GFX90A-NEXT:    s_mov_b32 s9, s4
5890; GFX90A-NEXT:    ;;#ASMSTART
5891; GFX90A-NEXT:    ; use s[8:9]
5892; GFX90A-NEXT:    ;;#ASMEND
5893; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5894;
5895; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_0_0:
5896; GFX940:       ; %bb.0:
5897; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5898; GFX940-NEXT:    ;;#ASMSTART
5899; GFX940-NEXT:    ; def s[0:1]
5900; GFX940-NEXT:    ;;#ASMEND
5901; GFX940-NEXT:    ;;#ASMSTART
5902; GFX940-NEXT:    ; def s[2:3]
5903; GFX940-NEXT:    ;;#ASMEND
5904; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s0
5905; GFX940-NEXT:    s_mov_b32 s9, s0
5906; GFX940-NEXT:    ;;#ASMSTART
5907; GFX940-NEXT:    ; use s[8:9]
5908; GFX940-NEXT:    ;;#ASMEND
5909; GFX940-NEXT:    s_setpc_b64 s[30:31]
5910  %vec0 = call <4 x i16> asm "; def $0", "=s"()
5911  %vec1 = call <4 x i16> asm "; def $0", "=s"()
5912  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5913  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5914  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 0>
5915  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5916  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
5917  ret void
5918}
5919
5920define void @s_shuffle_v3i16_v3i16__5_u_0() {
5921; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_u_0:
5922; GFX900:       ; %bb.0:
5923; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5924; GFX900-NEXT:    ;;#ASMSTART
5925; GFX900-NEXT:    ; def s[4:5]
5926; GFX900-NEXT:    ;;#ASMEND
5927; GFX900-NEXT:    ;;#ASMSTART
5928; GFX900-NEXT:    ; def s[6:7]
5929; GFX900-NEXT:    ;;#ASMEND
5930; GFX900-NEXT:    s_mov_b32 s8, s7
5931; GFX900-NEXT:    s_mov_b32 s9, s4
5932; GFX900-NEXT:    ;;#ASMSTART
5933; GFX900-NEXT:    ; use s[8:9]
5934; GFX900-NEXT:    ;;#ASMEND
5935; GFX900-NEXT:    s_setpc_b64 s[30:31]
5936;
5937; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_u_0:
5938; GFX90A:       ; %bb.0:
5939; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5940; GFX90A-NEXT:    ;;#ASMSTART
5941; GFX90A-NEXT:    ; def s[4:5]
5942; GFX90A-NEXT:    ;;#ASMEND
5943; GFX90A-NEXT:    ;;#ASMSTART
5944; GFX90A-NEXT:    ; def s[6:7]
5945; GFX90A-NEXT:    ;;#ASMEND
5946; GFX90A-NEXT:    s_mov_b32 s8, s7
5947; GFX90A-NEXT:    s_mov_b32 s9, s4
5948; GFX90A-NEXT:    ;;#ASMSTART
5949; GFX90A-NEXT:    ; use s[8:9]
5950; GFX90A-NEXT:    ;;#ASMEND
5951; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5952;
5953; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_u_0:
5954; GFX940:       ; %bb.0:
5955; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5956; GFX940-NEXT:    ;;#ASMSTART
5957; GFX940-NEXT:    ; def s[0:1]
5958; GFX940-NEXT:    ;;#ASMEND
5959; GFX940-NEXT:    ;;#ASMSTART
5960; GFX940-NEXT:    ; def s[2:3]
5961; GFX940-NEXT:    ;;#ASMEND
5962; GFX940-NEXT:    s_mov_b32 s8, s3
5963; GFX940-NEXT:    s_mov_b32 s9, s0
5964; GFX940-NEXT:    ;;#ASMSTART
5965; GFX940-NEXT:    ; use s[8:9]
5966; GFX940-NEXT:    ;;#ASMEND
5967; GFX940-NEXT:    s_setpc_b64 s[30:31]
5968  %vec0 = call <4 x i16> asm "; def $0", "=s"()
5969  %vec1 = call <4 x i16> asm "; def $0", "=s"()
5970  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5971  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
5972  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 0>
5973  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
5974  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
5975  ret void
5976}
5977
5978define void @s_shuffle_v3i16_v3i16__5_1_0() {
5979; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_1_0:
5980; GFX900:       ; %bb.0:
5981; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5982; GFX900-NEXT:    ;;#ASMSTART
5983; GFX900-NEXT:    ; def s[4:5]
5984; GFX900-NEXT:    ;;#ASMEND
5985; GFX900-NEXT:    ;;#ASMSTART
5986; GFX900-NEXT:    ; def s[6:7]
5987; GFX900-NEXT:    ;;#ASMEND
5988; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
5989; GFX900-NEXT:    s_mov_b32 s9, s4
5990; GFX900-NEXT:    ;;#ASMSTART
5991; GFX900-NEXT:    ; use s[8:9]
5992; GFX900-NEXT:    ;;#ASMEND
5993; GFX900-NEXT:    s_setpc_b64 s[30:31]
5994;
5995; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_1_0:
5996; GFX90A:       ; %bb.0:
5997; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5998; GFX90A-NEXT:    ;;#ASMSTART
5999; GFX90A-NEXT:    ; def s[4:5]
6000; GFX90A-NEXT:    ;;#ASMEND
6001; GFX90A-NEXT:    ;;#ASMSTART
6002; GFX90A-NEXT:    ; def s[6:7]
6003; GFX90A-NEXT:    ;;#ASMEND
6004; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
6005; GFX90A-NEXT:    s_mov_b32 s9, s4
6006; GFX90A-NEXT:    ;;#ASMSTART
6007; GFX90A-NEXT:    ; use s[8:9]
6008; GFX90A-NEXT:    ;;#ASMEND
6009; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6010;
6011; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_1_0:
6012; GFX940:       ; %bb.0:
6013; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6014; GFX940-NEXT:    ;;#ASMSTART
6015; GFX940-NEXT:    ; def s[0:1]
6016; GFX940-NEXT:    ;;#ASMEND
6017; GFX940-NEXT:    ;;#ASMSTART
6018; GFX940-NEXT:    ; def s[2:3]
6019; GFX940-NEXT:    ;;#ASMEND
6020; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s3, s0
6021; GFX940-NEXT:    s_mov_b32 s9, s0
6022; GFX940-NEXT:    ;;#ASMSTART
6023; GFX940-NEXT:    ; use s[8:9]
6024; GFX940-NEXT:    ;;#ASMEND
6025; GFX940-NEXT:    s_setpc_b64 s[30:31]
6026  %vec0 = call <4 x i16> asm "; def $0", "=s"()
6027  %vec1 = call <4 x i16> asm "; def $0", "=s"()
6028  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6029  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6030  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 0>
6031  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6032  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
6033  ret void
6034}
6035
6036define void @s_shuffle_v3i16_v3i16__5_2_0() {
6037; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_2_0:
6038; GFX900:       ; %bb.0:
6039; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6040; GFX900-NEXT:    ;;#ASMSTART
6041; GFX900-NEXT:    ; def s[4:5]
6042; GFX900-NEXT:    ;;#ASMEND
6043; GFX900-NEXT:    ;;#ASMSTART
6044; GFX900-NEXT:    ; def s[6:7]
6045; GFX900-NEXT:    ;;#ASMEND
6046; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
6047; GFX900-NEXT:    s_mov_b32 s9, s4
6048; GFX900-NEXT:    ;;#ASMSTART
6049; GFX900-NEXT:    ; use s[8:9]
6050; GFX900-NEXT:    ;;#ASMEND
6051; GFX900-NEXT:    s_setpc_b64 s[30:31]
6052;
6053; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_2_0:
6054; GFX90A:       ; %bb.0:
6055; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6056; GFX90A-NEXT:    ;;#ASMSTART
6057; GFX90A-NEXT:    ; def s[4:5]
6058; GFX90A-NEXT:    ;;#ASMEND
6059; GFX90A-NEXT:    ;;#ASMSTART
6060; GFX90A-NEXT:    ; def s[6:7]
6061; GFX90A-NEXT:    ;;#ASMEND
6062; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
6063; GFX90A-NEXT:    s_mov_b32 s9, s4
6064; GFX90A-NEXT:    ;;#ASMSTART
6065; GFX90A-NEXT:    ; use s[8:9]
6066; GFX90A-NEXT:    ;;#ASMEND
6067; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6068;
6069; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_2_0:
6070; GFX940:       ; %bb.0:
6071; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6072; GFX940-NEXT:    ;;#ASMSTART
6073; GFX940-NEXT:    ; def s[0:1]
6074; GFX940-NEXT:    ;;#ASMEND
6075; GFX940-NEXT:    ;;#ASMSTART
6076; GFX940-NEXT:    ; def s[2:3]
6077; GFX940-NEXT:    ;;#ASMEND
6078; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s1
6079; GFX940-NEXT:    s_mov_b32 s9, s0
6080; GFX940-NEXT:    ;;#ASMSTART
6081; GFX940-NEXT:    ; use s[8:9]
6082; GFX940-NEXT:    ;;#ASMEND
6083; GFX940-NEXT:    s_setpc_b64 s[30:31]
6084  %vec0 = call <4 x i16> asm "; def $0", "=s"()
6085  %vec1 = call <4 x i16> asm "; def $0", "=s"()
6086  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6087  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6088  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 0>
6089  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6090  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
6091  ret void
6092}
6093
6094define void @s_shuffle_v3i16_v3i16__5_3_0() {
6095; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_3_0:
6096; GFX900:       ; %bb.0:
6097; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6098; GFX900-NEXT:    ;;#ASMSTART
6099; GFX900-NEXT:    ; def s[4:5]
6100; GFX900-NEXT:    ;;#ASMEND
6101; GFX900-NEXT:    ;;#ASMSTART
6102; GFX900-NEXT:    ; def s[6:7]
6103; GFX900-NEXT:    ;;#ASMEND
6104; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s6
6105; GFX900-NEXT:    s_mov_b32 s9, s4
6106; GFX900-NEXT:    ;;#ASMSTART
6107; GFX900-NEXT:    ; use s[8:9]
6108; GFX900-NEXT:    ;;#ASMEND
6109; GFX900-NEXT:    s_setpc_b64 s[30:31]
6110;
6111; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_3_0:
6112; GFX90A:       ; %bb.0:
6113; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6114; GFX90A-NEXT:    ;;#ASMSTART
6115; GFX90A-NEXT:    ; def s[4:5]
6116; GFX90A-NEXT:    ;;#ASMEND
6117; GFX90A-NEXT:    ;;#ASMSTART
6118; GFX90A-NEXT:    ; def s[6:7]
6119; GFX90A-NEXT:    ;;#ASMEND
6120; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s6
6121; GFX90A-NEXT:    s_mov_b32 s9, s4
6122; GFX90A-NEXT:    ;;#ASMSTART
6123; GFX90A-NEXT:    ; use s[8:9]
6124; GFX90A-NEXT:    ;;#ASMEND
6125; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6126;
6127; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_3_0:
6128; GFX940:       ; %bb.0:
6129; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6130; GFX940-NEXT:    ;;#ASMSTART
6131; GFX940-NEXT:    ; def s[0:1]
6132; GFX940-NEXT:    ;;#ASMEND
6133; GFX940-NEXT:    ;;#ASMSTART
6134; GFX940-NEXT:    ; def s[2:3]
6135; GFX940-NEXT:    ;;#ASMEND
6136; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s2
6137; GFX940-NEXT:    s_mov_b32 s9, s0
6138; GFX940-NEXT:    ;;#ASMSTART
6139; GFX940-NEXT:    ; use s[8:9]
6140; GFX940-NEXT:    ;;#ASMEND
6141; GFX940-NEXT:    s_setpc_b64 s[30:31]
6142  %vec0 = call <4 x i16> asm "; def $0", "=s"()
6143  %vec1 = call <4 x i16> asm "; def $0", "=s"()
6144  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6145  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6146  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 0>
6147  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6148  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
6149  ret void
6150}
6151
6152define void @s_shuffle_v3i16_v3i16__5_4_0() {
6153; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_4_0:
6154; GFX900:       ; %bb.0:
6155; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6156; GFX900-NEXT:    ;;#ASMSTART
6157; GFX900-NEXT:    ; def s[4:5]
6158; GFX900-NEXT:    ;;#ASMEND
6159; GFX900-NEXT:    ;;#ASMSTART
6160; GFX900-NEXT:    ; def s[6:7]
6161; GFX900-NEXT:    ;;#ASMEND
6162; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s7, s6
6163; GFX900-NEXT:    s_mov_b32 s9, s4
6164; GFX900-NEXT:    ;;#ASMSTART
6165; GFX900-NEXT:    ; use s[8:9]
6166; GFX900-NEXT:    ;;#ASMEND
6167; GFX900-NEXT:    s_setpc_b64 s[30:31]
6168;
6169; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_4_0:
6170; GFX90A:       ; %bb.0:
6171; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6172; GFX90A-NEXT:    ;;#ASMSTART
6173; GFX90A-NEXT:    ; def s[4:5]
6174; GFX90A-NEXT:    ;;#ASMEND
6175; GFX90A-NEXT:    ;;#ASMSTART
6176; GFX90A-NEXT:    ; def s[6:7]
6177; GFX90A-NEXT:    ;;#ASMEND
6178; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s7, s6
6179; GFX90A-NEXT:    s_mov_b32 s9, s4
6180; GFX90A-NEXT:    ;;#ASMSTART
6181; GFX90A-NEXT:    ; use s[8:9]
6182; GFX90A-NEXT:    ;;#ASMEND
6183; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6184;
6185; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_4_0:
6186; GFX940:       ; %bb.0:
6187; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6188; GFX940-NEXT:    ;;#ASMSTART
6189; GFX940-NEXT:    ; def s[0:1]
6190; GFX940-NEXT:    ;;#ASMEND
6191; GFX940-NEXT:    ;;#ASMSTART
6192; GFX940-NEXT:    ; def s[2:3]
6193; GFX940-NEXT:    ;;#ASMEND
6194; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s3, s2
6195; GFX940-NEXT:    s_mov_b32 s9, s0
6196; GFX940-NEXT:    ;;#ASMSTART
6197; GFX940-NEXT:    ; use s[8:9]
6198; GFX940-NEXT:    ;;#ASMEND
6199; GFX940-NEXT:    s_setpc_b64 s[30:31]
6200  %vec0 = call <4 x i16> asm "; def $0", "=s"()
6201  %vec1 = call <4 x i16> asm "; def $0", "=s"()
6202  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6203  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6204  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 0>
6205  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6206  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
6207  ret void
6208}
6209
6210define void @s_shuffle_v3i16_v3i16__u_1_1() {
6211; GFX9-LABEL: s_shuffle_v3i16_v3i16__u_1_1:
6212; GFX9:       ; %bb.0:
6213; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6214; GFX9-NEXT:    ;;#ASMSTART
6215; GFX9-NEXT:    ; def s[8:9]
6216; GFX9-NEXT:    ;;#ASMEND
6217; GFX9-NEXT:    s_lshr_b32 s9, s8, 16
6218; GFX9-NEXT:    ;;#ASMSTART
6219; GFX9-NEXT:    ; use s[8:9]
6220; GFX9-NEXT:    ;;#ASMEND
6221; GFX9-NEXT:    s_setpc_b64 s[30:31]
6222  %vec0 = call <4 x i16> asm "; def $0", "=s"()
6223  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6224  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 poison, i32 1, i32 1>
6225  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6226  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
6227  ret void
6228}
6229
6230define void @s_shuffle_v3i16_v3i16__0_1_1() {
6231; GFX9-LABEL: s_shuffle_v3i16_v3i16__0_1_1:
6232; GFX9:       ; %bb.0:
6233; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6234; GFX9-NEXT:    ;;#ASMSTART
6235; GFX9-NEXT:    ; def s[8:9]
6236; GFX9-NEXT:    ;;#ASMEND
6237; GFX9-NEXT:    s_lshr_b32 s9, s8, 16
6238; GFX9-NEXT:    ;;#ASMSTART
6239; GFX9-NEXT:    ; use s[8:9]
6240; GFX9-NEXT:    ;;#ASMEND
6241; GFX9-NEXT:    s_setpc_b64 s[30:31]
6242  %vec0 = call <4 x i16> asm "; def $0", "=s"()
6243  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6244  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 0, i32 1, i32 1>
6245  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6246  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
6247  ret void
6248}
6249
6250define void @s_shuffle_v3i16_v3i16__1_1_1() {
6251; GFX900-LABEL: s_shuffle_v3i16_v3i16__1_1_1:
6252; GFX900:       ; %bb.0:
6253; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6254; GFX900-NEXT:    ;;#ASMSTART
6255; GFX900-NEXT:    ; def s[4:5]
6256; GFX900-NEXT:    ;;#ASMEND
6257; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
6258; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s4, s4
6259; GFX900-NEXT:    ;;#ASMSTART
6260; GFX900-NEXT:    ; use s[8:9]
6261; GFX900-NEXT:    ;;#ASMEND
6262; GFX900-NEXT:    s_setpc_b64 s[30:31]
6263;
6264; GFX90A-LABEL: s_shuffle_v3i16_v3i16__1_1_1:
6265; GFX90A:       ; %bb.0:
6266; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6267; GFX90A-NEXT:    ;;#ASMSTART
6268; GFX90A-NEXT:    ; def s[4:5]
6269; GFX90A-NEXT:    ;;#ASMEND
6270; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
6271; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s4, s4
6272; GFX90A-NEXT:    ;;#ASMSTART
6273; GFX90A-NEXT:    ; use s[8:9]
6274; GFX90A-NEXT:    ;;#ASMEND
6275; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6276;
6277; GFX940-LABEL: s_shuffle_v3i16_v3i16__1_1_1:
6278; GFX940:       ; %bb.0:
6279; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6280; GFX940-NEXT:    ;;#ASMSTART
6281; GFX940-NEXT:    ; def s[0:1]
6282; GFX940-NEXT:    ;;#ASMEND
6283; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
6284; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s0, s0
6285; GFX940-NEXT:    ;;#ASMSTART
6286; GFX940-NEXT:    ; use s[8:9]
6287; GFX940-NEXT:    ;;#ASMEND
6288; GFX940-NEXT:    s_setpc_b64 s[30:31]
6289  %vec0 = call <4 x i16> asm "; def $0", "=s"()
6290  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6291  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 1, i32 1, i32 1>
6292  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6293  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
6294  ret void
6295}
6296
6297define void @s_shuffle_v3i16_v3i16__2_1_1() {
6298; GFX900-LABEL: s_shuffle_v3i16_v3i16__2_1_1:
6299; GFX900:       ; %bb.0:
6300; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6301; GFX900-NEXT:    ;;#ASMSTART
6302; GFX900-NEXT:    ; def s[4:5]
6303; GFX900-NEXT:    ;;#ASMEND
6304; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
6305; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
6306; GFX900-NEXT:    ;;#ASMSTART
6307; GFX900-NEXT:    ; use s[8:9]
6308; GFX900-NEXT:    ;;#ASMEND
6309; GFX900-NEXT:    s_setpc_b64 s[30:31]
6310;
6311; GFX90A-LABEL: s_shuffle_v3i16_v3i16__2_1_1:
6312; GFX90A:       ; %bb.0:
6313; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6314; GFX90A-NEXT:    ;;#ASMSTART
6315; GFX90A-NEXT:    ; def s[4:5]
6316; GFX90A-NEXT:    ;;#ASMEND
6317; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
6318; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
6319; GFX90A-NEXT:    ;;#ASMSTART
6320; GFX90A-NEXT:    ; use s[8:9]
6321; GFX90A-NEXT:    ;;#ASMEND
6322; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6323;
6324; GFX940-LABEL: s_shuffle_v3i16_v3i16__2_1_1:
6325; GFX940:       ; %bb.0:
6326; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6327; GFX940-NEXT:    ;;#ASMSTART
6328; GFX940-NEXT:    ; def s[0:1]
6329; GFX940-NEXT:    ;;#ASMEND
6330; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s1, s0
6331; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
6332; GFX940-NEXT:    ;;#ASMSTART
6333; GFX940-NEXT:    ; use s[8:9]
6334; GFX940-NEXT:    ;;#ASMEND
6335; GFX940-NEXT:    s_setpc_b64 s[30:31]
6336  %vec0 = call <4 x i16> asm "; def $0", "=s"()
6337  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6338  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 2, i32 1, i32 1>
6339  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6340  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
6341  ret void
6342}
6343
6344define void @s_shuffle_v3i16_v3i16__3_1_1() {
6345; GFX9-LABEL: s_shuffle_v3i16_v3i16__3_1_1:
6346; GFX9:       ; %bb.0:
6347; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6348; GFX9-NEXT:    ;;#ASMSTART
6349; GFX9-NEXT:    ; def s[8:9]
6350; GFX9-NEXT:    ;;#ASMEND
6351; GFX9-NEXT:    s_lshr_b32 s9, s8, 16
6352; GFX9-NEXT:    ;;#ASMSTART
6353; GFX9-NEXT:    ; use s[8:9]
6354; GFX9-NEXT:    ;;#ASMEND
6355; GFX9-NEXT:    s_setpc_b64 s[30:31]
6356  %vec0 = call <4 x i16> asm "; def $0", "=s"()
6357  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6358  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 3, i32 1, i32 1>
6359  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6360  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
6361  ret void
6362}
6363
6364define void @s_shuffle_v3i16_v3i16__4_1_1() {
6365; GFX900-LABEL: s_shuffle_v3i16_v3i16__4_1_1:
6366; GFX900:       ; %bb.0:
6367; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6368; GFX900-NEXT:    ;;#ASMSTART
6369; GFX900-NEXT:    ; def s[4:5]
6370; GFX900-NEXT:    ;;#ASMEND
6371; GFX900-NEXT:    ;;#ASMSTART
6372; GFX900-NEXT:    ; def s[6:7]
6373; GFX900-NEXT:    ;;#ASMEND
6374; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s6, s4
6375; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
6376; GFX900-NEXT:    ;;#ASMSTART
6377; GFX900-NEXT:    ; use s[8:9]
6378; GFX900-NEXT:    ;;#ASMEND
6379; GFX900-NEXT:    s_setpc_b64 s[30:31]
6380;
6381; GFX90A-LABEL: s_shuffle_v3i16_v3i16__4_1_1:
6382; GFX90A:       ; %bb.0:
6383; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6384; GFX90A-NEXT:    ;;#ASMSTART
6385; GFX90A-NEXT:    ; def s[4:5]
6386; GFX90A-NEXT:    ;;#ASMEND
6387; GFX90A-NEXT:    ;;#ASMSTART
6388; GFX90A-NEXT:    ; def s[6:7]
6389; GFX90A-NEXT:    ;;#ASMEND
6390; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s6, s4
6391; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
6392; GFX90A-NEXT:    ;;#ASMSTART
6393; GFX90A-NEXT:    ; use s[8:9]
6394; GFX90A-NEXT:    ;;#ASMEND
6395; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6396;
6397; GFX940-LABEL: s_shuffle_v3i16_v3i16__4_1_1:
6398; GFX940:       ; %bb.0:
6399; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6400; GFX940-NEXT:    ;;#ASMSTART
6401; GFX940-NEXT:    ; def s[0:1]
6402; GFX940-NEXT:    ;;#ASMEND
6403; GFX940-NEXT:    ;;#ASMSTART
6404; GFX940-NEXT:    ; def s[2:3]
6405; GFX940-NEXT:    ;;#ASMEND
6406; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s2, s0
6407; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
6408; GFX940-NEXT:    ;;#ASMSTART
6409; GFX940-NEXT:    ; use s[8:9]
6410; GFX940-NEXT:    ;;#ASMEND
6411; GFX940-NEXT:    s_setpc_b64 s[30:31]
6412  %vec0 = call <4 x i16> asm "; def $0", "=s"()
6413  %vec1 = call <4 x i16> asm "; def $0", "=s"()
6414  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6415  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6416  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 1, i32 1>
6417  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6418  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
6419  ret void
6420}
6421
6422define void @s_shuffle_v3i16_v3i16__5_1_1() {
6423; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_1_1:
6424; GFX900:       ; %bb.0:
6425; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6426; GFX900-NEXT:    ;;#ASMSTART
6427; GFX900-NEXT:    ; def s[4:5]
6428; GFX900-NEXT:    ;;#ASMEND
6429; GFX900-NEXT:    ;;#ASMSTART
6430; GFX900-NEXT:    ; def s[6:7]
6431; GFX900-NEXT:    ;;#ASMEND
6432; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
6433; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
6434; GFX900-NEXT:    ;;#ASMSTART
6435; GFX900-NEXT:    ; use s[8:9]
6436; GFX900-NEXT:    ;;#ASMEND
6437; GFX900-NEXT:    s_setpc_b64 s[30:31]
6438;
6439; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_1_1:
6440; GFX90A:       ; %bb.0:
6441; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6442; GFX90A-NEXT:    ;;#ASMSTART
6443; GFX90A-NEXT:    ; def s[4:5]
6444; GFX90A-NEXT:    ;;#ASMEND
6445; GFX90A-NEXT:    ;;#ASMSTART
6446; GFX90A-NEXT:    ; def s[6:7]
6447; GFX90A-NEXT:    ;;#ASMEND
6448; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
6449; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
6450; GFX90A-NEXT:    ;;#ASMSTART
6451; GFX90A-NEXT:    ; use s[8:9]
6452; GFX90A-NEXT:    ;;#ASMEND
6453; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6454;
6455; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_1_1:
6456; GFX940:       ; %bb.0:
6457; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6458; GFX940-NEXT:    ;;#ASMSTART
6459; GFX940-NEXT:    ; def s[0:1]
6460; GFX940-NEXT:    ;;#ASMEND
6461; GFX940-NEXT:    ;;#ASMSTART
6462; GFX940-NEXT:    ; def s[2:3]
6463; GFX940-NEXT:    ;;#ASMEND
6464; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s3, s0
6465; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
6466; GFX940-NEXT:    ;;#ASMSTART
6467; GFX940-NEXT:    ; use s[8:9]
6468; GFX940-NEXT:    ;;#ASMEND
6469; GFX940-NEXT:    s_setpc_b64 s[30:31]
6470  %vec0 = call <4 x i16> asm "; def $0", "=s"()
6471  %vec1 = call <4 x i16> asm "; def $0", "=s"()
6472  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6473  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6474  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 1>
6475  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6476  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
6477  ret void
6478}
6479
6480define void @s_shuffle_v3i16_v3i16__5_u_1() {
6481; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_u_1:
6482; GFX900:       ; %bb.0:
6483; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6484; GFX900-NEXT:    ;;#ASMSTART
6485; GFX900-NEXT:    ; def s[4:5]
6486; GFX900-NEXT:    ;;#ASMEND
6487; GFX900-NEXT:    ;;#ASMSTART
6488; GFX900-NEXT:    ; def s[6:7]
6489; GFX900-NEXT:    ;;#ASMEND
6490; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
6491; GFX900-NEXT:    s_mov_b32 s8, s7
6492; GFX900-NEXT:    ;;#ASMSTART
6493; GFX900-NEXT:    ; use s[8:9]
6494; GFX900-NEXT:    ;;#ASMEND
6495; GFX900-NEXT:    s_setpc_b64 s[30:31]
6496;
6497; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_u_1:
6498; GFX90A:       ; %bb.0:
6499; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6500; GFX90A-NEXT:    ;;#ASMSTART
6501; GFX90A-NEXT:    ; def s[4:5]
6502; GFX90A-NEXT:    ;;#ASMEND
6503; GFX90A-NEXT:    ;;#ASMSTART
6504; GFX90A-NEXT:    ; def s[6:7]
6505; GFX90A-NEXT:    ;;#ASMEND
6506; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
6507; GFX90A-NEXT:    s_mov_b32 s8, s7
6508; GFX90A-NEXT:    ;;#ASMSTART
6509; GFX90A-NEXT:    ; use s[8:9]
6510; GFX90A-NEXT:    ;;#ASMEND
6511; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6512;
6513; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_u_1:
6514; GFX940:       ; %bb.0:
6515; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6516; GFX940-NEXT:    ;;#ASMSTART
6517; GFX940-NEXT:    ; def s[0:1]
6518; GFX940-NEXT:    ;;#ASMEND
6519; GFX940-NEXT:    ;;#ASMSTART
6520; GFX940-NEXT:    ; def s[2:3]
6521; GFX940-NEXT:    ;;#ASMEND
6522; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
6523; GFX940-NEXT:    s_mov_b32 s8, s3
6524; GFX940-NEXT:    ;;#ASMSTART
6525; GFX940-NEXT:    ; use s[8:9]
6526; GFX940-NEXT:    ;;#ASMEND
6527; GFX940-NEXT:    s_setpc_b64 s[30:31]
6528  %vec0 = call <4 x i16> asm "; def $0", "=s"()
6529  %vec1 = call <4 x i16> asm "; def $0", "=s"()
6530  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6531  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6532  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 1>
6533  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6534  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
6535  ret void
6536}
6537
6538define void @s_shuffle_v3i16_v3i16__5_0_1() {
6539; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_0_1:
6540; GFX900:       ; %bb.0:
6541; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6542; GFX900-NEXT:    ;;#ASMSTART
6543; GFX900-NEXT:    ; def s[4:5]
6544; GFX900-NEXT:    ;;#ASMEND
6545; GFX900-NEXT:    ;;#ASMSTART
6546; GFX900-NEXT:    ; def s[6:7]
6547; GFX900-NEXT:    ;;#ASMEND
6548; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
6549; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
6550; GFX900-NEXT:    ;;#ASMSTART
6551; GFX900-NEXT:    ; use s[8:9]
6552; GFX900-NEXT:    ;;#ASMEND
6553; GFX900-NEXT:    s_setpc_b64 s[30:31]
6554;
6555; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_0_1:
6556; GFX90A:       ; %bb.0:
6557; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6558; GFX90A-NEXT:    ;;#ASMSTART
6559; GFX90A-NEXT:    ; def s[4:5]
6560; GFX90A-NEXT:    ;;#ASMEND
6561; GFX90A-NEXT:    ;;#ASMSTART
6562; GFX90A-NEXT:    ; def s[6:7]
6563; GFX90A-NEXT:    ;;#ASMEND
6564; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
6565; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
6566; GFX90A-NEXT:    ;;#ASMSTART
6567; GFX90A-NEXT:    ; use s[8:9]
6568; GFX90A-NEXT:    ;;#ASMEND
6569; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6570;
6571; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_0_1:
6572; GFX940:       ; %bb.0:
6573; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6574; GFX940-NEXT:    ;;#ASMSTART
6575; GFX940-NEXT:    ; def s[0:1]
6576; GFX940-NEXT:    ;;#ASMEND
6577; GFX940-NEXT:    ;;#ASMSTART
6578; GFX940-NEXT:    ; def s[2:3]
6579; GFX940-NEXT:    ;;#ASMEND
6580; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s0
6581; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
6582; GFX940-NEXT:    ;;#ASMSTART
6583; GFX940-NEXT:    ; use s[8:9]
6584; GFX940-NEXT:    ;;#ASMEND
6585; GFX940-NEXT:    s_setpc_b64 s[30:31]
6586  %vec0 = call <4 x i16> asm "; def $0", "=s"()
6587  %vec1 = call <4 x i16> asm "; def $0", "=s"()
6588  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6589  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6590  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 1>
6591  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6592  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
6593  ret void
6594}
6595
6596define void @s_shuffle_v3i16_v3i16__5_2_1() {
6597; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_2_1:
6598; GFX900:       ; %bb.0:
6599; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6600; GFX900-NEXT:    ;;#ASMSTART
6601; GFX900-NEXT:    ; def s[4:5]
6602; GFX900-NEXT:    ;;#ASMEND
6603; GFX900-NEXT:    ;;#ASMSTART
6604; GFX900-NEXT:    ; def s[6:7]
6605; GFX900-NEXT:    ;;#ASMEND
6606; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
6607; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
6608; GFX900-NEXT:    ;;#ASMSTART
6609; GFX900-NEXT:    ; use s[8:9]
6610; GFX900-NEXT:    ;;#ASMEND
6611; GFX900-NEXT:    s_setpc_b64 s[30:31]
6612;
6613; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_2_1:
6614; GFX90A:       ; %bb.0:
6615; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6616; GFX90A-NEXT:    ;;#ASMSTART
6617; GFX90A-NEXT:    ; def s[4:5]
6618; GFX90A-NEXT:    ;;#ASMEND
6619; GFX90A-NEXT:    ;;#ASMSTART
6620; GFX90A-NEXT:    ; def s[6:7]
6621; GFX90A-NEXT:    ;;#ASMEND
6622; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
6623; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
6624; GFX90A-NEXT:    ;;#ASMSTART
6625; GFX90A-NEXT:    ; use s[8:9]
6626; GFX90A-NEXT:    ;;#ASMEND
6627; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6628;
6629; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_2_1:
6630; GFX940:       ; %bb.0:
6631; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6632; GFX940-NEXT:    ;;#ASMSTART
6633; GFX940-NEXT:    ; def s[0:1]
6634; GFX940-NEXT:    ;;#ASMEND
6635; GFX940-NEXT:    ;;#ASMSTART
6636; GFX940-NEXT:    ; def s[2:3]
6637; GFX940-NEXT:    ;;#ASMEND
6638; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s1
6639; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
6640; GFX940-NEXT:    ;;#ASMSTART
6641; GFX940-NEXT:    ; use s[8:9]
6642; GFX940-NEXT:    ;;#ASMEND
6643; GFX940-NEXT:    s_setpc_b64 s[30:31]
6644  %vec0 = call <4 x i16> asm "; def $0", "=s"()
6645  %vec1 = call <4 x i16> asm "; def $0", "=s"()
6646  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6647  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6648  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 1>
6649  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6650  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
6651  ret void
6652}
6653
6654define void @s_shuffle_v3i16_v3i16__5_3_1() {
6655; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_3_1:
6656; GFX900:       ; %bb.0:
6657; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6658; GFX900-NEXT:    ;;#ASMSTART
6659; GFX900-NEXT:    ; def s[4:5]
6660; GFX900-NEXT:    ;;#ASMEND
6661; GFX900-NEXT:    ;;#ASMSTART
6662; GFX900-NEXT:    ; def s[6:7]
6663; GFX900-NEXT:    ;;#ASMEND
6664; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s6
6665; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
6666; GFX900-NEXT:    ;;#ASMSTART
6667; GFX900-NEXT:    ; use s[8:9]
6668; GFX900-NEXT:    ;;#ASMEND
6669; GFX900-NEXT:    s_setpc_b64 s[30:31]
6670;
6671; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_3_1:
6672; GFX90A:       ; %bb.0:
6673; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6674; GFX90A-NEXT:    ;;#ASMSTART
6675; GFX90A-NEXT:    ; def s[4:5]
6676; GFX90A-NEXT:    ;;#ASMEND
6677; GFX90A-NEXT:    ;;#ASMSTART
6678; GFX90A-NEXT:    ; def s[6:7]
6679; GFX90A-NEXT:    ;;#ASMEND
6680; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s6
6681; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
6682; GFX90A-NEXT:    ;;#ASMSTART
6683; GFX90A-NEXT:    ; use s[8:9]
6684; GFX90A-NEXT:    ;;#ASMEND
6685; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6686;
6687; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_3_1:
6688; GFX940:       ; %bb.0:
6689; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6690; GFX940-NEXT:    ;;#ASMSTART
6691; GFX940-NEXT:    ; def s[0:1]
6692; GFX940-NEXT:    ;;#ASMEND
6693; GFX940-NEXT:    ;;#ASMSTART
6694; GFX940-NEXT:    ; def s[2:3]
6695; GFX940-NEXT:    ;;#ASMEND
6696; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s2
6697; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
6698; GFX940-NEXT:    ;;#ASMSTART
6699; GFX940-NEXT:    ; use s[8:9]
6700; GFX940-NEXT:    ;;#ASMEND
6701; GFX940-NEXT:    s_setpc_b64 s[30:31]
6702  %vec0 = call <4 x i16> asm "; def $0", "=s"()
6703  %vec1 = call <4 x i16> asm "; def $0", "=s"()
6704  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6705  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6706  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 1>
6707  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6708  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
6709  ret void
6710}
6711
6712define void @s_shuffle_v3i16_v3i16__5_4_1() {
6713; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_4_1:
6714; GFX900:       ; %bb.0:
6715; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6716; GFX900-NEXT:    ;;#ASMSTART
6717; GFX900-NEXT:    ; def s[4:5]
6718; GFX900-NEXT:    ;;#ASMEND
6719; GFX900-NEXT:    ;;#ASMSTART
6720; GFX900-NEXT:    ; def s[6:7]
6721; GFX900-NEXT:    ;;#ASMEND
6722; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s7, s6
6723; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
6724; GFX900-NEXT:    ;;#ASMSTART
6725; GFX900-NEXT:    ; use s[8:9]
6726; GFX900-NEXT:    ;;#ASMEND
6727; GFX900-NEXT:    s_setpc_b64 s[30:31]
6728;
6729; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_4_1:
6730; GFX90A:       ; %bb.0:
6731; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6732; GFX90A-NEXT:    ;;#ASMSTART
6733; GFX90A-NEXT:    ; def s[4:5]
6734; GFX90A-NEXT:    ;;#ASMEND
6735; GFX90A-NEXT:    ;;#ASMSTART
6736; GFX90A-NEXT:    ; def s[6:7]
6737; GFX90A-NEXT:    ;;#ASMEND
6738; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s7, s6
6739; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
6740; GFX90A-NEXT:    ;;#ASMSTART
6741; GFX90A-NEXT:    ; use s[8:9]
6742; GFX90A-NEXT:    ;;#ASMEND
6743; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6744;
6745; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_4_1:
6746; GFX940:       ; %bb.0:
6747; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6748; GFX940-NEXT:    ;;#ASMSTART
6749; GFX940-NEXT:    ; def s[0:1]
6750; GFX940-NEXT:    ;;#ASMEND
6751; GFX940-NEXT:    ;;#ASMSTART
6752; GFX940-NEXT:    ; def s[2:3]
6753; GFX940-NEXT:    ;;#ASMEND
6754; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s3, s2
6755; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
6756; GFX940-NEXT:    ;;#ASMSTART
6757; GFX940-NEXT:    ; use s[8:9]
6758; GFX940-NEXT:    ;;#ASMEND
6759; GFX940-NEXT:    s_setpc_b64 s[30:31]
6760  %vec0 = call <4 x i16> asm "; def $0", "=s"()
6761  %vec1 = call <4 x i16> asm "; def $0", "=s"()
6762  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6763  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6764  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 1>
6765  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6766  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
6767  ret void
6768}
6769
6770define void @s_shuffle_v3i16_v3i16__u_2_2() {
6771; GFX9-LABEL: s_shuffle_v3i16_v3i16__u_2_2:
6772; GFX9:       ; %bb.0:
6773; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6774; GFX9-NEXT:    ;;#ASMSTART
6775; GFX9-NEXT:    ; def s[8:9]
6776; GFX9-NEXT:    ;;#ASMEND
6777; GFX9-NEXT:    s_lshl_b32 s8, s9, 16
6778; GFX9-NEXT:    ;;#ASMSTART
6779; GFX9-NEXT:    ; use s[8:9]
6780; GFX9-NEXT:    ;;#ASMEND
6781; GFX9-NEXT:    s_setpc_b64 s[30:31]
6782  %vec0 = call <4 x i16> asm "; def $0", "=s"()
6783  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6784  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 poison, i32 2, i32 2>
6785  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6786  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
6787  ret void
6788}
6789
6790define void @s_shuffle_v3i16_v3i16__0_2_2() {
6791; GFX9-LABEL: s_shuffle_v3i16_v3i16__0_2_2:
6792; GFX9:       ; %bb.0:
6793; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6794; GFX9-NEXT:    ;;#ASMSTART
6795; GFX9-NEXT:    ; def s[8:9]
6796; GFX9-NEXT:    ;;#ASMEND
6797; GFX9-NEXT:    s_pack_ll_b32_b16 s8, s8, s9
6798; GFX9-NEXT:    ;;#ASMSTART
6799; GFX9-NEXT:    ; use s[8:9]
6800; GFX9-NEXT:    ;;#ASMEND
6801; GFX9-NEXT:    s_setpc_b64 s[30:31]
6802  %vec0 = call <4 x i16> asm "; def $0", "=s"()
6803  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6804  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 0, i32 2, i32 2>
6805  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6806  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
6807  ret void
6808}
6809
6810define void @s_shuffle_v3i16_v3i16__1_2_2() {
6811; GFX900-LABEL: s_shuffle_v3i16_v3i16__1_2_2:
6812; GFX900:       ; %bb.0:
6813; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6814; GFX900-NEXT:    ;;#ASMSTART
6815; GFX900-NEXT:    ; def s[8:9]
6816; GFX900-NEXT:    ;;#ASMEND
6817; GFX900-NEXT:    s_lshr_b32 s4, s8, 16
6818; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
6819; GFX900-NEXT:    ;;#ASMSTART
6820; GFX900-NEXT:    ; use s[8:9]
6821; GFX900-NEXT:    ;;#ASMEND
6822; GFX900-NEXT:    s_setpc_b64 s[30:31]
6823;
6824; GFX90A-LABEL: s_shuffle_v3i16_v3i16__1_2_2:
6825; GFX90A:       ; %bb.0:
6826; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6827; GFX90A-NEXT:    ;;#ASMSTART
6828; GFX90A-NEXT:    ; def s[8:9]
6829; GFX90A-NEXT:    ;;#ASMEND
6830; GFX90A-NEXT:    s_lshr_b32 s4, s8, 16
6831; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
6832; GFX90A-NEXT:    ;;#ASMSTART
6833; GFX90A-NEXT:    ; use s[8:9]
6834; GFX90A-NEXT:    ;;#ASMEND
6835; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6836;
6837; GFX940-LABEL: s_shuffle_v3i16_v3i16__1_2_2:
6838; GFX940:       ; %bb.0:
6839; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6840; GFX940-NEXT:    ;;#ASMSTART
6841; GFX940-NEXT:    ; def s[8:9]
6842; GFX940-NEXT:    ;;#ASMEND
6843; GFX940-NEXT:    s_lshr_b32 s0, s8, 16
6844; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
6845; GFX940-NEXT:    ;;#ASMSTART
6846; GFX940-NEXT:    ; use s[8:9]
6847; GFX940-NEXT:    ;;#ASMEND
6848; GFX940-NEXT:    s_setpc_b64 s[30:31]
6849  %vec0 = call <4 x i16> asm "; def $0", "=s"()
6850  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6851  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 1, i32 2, i32 2>
6852  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6853  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
6854  ret void
6855}
6856
6857define void @s_shuffle_v3i16_v3i16__2_2_2() {
6858; GFX9-LABEL: s_shuffle_v3i16_v3i16__2_2_2:
6859; GFX9:       ; %bb.0:
6860; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6861; GFX9-NEXT:    ;;#ASMSTART
6862; GFX9-NEXT:    ; def s[8:9]
6863; GFX9-NEXT:    ;;#ASMEND
6864; GFX9-NEXT:    s_pack_ll_b32_b16 s8, s9, s9
6865; GFX9-NEXT:    ;;#ASMSTART
6866; GFX9-NEXT:    ; use s[8:9]
6867; GFX9-NEXT:    ;;#ASMEND
6868; GFX9-NEXT:    s_setpc_b64 s[30:31]
6869  %vec0 = call <4 x i16> asm "; def $0", "=s"()
6870  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6871  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 2, i32 2, i32 2>
6872  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6873  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
6874  ret void
6875}
6876
6877define void @s_shuffle_v3i16_v3i16__3_2_2() {
6878; GFX9-LABEL: s_shuffle_v3i16_v3i16__3_2_2:
6879; GFX9:       ; %bb.0:
6880; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6881; GFX9-NEXT:    ;;#ASMSTART
6882; GFX9-NEXT:    ; def s[8:9]
6883; GFX9-NEXT:    ;;#ASMEND
6884; GFX9-NEXT:    s_lshl_b32 s8, s9, 16
6885; GFX9-NEXT:    ;;#ASMSTART
6886; GFX9-NEXT:    ; use s[8:9]
6887; GFX9-NEXT:    ;;#ASMEND
6888; GFX9-NEXT:    s_setpc_b64 s[30:31]
6889  %vec0 = call <4 x i16> asm "; def $0", "=s"()
6890  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6891  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 3, i32 2, i32 2>
6892  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6893  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
6894  ret void
6895}
6896
6897define void @s_shuffle_v3i16_v3i16__4_2_2() {
6898; GFX900-LABEL: s_shuffle_v3i16_v3i16__4_2_2:
6899; GFX900:       ; %bb.0:
6900; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6901; GFX900-NEXT:    ;;#ASMSTART
6902; GFX900-NEXT:    ; def s[4:5]
6903; GFX900-NEXT:    ;;#ASMEND
6904; GFX900-NEXT:    ;;#ASMSTART
6905; GFX900-NEXT:    ; def s[8:9]
6906; GFX900-NEXT:    ;;#ASMEND
6907; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
6908; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
6909; GFX900-NEXT:    ;;#ASMSTART
6910; GFX900-NEXT:    ; use s[8:9]
6911; GFX900-NEXT:    ;;#ASMEND
6912; GFX900-NEXT:    s_setpc_b64 s[30:31]
6913;
6914; GFX90A-LABEL: s_shuffle_v3i16_v3i16__4_2_2:
6915; GFX90A:       ; %bb.0:
6916; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6917; GFX90A-NEXT:    ;;#ASMSTART
6918; GFX90A-NEXT:    ; def s[4:5]
6919; GFX90A-NEXT:    ;;#ASMEND
6920; GFX90A-NEXT:    ;;#ASMSTART
6921; GFX90A-NEXT:    ; def s[8:9]
6922; GFX90A-NEXT:    ;;#ASMEND
6923; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
6924; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
6925; GFX90A-NEXT:    ;;#ASMSTART
6926; GFX90A-NEXT:    ; use s[8:9]
6927; GFX90A-NEXT:    ;;#ASMEND
6928; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6929;
6930; GFX940-LABEL: s_shuffle_v3i16_v3i16__4_2_2:
6931; GFX940:       ; %bb.0:
6932; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6933; GFX940-NEXT:    ;;#ASMSTART
6934; GFX940-NEXT:    ; def s[0:1]
6935; GFX940-NEXT:    ;;#ASMEND
6936; GFX940-NEXT:    ;;#ASMSTART
6937; GFX940-NEXT:    ; def s[8:9]
6938; GFX940-NEXT:    ;;#ASMEND
6939; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
6940; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
6941; GFX940-NEXT:    ;;#ASMSTART
6942; GFX940-NEXT:    ; use s[8:9]
6943; GFX940-NEXT:    ;;#ASMEND
6944; GFX940-NEXT:    s_setpc_b64 s[30:31]
6945  %vec0 = call <4 x i16> asm "; def $0", "=s"()
6946  %vec1 = call <4 x i16> asm "; def $0", "=s"()
6947  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6948  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
6949  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 2, i32 2>
6950  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
6951  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
6952  ret void
6953}
6954
6955define void @s_shuffle_v3i16_v3i16__5_2_2() {
6956; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_2_2:
6957; GFX900:       ; %bb.0:
6958; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6959; GFX900-NEXT:    ;;#ASMSTART
6960; GFX900-NEXT:    ; def s[8:9]
6961; GFX900-NEXT:    ;;#ASMEND
6962; GFX900-NEXT:    ;;#ASMSTART
6963; GFX900-NEXT:    ; def s[4:5]
6964; GFX900-NEXT:    ;;#ASMEND
6965; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s9
6966; GFX900-NEXT:    ;;#ASMSTART
6967; GFX900-NEXT:    ; use s[8:9]
6968; GFX900-NEXT:    ;;#ASMEND
6969; GFX900-NEXT:    s_setpc_b64 s[30:31]
6970;
6971; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_2_2:
6972; GFX90A:       ; %bb.0:
6973; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6974; GFX90A-NEXT:    ;;#ASMSTART
6975; GFX90A-NEXT:    ; def s[8:9]
6976; GFX90A-NEXT:    ;;#ASMEND
6977; GFX90A-NEXT:    ;;#ASMSTART
6978; GFX90A-NEXT:    ; def s[4:5]
6979; GFX90A-NEXT:    ;;#ASMEND
6980; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s9
6981; GFX90A-NEXT:    ;;#ASMSTART
6982; GFX90A-NEXT:    ; use s[8:9]
6983; GFX90A-NEXT:    ;;#ASMEND
6984; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6985;
6986; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_2_2:
6987; GFX940:       ; %bb.0:
6988; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6989; GFX940-NEXT:    ;;#ASMSTART
6990; GFX940-NEXT:    ; def s[8:9]
6991; GFX940-NEXT:    ;;#ASMEND
6992; GFX940-NEXT:    ;;#ASMSTART
6993; GFX940-NEXT:    ; def s[0:1]
6994; GFX940-NEXT:    ;;#ASMEND
6995; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s9
6996; GFX940-NEXT:    ;;#ASMSTART
6997; GFX940-NEXT:    ; use s[8:9]
6998; GFX940-NEXT:    ;;#ASMEND
6999; GFX940-NEXT:    s_setpc_b64 s[30:31]
7000  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7001  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7002  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7003  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7004  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 2>
7005  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7006  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
7007  ret void
7008}
7009
7010define void @s_shuffle_v3i16_v3i16__5_u_2() {
7011; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_u_2:
7012; GFX900:       ; %bb.0:
7013; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7014; GFX900-NEXT:    ;;#ASMSTART
7015; GFX900-NEXT:    ; def s[8:9]
7016; GFX900-NEXT:    ;;#ASMEND
7017; GFX900-NEXT:    ;;#ASMSTART
7018; GFX900-NEXT:    ; def s[4:5]
7019; GFX900-NEXT:    ;;#ASMEND
7020; GFX900-NEXT:    s_mov_b32 s8, s5
7021; GFX900-NEXT:    ;;#ASMSTART
7022; GFX900-NEXT:    ; use s[8:9]
7023; GFX900-NEXT:    ;;#ASMEND
7024; GFX900-NEXT:    s_setpc_b64 s[30:31]
7025;
7026; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_u_2:
7027; GFX90A:       ; %bb.0:
7028; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7029; GFX90A-NEXT:    ;;#ASMSTART
7030; GFX90A-NEXT:    ; def s[8:9]
7031; GFX90A-NEXT:    ;;#ASMEND
7032; GFX90A-NEXT:    ;;#ASMSTART
7033; GFX90A-NEXT:    ; def s[4:5]
7034; GFX90A-NEXT:    ;;#ASMEND
7035; GFX90A-NEXT:    s_mov_b32 s8, s5
7036; GFX90A-NEXT:    ;;#ASMSTART
7037; GFX90A-NEXT:    ; use s[8:9]
7038; GFX90A-NEXT:    ;;#ASMEND
7039; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7040;
7041; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_u_2:
7042; GFX940:       ; %bb.0:
7043; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7044; GFX940-NEXT:    ;;#ASMSTART
7045; GFX940-NEXT:    ; def s[8:9]
7046; GFX940-NEXT:    ;;#ASMEND
7047; GFX940-NEXT:    ;;#ASMSTART
7048; GFX940-NEXT:    ; def s[0:1]
7049; GFX940-NEXT:    ;;#ASMEND
7050; GFX940-NEXT:    s_mov_b32 s8, s1
7051; GFX940-NEXT:    ;;#ASMSTART
7052; GFX940-NEXT:    ; use s[8:9]
7053; GFX940-NEXT:    ;;#ASMEND
7054; GFX940-NEXT:    s_setpc_b64 s[30:31]
7055  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7056  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7057  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7058  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7059  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 2>
7060  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7061  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
7062  ret void
7063}
7064
7065define void @s_shuffle_v3i16_v3i16__5_0_2() {
7066; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_0_2:
7067; GFX900:       ; %bb.0:
7068; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7069; GFX900-NEXT:    ;;#ASMSTART
7070; GFX900-NEXT:    ; def s[8:9]
7071; GFX900-NEXT:    ;;#ASMEND
7072; GFX900-NEXT:    ;;#ASMSTART
7073; GFX900-NEXT:    ; def s[4:5]
7074; GFX900-NEXT:    ;;#ASMEND
7075; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s8
7076; GFX900-NEXT:    ;;#ASMSTART
7077; GFX900-NEXT:    ; use s[8:9]
7078; GFX900-NEXT:    ;;#ASMEND
7079; GFX900-NEXT:    s_setpc_b64 s[30:31]
7080;
7081; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_0_2:
7082; GFX90A:       ; %bb.0:
7083; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7084; GFX90A-NEXT:    ;;#ASMSTART
7085; GFX90A-NEXT:    ; def s[8:9]
7086; GFX90A-NEXT:    ;;#ASMEND
7087; GFX90A-NEXT:    ;;#ASMSTART
7088; GFX90A-NEXT:    ; def s[4:5]
7089; GFX90A-NEXT:    ;;#ASMEND
7090; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s8
7091; GFX90A-NEXT:    ;;#ASMSTART
7092; GFX90A-NEXT:    ; use s[8:9]
7093; GFX90A-NEXT:    ;;#ASMEND
7094; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7095;
7096; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_0_2:
7097; GFX940:       ; %bb.0:
7098; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7099; GFX940-NEXT:    ;;#ASMSTART
7100; GFX940-NEXT:    ; def s[8:9]
7101; GFX940-NEXT:    ;;#ASMEND
7102; GFX940-NEXT:    ;;#ASMSTART
7103; GFX940-NEXT:    ; def s[0:1]
7104; GFX940-NEXT:    ;;#ASMEND
7105; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s8
7106; GFX940-NEXT:    ;;#ASMSTART
7107; GFX940-NEXT:    ; use s[8:9]
7108; GFX940-NEXT:    ;;#ASMEND
7109; GFX940-NEXT:    s_setpc_b64 s[30:31]
7110  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7111  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7112  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7113  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7114  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 2>
7115  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7116  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
7117  ret void
7118}
7119
7120define void @s_shuffle_v3i16_v3i16__5_1_2() {
7121; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_1_2:
7122; GFX900:       ; %bb.0:
7123; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7124; GFX900-NEXT:    ;;#ASMSTART
7125; GFX900-NEXT:    ; def s[8:9]
7126; GFX900-NEXT:    ;;#ASMEND
7127; GFX900-NEXT:    ;;#ASMSTART
7128; GFX900-NEXT:    ; def s[4:5]
7129; GFX900-NEXT:    ;;#ASMEND
7130; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s5, s8
7131; GFX900-NEXT:    ;;#ASMSTART
7132; GFX900-NEXT:    ; use s[8:9]
7133; GFX900-NEXT:    ;;#ASMEND
7134; GFX900-NEXT:    s_setpc_b64 s[30:31]
7135;
7136; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_1_2:
7137; GFX90A:       ; %bb.0:
7138; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7139; GFX90A-NEXT:    ;;#ASMSTART
7140; GFX90A-NEXT:    ; def s[8:9]
7141; GFX90A-NEXT:    ;;#ASMEND
7142; GFX90A-NEXT:    ;;#ASMSTART
7143; GFX90A-NEXT:    ; def s[4:5]
7144; GFX90A-NEXT:    ;;#ASMEND
7145; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s5, s8
7146; GFX90A-NEXT:    ;;#ASMSTART
7147; GFX90A-NEXT:    ; use s[8:9]
7148; GFX90A-NEXT:    ;;#ASMEND
7149; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7150;
7151; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_1_2:
7152; GFX940:       ; %bb.0:
7153; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7154; GFX940-NEXT:    ;;#ASMSTART
7155; GFX940-NEXT:    ; def s[8:9]
7156; GFX940-NEXT:    ;;#ASMEND
7157; GFX940-NEXT:    ;;#ASMSTART
7158; GFX940-NEXT:    ; def s[0:1]
7159; GFX940-NEXT:    ;;#ASMEND
7160; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s1, s8
7161; GFX940-NEXT:    ;;#ASMSTART
7162; GFX940-NEXT:    ; use s[8:9]
7163; GFX940-NEXT:    ;;#ASMEND
7164; GFX940-NEXT:    s_setpc_b64 s[30:31]
7165  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7166  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7167  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7168  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7169  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 2>
7170  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7171  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
7172  ret void
7173}
7174
7175define void @s_shuffle_v3i16_v3i16__5_3_2() {
7176; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_3_2:
7177; GFX900:       ; %bb.0:
7178; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7179; GFX900-NEXT:    ;;#ASMSTART
7180; GFX900-NEXT:    ; def s[8:9]
7181; GFX900-NEXT:    ;;#ASMEND
7182; GFX900-NEXT:    ;;#ASMSTART
7183; GFX900-NEXT:    ; def s[4:5]
7184; GFX900-NEXT:    ;;#ASMEND
7185; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
7186; GFX900-NEXT:    ;;#ASMSTART
7187; GFX900-NEXT:    ; use s[8:9]
7188; GFX900-NEXT:    ;;#ASMEND
7189; GFX900-NEXT:    s_setpc_b64 s[30:31]
7190;
7191; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_3_2:
7192; GFX90A:       ; %bb.0:
7193; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7194; GFX90A-NEXT:    ;;#ASMSTART
7195; GFX90A-NEXT:    ; def s[8:9]
7196; GFX90A-NEXT:    ;;#ASMEND
7197; GFX90A-NEXT:    ;;#ASMSTART
7198; GFX90A-NEXT:    ; def s[4:5]
7199; GFX90A-NEXT:    ;;#ASMEND
7200; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
7201; GFX90A-NEXT:    ;;#ASMSTART
7202; GFX90A-NEXT:    ; use s[8:9]
7203; GFX90A-NEXT:    ;;#ASMEND
7204; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7205;
7206; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_3_2:
7207; GFX940:       ; %bb.0:
7208; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7209; GFX940-NEXT:    ;;#ASMSTART
7210; GFX940-NEXT:    ; def s[8:9]
7211; GFX940-NEXT:    ;;#ASMEND
7212; GFX940-NEXT:    ;;#ASMSTART
7213; GFX940-NEXT:    ; def s[0:1]
7214; GFX940-NEXT:    ;;#ASMEND
7215; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
7216; GFX940-NEXT:    ;;#ASMSTART
7217; GFX940-NEXT:    ; use s[8:9]
7218; GFX940-NEXT:    ;;#ASMEND
7219; GFX940-NEXT:    s_setpc_b64 s[30:31]
7220  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7221  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7222  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7223  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7224  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 2>
7225  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7226  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
7227  ret void
7228}
7229
7230define void @s_shuffle_v3i16_v3i16__5_4_2() {
7231; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_4_2:
7232; GFX900:       ; %bb.0:
7233; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7234; GFX900-NEXT:    ;;#ASMSTART
7235; GFX900-NEXT:    ; def s[8:9]
7236; GFX900-NEXT:    ;;#ASMEND
7237; GFX900-NEXT:    ;;#ASMSTART
7238; GFX900-NEXT:    ; def s[4:5]
7239; GFX900-NEXT:    ;;#ASMEND
7240; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
7241; GFX900-NEXT:    ;;#ASMSTART
7242; GFX900-NEXT:    ; use s[8:9]
7243; GFX900-NEXT:    ;;#ASMEND
7244; GFX900-NEXT:    s_setpc_b64 s[30:31]
7245;
7246; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_4_2:
7247; GFX90A:       ; %bb.0:
7248; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7249; GFX90A-NEXT:    ;;#ASMSTART
7250; GFX90A-NEXT:    ; def s[8:9]
7251; GFX90A-NEXT:    ;;#ASMEND
7252; GFX90A-NEXT:    ;;#ASMSTART
7253; GFX90A-NEXT:    ; def s[4:5]
7254; GFX90A-NEXT:    ;;#ASMEND
7255; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
7256; GFX90A-NEXT:    ;;#ASMSTART
7257; GFX90A-NEXT:    ; use s[8:9]
7258; GFX90A-NEXT:    ;;#ASMEND
7259; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7260;
7261; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_4_2:
7262; GFX940:       ; %bb.0:
7263; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7264; GFX940-NEXT:    ;;#ASMSTART
7265; GFX940-NEXT:    ; def s[8:9]
7266; GFX940-NEXT:    ;;#ASMEND
7267; GFX940-NEXT:    ;;#ASMSTART
7268; GFX940-NEXT:    ; def s[0:1]
7269; GFX940-NEXT:    ;;#ASMEND
7270; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s1, s0
7271; GFX940-NEXT:    ;;#ASMSTART
7272; GFX940-NEXT:    ; use s[8:9]
7273; GFX940-NEXT:    ;;#ASMEND
7274; GFX940-NEXT:    s_setpc_b64 s[30:31]
7275  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7276  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7277  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7278  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7279  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 2>
7280  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7281  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
7282  ret void
7283}
7284
7285define void @s_shuffle_v3i16_v3i16__u_3_3() {
7286; GFX9-LABEL: s_shuffle_v3i16_v3i16__u_3_3:
7287; GFX9:       ; %bb.0:
7288; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7289; GFX9-NEXT:    ;;#ASMSTART
7290; GFX9-NEXT:    ; use s[8:9]
7291; GFX9-NEXT:    ;;#ASMEND
7292; GFX9-NEXT:    s_setpc_b64 s[30:31]
7293  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7294  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7295  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 poison, i32 3, i32 3>
7296  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7297  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
7298  ret void
7299}
7300
7301define void @s_shuffle_v3i16_v3i16__0_3_3() {
7302; GFX900-LABEL: s_shuffle_v3i16_v3i16__0_3_3:
7303; GFX900:       ; %bb.0:
7304; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7305; GFX900-NEXT:    ;;#ASMSTART
7306; GFX900-NEXT:    ; def s[8:9]
7307; GFX900-NEXT:    ;;#ASMEND
7308; GFX900-NEXT:    ;;#ASMSTART
7309; GFX900-NEXT:    ; use s[8:9]
7310; GFX900-NEXT:    ;;#ASMEND
7311; GFX900-NEXT:    s_setpc_b64 s[30:31]
7312;
7313; GFX90A-LABEL: s_shuffle_v3i16_v3i16__0_3_3:
7314; GFX90A:       ; %bb.0:
7315; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7316; GFX90A-NEXT:    ;;#ASMSTART
7317; GFX90A-NEXT:    ; def s[8:9]
7318; GFX90A-NEXT:    ;;#ASMEND
7319; GFX90A-NEXT:    ;;#ASMSTART
7320; GFX90A-NEXT:    ; use s[8:9]
7321; GFX90A-NEXT:    ;;#ASMEND
7322; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7323;
7324; GFX940-LABEL: s_shuffle_v3i16_v3i16__0_3_3:
7325; GFX940:       ; %bb.0:
7326; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7327; GFX940-NEXT:    ;;#ASMSTART
7328; GFX940-NEXT:    ; def s[8:9]
7329; GFX940-NEXT:    ;;#ASMEND
7330; GFX940-NEXT:    s_nop 0
7331; GFX940-NEXT:    ;;#ASMSTART
7332; GFX940-NEXT:    ; use s[8:9]
7333; GFX940-NEXT:    ;;#ASMEND
7334; GFX940-NEXT:    s_setpc_b64 s[30:31]
7335  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7336  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7337  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 0, i32 3, i32 3>
7338  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7339  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
7340  ret void
7341}
7342
7343define void @s_shuffle_v3i16_v3i16__1_3_3() {
7344; GFX900-LABEL: s_shuffle_v3i16_v3i16__1_3_3:
7345; GFX900:       ; %bb.0:
7346; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7347; GFX900-NEXT:    ;;#ASMSTART
7348; GFX900-NEXT:    ; def s[4:5]
7349; GFX900-NEXT:    ;;#ASMEND
7350; GFX900-NEXT:    s_lshr_b32 s8, s4, 16
7351; GFX900-NEXT:    ;;#ASMSTART
7352; GFX900-NEXT:    ; use s[8:9]
7353; GFX900-NEXT:    ;;#ASMEND
7354; GFX900-NEXT:    s_setpc_b64 s[30:31]
7355;
7356; GFX90A-LABEL: s_shuffle_v3i16_v3i16__1_3_3:
7357; GFX90A:       ; %bb.0:
7358; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7359; GFX90A-NEXT:    ;;#ASMSTART
7360; GFX90A-NEXT:    ; def s[4:5]
7361; GFX90A-NEXT:    ;;#ASMEND
7362; GFX90A-NEXT:    s_lshr_b32 s8, s4, 16
7363; GFX90A-NEXT:    ;;#ASMSTART
7364; GFX90A-NEXT:    ; use s[8:9]
7365; GFX90A-NEXT:    ;;#ASMEND
7366; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7367;
7368; GFX940-LABEL: s_shuffle_v3i16_v3i16__1_3_3:
7369; GFX940:       ; %bb.0:
7370; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7371; GFX940-NEXT:    ;;#ASMSTART
7372; GFX940-NEXT:    ; def s[0:1]
7373; GFX940-NEXT:    ;;#ASMEND
7374; GFX940-NEXT:    s_lshr_b32 s8, s0, 16
7375; GFX940-NEXT:    ;;#ASMSTART
7376; GFX940-NEXT:    ; use s[8:9]
7377; GFX940-NEXT:    ;;#ASMEND
7378; GFX940-NEXT:    s_setpc_b64 s[30:31]
7379  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7380  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7381  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 1, i32 3, i32 3>
7382  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7383  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
7384  ret void
7385}
7386
7387define void @s_shuffle_v3i16_v3i16__2_3_3() {
7388; GFX900-LABEL: s_shuffle_v3i16_v3i16__2_3_3:
7389; GFX900:       ; %bb.0:
7390; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7391; GFX900-NEXT:    ;;#ASMSTART
7392; GFX900-NEXT:    ; def s[4:5]
7393; GFX900-NEXT:    ;;#ASMEND
7394; GFX900-NEXT:    s_mov_b32 s8, s5
7395; GFX900-NEXT:    ;;#ASMSTART
7396; GFX900-NEXT:    ; use s[8:9]
7397; GFX900-NEXT:    ;;#ASMEND
7398; GFX900-NEXT:    s_setpc_b64 s[30:31]
7399;
7400; GFX90A-LABEL: s_shuffle_v3i16_v3i16__2_3_3:
7401; GFX90A:       ; %bb.0:
7402; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7403; GFX90A-NEXT:    ;;#ASMSTART
7404; GFX90A-NEXT:    ; def s[4:5]
7405; GFX90A-NEXT:    ;;#ASMEND
7406; GFX90A-NEXT:    s_mov_b32 s8, s5
7407; GFX90A-NEXT:    ;;#ASMSTART
7408; GFX90A-NEXT:    ; use s[8:9]
7409; GFX90A-NEXT:    ;;#ASMEND
7410; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7411;
7412; GFX940-LABEL: s_shuffle_v3i16_v3i16__2_3_3:
7413; GFX940:       ; %bb.0:
7414; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7415; GFX940-NEXT:    ;;#ASMSTART
7416; GFX940-NEXT:    ; def s[0:1]
7417; GFX940-NEXT:    ;;#ASMEND
7418; GFX940-NEXT:    s_mov_b32 s8, s1
7419; GFX940-NEXT:    ;;#ASMSTART
7420; GFX940-NEXT:    ; use s[8:9]
7421; GFX940-NEXT:    ;;#ASMEND
7422; GFX940-NEXT:    s_setpc_b64 s[30:31]
7423  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7424  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7425  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 2, i32 3, i32 3>
7426  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7427  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
7428  ret void
7429}
7430
7431define void @s_shuffle_v3i16_v3i16__3_3_3() {
7432; GFX9-LABEL: s_shuffle_v3i16_v3i16__3_3_3:
7433; GFX9:       ; %bb.0:
7434; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7435; GFX9-NEXT:    ;;#ASMSTART
7436; GFX9-NEXT:    ; use s[8:9]
7437; GFX9-NEXT:    ;;#ASMEND
7438; GFX9-NEXT:    s_setpc_b64 s[30:31]
7439  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7440  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7441  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> poison, <3 x i32> <i32 3, i32 3, i32 3>
7442  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7443  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
7444  ret void
7445}
7446
7447define void @s_shuffle_v3i16_v3i16__4_3_3() {
7448; GFX900-LABEL: s_shuffle_v3i16_v3i16__4_3_3:
7449; GFX900:       ; %bb.0:
7450; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7451; GFX900-NEXT:    ;;#ASMSTART
7452; GFX900-NEXT:    ; def s[4:5]
7453; GFX900-NEXT:    ;;#ASMEND
7454; GFX900-NEXT:    s_lshr_b32 s5, s4, 16
7455; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
7456; GFX900-NEXT:    s_mov_b32 s9, s4
7457; GFX900-NEXT:    ;;#ASMSTART
7458; GFX900-NEXT:    ; use s[8:9]
7459; GFX900-NEXT:    ;;#ASMEND
7460; GFX900-NEXT:    s_setpc_b64 s[30:31]
7461;
7462; GFX90A-LABEL: s_shuffle_v3i16_v3i16__4_3_3:
7463; GFX90A:       ; %bb.0:
7464; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7465; GFX90A-NEXT:    ;;#ASMSTART
7466; GFX90A-NEXT:    ; def s[4:5]
7467; GFX90A-NEXT:    ;;#ASMEND
7468; GFX90A-NEXT:    s_lshr_b32 s5, s4, 16
7469; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
7470; GFX90A-NEXT:    s_mov_b32 s9, s4
7471; GFX90A-NEXT:    ;;#ASMSTART
7472; GFX90A-NEXT:    ; use s[8:9]
7473; GFX90A-NEXT:    ;;#ASMEND
7474; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7475;
7476; GFX940-LABEL: s_shuffle_v3i16_v3i16__4_3_3:
7477; GFX940:       ; %bb.0:
7478; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7479; GFX940-NEXT:    ;;#ASMSTART
7480; GFX940-NEXT:    ; def s[0:1]
7481; GFX940-NEXT:    ;;#ASMEND
7482; GFX940-NEXT:    s_lshr_b32 s1, s0, 16
7483; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
7484; GFX940-NEXT:    s_mov_b32 s9, s0
7485; GFX940-NEXT:    ;;#ASMSTART
7486; GFX940-NEXT:    ; use s[8:9]
7487; GFX940-NEXT:    ;;#ASMEND
7488; GFX940-NEXT:    s_setpc_b64 s[30:31]
7489  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7490  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7491  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7492  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7493  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 3, i32 3>
7494  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7495  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
7496  ret void
7497}
7498
7499define void @s_shuffle_v3i16_v3i16__5_3_3() {
7500; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_3_3:
7501; GFX900:       ; %bb.0:
7502; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7503; GFX900-NEXT:    ;;#ASMSTART
7504; GFX900-NEXT:    ; def s[4:5]
7505; GFX900-NEXT:    ;;#ASMEND
7506; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
7507; GFX900-NEXT:    s_mov_b32 s9, s4
7508; GFX900-NEXT:    ;;#ASMSTART
7509; GFX900-NEXT:    ; use s[8:9]
7510; GFX900-NEXT:    ;;#ASMEND
7511; GFX900-NEXT:    s_setpc_b64 s[30:31]
7512;
7513; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_3_3:
7514; GFX90A:       ; %bb.0:
7515; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7516; GFX90A-NEXT:    ;;#ASMSTART
7517; GFX90A-NEXT:    ; def s[4:5]
7518; GFX90A-NEXT:    ;;#ASMEND
7519; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
7520; GFX90A-NEXT:    s_mov_b32 s9, s4
7521; GFX90A-NEXT:    ;;#ASMSTART
7522; GFX90A-NEXT:    ; use s[8:9]
7523; GFX90A-NEXT:    ;;#ASMEND
7524; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7525;
7526; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_3_3:
7527; GFX940:       ; %bb.0:
7528; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7529; GFX940-NEXT:    ;;#ASMSTART
7530; GFX940-NEXT:    ; def s[0:1]
7531; GFX940-NEXT:    ;;#ASMEND
7532; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
7533; GFX940-NEXT:    s_mov_b32 s9, s0
7534; GFX940-NEXT:    ;;#ASMSTART
7535; GFX940-NEXT:    ; use s[8:9]
7536; GFX940-NEXT:    ;;#ASMEND
7537; GFX940-NEXT:    s_setpc_b64 s[30:31]
7538  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7539  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7540  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7541  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7542  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 3>
7543  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7544  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
7545  ret void
7546}
7547
7548define void @s_shuffle_v3i16_v3i16__5_u_3() {
7549; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_u_3:
7550; GFX900:       ; %bb.0:
7551; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7552; GFX900-NEXT:    ;;#ASMSTART
7553; GFX900-NEXT:    ; def s[4:5]
7554; GFX900-NEXT:    ;;#ASMEND
7555; GFX900-NEXT:    s_mov_b32 s8, s5
7556; GFX900-NEXT:    s_mov_b32 s9, s4
7557; GFX900-NEXT:    ;;#ASMSTART
7558; GFX900-NEXT:    ; use s[8:9]
7559; GFX900-NEXT:    ;;#ASMEND
7560; GFX900-NEXT:    s_setpc_b64 s[30:31]
7561;
7562; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_u_3:
7563; GFX90A:       ; %bb.0:
7564; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7565; GFX90A-NEXT:    ;;#ASMSTART
7566; GFX90A-NEXT:    ; def s[4:5]
7567; GFX90A-NEXT:    ;;#ASMEND
7568; GFX90A-NEXT:    s_mov_b32 s8, s5
7569; GFX90A-NEXT:    s_mov_b32 s9, s4
7570; GFX90A-NEXT:    ;;#ASMSTART
7571; GFX90A-NEXT:    ; use s[8:9]
7572; GFX90A-NEXT:    ;;#ASMEND
7573; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7574;
7575; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_u_3:
7576; GFX940:       ; %bb.0:
7577; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7578; GFX940-NEXT:    ;;#ASMSTART
7579; GFX940-NEXT:    ; def s[0:1]
7580; GFX940-NEXT:    ;;#ASMEND
7581; GFX940-NEXT:    s_mov_b32 s8, s1
7582; GFX940-NEXT:    s_mov_b32 s9, s0
7583; GFX940-NEXT:    ;;#ASMSTART
7584; GFX940-NEXT:    ; use s[8:9]
7585; GFX940-NEXT:    ;;#ASMEND
7586; GFX940-NEXT:    s_setpc_b64 s[30:31]
7587  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7588  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7589  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7590  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7591  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 3>
7592  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7593  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
7594  ret void
7595}
7596
7597define void @s_shuffle_v3i16_v3i16__5_0_3() {
7598; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_0_3:
7599; GFX900:       ; %bb.0:
7600; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7601; GFX900-NEXT:    ;;#ASMSTART
7602; GFX900-NEXT:    ; def s[4:5]
7603; GFX900-NEXT:    ;;#ASMEND
7604; GFX900-NEXT:    ;;#ASMSTART
7605; GFX900-NEXT:    ; def s[6:7]
7606; GFX900-NEXT:    ;;#ASMEND
7607; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
7608; GFX900-NEXT:    s_mov_b32 s9, s6
7609; GFX900-NEXT:    ;;#ASMSTART
7610; GFX900-NEXT:    ; use s[8:9]
7611; GFX900-NEXT:    ;;#ASMEND
7612; GFX900-NEXT:    s_setpc_b64 s[30:31]
7613;
7614; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_0_3:
7615; GFX90A:       ; %bb.0:
7616; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7617; GFX90A-NEXT:    ;;#ASMSTART
7618; GFX90A-NEXT:    ; def s[4:5]
7619; GFX90A-NEXT:    ;;#ASMEND
7620; GFX90A-NEXT:    ;;#ASMSTART
7621; GFX90A-NEXT:    ; def s[6:7]
7622; GFX90A-NEXT:    ;;#ASMEND
7623; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
7624; GFX90A-NEXT:    s_mov_b32 s9, s6
7625; GFX90A-NEXT:    ;;#ASMSTART
7626; GFX90A-NEXT:    ; use s[8:9]
7627; GFX90A-NEXT:    ;;#ASMEND
7628; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7629;
7630; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_0_3:
7631; GFX940:       ; %bb.0:
7632; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7633; GFX940-NEXT:    ;;#ASMSTART
7634; GFX940-NEXT:    ; def s[0:1]
7635; GFX940-NEXT:    ;;#ASMEND
7636; GFX940-NEXT:    ;;#ASMSTART
7637; GFX940-NEXT:    ; def s[2:3]
7638; GFX940-NEXT:    ;;#ASMEND
7639; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s0
7640; GFX940-NEXT:    s_mov_b32 s9, s2
7641; GFX940-NEXT:    ;;#ASMSTART
7642; GFX940-NEXT:    ; use s[8:9]
7643; GFX940-NEXT:    ;;#ASMEND
7644; GFX940-NEXT:    s_setpc_b64 s[30:31]
7645  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7646  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7647  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7648  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7649  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 3>
7650  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7651  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
7652  ret void
7653}
7654
7655define void @s_shuffle_v3i16_v3i16__5_1_3() {
7656; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_1_3:
7657; GFX900:       ; %bb.0:
7658; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7659; GFX900-NEXT:    ;;#ASMSTART
7660; GFX900-NEXT:    ; def s[4:5]
7661; GFX900-NEXT:    ;;#ASMEND
7662; GFX900-NEXT:    ;;#ASMSTART
7663; GFX900-NEXT:    ; def s[6:7]
7664; GFX900-NEXT:    ;;#ASMEND
7665; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
7666; GFX900-NEXT:    s_mov_b32 s9, s6
7667; GFX900-NEXT:    ;;#ASMSTART
7668; GFX900-NEXT:    ; use s[8:9]
7669; GFX900-NEXT:    ;;#ASMEND
7670; GFX900-NEXT:    s_setpc_b64 s[30:31]
7671;
7672; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_1_3:
7673; GFX90A:       ; %bb.0:
7674; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7675; GFX90A-NEXT:    ;;#ASMSTART
7676; GFX90A-NEXT:    ; def s[4:5]
7677; GFX90A-NEXT:    ;;#ASMEND
7678; GFX90A-NEXT:    ;;#ASMSTART
7679; GFX90A-NEXT:    ; def s[6:7]
7680; GFX90A-NEXT:    ;;#ASMEND
7681; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
7682; GFX90A-NEXT:    s_mov_b32 s9, s6
7683; GFX90A-NEXT:    ;;#ASMSTART
7684; GFX90A-NEXT:    ; use s[8:9]
7685; GFX90A-NEXT:    ;;#ASMEND
7686; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7687;
7688; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_1_3:
7689; GFX940:       ; %bb.0:
7690; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7691; GFX940-NEXT:    ;;#ASMSTART
7692; GFX940-NEXT:    ; def s[0:1]
7693; GFX940-NEXT:    ;;#ASMEND
7694; GFX940-NEXT:    ;;#ASMSTART
7695; GFX940-NEXT:    ; def s[2:3]
7696; GFX940-NEXT:    ;;#ASMEND
7697; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s3, s0
7698; GFX940-NEXT:    s_mov_b32 s9, s2
7699; GFX940-NEXT:    ;;#ASMSTART
7700; GFX940-NEXT:    ; use s[8:9]
7701; GFX940-NEXT:    ;;#ASMEND
7702; GFX940-NEXT:    s_setpc_b64 s[30:31]
7703  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7704  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7705  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7706  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7707  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 3>
7708  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7709  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
7710  ret void
7711}
7712
7713define void @s_shuffle_v3i16_v3i16__5_2_3() {
7714; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_2_3:
7715; GFX900:       ; %bb.0:
7716; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7717; GFX900-NEXT:    ;;#ASMSTART
7718; GFX900-NEXT:    ; def s[4:5]
7719; GFX900-NEXT:    ;;#ASMEND
7720; GFX900-NEXT:    ;;#ASMSTART
7721; GFX900-NEXT:    ; def s[6:7]
7722; GFX900-NEXT:    ;;#ASMEND
7723; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
7724; GFX900-NEXT:    s_mov_b32 s9, s6
7725; GFX900-NEXT:    ;;#ASMSTART
7726; GFX900-NEXT:    ; use s[8:9]
7727; GFX900-NEXT:    ;;#ASMEND
7728; GFX900-NEXT:    s_setpc_b64 s[30:31]
7729;
7730; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_2_3:
7731; GFX90A:       ; %bb.0:
7732; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7733; GFX90A-NEXT:    ;;#ASMSTART
7734; GFX90A-NEXT:    ; def s[4:5]
7735; GFX90A-NEXT:    ;;#ASMEND
7736; GFX90A-NEXT:    ;;#ASMSTART
7737; GFX90A-NEXT:    ; def s[6:7]
7738; GFX90A-NEXT:    ;;#ASMEND
7739; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
7740; GFX90A-NEXT:    s_mov_b32 s9, s6
7741; GFX90A-NEXT:    ;;#ASMSTART
7742; GFX90A-NEXT:    ; use s[8:9]
7743; GFX90A-NEXT:    ;;#ASMEND
7744; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7745;
7746; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_2_3:
7747; GFX940:       ; %bb.0:
7748; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7749; GFX940-NEXT:    ;;#ASMSTART
7750; GFX940-NEXT:    ; def s[0:1]
7751; GFX940-NEXT:    ;;#ASMEND
7752; GFX940-NEXT:    ;;#ASMSTART
7753; GFX940-NEXT:    ; def s[2:3]
7754; GFX940-NEXT:    ;;#ASMEND
7755; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s1
7756; GFX940-NEXT:    s_mov_b32 s9, s2
7757; GFX940-NEXT:    ;;#ASMSTART
7758; GFX940-NEXT:    ; use s[8:9]
7759; GFX940-NEXT:    ;;#ASMEND
7760; GFX940-NEXT:    s_setpc_b64 s[30:31]
7761  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7762  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7763  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7764  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7765  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 3>
7766  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7767  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
7768  ret void
7769}
7770
7771define void @s_shuffle_v3i16_v3i16__5_4_3() {
7772; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_4_3:
7773; GFX900:       ; %bb.0:
7774; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7775; GFX900-NEXT:    ;;#ASMSTART
7776; GFX900-NEXT:    ; def s[4:5]
7777; GFX900-NEXT:    ;;#ASMEND
7778; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
7779; GFX900-NEXT:    s_mov_b32 s9, s4
7780; GFX900-NEXT:    ;;#ASMSTART
7781; GFX900-NEXT:    ; use s[8:9]
7782; GFX900-NEXT:    ;;#ASMEND
7783; GFX900-NEXT:    s_setpc_b64 s[30:31]
7784;
7785; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_4_3:
7786; GFX90A:       ; %bb.0:
7787; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7788; GFX90A-NEXT:    ;;#ASMSTART
7789; GFX90A-NEXT:    ; def s[4:5]
7790; GFX90A-NEXT:    ;;#ASMEND
7791; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
7792; GFX90A-NEXT:    s_mov_b32 s9, s4
7793; GFX90A-NEXT:    ;;#ASMSTART
7794; GFX90A-NEXT:    ; use s[8:9]
7795; GFX90A-NEXT:    ;;#ASMEND
7796; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7797;
7798; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_4_3:
7799; GFX940:       ; %bb.0:
7800; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7801; GFX940-NEXT:    ;;#ASMSTART
7802; GFX940-NEXT:    ; def s[0:1]
7803; GFX940-NEXT:    ;;#ASMEND
7804; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s1, s0
7805; GFX940-NEXT:    s_mov_b32 s9, s0
7806; GFX940-NEXT:    ;;#ASMSTART
7807; GFX940-NEXT:    ; use s[8:9]
7808; GFX940-NEXT:    ;;#ASMEND
7809; GFX940-NEXT:    s_setpc_b64 s[30:31]
7810  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7811  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7812  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7813  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7814  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 3>
7815  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7816  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
7817  ret void
7818}
7819
7820define void @s_shuffle_v3i16_v3i16__u_4_4() {
7821; GFX9-LABEL: s_shuffle_v3i16_v3i16__u_4_4:
7822; GFX9:       ; %bb.0:
7823; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7824; GFX9-NEXT:    ;;#ASMSTART
7825; GFX9-NEXT:    ; def s[8:9]
7826; GFX9-NEXT:    ;;#ASMEND
7827; GFX9-NEXT:    s_lshr_b32 s9, s8, 16
7828; GFX9-NEXT:    ;;#ASMSTART
7829; GFX9-NEXT:    ; use s[8:9]
7830; GFX9-NEXT:    ;;#ASMEND
7831; GFX9-NEXT:    s_setpc_b64 s[30:31]
7832  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7833  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7834  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7835  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7836  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 poison, i32 4, i32 4>
7837  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7838  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
7839  ret void
7840}
7841
7842define void @s_shuffle_v3i16_v3i16__0_4_4() {
7843; GFX900-LABEL: s_shuffle_v3i16_v3i16__0_4_4:
7844; GFX900:       ; %bb.0:
7845; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7846; GFX900-NEXT:    ;;#ASMSTART
7847; GFX900-NEXT:    ; def s[4:5]
7848; GFX900-NEXT:    ;;#ASMEND
7849; GFX900-NEXT:    ;;#ASMSTART
7850; GFX900-NEXT:    ; def s[6:7]
7851; GFX900-NEXT:    ;;#ASMEND
7852; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s4, s6
7853; GFX900-NEXT:    s_lshr_b32 s9, s6, 16
7854; GFX900-NEXT:    ;;#ASMSTART
7855; GFX900-NEXT:    ; use s[8:9]
7856; GFX900-NEXT:    ;;#ASMEND
7857; GFX900-NEXT:    s_setpc_b64 s[30:31]
7858;
7859; GFX90A-LABEL: s_shuffle_v3i16_v3i16__0_4_4:
7860; GFX90A:       ; %bb.0:
7861; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7862; GFX90A-NEXT:    ;;#ASMSTART
7863; GFX90A-NEXT:    ; def s[4:5]
7864; GFX90A-NEXT:    ;;#ASMEND
7865; GFX90A-NEXT:    ;;#ASMSTART
7866; GFX90A-NEXT:    ; def s[6:7]
7867; GFX90A-NEXT:    ;;#ASMEND
7868; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s4, s6
7869; GFX90A-NEXT:    s_lshr_b32 s9, s6, 16
7870; GFX90A-NEXT:    ;;#ASMSTART
7871; GFX90A-NEXT:    ; use s[8:9]
7872; GFX90A-NEXT:    ;;#ASMEND
7873; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7874;
7875; GFX940-LABEL: s_shuffle_v3i16_v3i16__0_4_4:
7876; GFX940:       ; %bb.0:
7877; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7878; GFX940-NEXT:    ;;#ASMSTART
7879; GFX940-NEXT:    ; def s[0:1]
7880; GFX940-NEXT:    ;;#ASMEND
7881; GFX940-NEXT:    ;;#ASMSTART
7882; GFX940-NEXT:    ; def s[2:3]
7883; GFX940-NEXT:    ;;#ASMEND
7884; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s0, s2
7885; GFX940-NEXT:    s_lshr_b32 s9, s2, 16
7886; GFX940-NEXT:    ;;#ASMSTART
7887; GFX940-NEXT:    ; use s[8:9]
7888; GFX940-NEXT:    ;;#ASMEND
7889; GFX940-NEXT:    s_setpc_b64 s[30:31]
7890  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7891  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7892  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7893  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7894  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 0, i32 4, i32 4>
7895  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7896  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
7897  ret void
7898}
7899
7900define void @s_shuffle_v3i16_v3i16__1_4_4() {
7901; GFX900-LABEL: s_shuffle_v3i16_v3i16__1_4_4:
7902; GFX900:       ; %bb.0:
7903; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7904; GFX900-NEXT:    ;;#ASMSTART
7905; GFX900-NEXT:    ; def s[4:5]
7906; GFX900-NEXT:    ;;#ASMEND
7907; GFX900-NEXT:    ;;#ASMSTART
7908; GFX900-NEXT:    ; def s[6:7]
7909; GFX900-NEXT:    ;;#ASMEND
7910; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s4, s6
7911; GFX900-NEXT:    s_lshr_b32 s9, s6, 16
7912; GFX900-NEXT:    ;;#ASMSTART
7913; GFX900-NEXT:    ; use s[8:9]
7914; GFX900-NEXT:    ;;#ASMEND
7915; GFX900-NEXT:    s_setpc_b64 s[30:31]
7916;
7917; GFX90A-LABEL: s_shuffle_v3i16_v3i16__1_4_4:
7918; GFX90A:       ; %bb.0:
7919; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7920; GFX90A-NEXT:    ;;#ASMSTART
7921; GFX90A-NEXT:    ; def s[4:5]
7922; GFX90A-NEXT:    ;;#ASMEND
7923; GFX90A-NEXT:    ;;#ASMSTART
7924; GFX90A-NEXT:    ; def s[6:7]
7925; GFX90A-NEXT:    ;;#ASMEND
7926; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s4, s6
7927; GFX90A-NEXT:    s_lshr_b32 s9, s6, 16
7928; GFX90A-NEXT:    ;;#ASMSTART
7929; GFX90A-NEXT:    ; use s[8:9]
7930; GFX90A-NEXT:    ;;#ASMEND
7931; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7932;
7933; GFX940-LABEL: s_shuffle_v3i16_v3i16__1_4_4:
7934; GFX940:       ; %bb.0:
7935; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7936; GFX940-NEXT:    ;;#ASMSTART
7937; GFX940-NEXT:    ; def s[0:1]
7938; GFX940-NEXT:    ;;#ASMEND
7939; GFX940-NEXT:    ;;#ASMSTART
7940; GFX940-NEXT:    ; def s[2:3]
7941; GFX940-NEXT:    ;;#ASMEND
7942; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s0, s2
7943; GFX940-NEXT:    s_lshr_b32 s9, s2, 16
7944; GFX940-NEXT:    ;;#ASMSTART
7945; GFX940-NEXT:    ; use s[8:9]
7946; GFX940-NEXT:    ;;#ASMEND
7947; GFX940-NEXT:    s_setpc_b64 s[30:31]
7948  %vec0 = call <4 x i16> asm "; def $0", "=s"()
7949  %vec1 = call <4 x i16> asm "; def $0", "=s"()
7950  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7951  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
7952  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 1, i32 4, i32 4>
7953  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7954  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
7955  ret void
7956}
7957
7958define void @s_shuffle_v3i16_v3i16__2_4_4() {
7959; GFX900-LABEL: s_shuffle_v3i16_v3i16__2_4_4:
7960; GFX900:       ; %bb.0:
7961; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7962; GFX900-NEXT:    ;;#ASMSTART
7963; GFX900-NEXT:    ; def s[4:5]
7964; GFX900-NEXT:    ;;#ASMEND
7965; GFX900-NEXT:    ;;#ASMSTART
7966; GFX900-NEXT:    ; def s[6:7]
7967; GFX900-NEXT:    ;;#ASMEND
7968; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s5, s6
7969; GFX900-NEXT:    s_lshr_b32 s9, s6, 16
7970; GFX900-NEXT:    ;;#ASMSTART
7971; GFX900-NEXT:    ; use s[8:9]
7972; GFX900-NEXT:    ;;#ASMEND
7973; GFX900-NEXT:    s_setpc_b64 s[30:31]
7974;
7975; GFX90A-LABEL: s_shuffle_v3i16_v3i16__2_4_4:
7976; GFX90A:       ; %bb.0:
7977; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7978; GFX90A-NEXT:    ;;#ASMSTART
7979; GFX90A-NEXT:    ; def s[4:5]
7980; GFX90A-NEXT:    ;;#ASMEND
7981; GFX90A-NEXT:    ;;#ASMSTART
7982; GFX90A-NEXT:    ; def s[6:7]
7983; GFX90A-NEXT:    ;;#ASMEND
7984; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s5, s6
7985; GFX90A-NEXT:    s_lshr_b32 s9, s6, 16
7986; GFX90A-NEXT:    ;;#ASMSTART
7987; GFX90A-NEXT:    ; use s[8:9]
7988; GFX90A-NEXT:    ;;#ASMEND
7989; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7990;
7991; GFX940-LABEL: s_shuffle_v3i16_v3i16__2_4_4:
7992; GFX940:       ; %bb.0:
7993; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7994; GFX940-NEXT:    ;;#ASMSTART
7995; GFX940-NEXT:    ; def s[0:1]
7996; GFX940-NEXT:    ;;#ASMEND
7997; GFX940-NEXT:    ;;#ASMSTART
7998; GFX940-NEXT:    ; def s[2:3]
7999; GFX940-NEXT:    ;;#ASMEND
8000; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s1, s2
8001; GFX940-NEXT:    s_lshr_b32 s9, s2, 16
8002; GFX940-NEXT:    ;;#ASMSTART
8003; GFX940-NEXT:    ; use s[8:9]
8004; GFX940-NEXT:    ;;#ASMEND
8005; GFX940-NEXT:    s_setpc_b64 s[30:31]
8006  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8007  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8008  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8009  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8010  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 2, i32 4, i32 4>
8011  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8012  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8013  ret void
8014}
8015
8016define void @s_shuffle_v3i16_v3i16__3_4_4() {
8017; GFX9-LABEL: s_shuffle_v3i16_v3i16__3_4_4:
8018; GFX9:       ; %bb.0:
8019; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8020; GFX9-NEXT:    ;;#ASMSTART
8021; GFX9-NEXT:    ; def s[8:9]
8022; GFX9-NEXT:    ;;#ASMEND
8023; GFX9-NEXT:    s_lshr_b32 s9, s8, 16
8024; GFX9-NEXT:    ;;#ASMSTART
8025; GFX9-NEXT:    ; use s[8:9]
8026; GFX9-NEXT:    ;;#ASMEND
8027; GFX9-NEXT:    s_setpc_b64 s[30:31]
8028  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8029  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8030  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8031  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8032  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 3, i32 4, i32 4>
8033  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8034  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8035  ret void
8036}
8037
8038define void @s_shuffle_v3i16_v3i16__4_4_4() {
8039; GFX900-LABEL: s_shuffle_v3i16_v3i16__4_4_4:
8040; GFX900:       ; %bb.0:
8041; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8042; GFX900-NEXT:    ;;#ASMSTART
8043; GFX900-NEXT:    ; def s[4:5]
8044; GFX900-NEXT:    ;;#ASMEND
8045; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
8046; GFX900-NEXT:    s_pack_hh_b32_b16 s8, s4, s4
8047; GFX900-NEXT:    ;;#ASMSTART
8048; GFX900-NEXT:    ; use s[8:9]
8049; GFX900-NEXT:    ;;#ASMEND
8050; GFX900-NEXT:    s_setpc_b64 s[30:31]
8051;
8052; GFX90A-LABEL: s_shuffle_v3i16_v3i16__4_4_4:
8053; GFX90A:       ; %bb.0:
8054; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8055; GFX90A-NEXT:    ;;#ASMSTART
8056; GFX90A-NEXT:    ; def s[4:5]
8057; GFX90A-NEXT:    ;;#ASMEND
8058; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
8059; GFX90A-NEXT:    s_pack_hh_b32_b16 s8, s4, s4
8060; GFX90A-NEXT:    ;;#ASMSTART
8061; GFX90A-NEXT:    ; use s[8:9]
8062; GFX90A-NEXT:    ;;#ASMEND
8063; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8064;
8065; GFX940-LABEL: s_shuffle_v3i16_v3i16__4_4_4:
8066; GFX940:       ; %bb.0:
8067; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8068; GFX940-NEXT:    ;;#ASMSTART
8069; GFX940-NEXT:    ; def s[0:1]
8070; GFX940-NEXT:    ;;#ASMEND
8071; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
8072; GFX940-NEXT:    s_pack_hh_b32_b16 s8, s0, s0
8073; GFX940-NEXT:    ;;#ASMSTART
8074; GFX940-NEXT:    ; use s[8:9]
8075; GFX940-NEXT:    ;;#ASMEND
8076; GFX940-NEXT:    s_setpc_b64 s[30:31]
8077  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8078  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8079  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8080  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8081  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 4, i32 4>
8082  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8083  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8084  ret void
8085}
8086
8087define void @s_shuffle_v3i16_v3i16__5_4_4() {
8088; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_4_4:
8089; GFX900:       ; %bb.0:
8090; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8091; GFX900-NEXT:    ;;#ASMSTART
8092; GFX900-NEXT:    ; def s[4:5]
8093; GFX900-NEXT:    ;;#ASMEND
8094; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
8095; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
8096; GFX900-NEXT:    ;;#ASMSTART
8097; GFX900-NEXT:    ; use s[8:9]
8098; GFX900-NEXT:    ;;#ASMEND
8099; GFX900-NEXT:    s_setpc_b64 s[30:31]
8100;
8101; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_4_4:
8102; GFX90A:       ; %bb.0:
8103; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8104; GFX90A-NEXT:    ;;#ASMSTART
8105; GFX90A-NEXT:    ; def s[4:5]
8106; GFX90A-NEXT:    ;;#ASMEND
8107; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s5, s4
8108; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
8109; GFX90A-NEXT:    ;;#ASMSTART
8110; GFX90A-NEXT:    ; use s[8:9]
8111; GFX90A-NEXT:    ;;#ASMEND
8112; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8113;
8114; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_4_4:
8115; GFX940:       ; %bb.0:
8116; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8117; GFX940-NEXT:    ;;#ASMSTART
8118; GFX940-NEXT:    ; def s[0:1]
8119; GFX940-NEXT:    ;;#ASMEND
8120; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s1, s0
8121; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
8122; GFX940-NEXT:    ;;#ASMSTART
8123; GFX940-NEXT:    ; use s[8:9]
8124; GFX940-NEXT:    ;;#ASMEND
8125; GFX940-NEXT:    s_setpc_b64 s[30:31]
8126  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8127  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8128  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8129  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8130  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 4>
8131  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8132  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8133  ret void
8134}
8135
8136define void @s_shuffle_v3i16_v3i16__5_u_4() {
8137; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_u_4:
8138; GFX900:       ; %bb.0:
8139; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8140; GFX900-NEXT:    ;;#ASMSTART
8141; GFX900-NEXT:    ; def s[4:5]
8142; GFX900-NEXT:    ;;#ASMEND
8143; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
8144; GFX900-NEXT:    s_mov_b32 s8, s5
8145; GFX900-NEXT:    ;;#ASMSTART
8146; GFX900-NEXT:    ; use s[8:9]
8147; GFX900-NEXT:    ;;#ASMEND
8148; GFX900-NEXT:    s_setpc_b64 s[30:31]
8149;
8150; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_u_4:
8151; GFX90A:       ; %bb.0:
8152; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8153; GFX90A-NEXT:    ;;#ASMSTART
8154; GFX90A-NEXT:    ; def s[4:5]
8155; GFX90A-NEXT:    ;;#ASMEND
8156; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
8157; GFX90A-NEXT:    s_mov_b32 s8, s5
8158; GFX90A-NEXT:    ;;#ASMSTART
8159; GFX90A-NEXT:    ; use s[8:9]
8160; GFX90A-NEXT:    ;;#ASMEND
8161; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8162;
8163; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_u_4:
8164; GFX940:       ; %bb.0:
8165; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8166; GFX940-NEXT:    ;;#ASMSTART
8167; GFX940-NEXT:    ; def s[0:1]
8168; GFX940-NEXT:    ;;#ASMEND
8169; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
8170; GFX940-NEXT:    s_mov_b32 s8, s1
8171; GFX940-NEXT:    ;;#ASMSTART
8172; GFX940-NEXT:    ; use s[8:9]
8173; GFX940-NEXT:    ;;#ASMEND
8174; GFX940-NEXT:    s_setpc_b64 s[30:31]
8175  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8176  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8177  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8178  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8179  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 4>
8180  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8181  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8182  ret void
8183}
8184
8185define void @s_shuffle_v3i16_v3i16__5_0_4() {
8186; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_0_4:
8187; GFX900:       ; %bb.0:
8188; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8189; GFX900-NEXT:    ;;#ASMSTART
8190; GFX900-NEXT:    ; def s[4:5]
8191; GFX900-NEXT:    ;;#ASMEND
8192; GFX900-NEXT:    ;;#ASMSTART
8193; GFX900-NEXT:    ; def s[6:7]
8194; GFX900-NEXT:    ;;#ASMEND
8195; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
8196; GFX900-NEXT:    s_lshr_b32 s9, s6, 16
8197; GFX900-NEXT:    ;;#ASMSTART
8198; GFX900-NEXT:    ; use s[8:9]
8199; GFX900-NEXT:    ;;#ASMEND
8200; GFX900-NEXT:    s_setpc_b64 s[30:31]
8201;
8202; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_0_4:
8203; GFX90A:       ; %bb.0:
8204; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8205; GFX90A-NEXT:    ;;#ASMSTART
8206; GFX90A-NEXT:    ; def s[4:5]
8207; GFX90A-NEXT:    ;;#ASMEND
8208; GFX90A-NEXT:    ;;#ASMSTART
8209; GFX90A-NEXT:    ; def s[6:7]
8210; GFX90A-NEXT:    ;;#ASMEND
8211; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
8212; GFX90A-NEXT:    s_lshr_b32 s9, s6, 16
8213; GFX90A-NEXT:    ;;#ASMSTART
8214; GFX90A-NEXT:    ; use s[8:9]
8215; GFX90A-NEXT:    ;;#ASMEND
8216; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8217;
8218; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_0_4:
8219; GFX940:       ; %bb.0:
8220; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8221; GFX940-NEXT:    ;;#ASMSTART
8222; GFX940-NEXT:    ; def s[0:1]
8223; GFX940-NEXT:    ;;#ASMEND
8224; GFX940-NEXT:    ;;#ASMSTART
8225; GFX940-NEXT:    ; def s[2:3]
8226; GFX940-NEXT:    ;;#ASMEND
8227; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s0
8228; GFX940-NEXT:    s_lshr_b32 s9, s2, 16
8229; GFX940-NEXT:    ;;#ASMSTART
8230; GFX940-NEXT:    ; use s[8:9]
8231; GFX940-NEXT:    ;;#ASMEND
8232; GFX940-NEXT:    s_setpc_b64 s[30:31]
8233  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8234  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8235  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8236  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8237  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 4>
8238  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8239  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8240  ret void
8241}
8242
8243define void @s_shuffle_v3i16_v3i16__5_1_4() {
8244; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_1_4:
8245; GFX900:       ; %bb.0:
8246; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8247; GFX900-NEXT:    ;;#ASMSTART
8248; GFX900-NEXT:    ; def s[4:5]
8249; GFX900-NEXT:    ;;#ASMEND
8250; GFX900-NEXT:    ;;#ASMSTART
8251; GFX900-NEXT:    ; def s[6:7]
8252; GFX900-NEXT:    ;;#ASMEND
8253; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
8254; GFX900-NEXT:    s_lshr_b32 s9, s6, 16
8255; GFX900-NEXT:    ;;#ASMSTART
8256; GFX900-NEXT:    ; use s[8:9]
8257; GFX900-NEXT:    ;;#ASMEND
8258; GFX900-NEXT:    s_setpc_b64 s[30:31]
8259;
8260; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_1_4:
8261; GFX90A:       ; %bb.0:
8262; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8263; GFX90A-NEXT:    ;;#ASMSTART
8264; GFX90A-NEXT:    ; def s[4:5]
8265; GFX90A-NEXT:    ;;#ASMEND
8266; GFX90A-NEXT:    ;;#ASMSTART
8267; GFX90A-NEXT:    ; def s[6:7]
8268; GFX90A-NEXT:    ;;#ASMEND
8269; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s7, s4
8270; GFX90A-NEXT:    s_lshr_b32 s9, s6, 16
8271; GFX90A-NEXT:    ;;#ASMSTART
8272; GFX90A-NEXT:    ; use s[8:9]
8273; GFX90A-NEXT:    ;;#ASMEND
8274; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8275;
8276; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_1_4:
8277; GFX940:       ; %bb.0:
8278; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8279; GFX940-NEXT:    ;;#ASMSTART
8280; GFX940-NEXT:    ; def s[0:1]
8281; GFX940-NEXT:    ;;#ASMEND
8282; GFX940-NEXT:    ;;#ASMSTART
8283; GFX940-NEXT:    ; def s[2:3]
8284; GFX940-NEXT:    ;;#ASMEND
8285; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s3, s0
8286; GFX940-NEXT:    s_lshr_b32 s9, s2, 16
8287; GFX940-NEXT:    ;;#ASMSTART
8288; GFX940-NEXT:    ; use s[8:9]
8289; GFX940-NEXT:    ;;#ASMEND
8290; GFX940-NEXT:    s_setpc_b64 s[30:31]
8291  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8292  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8293  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8294  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8295  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 4>
8296  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8297  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8298  ret void
8299}
8300
8301define void @s_shuffle_v3i16_v3i16__5_2_4() {
8302; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_2_4:
8303; GFX900:       ; %bb.0:
8304; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8305; GFX900-NEXT:    ;;#ASMSTART
8306; GFX900-NEXT:    ; def s[4:5]
8307; GFX900-NEXT:    ;;#ASMEND
8308; GFX900-NEXT:    ;;#ASMSTART
8309; GFX900-NEXT:    ; def s[6:7]
8310; GFX900-NEXT:    ;;#ASMEND
8311; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
8312; GFX900-NEXT:    s_lshr_b32 s9, s6, 16
8313; GFX900-NEXT:    ;;#ASMSTART
8314; GFX900-NEXT:    ; use s[8:9]
8315; GFX900-NEXT:    ;;#ASMEND
8316; GFX900-NEXT:    s_setpc_b64 s[30:31]
8317;
8318; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_2_4:
8319; GFX90A:       ; %bb.0:
8320; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8321; GFX90A-NEXT:    ;;#ASMSTART
8322; GFX90A-NEXT:    ; def s[4:5]
8323; GFX90A-NEXT:    ;;#ASMEND
8324; GFX90A-NEXT:    ;;#ASMSTART
8325; GFX90A-NEXT:    ; def s[6:7]
8326; GFX90A-NEXT:    ;;#ASMEND
8327; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
8328; GFX90A-NEXT:    s_lshr_b32 s9, s6, 16
8329; GFX90A-NEXT:    ;;#ASMSTART
8330; GFX90A-NEXT:    ; use s[8:9]
8331; GFX90A-NEXT:    ;;#ASMEND
8332; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8333;
8334; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_2_4:
8335; GFX940:       ; %bb.0:
8336; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8337; GFX940-NEXT:    ;;#ASMSTART
8338; GFX940-NEXT:    ; def s[0:1]
8339; GFX940-NEXT:    ;;#ASMEND
8340; GFX940-NEXT:    ;;#ASMSTART
8341; GFX940-NEXT:    ; def s[2:3]
8342; GFX940-NEXT:    ;;#ASMEND
8343; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s1
8344; GFX940-NEXT:    s_lshr_b32 s9, s2, 16
8345; GFX940-NEXT:    ;;#ASMSTART
8346; GFX940-NEXT:    ; use s[8:9]
8347; GFX940-NEXT:    ;;#ASMEND
8348; GFX940-NEXT:    s_setpc_b64 s[30:31]
8349  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8350  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8351  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8352  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8353  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 4>
8354  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8355  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8356  ret void
8357}
8358
8359define void @s_shuffle_v3i16_v3i16__5_3_4() {
8360; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_3_4:
8361; GFX900:       ; %bb.0:
8362; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8363; GFX900-NEXT:    ;;#ASMSTART
8364; GFX900-NEXT:    ; def s[4:5]
8365; GFX900-NEXT:    ;;#ASMEND
8366; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
8367; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
8368; GFX900-NEXT:    ;;#ASMSTART
8369; GFX900-NEXT:    ; use s[8:9]
8370; GFX900-NEXT:    ;;#ASMEND
8371; GFX900-NEXT:    s_setpc_b64 s[30:31]
8372;
8373; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_3_4:
8374; GFX90A:       ; %bb.0:
8375; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8376; GFX90A-NEXT:    ;;#ASMSTART
8377; GFX90A-NEXT:    ; def s[4:5]
8378; GFX90A-NEXT:    ;;#ASMEND
8379; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
8380; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
8381; GFX90A-NEXT:    ;;#ASMSTART
8382; GFX90A-NEXT:    ; use s[8:9]
8383; GFX90A-NEXT:    ;;#ASMEND
8384; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8385;
8386; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_3_4:
8387; GFX940:       ; %bb.0:
8388; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8389; GFX940-NEXT:    ;;#ASMSTART
8390; GFX940-NEXT:    ; def s[0:1]
8391; GFX940-NEXT:    ;;#ASMEND
8392; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
8393; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
8394; GFX940-NEXT:    ;;#ASMSTART
8395; GFX940-NEXT:    ; use s[8:9]
8396; GFX940-NEXT:    ;;#ASMEND
8397; GFX940-NEXT:    s_setpc_b64 s[30:31]
8398  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8399  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8400  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8401  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8402  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 4>
8403  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8404  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8405  ret void
8406}
8407
8408define void @s_shuffle_v3i16_v3i16__u_5_5() {
8409; GFX9-LABEL: s_shuffle_v3i16_v3i16__u_5_5:
8410; GFX9:       ; %bb.0:
8411; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8412; GFX9-NEXT:    ;;#ASMSTART
8413; GFX9-NEXT:    ; def s[8:9]
8414; GFX9-NEXT:    ;;#ASMEND
8415; GFX9-NEXT:    s_lshl_b32 s8, s9, 16
8416; GFX9-NEXT:    ;;#ASMSTART
8417; GFX9-NEXT:    ; use s[8:9]
8418; GFX9-NEXT:    ;;#ASMEND
8419; GFX9-NEXT:    s_setpc_b64 s[30:31]
8420  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8421  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8422  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8423  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8424  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 poison, i32 5, i32 5>
8425  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8426  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8427  ret void
8428}
8429
8430define void @s_shuffle_v3i16_v3i16__0_5_5() {
8431; GFX900-LABEL: s_shuffle_v3i16_v3i16__0_5_5:
8432; GFX900:       ; %bb.0:
8433; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8434; GFX900-NEXT:    ;;#ASMSTART
8435; GFX900-NEXT:    ; def s[8:9]
8436; GFX900-NEXT:    ;;#ASMEND
8437; GFX900-NEXT:    ;;#ASMSTART
8438; GFX900-NEXT:    ; def s[4:5]
8439; GFX900-NEXT:    ;;#ASMEND
8440; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
8441; GFX900-NEXT:    ;;#ASMSTART
8442; GFX900-NEXT:    ; use s[8:9]
8443; GFX900-NEXT:    ;;#ASMEND
8444; GFX900-NEXT:    s_setpc_b64 s[30:31]
8445;
8446; GFX90A-LABEL: s_shuffle_v3i16_v3i16__0_5_5:
8447; GFX90A:       ; %bb.0:
8448; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8449; GFX90A-NEXT:    ;;#ASMSTART
8450; GFX90A-NEXT:    ; def s[8:9]
8451; GFX90A-NEXT:    ;;#ASMEND
8452; GFX90A-NEXT:    ;;#ASMSTART
8453; GFX90A-NEXT:    ; def s[4:5]
8454; GFX90A-NEXT:    ;;#ASMEND
8455; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
8456; GFX90A-NEXT:    ;;#ASMSTART
8457; GFX90A-NEXT:    ; use s[8:9]
8458; GFX90A-NEXT:    ;;#ASMEND
8459; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8460;
8461; GFX940-LABEL: s_shuffle_v3i16_v3i16__0_5_5:
8462; GFX940:       ; %bb.0:
8463; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8464; GFX940-NEXT:    ;;#ASMSTART
8465; GFX940-NEXT:    ; def s[8:9]
8466; GFX940-NEXT:    ;;#ASMEND
8467; GFX940-NEXT:    ;;#ASMSTART
8468; GFX940-NEXT:    ; def s[0:1]
8469; GFX940-NEXT:    ;;#ASMEND
8470; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
8471; GFX940-NEXT:    ;;#ASMSTART
8472; GFX940-NEXT:    ; use s[8:9]
8473; GFX940-NEXT:    ;;#ASMEND
8474; GFX940-NEXT:    s_setpc_b64 s[30:31]
8475  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8476  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8477  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8478  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8479  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 0, i32 5, i32 5>
8480  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8481  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8482  ret void
8483}
8484
8485define void @s_shuffle_v3i16_v3i16__1_5_5() {
8486; GFX900-LABEL: s_shuffle_v3i16_v3i16__1_5_5:
8487; GFX900:       ; %bb.0:
8488; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8489; GFX900-NEXT:    ;;#ASMSTART
8490; GFX900-NEXT:    ; def s[4:5]
8491; GFX900-NEXT:    ;;#ASMEND
8492; GFX900-NEXT:    ;;#ASMSTART
8493; GFX900-NEXT:    ; def s[8:9]
8494; GFX900-NEXT:    ;;#ASMEND
8495; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
8496; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
8497; GFX900-NEXT:    ;;#ASMSTART
8498; GFX900-NEXT:    ; use s[8:9]
8499; GFX900-NEXT:    ;;#ASMEND
8500; GFX900-NEXT:    s_setpc_b64 s[30:31]
8501;
8502; GFX90A-LABEL: s_shuffle_v3i16_v3i16__1_5_5:
8503; GFX90A:       ; %bb.0:
8504; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8505; GFX90A-NEXT:    ;;#ASMSTART
8506; GFX90A-NEXT:    ; def s[4:5]
8507; GFX90A-NEXT:    ;;#ASMEND
8508; GFX90A-NEXT:    ;;#ASMSTART
8509; GFX90A-NEXT:    ; def s[8:9]
8510; GFX90A-NEXT:    ;;#ASMEND
8511; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
8512; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
8513; GFX90A-NEXT:    ;;#ASMSTART
8514; GFX90A-NEXT:    ; use s[8:9]
8515; GFX90A-NEXT:    ;;#ASMEND
8516; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8517;
8518; GFX940-LABEL: s_shuffle_v3i16_v3i16__1_5_5:
8519; GFX940:       ; %bb.0:
8520; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8521; GFX940-NEXT:    ;;#ASMSTART
8522; GFX940-NEXT:    ; def s[0:1]
8523; GFX940-NEXT:    ;;#ASMEND
8524; GFX940-NEXT:    ;;#ASMSTART
8525; GFX940-NEXT:    ; def s[8:9]
8526; GFX940-NEXT:    ;;#ASMEND
8527; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
8528; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
8529; GFX940-NEXT:    ;;#ASMSTART
8530; GFX940-NEXT:    ; use s[8:9]
8531; GFX940-NEXT:    ;;#ASMEND
8532; GFX940-NEXT:    s_setpc_b64 s[30:31]
8533  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8534  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8535  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8536  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8537  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 1, i32 5, i32 5>
8538  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8539  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8540  ret void
8541}
8542
8543define void @s_shuffle_v3i16_v3i16__2_5_5() {
8544; GFX900-LABEL: s_shuffle_v3i16_v3i16__2_5_5:
8545; GFX900:       ; %bb.0:
8546; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8547; GFX900-NEXT:    ;;#ASMSTART
8548; GFX900-NEXT:    ; def s[8:9]
8549; GFX900-NEXT:    ;;#ASMEND
8550; GFX900-NEXT:    ;;#ASMSTART
8551; GFX900-NEXT:    ; def s[4:5]
8552; GFX900-NEXT:    ;;#ASMEND
8553; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s9
8554; GFX900-NEXT:    ;;#ASMSTART
8555; GFX900-NEXT:    ; use s[8:9]
8556; GFX900-NEXT:    ;;#ASMEND
8557; GFX900-NEXT:    s_setpc_b64 s[30:31]
8558;
8559; GFX90A-LABEL: s_shuffle_v3i16_v3i16__2_5_5:
8560; GFX90A:       ; %bb.0:
8561; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8562; GFX90A-NEXT:    ;;#ASMSTART
8563; GFX90A-NEXT:    ; def s[8:9]
8564; GFX90A-NEXT:    ;;#ASMEND
8565; GFX90A-NEXT:    ;;#ASMSTART
8566; GFX90A-NEXT:    ; def s[4:5]
8567; GFX90A-NEXT:    ;;#ASMEND
8568; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s9
8569; GFX90A-NEXT:    ;;#ASMSTART
8570; GFX90A-NEXT:    ; use s[8:9]
8571; GFX90A-NEXT:    ;;#ASMEND
8572; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8573;
8574; GFX940-LABEL: s_shuffle_v3i16_v3i16__2_5_5:
8575; GFX940:       ; %bb.0:
8576; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8577; GFX940-NEXT:    ;;#ASMSTART
8578; GFX940-NEXT:    ; def s[8:9]
8579; GFX940-NEXT:    ;;#ASMEND
8580; GFX940-NEXT:    ;;#ASMSTART
8581; GFX940-NEXT:    ; def s[0:1]
8582; GFX940-NEXT:    ;;#ASMEND
8583; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s9
8584; GFX940-NEXT:    ;;#ASMSTART
8585; GFX940-NEXT:    ; use s[8:9]
8586; GFX940-NEXT:    ;;#ASMEND
8587; GFX940-NEXT:    s_setpc_b64 s[30:31]
8588  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8589  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8590  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8591  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8592  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 2, i32 5, i32 5>
8593  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8594  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8595  ret void
8596}
8597
8598define void @s_shuffle_v3i16_v3i16__3_5_5() {
8599; GFX9-LABEL: s_shuffle_v3i16_v3i16__3_5_5:
8600; GFX9:       ; %bb.0:
8601; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8602; GFX9-NEXT:    ;;#ASMSTART
8603; GFX9-NEXT:    ; def s[8:9]
8604; GFX9-NEXT:    ;;#ASMEND
8605; GFX9-NEXT:    s_pack_ll_b32_b16 s8, s8, s9
8606; GFX9-NEXT:    ;;#ASMSTART
8607; GFX9-NEXT:    ; use s[8:9]
8608; GFX9-NEXT:    ;;#ASMEND
8609; GFX9-NEXT:    s_setpc_b64 s[30:31]
8610  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8611  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8612  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8613  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8614  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 3, i32 5, i32 5>
8615  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8616  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8617  ret void
8618}
8619
8620define void @s_shuffle_v3i16_v3i16__4_5_5() {
8621; GFX900-LABEL: s_shuffle_v3i16_v3i16__4_5_5:
8622; GFX900:       ; %bb.0:
8623; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8624; GFX900-NEXT:    ;;#ASMSTART
8625; GFX900-NEXT:    ; def s[8:9]
8626; GFX900-NEXT:    ;;#ASMEND
8627; GFX900-NEXT:    s_lshr_b32 s4, s8, 16
8628; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
8629; GFX900-NEXT:    ;;#ASMSTART
8630; GFX900-NEXT:    ; use s[8:9]
8631; GFX900-NEXT:    ;;#ASMEND
8632; GFX900-NEXT:    s_setpc_b64 s[30:31]
8633;
8634; GFX90A-LABEL: s_shuffle_v3i16_v3i16__4_5_5:
8635; GFX90A:       ; %bb.0:
8636; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8637; GFX90A-NEXT:    ;;#ASMSTART
8638; GFX90A-NEXT:    ; def s[8:9]
8639; GFX90A-NEXT:    ;;#ASMEND
8640; GFX90A-NEXT:    s_lshr_b32 s4, s8, 16
8641; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
8642; GFX90A-NEXT:    ;;#ASMSTART
8643; GFX90A-NEXT:    ; use s[8:9]
8644; GFX90A-NEXT:    ;;#ASMEND
8645; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8646;
8647; GFX940-LABEL: s_shuffle_v3i16_v3i16__4_5_5:
8648; GFX940:       ; %bb.0:
8649; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8650; GFX940-NEXT:    ;;#ASMSTART
8651; GFX940-NEXT:    ; def s[8:9]
8652; GFX940-NEXT:    ;;#ASMEND
8653; GFX940-NEXT:    s_lshr_b32 s0, s8, 16
8654; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
8655; GFX940-NEXT:    ;;#ASMSTART
8656; GFX940-NEXT:    ; use s[8:9]
8657; GFX940-NEXT:    ;;#ASMEND
8658; GFX940-NEXT:    s_setpc_b64 s[30:31]
8659  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8660  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8661  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8662  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8663  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 4, i32 5, i32 5>
8664  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8665  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8666  ret void
8667}
8668
8669define void @s_shuffle_v3i16_v3i16__5_u_5() {
8670; GFX9-LABEL: s_shuffle_v3i16_v3i16__5_u_5:
8671; GFX9:       ; %bb.0:
8672; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8673; GFX9-NEXT:    ;;#ASMSTART
8674; GFX9-NEXT:    ; def s[8:9]
8675; GFX9-NEXT:    ;;#ASMEND
8676; GFX9-NEXT:    s_mov_b32 s8, s9
8677; GFX9-NEXT:    ;;#ASMSTART
8678; GFX9-NEXT:    ; use s[8:9]
8679; GFX9-NEXT:    ;;#ASMEND
8680; GFX9-NEXT:    s_setpc_b64 s[30:31]
8681  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8682  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8683  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8684  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8685  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 poison, i32 5>
8686  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8687  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8688  ret void
8689}
8690
8691define void @s_shuffle_v3i16_v3i16__5_0_5() {
8692; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_0_5:
8693; GFX900:       ; %bb.0:
8694; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8695; GFX900-NEXT:    ;;#ASMSTART
8696; GFX900-NEXT:    ; def s[8:9]
8697; GFX900-NEXT:    ;;#ASMEND
8698; GFX900-NEXT:    ;;#ASMSTART
8699; GFX900-NEXT:    ; def s[4:5]
8700; GFX900-NEXT:    ;;#ASMEND
8701; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s9, s4
8702; GFX900-NEXT:    ;;#ASMSTART
8703; GFX900-NEXT:    ; use s[8:9]
8704; GFX900-NEXT:    ;;#ASMEND
8705; GFX900-NEXT:    s_setpc_b64 s[30:31]
8706;
8707; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_0_5:
8708; GFX90A:       ; %bb.0:
8709; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8710; GFX90A-NEXT:    ;;#ASMSTART
8711; GFX90A-NEXT:    ; def s[8:9]
8712; GFX90A-NEXT:    ;;#ASMEND
8713; GFX90A-NEXT:    ;;#ASMSTART
8714; GFX90A-NEXT:    ; def s[4:5]
8715; GFX90A-NEXT:    ;;#ASMEND
8716; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s9, s4
8717; GFX90A-NEXT:    ;;#ASMSTART
8718; GFX90A-NEXT:    ; use s[8:9]
8719; GFX90A-NEXT:    ;;#ASMEND
8720; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8721;
8722; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_0_5:
8723; GFX940:       ; %bb.0:
8724; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8725; GFX940-NEXT:    ;;#ASMSTART
8726; GFX940-NEXT:    ; def s[8:9]
8727; GFX940-NEXT:    ;;#ASMEND
8728; GFX940-NEXT:    ;;#ASMSTART
8729; GFX940-NEXT:    ; def s[0:1]
8730; GFX940-NEXT:    ;;#ASMEND
8731; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s9, s0
8732; GFX940-NEXT:    ;;#ASMSTART
8733; GFX940-NEXT:    ; use s[8:9]
8734; GFX940-NEXT:    ;;#ASMEND
8735; GFX940-NEXT:    s_setpc_b64 s[30:31]
8736  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8737  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8738  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8739  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8740  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 0, i32 5>
8741  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8742  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8743  ret void
8744}
8745
8746define void @s_shuffle_v3i16_v3i16__5_1_5() {
8747; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_1_5:
8748; GFX900:       ; %bb.0:
8749; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8750; GFX900-NEXT:    ;;#ASMSTART
8751; GFX900-NEXT:    ; def s[8:9]
8752; GFX900-NEXT:    ;;#ASMEND
8753; GFX900-NEXT:    ;;#ASMSTART
8754; GFX900-NEXT:    ; def s[4:5]
8755; GFX900-NEXT:    ;;#ASMEND
8756; GFX900-NEXT:    s_pack_lh_b32_b16 s8, s9, s4
8757; GFX900-NEXT:    ;;#ASMSTART
8758; GFX900-NEXT:    ; use s[8:9]
8759; GFX900-NEXT:    ;;#ASMEND
8760; GFX900-NEXT:    s_setpc_b64 s[30:31]
8761;
8762; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_1_5:
8763; GFX90A:       ; %bb.0:
8764; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8765; GFX90A-NEXT:    ;;#ASMSTART
8766; GFX90A-NEXT:    ; def s[8:9]
8767; GFX90A-NEXT:    ;;#ASMEND
8768; GFX90A-NEXT:    ;;#ASMSTART
8769; GFX90A-NEXT:    ; def s[4:5]
8770; GFX90A-NEXT:    ;;#ASMEND
8771; GFX90A-NEXT:    s_pack_lh_b32_b16 s8, s9, s4
8772; GFX90A-NEXT:    ;;#ASMSTART
8773; GFX90A-NEXT:    ; use s[8:9]
8774; GFX90A-NEXT:    ;;#ASMEND
8775; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8776;
8777; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_1_5:
8778; GFX940:       ; %bb.0:
8779; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8780; GFX940-NEXT:    ;;#ASMSTART
8781; GFX940-NEXT:    ; def s[8:9]
8782; GFX940-NEXT:    ;;#ASMEND
8783; GFX940-NEXT:    ;;#ASMSTART
8784; GFX940-NEXT:    ; def s[0:1]
8785; GFX940-NEXT:    ;;#ASMEND
8786; GFX940-NEXT:    s_pack_lh_b32_b16 s8, s9, s0
8787; GFX940-NEXT:    ;;#ASMSTART
8788; GFX940-NEXT:    ; use s[8:9]
8789; GFX940-NEXT:    ;;#ASMEND
8790; GFX940-NEXT:    s_setpc_b64 s[30:31]
8791  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8792  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8793  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8794  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8795  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 1, i32 5>
8796  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8797  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8798  ret void
8799}
8800
8801define void @s_shuffle_v3i16_v3i16__5_2_5() {
8802; GFX900-LABEL: s_shuffle_v3i16_v3i16__5_2_5:
8803; GFX900:       ; %bb.0:
8804; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8805; GFX900-NEXT:    ;;#ASMSTART
8806; GFX900-NEXT:    ; def s[8:9]
8807; GFX900-NEXT:    ;;#ASMEND
8808; GFX900-NEXT:    ;;#ASMSTART
8809; GFX900-NEXT:    ; def s[4:5]
8810; GFX900-NEXT:    ;;#ASMEND
8811; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s9, s5
8812; GFX900-NEXT:    ;;#ASMSTART
8813; GFX900-NEXT:    ; use s[8:9]
8814; GFX900-NEXT:    ;;#ASMEND
8815; GFX900-NEXT:    s_setpc_b64 s[30:31]
8816;
8817; GFX90A-LABEL: s_shuffle_v3i16_v3i16__5_2_5:
8818; GFX90A:       ; %bb.0:
8819; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8820; GFX90A-NEXT:    ;;#ASMSTART
8821; GFX90A-NEXT:    ; def s[8:9]
8822; GFX90A-NEXT:    ;;#ASMEND
8823; GFX90A-NEXT:    ;;#ASMSTART
8824; GFX90A-NEXT:    ; def s[4:5]
8825; GFX90A-NEXT:    ;;#ASMEND
8826; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s9, s5
8827; GFX90A-NEXT:    ;;#ASMSTART
8828; GFX90A-NEXT:    ; use s[8:9]
8829; GFX90A-NEXT:    ;;#ASMEND
8830; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8831;
8832; GFX940-LABEL: s_shuffle_v3i16_v3i16__5_2_5:
8833; GFX940:       ; %bb.0:
8834; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8835; GFX940-NEXT:    ;;#ASMSTART
8836; GFX940-NEXT:    ; def s[8:9]
8837; GFX940-NEXT:    ;;#ASMEND
8838; GFX940-NEXT:    ;;#ASMSTART
8839; GFX940-NEXT:    ; def s[0:1]
8840; GFX940-NEXT:    ;;#ASMEND
8841; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s9, s1
8842; GFX940-NEXT:    ;;#ASMSTART
8843; GFX940-NEXT:    ; use s[8:9]
8844; GFX940-NEXT:    ;;#ASMEND
8845; GFX940-NEXT:    s_setpc_b64 s[30:31]
8846  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8847  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8848  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8849  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8850  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 2, i32 5>
8851  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8852  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8853  ret void
8854}
8855
8856define void @s_shuffle_v3i16_v3i16__5_3_5() {
8857; GFX9-LABEL: s_shuffle_v3i16_v3i16__5_3_5:
8858; GFX9:       ; %bb.0:
8859; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8860; GFX9-NEXT:    ;;#ASMSTART
8861; GFX9-NEXT:    ; def s[8:9]
8862; GFX9-NEXT:    ;;#ASMEND
8863; GFX9-NEXT:    s_pack_ll_b32_b16 s8, s9, s8
8864; GFX9-NEXT:    ;;#ASMSTART
8865; GFX9-NEXT:    ; use s[8:9]
8866; GFX9-NEXT:    ;;#ASMEND
8867; GFX9-NEXT:    s_setpc_b64 s[30:31]
8868  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8869  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8870  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8871  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8872  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 3, i32 5>
8873  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8874  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8875  ret void
8876}
8877
8878define void @s_shuffle_v3i16_v3i16__5_4_5() {
8879; GFX9-LABEL: s_shuffle_v3i16_v3i16__5_4_5:
8880; GFX9:       ; %bb.0:
8881; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8882; GFX9-NEXT:    ;;#ASMSTART
8883; GFX9-NEXT:    ; def s[8:9]
8884; GFX9-NEXT:    ;;#ASMEND
8885; GFX9-NEXT:    s_pack_lh_b32_b16 s8, s9, s8
8886; GFX9-NEXT:    ;;#ASMSTART
8887; GFX9-NEXT:    ; use s[8:9]
8888; GFX9-NEXT:    ;;#ASMEND
8889; GFX9-NEXT:    s_setpc_b64 s[30:31]
8890  %vec0 = call <4 x i16> asm "; def $0", "=s"()
8891  %vec1 = call <4 x i16> asm "; def $0", "=s"()
8892  %extract3 = shufflevector <4 x i16> %vec0, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8893  %extract31 = shufflevector <4 x i16> %vec1, <4 x i16> poison, <3 x i32> <i32 0, i32 1, i32 2>
8894  %shuf = shufflevector <3 x i16> %extract3, <3 x i16> %extract31, <3 x i32> <i32 5, i32 4, i32 5>
8895  %extend3 = shufflevector <3 x i16> %shuf, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8896  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x i16> %extend3)
8897  ret void
8898}
8899;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
8900; GFX90APLUS: {{.*}}
8901