xref: /llvm-project/llvm/test/CodeGen/AMDGPU/shufflevector.v3f16.v4f16.ll (revision 585858aeb6247b3892218edb9d353c63f1c33186)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s
5
6
7define void @v_shuffle_v3f16_v4f16__u_u_u(ptr addrspace(1) inreg %ptr) {
8; GFX9-LABEL: v_shuffle_v3f16_v4f16__u_u_u:
9; GFX9:       ; %bb.0:
10; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GFX9-NEXT:    s_setpc_b64 s[30:31]
12  %vec0 = call <4 x half> asm "; def $0", "=v"()
13  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> poison
14  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
15  ret void
16}
17
18define void @v_shuffle_v3f16_v4f16__0_u_u(ptr addrspace(1) inreg %ptr) {
19; GFX900-LABEL: v_shuffle_v3f16_v4f16__0_u_u:
20; GFX900:       ; %bb.0:
21; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22; GFX900-NEXT:    v_mov_b32_e32 v2, 0
23; GFX900-NEXT:    ;;#ASMSTART
24; GFX900-NEXT:    ; def v[0:1]
25; GFX900-NEXT:    ;;#ASMEND
26; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
27; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
28; GFX900-NEXT:    s_waitcnt vmcnt(0)
29; GFX900-NEXT:    s_setpc_b64 s[30:31]
30;
31; GFX90A-LABEL: v_shuffle_v3f16_v4f16__0_u_u:
32; GFX90A:       ; %bb.0:
33; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
35; GFX90A-NEXT:    ;;#ASMSTART
36; GFX90A-NEXT:    ; def v[0:1]
37; GFX90A-NEXT:    ;;#ASMEND
38; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
39; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
40; GFX90A-NEXT:    s_waitcnt vmcnt(0)
41; GFX90A-NEXT:    s_setpc_b64 s[30:31]
42;
43; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_u_u:
44; GFX940:       ; %bb.0:
45; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46; GFX940-NEXT:    v_mov_b32_e32 v2, 0
47; GFX940-NEXT:    ;;#ASMSTART
48; GFX940-NEXT:    ; def v[0:1]
49; GFX940-NEXT:    ;;#ASMEND
50; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
51; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
52; GFX940-NEXT:    s_waitcnt vmcnt(0)
53; GFX940-NEXT:    s_setpc_b64 s[30:31]
54  %vec0 = call <4 x half> asm "; def $0", "=v"()
55  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 poison, i32 poison>
56  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
57  ret void
58}
59
60define void @v_shuffle_v3f16_v4f16__1_u_u(ptr addrspace(1) inreg %ptr) {
61; GFX900-LABEL: v_shuffle_v3f16_v4f16__1_u_u:
62; GFX900:       ; %bb.0:
63; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64; GFX900-NEXT:    ;;#ASMSTART
65; GFX900-NEXT:    ; def v[0:1]
66; GFX900-NEXT:    ;;#ASMEND
67; GFX900-NEXT:    v_mov_b32_e32 v2, 0
68; GFX900-NEXT:    v_alignbit_b32 v0, s4, v0, 16
69; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
70; GFX900-NEXT:    s_waitcnt vmcnt(0)
71; GFX900-NEXT:    s_setpc_b64 s[30:31]
72;
73; GFX90A-LABEL: v_shuffle_v3f16_v4f16__1_u_u:
74; GFX90A:       ; %bb.0:
75; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76; GFX90A-NEXT:    ;;#ASMSTART
77; GFX90A-NEXT:    ; def v[0:1]
78; GFX90A-NEXT:    ;;#ASMEND
79; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
80; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v0, 16
81; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
82; GFX90A-NEXT:    s_waitcnt vmcnt(0)
83; GFX90A-NEXT:    s_setpc_b64 s[30:31]
84;
85; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_u_u:
86; GFX940:       ; %bb.0:
87; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88; GFX940-NEXT:    ;;#ASMSTART
89; GFX940-NEXT:    ; def v[0:1]
90; GFX940-NEXT:    ;;#ASMEND
91; GFX940-NEXT:    v_mov_b32_e32 v2, 0
92; GFX940-NEXT:    v_alignbit_b32 v0, s0, v0, 16
93; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
94; GFX940-NEXT:    s_waitcnt vmcnt(0)
95; GFX940-NEXT:    s_setpc_b64 s[30:31]
96  %vec0 = call <4 x half> asm "; def $0", "=v"()
97  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 1, i32 poison, i32 poison>
98  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
99  ret void
100}
101
102define void @v_shuffle_v3f16_v4f16__2_u_u(ptr addrspace(1) inreg %ptr) {
103; GFX900-LABEL: v_shuffle_v3f16_v4f16__2_u_u:
104; GFX900:       ; %bb.0:
105; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
106; GFX900-NEXT:    v_mov_b32_e32 v2, 0
107; GFX900-NEXT:    ;;#ASMSTART
108; GFX900-NEXT:    ; def v[0:1]
109; GFX900-NEXT:    ;;#ASMEND
110; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
111; GFX900-NEXT:    s_waitcnt vmcnt(0)
112; GFX900-NEXT:    s_setpc_b64 s[30:31]
113;
114; GFX90A-LABEL: v_shuffle_v3f16_v4f16__2_u_u:
115; GFX90A:       ; %bb.0:
116; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
117; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
118; GFX90A-NEXT:    ;;#ASMSTART
119; GFX90A-NEXT:    ; def v[0:1]
120; GFX90A-NEXT:    ;;#ASMEND
121; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
122; GFX90A-NEXT:    s_waitcnt vmcnt(0)
123; GFX90A-NEXT:    s_setpc_b64 s[30:31]
124;
125; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_u_u:
126; GFX940:       ; %bb.0:
127; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
128; GFX940-NEXT:    v_mov_b32_e32 v2, 0
129; GFX940-NEXT:    ;;#ASMSTART
130; GFX940-NEXT:    ; def v[0:1]
131; GFX940-NEXT:    ;;#ASMEND
132; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
133; GFX940-NEXT:    s_waitcnt vmcnt(0)
134; GFX940-NEXT:    s_setpc_b64 s[30:31]
135  %vec0 = call <4 x half> asm "; def $0", "=v"()
136  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 2, i32 poison, i32 poison>
137  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
138  ret void
139}
140
141define void @v_shuffle_v3f16_v4f16__3_u_u(ptr addrspace(1) inreg %ptr) {
142; GFX900-LABEL: v_shuffle_v3f16_v4f16__3_u_u:
143; GFX900:       ; %bb.0:
144; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145; GFX900-NEXT:    ;;#ASMSTART
146; GFX900-NEXT:    ; def v[0:1]
147; GFX900-NEXT:    ;;#ASMEND
148; GFX900-NEXT:    v_mov_b32_e32 v2, 0
149; GFX900-NEXT:    v_alignbit_b32 v0, s4, v1, 16
150; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
151; GFX900-NEXT:    s_waitcnt vmcnt(0)
152; GFX900-NEXT:    s_setpc_b64 s[30:31]
153;
154; GFX90A-LABEL: v_shuffle_v3f16_v4f16__3_u_u:
155; GFX90A:       ; %bb.0:
156; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157; GFX90A-NEXT:    ;;#ASMSTART
158; GFX90A-NEXT:    ; def v[0:1]
159; GFX90A-NEXT:    ;;#ASMEND
160; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
161; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v1, 16
162; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
163; GFX90A-NEXT:    s_waitcnt vmcnt(0)
164; GFX90A-NEXT:    s_setpc_b64 s[30:31]
165;
166; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_u_u:
167; GFX940:       ; %bb.0:
168; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
169; GFX940-NEXT:    ;;#ASMSTART
170; GFX940-NEXT:    ; def v[0:1]
171; GFX940-NEXT:    ;;#ASMEND
172; GFX940-NEXT:    v_mov_b32_e32 v2, 0
173; GFX940-NEXT:    v_alignbit_b32 v0, s0, v1, 16
174; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
175; GFX940-NEXT:    s_waitcnt vmcnt(0)
176; GFX940-NEXT:    s_setpc_b64 s[30:31]
177  %vec0 = call <4 x half> asm "; def $0", "=v"()
178  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 3, i32 poison, i32 poison>
179  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
180  ret void
181}
182
183define void @v_shuffle_v3f16_v4f16__4_u_u(ptr addrspace(1) inreg %ptr) {
184; GFX9-LABEL: v_shuffle_v3f16_v4f16__4_u_u:
185; GFX9:       ; %bb.0:
186; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
187; GFX9-NEXT:    s_setpc_b64 s[30:31]
188  %vec0 = call <4 x half> asm "; def $0", "=v"()
189  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 4, i32 poison, i32 poison>
190  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
191  ret void
192}
193
194define void @v_shuffle_v3f16_v4f16__5_u_u(ptr addrspace(1) inreg %ptr) {
195; GFX900-LABEL: v_shuffle_v3f16_v4f16__5_u_u:
196; GFX900:       ; %bb.0:
197; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
198; GFX900-NEXT:    ;;#ASMSTART
199; GFX900-NEXT:    ; def v[0:1]
200; GFX900-NEXT:    ;;#ASMEND
201; GFX900-NEXT:    v_mov_b32_e32 v2, 0
202; GFX900-NEXT:    v_alignbit_b32 v0, s4, v0, 16
203; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
204; GFX900-NEXT:    s_waitcnt vmcnt(0)
205; GFX900-NEXT:    s_setpc_b64 s[30:31]
206;
207; GFX90A-LABEL: v_shuffle_v3f16_v4f16__5_u_u:
208; GFX90A:       ; %bb.0:
209; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210; GFX90A-NEXT:    ;;#ASMSTART
211; GFX90A-NEXT:    ; def v[0:1]
212; GFX90A-NEXT:    ;;#ASMEND
213; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
214; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v0, 16
215; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
216; GFX90A-NEXT:    s_waitcnt vmcnt(0)
217; GFX90A-NEXT:    s_setpc_b64 s[30:31]
218;
219; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_u_u:
220; GFX940:       ; %bb.0:
221; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222; GFX940-NEXT:    ;;#ASMSTART
223; GFX940-NEXT:    ; def v[0:1]
224; GFX940-NEXT:    ;;#ASMEND
225; GFX940-NEXT:    v_mov_b32_e32 v2, 0
226; GFX940-NEXT:    v_alignbit_b32 v0, s0, v0, 16
227; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
228; GFX940-NEXT:    s_waitcnt vmcnt(0)
229; GFX940-NEXT:    s_setpc_b64 s[30:31]
230  %vec0 = call <4 x half> asm "; def $0", "=v"()
231  %vec1 = call <4 x half> asm "; def $0", "=v"()
232  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 poison, i32 poison>
233  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
234  ret void
235}
236
237define void @v_shuffle_v3f16_v4f16__6_u_u(ptr addrspace(1) inreg %ptr) {
238; GFX900-LABEL: v_shuffle_v3f16_v4f16__6_u_u:
239; GFX900:       ; %bb.0:
240; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
241; GFX900-NEXT:    v_mov_b32_e32 v2, 0
242; GFX900-NEXT:    ;;#ASMSTART
243; GFX900-NEXT:    ; def v[0:1]
244; GFX900-NEXT:    ;;#ASMEND
245; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
246; GFX900-NEXT:    s_waitcnt vmcnt(0)
247; GFX900-NEXT:    s_setpc_b64 s[30:31]
248;
249; GFX90A-LABEL: v_shuffle_v3f16_v4f16__6_u_u:
250; GFX90A:       ; %bb.0:
251; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
252; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
253; GFX90A-NEXT:    ;;#ASMSTART
254; GFX90A-NEXT:    ; def v[0:1]
255; GFX90A-NEXT:    ;;#ASMEND
256; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
257; GFX90A-NEXT:    s_waitcnt vmcnt(0)
258; GFX90A-NEXT:    s_setpc_b64 s[30:31]
259;
260; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_u_u:
261; GFX940:       ; %bb.0:
262; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
263; GFX940-NEXT:    v_mov_b32_e32 v2, 0
264; GFX940-NEXT:    ;;#ASMSTART
265; GFX940-NEXT:    ; def v[0:1]
266; GFX940-NEXT:    ;;#ASMEND
267; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
268; GFX940-NEXT:    s_waitcnt vmcnt(0)
269; GFX940-NEXT:    s_setpc_b64 s[30:31]
270  %vec0 = call <4 x half> asm "; def $0", "=v"()
271  %vec1 = call <4 x half> asm "; def $0", "=v"()
272  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 poison, i32 poison>
273  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
274  ret void
275}
276
277define void @v_shuffle_v3f16_v4f16__7_u_u(ptr addrspace(1) inreg %ptr) {
278; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_u_u:
279; GFX900:       ; %bb.0:
280; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
281; GFX900-NEXT:    ;;#ASMSTART
282; GFX900-NEXT:    ; def v[0:1]
283; GFX900-NEXT:    ;;#ASMEND
284; GFX900-NEXT:    v_mov_b32_e32 v2, 0
285; GFX900-NEXT:    v_alignbit_b32 v0, s4, v1, 16
286; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
287; GFX900-NEXT:    s_waitcnt vmcnt(0)
288; GFX900-NEXT:    s_setpc_b64 s[30:31]
289;
290; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_u_u:
291; GFX90A:       ; %bb.0:
292; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
293; GFX90A-NEXT:    ;;#ASMSTART
294; GFX90A-NEXT:    ; def v[0:1]
295; GFX90A-NEXT:    ;;#ASMEND
296; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
297; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v1, 16
298; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
299; GFX90A-NEXT:    s_waitcnt vmcnt(0)
300; GFX90A-NEXT:    s_setpc_b64 s[30:31]
301;
302; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_u:
303; GFX940:       ; %bb.0:
304; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
305; GFX940-NEXT:    ;;#ASMSTART
306; GFX940-NEXT:    ; def v[0:1]
307; GFX940-NEXT:    ;;#ASMEND
308; GFX940-NEXT:    v_mov_b32_e32 v2, 0
309; GFX940-NEXT:    v_alignbit_b32 v0, s0, v1, 16
310; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
311; GFX940-NEXT:    s_waitcnt vmcnt(0)
312; GFX940-NEXT:    s_setpc_b64 s[30:31]
313  %vec0 = call <4 x half> asm "; def $0", "=v"()
314  %vec1 = call <4 x half> asm "; def $0", "=v"()
315  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 poison>
316  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
317  ret void
318}
319
320define void @v_shuffle_v3f16_v4f16__7_0_u(ptr addrspace(1) inreg %ptr) {
321; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_0_u:
322; GFX900:       ; %bb.0:
323; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
324; GFX900-NEXT:    ;;#ASMSTART
325; GFX900-NEXT:    ; def v[0:1]
326; GFX900-NEXT:    ;;#ASMEND
327; GFX900-NEXT:    v_mov_b32_e32 v3, 0
328; GFX900-NEXT:    ;;#ASMSTART
329; GFX900-NEXT:    ; def v[1:2]
330; GFX900-NEXT:    ;;#ASMEND
331; GFX900-NEXT:    v_alignbit_b32 v0, v0, v2, 16
332; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
333; GFX900-NEXT:    s_waitcnt vmcnt(0)
334; GFX900-NEXT:    s_setpc_b64 s[30:31]
335;
336; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_0_u:
337; GFX90A:       ; %bb.0:
338; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
339; GFX90A-NEXT:    ;;#ASMSTART
340; GFX90A-NEXT:    ; def v[0:1]
341; GFX90A-NEXT:    ;;#ASMEND
342; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
343; GFX90A-NEXT:    ;;#ASMSTART
344; GFX90A-NEXT:    ; def v[2:3]
345; GFX90A-NEXT:    ;;#ASMEND
346; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v3, 16
347; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
348; GFX90A-NEXT:    s_waitcnt vmcnt(0)
349; GFX90A-NEXT:    s_setpc_b64 s[30:31]
350;
351; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_u:
352; GFX940:       ; %bb.0:
353; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
354; GFX940-NEXT:    ;;#ASMSTART
355; GFX940-NEXT:    ; def v[0:1]
356; GFX940-NEXT:    ;;#ASMEND
357; GFX940-NEXT:    v_mov_b32_e32 v4, 0
358; GFX940-NEXT:    ;;#ASMSTART
359; GFX940-NEXT:    ; def v[2:3]
360; GFX940-NEXT:    ;;#ASMEND
361; GFX940-NEXT:    s_nop 0
362; GFX940-NEXT:    v_alignbit_b32 v0, v0, v3, 16
363; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
364; GFX940-NEXT:    s_waitcnt vmcnt(0)
365; GFX940-NEXT:    s_setpc_b64 s[30:31]
366  %vec0 = call <4 x half> asm "; def $0", "=v"()
367  %vec1 = call <4 x half> asm "; def $0", "=v"()
368  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 poison>
369  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
370  ret void
371}
372
373define void @v_shuffle_v3f16_v4f16__7_1_u(ptr addrspace(1) inreg %ptr) {
374; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_1_u:
375; GFX900:       ; %bb.0:
376; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377; GFX900-NEXT:    ;;#ASMSTART
378; GFX900-NEXT:    ; def v[0:1]
379; GFX900-NEXT:    ;;#ASMEND
380; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
381; GFX900-NEXT:    v_mov_b32_e32 v3, 0
382; GFX900-NEXT:    ;;#ASMSTART
383; GFX900-NEXT:    ; def v[1:2]
384; GFX900-NEXT:    ;;#ASMEND
385; GFX900-NEXT:    v_perm_b32 v0, v0, v2, s4
386; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
387; GFX900-NEXT:    s_waitcnt vmcnt(0)
388; GFX900-NEXT:    s_setpc_b64 s[30:31]
389;
390; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_1_u:
391; GFX90A:       ; %bb.0:
392; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
393; GFX90A-NEXT:    ;;#ASMSTART
394; GFX90A-NEXT:    ; def v[0:1]
395; GFX90A-NEXT:    ;;#ASMEND
396; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
397; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
398; GFX90A-NEXT:    ;;#ASMSTART
399; GFX90A-NEXT:    ; def v[2:3]
400; GFX90A-NEXT:    ;;#ASMEND
401; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
402; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
403; GFX90A-NEXT:    s_waitcnt vmcnt(0)
404; GFX90A-NEXT:    s_setpc_b64 s[30:31]
405;
406; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_u:
407; GFX940:       ; %bb.0:
408; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
409; GFX940-NEXT:    ;;#ASMSTART
410; GFX940-NEXT:    ; def v[0:1]
411; GFX940-NEXT:    ;;#ASMEND
412; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
413; GFX940-NEXT:    v_mov_b32_e32 v4, 0
414; GFX940-NEXT:    ;;#ASMSTART
415; GFX940-NEXT:    ; def v[2:3]
416; GFX940-NEXT:    ;;#ASMEND
417; GFX940-NEXT:    s_nop 0
418; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
419; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
420; GFX940-NEXT:    s_waitcnt vmcnt(0)
421; GFX940-NEXT:    s_setpc_b64 s[30:31]
422  %vec0 = call <4 x half> asm "; def $0", "=v"()
423  %vec1 = call <4 x half> asm "; def $0", "=v"()
424  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 poison>
425  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
426  ret void
427}
428
429define void @v_shuffle_v3f16_v4f16__7_2_u(ptr addrspace(1) inreg %ptr) {
430; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_2_u:
431; GFX900:       ; %bb.0:
432; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
433; GFX900-NEXT:    ;;#ASMSTART
434; GFX900-NEXT:    ; def v[0:1]
435; GFX900-NEXT:    ;;#ASMEND
436; GFX900-NEXT:    v_mov_b32_e32 v4, 0
437; GFX900-NEXT:    ;;#ASMSTART
438; GFX900-NEXT:    ; def v[2:3]
439; GFX900-NEXT:    ;;#ASMEND
440; GFX900-NEXT:    v_alignbit_b32 v0, v1, v3, 16
441; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
442; GFX900-NEXT:    s_waitcnt vmcnt(0)
443; GFX900-NEXT:    s_setpc_b64 s[30:31]
444;
445; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_2_u:
446; GFX90A:       ; %bb.0:
447; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
448; GFX90A-NEXT:    ;;#ASMSTART
449; GFX90A-NEXT:    ; def v[0:1]
450; GFX90A-NEXT:    ;;#ASMEND
451; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
452; GFX90A-NEXT:    ;;#ASMSTART
453; GFX90A-NEXT:    ; def v[2:3]
454; GFX90A-NEXT:    ;;#ASMEND
455; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v3, 16
456; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
457; GFX90A-NEXT:    s_waitcnt vmcnt(0)
458; GFX90A-NEXT:    s_setpc_b64 s[30:31]
459;
460; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_u:
461; GFX940:       ; %bb.0:
462; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
463; GFX940-NEXT:    ;;#ASMSTART
464; GFX940-NEXT:    ; def v[0:1]
465; GFX940-NEXT:    ;;#ASMEND
466; GFX940-NEXT:    v_mov_b32_e32 v4, 0
467; GFX940-NEXT:    ;;#ASMSTART
468; GFX940-NEXT:    ; def v[2:3]
469; GFX940-NEXT:    ;;#ASMEND
470; GFX940-NEXT:    s_nop 0
471; GFX940-NEXT:    v_alignbit_b32 v0, v1, v3, 16
472; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
473; GFX940-NEXT:    s_waitcnt vmcnt(0)
474; GFX940-NEXT:    s_setpc_b64 s[30:31]
475  %vec0 = call <4 x half> asm "; def $0", "=v"()
476  %vec1 = call <4 x half> asm "; def $0", "=v"()
477  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 poison>
478  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
479  ret void
480}
481
482define void @v_shuffle_v3f16_v4f16__7_3_u(ptr addrspace(1) inreg %ptr) {
483; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_3_u:
484; GFX900:       ; %bb.0:
485; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
486; GFX900-NEXT:    ;;#ASMSTART
487; GFX900-NEXT:    ; def v[0:1]
488; GFX900-NEXT:    ;;#ASMEND
489; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
490; GFX900-NEXT:    v_mov_b32_e32 v4, 0
491; GFX900-NEXT:    ;;#ASMSTART
492; GFX900-NEXT:    ; def v[2:3]
493; GFX900-NEXT:    ;;#ASMEND
494; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
495; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
496; GFX900-NEXT:    s_waitcnt vmcnt(0)
497; GFX900-NEXT:    s_setpc_b64 s[30:31]
498;
499; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_3_u:
500; GFX90A:       ; %bb.0:
501; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
502; GFX90A-NEXT:    ;;#ASMSTART
503; GFX90A-NEXT:    ; def v[0:1]
504; GFX90A-NEXT:    ;;#ASMEND
505; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
506; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
507; GFX90A-NEXT:    ;;#ASMSTART
508; GFX90A-NEXT:    ; def v[2:3]
509; GFX90A-NEXT:    ;;#ASMEND
510; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
511; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
512; GFX90A-NEXT:    s_waitcnt vmcnt(0)
513; GFX90A-NEXT:    s_setpc_b64 s[30:31]
514;
515; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_u:
516; GFX940:       ; %bb.0:
517; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
518; GFX940-NEXT:    ;;#ASMSTART
519; GFX940-NEXT:    ; def v[0:1]
520; GFX940-NEXT:    ;;#ASMEND
521; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
522; GFX940-NEXT:    v_mov_b32_e32 v4, 0
523; GFX940-NEXT:    ;;#ASMSTART
524; GFX940-NEXT:    ; def v[2:3]
525; GFX940-NEXT:    ;;#ASMEND
526; GFX940-NEXT:    s_nop 0
527; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
528; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
529; GFX940-NEXT:    s_waitcnt vmcnt(0)
530; GFX940-NEXT:    s_setpc_b64 s[30:31]
531  %vec0 = call <4 x half> asm "; def $0", "=v"()
532  %vec1 = call <4 x half> asm "; def $0", "=v"()
533  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 poison>
534  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
535  ret void
536}
537
538define void @v_shuffle_v3f16_v4f16__7_4_u(ptr addrspace(1) inreg %ptr) {
539; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_4_u:
540; GFX900:       ; %bb.0:
541; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
542; GFX900-NEXT:    ;;#ASMSTART
543; GFX900-NEXT:    ; def v[0:1]
544; GFX900-NEXT:    ;;#ASMEND
545; GFX900-NEXT:    v_mov_b32_e32 v2, 0
546; GFX900-NEXT:    v_alignbit_b32 v0, v0, v1, 16
547; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
548; GFX900-NEXT:    s_waitcnt vmcnt(0)
549; GFX900-NEXT:    s_setpc_b64 s[30:31]
550;
551; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_4_u:
552; GFX90A:       ; %bb.0:
553; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
554; GFX90A-NEXT:    ;;#ASMSTART
555; GFX90A-NEXT:    ; def v[0:1]
556; GFX90A-NEXT:    ;;#ASMEND
557; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
558; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v1, 16
559; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
560; GFX90A-NEXT:    s_waitcnt vmcnt(0)
561; GFX90A-NEXT:    s_setpc_b64 s[30:31]
562;
563; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_u:
564; GFX940:       ; %bb.0:
565; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
566; GFX940-NEXT:    ;;#ASMSTART
567; GFX940-NEXT:    ; def v[0:1]
568; GFX940-NEXT:    ;;#ASMEND
569; GFX940-NEXT:    v_mov_b32_e32 v2, 0
570; GFX940-NEXT:    v_alignbit_b32 v0, v0, v1, 16
571; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
572; GFX940-NEXT:    s_waitcnt vmcnt(0)
573; GFX940-NEXT:    s_setpc_b64 s[30:31]
574  %vec0 = call <4 x half> asm "; def $0", "=v"()
575  %vec1 = call <4 x half> asm "; def $0", "=v"()
576  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 poison>
577  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
578  ret void
579}
580
581define void @v_shuffle_v3f16_v4f16__7_5_u(ptr addrspace(1) inreg %ptr) {
582; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_5_u:
583; GFX900:       ; %bb.0:
584; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
585; GFX900-NEXT:    ;;#ASMSTART
586; GFX900-NEXT:    ; def v[0:1]
587; GFX900-NEXT:    ;;#ASMEND
588; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
589; GFX900-NEXT:    v_mov_b32_e32 v2, 0
590; GFX900-NEXT:    v_perm_b32 v0, v0, v1, s4
591; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
592; GFX900-NEXT:    s_waitcnt vmcnt(0)
593; GFX900-NEXT:    s_setpc_b64 s[30:31]
594;
595; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_5_u:
596; GFX90A:       ; %bb.0:
597; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
598; GFX90A-NEXT:    ;;#ASMSTART
599; GFX90A-NEXT:    ; def v[0:1]
600; GFX90A-NEXT:    ;;#ASMEND
601; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
602; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
603; GFX90A-NEXT:    v_perm_b32 v0, v0, v1, s4
604; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
605; GFX90A-NEXT:    s_waitcnt vmcnt(0)
606; GFX90A-NEXT:    s_setpc_b64 s[30:31]
607;
608; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_u:
609; GFX940:       ; %bb.0:
610; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
611; GFX940-NEXT:    ;;#ASMSTART
612; GFX940-NEXT:    ; def v[0:1]
613; GFX940-NEXT:    ;;#ASMEND
614; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
615; GFX940-NEXT:    v_mov_b32_e32 v2, 0
616; GFX940-NEXT:    v_perm_b32 v0, v0, v1, s2
617; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
618; GFX940-NEXT:    s_waitcnt vmcnt(0)
619; GFX940-NEXT:    s_setpc_b64 s[30:31]
620  %vec0 = call <4 x half> asm "; def $0", "=v"()
621  %vec1 = call <4 x half> asm "; def $0", "=v"()
622  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 poison>
623  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
624  ret void
625}
626
627define void @v_shuffle_v3f16_v4f16__7_6_u(ptr addrspace(1) inreg %ptr) {
628; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_6_u:
629; GFX900:       ; %bb.0:
630; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
631; GFX900-NEXT:    ;;#ASMSTART
632; GFX900-NEXT:    ; def v[0:1]
633; GFX900-NEXT:    ;;#ASMEND
634; GFX900-NEXT:    v_mov_b32_e32 v2, 0
635; GFX900-NEXT:    v_alignbit_b32 v0, v1, v1, 16
636; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
637; GFX900-NEXT:    s_waitcnt vmcnt(0)
638; GFX900-NEXT:    s_setpc_b64 s[30:31]
639;
640; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_6_u:
641; GFX90A:       ; %bb.0:
642; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
643; GFX90A-NEXT:    ;;#ASMSTART
644; GFX90A-NEXT:    ; def v[0:1]
645; GFX90A-NEXT:    ;;#ASMEND
646; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
647; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v1, 16
648; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
649; GFX90A-NEXT:    s_waitcnt vmcnt(0)
650; GFX90A-NEXT:    s_setpc_b64 s[30:31]
651;
652; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_u:
653; GFX940:       ; %bb.0:
654; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
655; GFX940-NEXT:    ;;#ASMSTART
656; GFX940-NEXT:    ; def v[0:1]
657; GFX940-NEXT:    ;;#ASMEND
658; GFX940-NEXT:    v_mov_b32_e32 v2, 0
659; GFX940-NEXT:    v_alignbit_b32 v0, v1, v1, 16
660; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
661; GFX940-NEXT:    s_waitcnt vmcnt(0)
662; GFX940-NEXT:    s_setpc_b64 s[30:31]
663  %vec0 = call <4 x half> asm "; def $0", "=v"()
664  %vec1 = call <4 x half> asm "; def $0", "=v"()
665  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 poison>
666  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
667  ret void
668}
669
670define void @v_shuffle_v3f16_v4f16__7_7_u(ptr addrspace(1) inreg %ptr) {
671; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_7_u:
672; GFX900:       ; %bb.0:
673; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
674; GFX900-NEXT:    ;;#ASMSTART
675; GFX900-NEXT:    ; def v[0:1]
676; GFX900-NEXT:    ;;#ASMEND
677; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
678; GFX900-NEXT:    v_mov_b32_e32 v2, 0
679; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
680; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
681; GFX900-NEXT:    s_waitcnt vmcnt(0)
682; GFX900-NEXT:    s_setpc_b64 s[30:31]
683;
684; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_7_u:
685; GFX90A:       ; %bb.0:
686; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
687; GFX90A-NEXT:    ;;#ASMSTART
688; GFX90A-NEXT:    ; def v[0:1]
689; GFX90A-NEXT:    ;;#ASMEND
690; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
691; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
692; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
693; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
694; GFX90A-NEXT:    s_waitcnt vmcnt(0)
695; GFX90A-NEXT:    s_setpc_b64 s[30:31]
696;
697; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_u:
698; GFX940:       ; %bb.0:
699; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
700; GFX940-NEXT:    ;;#ASMSTART
701; GFX940-NEXT:    ; def v[0:1]
702; GFX940-NEXT:    ;;#ASMEND
703; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
704; GFX940-NEXT:    v_mov_b32_e32 v2, 0
705; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
706; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
707; GFX940-NEXT:    s_waitcnt vmcnt(0)
708; GFX940-NEXT:    s_setpc_b64 s[30:31]
709  %vec0 = call <4 x half> asm "; def $0", "=v"()
710  %vec1 = call <4 x half> asm "; def $0", "=v"()
711  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 poison>
712  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
713  ret void
714}
715
716define void @v_shuffle_v3f16_v4f16__7_7_0(ptr addrspace(1) inreg %ptr) {
717; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_7_0:
718; GFX900:       ; %bb.0:
719; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
720; GFX900-NEXT:    ;;#ASMSTART
721; GFX900-NEXT:    ; def v[0:1]
722; GFX900-NEXT:    ;;#ASMEND
723; GFX900-NEXT:    v_mov_b32_e32 v3, 0
724; GFX900-NEXT:    ;;#ASMSTART
725; GFX900-NEXT:    ; def v[1:2]
726; GFX900-NEXT:    ;;#ASMEND
727; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
728; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
729; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
730; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
731; GFX900-NEXT:    s_waitcnt vmcnt(0)
732; GFX900-NEXT:    s_setpc_b64 s[30:31]
733;
734; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_7_0:
735; GFX90A:       ; %bb.0:
736; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
737; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
738; GFX90A-NEXT:    ;;#ASMSTART
739; GFX90A-NEXT:    ; def v[0:1]
740; GFX90A-NEXT:    ;;#ASMEND
741; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
742; GFX90A-NEXT:    ;;#ASMSTART
743; GFX90A-NEXT:    ; def v[2:3]
744; GFX90A-NEXT:    ;;#ASMEND
745; GFX90A-NEXT:    v_perm_b32 v1, v3, v3, s4
746; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
747; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
748; GFX90A-NEXT:    s_waitcnt vmcnt(0)
749; GFX90A-NEXT:    s_setpc_b64 s[30:31]
750;
751; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_0:
752; GFX940:       ; %bb.0:
753; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
754; GFX940-NEXT:    v_mov_b32_e32 v4, 0
755; GFX940-NEXT:    ;;#ASMSTART
756; GFX940-NEXT:    ; def v[0:1]
757; GFX940-NEXT:    ;;#ASMEND
758; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
759; GFX940-NEXT:    ;;#ASMSTART
760; GFX940-NEXT:    ; def v[2:3]
761; GFX940-NEXT:    ;;#ASMEND
762; GFX940-NEXT:    s_nop 0
763; GFX940-NEXT:    v_perm_b32 v1, v3, v3, s2
764; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
765; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
766; GFX940-NEXT:    s_waitcnt vmcnt(0)
767; GFX940-NEXT:    s_setpc_b64 s[30:31]
768  %vec0 = call <4 x half> asm "; def $0", "=v"()
769  %vec1 = call <4 x half> asm "; def $0", "=v"()
770  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 0>
771  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
772  ret void
773}
774
775define void @v_shuffle_v3f16_v4f16__7_7_1(ptr addrspace(1) inreg %ptr) {
776; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_7_1:
777; GFX900:       ; %bb.0:
778; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
779; GFX900-NEXT:    ;;#ASMSTART
780; GFX900-NEXT:    ; def v[0:1]
781; GFX900-NEXT:    ;;#ASMEND
782; GFX900-NEXT:    v_mov_b32_e32 v3, 0
783; GFX900-NEXT:    ;;#ASMSTART
784; GFX900-NEXT:    ; def v[1:2]
785; GFX900-NEXT:    ;;#ASMEND
786; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
787; GFX900-NEXT:    v_perm_b32 v1, v2, v2, s4
788; GFX900-NEXT:    global_store_short_d16_hi v3, v0, s[16:17] offset:4
789; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
790; GFX900-NEXT:    s_waitcnt vmcnt(0)
791; GFX900-NEXT:    s_setpc_b64 s[30:31]
792;
793; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_7_1:
794; GFX90A:       ; %bb.0:
795; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
796; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
797; GFX90A-NEXT:    ;;#ASMSTART
798; GFX90A-NEXT:    ; def v[0:1]
799; GFX90A-NEXT:    ;;#ASMEND
800; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
801; GFX90A-NEXT:    ;;#ASMSTART
802; GFX90A-NEXT:    ; def v[2:3]
803; GFX90A-NEXT:    ;;#ASMEND
804; GFX90A-NEXT:    v_perm_b32 v1, v3, v3, s4
805; GFX90A-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
806; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
807; GFX90A-NEXT:    s_waitcnt vmcnt(0)
808; GFX90A-NEXT:    s_setpc_b64 s[30:31]
809;
810; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_1:
811; GFX940:       ; %bb.0:
812; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
813; GFX940-NEXT:    v_mov_b32_e32 v4, 0
814; GFX940-NEXT:    ;;#ASMSTART
815; GFX940-NEXT:    ; def v[0:1]
816; GFX940-NEXT:    ;;#ASMEND
817; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
818; GFX940-NEXT:    ;;#ASMSTART
819; GFX940-NEXT:    ; def v[2:3]
820; GFX940-NEXT:    ;;#ASMEND
821; GFX940-NEXT:    s_nop 0
822; GFX940-NEXT:    v_perm_b32 v1, v3, v3, s2
823; GFX940-NEXT:    global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
824; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
825; GFX940-NEXT:    s_waitcnt vmcnt(0)
826; GFX940-NEXT:    s_setpc_b64 s[30:31]
827  %vec0 = call <4 x half> asm "; def $0", "=v"()
828  %vec1 = call <4 x half> asm "; def $0", "=v"()
829  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 1>
830  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
831  ret void
832}
833
834define void @v_shuffle_v3f16_v4f16__7_7_2(ptr addrspace(1) inreg %ptr) {
835; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_7_2:
836; GFX900:       ; %bb.0:
837; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
838; GFX900-NEXT:    v_mov_b32_e32 v4, 0
839; GFX900-NEXT:    ;;#ASMSTART
840; GFX900-NEXT:    ; def v[0:1]
841; GFX900-NEXT:    ;;#ASMEND
842; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
843; GFX900-NEXT:    ;;#ASMSTART
844; GFX900-NEXT:    ; def v[2:3]
845; GFX900-NEXT:    ;;#ASMEND
846; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
847; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
848; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
849; GFX900-NEXT:    s_waitcnt vmcnt(0)
850; GFX900-NEXT:    s_setpc_b64 s[30:31]
851;
852; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_7_2:
853; GFX90A:       ; %bb.0:
854; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
855; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
856; GFX90A-NEXT:    ;;#ASMSTART
857; GFX90A-NEXT:    ; def v[0:1]
858; GFX90A-NEXT:    ;;#ASMEND
859; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
860; GFX90A-NEXT:    ;;#ASMSTART
861; GFX90A-NEXT:    ; def v[2:3]
862; GFX90A-NEXT:    ;;#ASMEND
863; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
864; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
865; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
866; GFX90A-NEXT:    s_waitcnt vmcnt(0)
867; GFX90A-NEXT:    s_setpc_b64 s[30:31]
868;
869; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_2:
870; GFX940:       ; %bb.0:
871; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
872; GFX940-NEXT:    v_mov_b32_e32 v4, 0
873; GFX940-NEXT:    ;;#ASMSTART
874; GFX940-NEXT:    ; def v[0:1]
875; GFX940-NEXT:    ;;#ASMEND
876; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
877; GFX940-NEXT:    ;;#ASMSTART
878; GFX940-NEXT:    ; def v[2:3]
879; GFX940-NEXT:    ;;#ASMEND
880; GFX940-NEXT:    s_nop 0
881; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
882; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
883; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
884; GFX940-NEXT:    s_waitcnt vmcnt(0)
885; GFX940-NEXT:    s_setpc_b64 s[30:31]
886  %vec0 = call <4 x half> asm "; def $0", "=v"()
887  %vec1 = call <4 x half> asm "; def $0", "=v"()
888  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 2>
889  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
890  ret void
891}
892
893define void @v_shuffle_v3f16_v4f16__7_7_3(ptr addrspace(1) inreg %ptr) {
894; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_7_3:
895; GFX900:       ; %bb.0:
896; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
897; GFX900-NEXT:    v_mov_b32_e32 v4, 0
898; GFX900-NEXT:    ;;#ASMSTART
899; GFX900-NEXT:    ; def v[0:1]
900; GFX900-NEXT:    ;;#ASMEND
901; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
902; GFX900-NEXT:    ;;#ASMSTART
903; GFX900-NEXT:    ; def v[2:3]
904; GFX900-NEXT:    ;;#ASMEND
905; GFX900-NEXT:    v_perm_b32 v0, v3, v3, s4
906; GFX900-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
907; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
908; GFX900-NEXT:    s_waitcnt vmcnt(0)
909; GFX900-NEXT:    s_setpc_b64 s[30:31]
910;
911; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_7_3:
912; GFX90A:       ; %bb.0:
913; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
914; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
915; GFX90A-NEXT:    ;;#ASMSTART
916; GFX90A-NEXT:    ; def v[0:1]
917; GFX90A-NEXT:    ;;#ASMEND
918; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
919; GFX90A-NEXT:    ;;#ASMSTART
920; GFX90A-NEXT:    ; def v[2:3]
921; GFX90A-NEXT:    ;;#ASMEND
922; GFX90A-NEXT:    v_perm_b32 v0, v3, v3, s4
923; GFX90A-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
924; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
925; GFX90A-NEXT:    s_waitcnt vmcnt(0)
926; GFX90A-NEXT:    s_setpc_b64 s[30:31]
927;
928; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_3:
929; GFX940:       ; %bb.0:
930; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
931; GFX940-NEXT:    v_mov_b32_e32 v4, 0
932; GFX940-NEXT:    ;;#ASMSTART
933; GFX940-NEXT:    ; def v[0:1]
934; GFX940-NEXT:    ;;#ASMEND
935; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
936; GFX940-NEXT:    ;;#ASMSTART
937; GFX940-NEXT:    ; def v[2:3]
938; GFX940-NEXT:    ;;#ASMEND
939; GFX940-NEXT:    s_nop 0
940; GFX940-NEXT:    v_perm_b32 v0, v3, v3, s2
941; GFX940-NEXT:    global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1
942; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
943; GFX940-NEXT:    s_waitcnt vmcnt(0)
944; GFX940-NEXT:    s_setpc_b64 s[30:31]
945  %vec0 = call <4 x half> asm "; def $0", "=v"()
946  %vec1 = call <4 x half> asm "; def $0", "=v"()
947  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 3>
948  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
949  ret void
950}
951
952define void @v_shuffle_v3f16_v4f16__7_7_4(ptr addrspace(1) inreg %ptr) {
953; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_7_4:
954; GFX900:       ; %bb.0:
955; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
956; GFX900-NEXT:    v_mov_b32_e32 v2, 0
957; GFX900-NEXT:    ;;#ASMSTART
958; GFX900-NEXT:    ; def v[0:1]
959; GFX900-NEXT:    ;;#ASMEND
960; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
961; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
962; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
963; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
964; GFX900-NEXT:    s_waitcnt vmcnt(0)
965; GFX900-NEXT:    s_setpc_b64 s[30:31]
966;
967; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_7_4:
968; GFX90A:       ; %bb.0:
969; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
970; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
971; GFX90A-NEXT:    ;;#ASMSTART
972; GFX90A-NEXT:    ; def v[0:1]
973; GFX90A-NEXT:    ;;#ASMEND
974; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
975; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
976; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
977; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
978; GFX90A-NEXT:    s_waitcnt vmcnt(0)
979; GFX90A-NEXT:    s_setpc_b64 s[30:31]
980;
981; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_4:
982; GFX940:       ; %bb.0:
983; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
984; GFX940-NEXT:    v_mov_b32_e32 v2, 0
985; GFX940-NEXT:    ;;#ASMSTART
986; GFX940-NEXT:    ; def v[0:1]
987; GFX940-NEXT:    ;;#ASMEND
988; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
989; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
990; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
991; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
992; GFX940-NEXT:    s_waitcnt vmcnt(0)
993; GFX940-NEXT:    s_setpc_b64 s[30:31]
994  %vec0 = call <4 x half> asm "; def $0", "=v"()
995  %vec1 = call <4 x half> asm "; def $0", "=v"()
996  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 4>
997  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
998  ret void
999}
1000
1001define void @v_shuffle_v3f16_v4f16__7_7_5(ptr addrspace(1) inreg %ptr) {
1002; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_7_5:
1003; GFX900:       ; %bb.0:
1004; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1005; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1006; GFX900-NEXT:    ;;#ASMSTART
1007; GFX900-NEXT:    ; def v[0:1]
1008; GFX900-NEXT:    ;;#ASMEND
1009; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1010; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
1011; GFX900-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
1012; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
1013; GFX900-NEXT:    s_waitcnt vmcnt(0)
1014; GFX900-NEXT:    s_setpc_b64 s[30:31]
1015;
1016; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_7_5:
1017; GFX90A:       ; %bb.0:
1018; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1019; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1020; GFX90A-NEXT:    ;;#ASMSTART
1021; GFX90A-NEXT:    ; def v[0:1]
1022; GFX90A-NEXT:    ;;#ASMEND
1023; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1024; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
1025; GFX90A-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
1026; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
1027; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1028; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1029;
1030; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_5:
1031; GFX940:       ; %bb.0:
1032; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1033; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1034; GFX940-NEXT:    ;;#ASMSTART
1035; GFX940-NEXT:    ; def v[0:1]
1036; GFX940-NEXT:    ;;#ASMEND
1037; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1038; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
1039; GFX940-NEXT:    global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1
1040; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
1041; GFX940-NEXT:    s_waitcnt vmcnt(0)
1042; GFX940-NEXT:    s_setpc_b64 s[30:31]
1043  %vec0 = call <4 x half> asm "; def $0", "=v"()
1044  %vec1 = call <4 x half> asm "; def $0", "=v"()
1045  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 5>
1046  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
1047  ret void
1048}
1049
1050define void @v_shuffle_v3f16_v4f16__7_7_6(ptr addrspace(1) inreg %ptr) {
1051; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_7_6:
1052; GFX900:       ; %bb.0:
1053; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1054; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1055; GFX900-NEXT:    ;;#ASMSTART
1056; GFX900-NEXT:    ; def v[0:1]
1057; GFX900-NEXT:    ;;#ASMEND
1058; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1059; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
1060; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
1061; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
1062; GFX900-NEXT:    s_waitcnt vmcnt(0)
1063; GFX900-NEXT:    s_setpc_b64 s[30:31]
1064;
1065; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_7_6:
1066; GFX90A:       ; %bb.0:
1067; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1068; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1069; GFX90A-NEXT:    ;;#ASMSTART
1070; GFX90A-NEXT:    ; def v[0:1]
1071; GFX90A-NEXT:    ;;#ASMEND
1072; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1073; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
1074; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
1075; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
1076; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1077; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1078;
1079; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_6:
1080; GFX940:       ; %bb.0:
1081; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1082; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1083; GFX940-NEXT:    ;;#ASMSTART
1084; GFX940-NEXT:    ; def v[0:1]
1085; GFX940-NEXT:    ;;#ASMEND
1086; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1087; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
1088; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
1089; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
1090; GFX940-NEXT:    s_waitcnt vmcnt(0)
1091; GFX940-NEXT:    s_setpc_b64 s[30:31]
1092  %vec0 = call <4 x half> asm "; def $0", "=v"()
1093  %vec1 = call <4 x half> asm "; def $0", "=v"()
1094  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 6>
1095  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
1096  ret void
1097}
1098
1099define void @v_shuffle_v3f16_v4f16__7_7_7(ptr addrspace(1) inreg %ptr) {
1100; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_7_7:
1101; GFX900:       ; %bb.0:
1102; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1103; GFX900-NEXT:    ;;#ASMSTART
1104; GFX900-NEXT:    ; def v[0:1]
1105; GFX900-NEXT:    ;;#ASMEND
1106; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1107; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1108; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
1109; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
1110; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
1111; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1112; GFX900-NEXT:    s_waitcnt vmcnt(0)
1113; GFX900-NEXT:    s_setpc_b64 s[30:31]
1114;
1115; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_7_7:
1116; GFX90A:       ; %bb.0:
1117; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1118; GFX90A-NEXT:    ;;#ASMSTART
1119; GFX90A-NEXT:    ; def v[0:1]
1120; GFX90A-NEXT:    ;;#ASMEND
1121; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1122; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1123; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
1124; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
1125; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
1126; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1127; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1128; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1129;
1130; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_7:
1131; GFX940:       ; %bb.0:
1132; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1133; GFX940-NEXT:    ;;#ASMSTART
1134; GFX940-NEXT:    ; def v[0:1]
1135; GFX940-NEXT:    ;;#ASMEND
1136; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1137; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1138; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
1139; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
1140; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
1141; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1142; GFX940-NEXT:    s_waitcnt vmcnt(0)
1143; GFX940-NEXT:    s_setpc_b64 s[30:31]
1144  %vec0 = call <4 x half> asm "; def $0", "=v"()
1145  %vec1 = call <4 x half> asm "; def $0", "=v"()
1146  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 7>
1147  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
1148  ret void
1149}
1150
1151define void @v_shuffle_v3f16_v4f16__u_0_0(ptr addrspace(1) inreg %ptr) {
1152; GFX900-LABEL: v_shuffle_v3f16_v4f16__u_0_0:
1153; GFX900:       ; %bb.0:
1154; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1155; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1156; GFX900-NEXT:    ;;#ASMSTART
1157; GFX900-NEXT:    ; def v[0:1]
1158; GFX900-NEXT:    ;;#ASMEND
1159; GFX900-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
1160; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1161; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
1162; GFX900-NEXT:    s_waitcnt vmcnt(0)
1163; GFX900-NEXT:    s_setpc_b64 s[30:31]
1164;
1165; GFX90A-LABEL: v_shuffle_v3f16_v4f16__u_0_0:
1166; GFX90A:       ; %bb.0:
1167; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1168; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1169; GFX90A-NEXT:    ;;#ASMSTART
1170; GFX90A-NEXT:    ; def v[0:1]
1171; GFX90A-NEXT:    ;;#ASMEND
1172; GFX90A-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
1173; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1174; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
1175; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1176; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1177;
1178; GFX940-LABEL: v_shuffle_v3f16_v4f16__u_0_0:
1179; GFX940:       ; %bb.0:
1180; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1181; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1182; GFX940-NEXT:    ;;#ASMSTART
1183; GFX940-NEXT:    ; def v[0:1]
1184; GFX940-NEXT:    ;;#ASMEND
1185; GFX940-NEXT:    s_nop 0
1186; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
1187; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1188; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
1189; GFX940-NEXT:    s_waitcnt vmcnt(0)
1190; GFX940-NEXT:    s_setpc_b64 s[30:31]
1191  %vec0 = call <4 x half> asm "; def $0", "=v"()
1192  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 poison, i32 0, i32 0>
1193  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
1194  ret void
1195}
1196
1197define void @v_shuffle_v3f16_v4f16__0_0_0(ptr addrspace(1) inreg %ptr) {
1198; GFX900-LABEL: v_shuffle_v3f16_v4f16__0_0_0:
1199; GFX900:       ; %bb.0:
1200; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1201; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1202; GFX900-NEXT:    ;;#ASMSTART
1203; GFX900-NEXT:    ; def v[0:1]
1204; GFX900-NEXT:    ;;#ASMEND
1205; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1206; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
1207; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1208; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
1209; GFX900-NEXT:    s_waitcnt vmcnt(0)
1210; GFX900-NEXT:    s_setpc_b64 s[30:31]
1211;
1212; GFX90A-LABEL: v_shuffle_v3f16_v4f16__0_0_0:
1213; GFX90A:       ; %bb.0:
1214; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1215; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1216; GFX90A-NEXT:    ;;#ASMSTART
1217; GFX90A-NEXT:    ; def v[0:1]
1218; GFX90A-NEXT:    ;;#ASMEND
1219; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1220; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
1221; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1222; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
1223; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1224; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1225;
1226; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_0_0:
1227; GFX940:       ; %bb.0:
1228; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1229; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1230; GFX940-NEXT:    ;;#ASMSTART
1231; GFX940-NEXT:    ; def v[0:1]
1232; GFX940-NEXT:    ;;#ASMEND
1233; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1234; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
1235; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1236; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
1237; GFX940-NEXT:    s_waitcnt vmcnt(0)
1238; GFX940-NEXT:    s_setpc_b64 s[30:31]
1239  %vec0 = call <4 x half> asm "; def $0", "=v"()
1240  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> zeroinitializer
1241  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
1242  ret void
1243}
1244
1245define void @v_shuffle_v3f16_v4f16__1_0_0(ptr addrspace(1) inreg %ptr) {
1246; GFX900-LABEL: v_shuffle_v3f16_v4f16__1_0_0:
1247; GFX900:       ; %bb.0:
1248; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1249; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1250; GFX900-NEXT:    ;;#ASMSTART
1251; GFX900-NEXT:    ; def v[0:1]
1252; GFX900-NEXT:    ;;#ASMEND
1253; GFX900-NEXT:    v_alignbit_b32 v1, v0, v0, 16
1254; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1255; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
1256; GFX900-NEXT:    s_waitcnt vmcnt(0)
1257; GFX900-NEXT:    s_setpc_b64 s[30:31]
1258;
1259; GFX90A-LABEL: v_shuffle_v3f16_v4f16__1_0_0:
1260; GFX90A:       ; %bb.0:
1261; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1262; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1263; GFX90A-NEXT:    ;;#ASMSTART
1264; GFX90A-NEXT:    ; def v[0:1]
1265; GFX90A-NEXT:    ;;#ASMEND
1266; GFX90A-NEXT:    v_alignbit_b32 v1, v0, v0, 16
1267; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1268; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
1269; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1270; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1271;
1272; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_0_0:
1273; GFX940:       ; %bb.0:
1274; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1275; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1276; GFX940-NEXT:    ;;#ASMSTART
1277; GFX940-NEXT:    ; def v[0:1]
1278; GFX940-NEXT:    ;;#ASMEND
1279; GFX940-NEXT:    s_nop 0
1280; GFX940-NEXT:    v_alignbit_b32 v1, v0, v0, 16
1281; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1282; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
1283; GFX940-NEXT:    s_waitcnt vmcnt(0)
1284; GFX940-NEXT:    s_setpc_b64 s[30:31]
1285  %vec0 = call <4 x half> asm "; def $0", "=v"()
1286  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 1, i32 0, i32 0>
1287  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
1288  ret void
1289}
1290
1291define void @v_shuffle_v3f16_v4f16__2_0_0(ptr addrspace(1) inreg %ptr) {
1292; GFX900-LABEL: v_shuffle_v3f16_v4f16__2_0_0:
1293; GFX900:       ; %bb.0:
1294; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1295; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1296; GFX900-NEXT:    ;;#ASMSTART
1297; GFX900-NEXT:    ; def v[0:1]
1298; GFX900-NEXT:    ;;#ASMEND
1299; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1300; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
1301; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1302; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
1303; GFX900-NEXT:    s_waitcnt vmcnt(0)
1304; GFX900-NEXT:    s_setpc_b64 s[30:31]
1305;
1306; GFX90A-LABEL: v_shuffle_v3f16_v4f16__2_0_0:
1307; GFX90A:       ; %bb.0:
1308; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1309; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1310; GFX90A-NEXT:    ;;#ASMSTART
1311; GFX90A-NEXT:    ; def v[0:1]
1312; GFX90A-NEXT:    ;;#ASMEND
1313; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1314; GFX90A-NEXT:    v_perm_b32 v1, v0, v1, s4
1315; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1316; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
1317; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1318; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1319;
1320; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_0_0:
1321; GFX940:       ; %bb.0:
1322; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1323; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1324; GFX940-NEXT:    ;;#ASMSTART
1325; GFX940-NEXT:    ; def v[0:1]
1326; GFX940-NEXT:    ;;#ASMEND
1327; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1328; GFX940-NEXT:    v_perm_b32 v1, v0, v1, s2
1329; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1330; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
1331; GFX940-NEXT:    s_waitcnt vmcnt(0)
1332; GFX940-NEXT:    s_setpc_b64 s[30:31]
1333  %vec0 = call <4 x half> asm "; def $0", "=v"()
1334  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 2, i32 0, i32 0>
1335  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
1336  ret void
1337}
1338
1339define void @v_shuffle_v3f16_v4f16__3_0_0(ptr addrspace(1) inreg %ptr) {
1340; GFX900-LABEL: v_shuffle_v3f16_v4f16__3_0_0:
1341; GFX900:       ; %bb.0:
1342; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1343; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1344; GFX900-NEXT:    ;;#ASMSTART
1345; GFX900-NEXT:    ; def v[0:1]
1346; GFX900-NEXT:    ;;#ASMEND
1347; GFX900-NEXT:    v_alignbit_b32 v1, v0, v1, 16
1348; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1349; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
1350; GFX900-NEXT:    s_waitcnt vmcnt(0)
1351; GFX900-NEXT:    s_setpc_b64 s[30:31]
1352;
1353; GFX90A-LABEL: v_shuffle_v3f16_v4f16__3_0_0:
1354; GFX90A:       ; %bb.0:
1355; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1356; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1357; GFX90A-NEXT:    ;;#ASMSTART
1358; GFX90A-NEXT:    ; def v[0:1]
1359; GFX90A-NEXT:    ;;#ASMEND
1360; GFX90A-NEXT:    v_alignbit_b32 v1, v0, v1, 16
1361; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1362; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
1363; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1364; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1365;
1366; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_0_0:
1367; GFX940:       ; %bb.0:
1368; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1369; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1370; GFX940-NEXT:    ;;#ASMSTART
1371; GFX940-NEXT:    ; def v[0:1]
1372; GFX940-NEXT:    ;;#ASMEND
1373; GFX940-NEXT:    s_nop 0
1374; GFX940-NEXT:    v_alignbit_b32 v1, v0, v1, 16
1375; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1376; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
1377; GFX940-NEXT:    s_waitcnt vmcnt(0)
1378; GFX940-NEXT:    s_setpc_b64 s[30:31]
1379  %vec0 = call <4 x half> asm "; def $0", "=v"()
1380  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 3, i32 0, i32 0>
1381  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
1382  ret void
1383}
1384
1385define void @v_shuffle_v3f16_v4f16__4_0_0(ptr addrspace(1) inreg %ptr) {
1386; GFX900-LABEL: v_shuffle_v3f16_v4f16__4_0_0:
1387; GFX900:       ; %bb.0:
1388; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1389; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1390; GFX900-NEXT:    ;;#ASMSTART
1391; GFX900-NEXT:    ; def v[0:1]
1392; GFX900-NEXT:    ;;#ASMEND
1393; GFX900-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
1394; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1395; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
1396; GFX900-NEXT:    s_waitcnt vmcnt(0)
1397; GFX900-NEXT:    s_setpc_b64 s[30:31]
1398;
1399; GFX90A-LABEL: v_shuffle_v3f16_v4f16__4_0_0:
1400; GFX90A:       ; %bb.0:
1401; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1402; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1403; GFX90A-NEXT:    ;;#ASMSTART
1404; GFX90A-NEXT:    ; def v[0:1]
1405; GFX90A-NEXT:    ;;#ASMEND
1406; GFX90A-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
1407; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
1408; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
1409; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1410; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1411;
1412; GFX940-LABEL: v_shuffle_v3f16_v4f16__4_0_0:
1413; GFX940:       ; %bb.0:
1414; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1415; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1416; GFX940-NEXT:    ;;#ASMSTART
1417; GFX940-NEXT:    ; def v[0:1]
1418; GFX940-NEXT:    ;;#ASMEND
1419; GFX940-NEXT:    s_nop 0
1420; GFX940-NEXT:    v_lshlrev_b32_e32 v1, 16, v0
1421; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
1422; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
1423; GFX940-NEXT:    s_waitcnt vmcnt(0)
1424; GFX940-NEXT:    s_setpc_b64 s[30:31]
1425  %vec0 = call <4 x half> asm "; def $0", "=v"()
1426  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 4, i32 0, i32 0>
1427  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
1428  ret void
1429}
1430
1431define void @v_shuffle_v3f16_v4f16__5_0_0(ptr addrspace(1) inreg %ptr) {
1432; GFX900-LABEL: v_shuffle_v3f16_v4f16__5_0_0:
1433; GFX900:       ; %bb.0:
1434; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1435; GFX900-NEXT:    ;;#ASMSTART
1436; GFX900-NEXT:    ; def v[0:1]
1437; GFX900-NEXT:    ;;#ASMEND
1438; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1439; GFX900-NEXT:    ;;#ASMSTART
1440; GFX900-NEXT:    ; def v[1:2]
1441; GFX900-NEXT:    ;;#ASMEND
1442; GFX900-NEXT:    v_alignbit_b32 v1, v0, v1, 16
1443; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
1444; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
1445; GFX900-NEXT:    s_waitcnt vmcnt(0)
1446; GFX900-NEXT:    s_setpc_b64 s[30:31]
1447;
1448; GFX90A-LABEL: v_shuffle_v3f16_v4f16__5_0_0:
1449; GFX90A:       ; %bb.0:
1450; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1451; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1452; GFX90A-NEXT:    ;;#ASMSTART
1453; GFX90A-NEXT:    ; def v[0:1]
1454; GFX90A-NEXT:    ;;#ASMEND
1455; GFX90A-NEXT:    ;;#ASMSTART
1456; GFX90A-NEXT:    ; def v[2:3]
1457; GFX90A-NEXT:    ;;#ASMEND
1458; GFX90A-NEXT:    v_alignbit_b32 v1, v0, v2, 16
1459; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1460; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1461; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1462; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1463;
1464; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_0_0:
1465; GFX940:       ; %bb.0:
1466; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1467; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1468; GFX940-NEXT:    ;;#ASMSTART
1469; GFX940-NEXT:    ; def v[0:1]
1470; GFX940-NEXT:    ;;#ASMEND
1471; GFX940-NEXT:    ;;#ASMSTART
1472; GFX940-NEXT:    ; def v[2:3]
1473; GFX940-NEXT:    ;;#ASMEND
1474; GFX940-NEXT:    s_nop 0
1475; GFX940-NEXT:    v_alignbit_b32 v1, v0, v2, 16
1476; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1477; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1478; GFX940-NEXT:    s_waitcnt vmcnt(0)
1479; GFX940-NEXT:    s_setpc_b64 s[30:31]
1480  %vec0 = call <4 x half> asm "; def $0", "=v"()
1481  %vec1 = call <4 x half> asm "; def $0", "=v"()
1482  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 0, i32 0>
1483  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
1484  ret void
1485}
1486
1487define void @v_shuffle_v3f16_v4f16__6_0_0(ptr addrspace(1) inreg %ptr) {
1488; GFX900-LABEL: v_shuffle_v3f16_v4f16__6_0_0:
1489; GFX900:       ; %bb.0:
1490; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1491; GFX900-NEXT:    ;;#ASMSTART
1492; GFX900-NEXT:    ; def v[0:1]
1493; GFX900-NEXT:    ;;#ASMEND
1494; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1495; GFX900-NEXT:    ;;#ASMSTART
1496; GFX900-NEXT:    ; def v[1:2]
1497; GFX900-NEXT:    ;;#ASMEND
1498; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1499; GFX900-NEXT:    v_perm_b32 v1, v0, v2, s4
1500; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
1501; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
1502; GFX900-NEXT:    s_waitcnt vmcnt(0)
1503; GFX900-NEXT:    s_setpc_b64 s[30:31]
1504;
1505; GFX90A-LABEL: v_shuffle_v3f16_v4f16__6_0_0:
1506; GFX90A:       ; %bb.0:
1507; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1508; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1509; GFX90A-NEXT:    ;;#ASMSTART
1510; GFX90A-NEXT:    ; def v[0:1]
1511; GFX90A-NEXT:    ;;#ASMEND
1512; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1513; GFX90A-NEXT:    ;;#ASMSTART
1514; GFX90A-NEXT:    ; def v[2:3]
1515; GFX90A-NEXT:    ;;#ASMEND
1516; GFX90A-NEXT:    v_perm_b32 v1, v0, v3, s4
1517; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1518; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1519; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1520; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1521;
1522; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_0_0:
1523; GFX940:       ; %bb.0:
1524; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1525; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1526; GFX940-NEXT:    ;;#ASMSTART
1527; GFX940-NEXT:    ; def v[0:1]
1528; GFX940-NEXT:    ;;#ASMEND
1529; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1530; GFX940-NEXT:    ;;#ASMSTART
1531; GFX940-NEXT:    ; def v[2:3]
1532; GFX940-NEXT:    ;;#ASMEND
1533; GFX940-NEXT:    s_nop 0
1534; GFX940-NEXT:    v_perm_b32 v1, v0, v3, s2
1535; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1536; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1537; GFX940-NEXT:    s_waitcnt vmcnt(0)
1538; GFX940-NEXT:    s_setpc_b64 s[30:31]
1539  %vec0 = call <4 x half> asm "; def $0", "=v"()
1540  %vec1 = call <4 x half> asm "; def $0", "=v"()
1541  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 0, i32 0>
1542  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
1543  ret void
1544}
1545
1546define void @v_shuffle_v3f16_v4f16__7_0_0(ptr addrspace(1) inreg %ptr) {
1547; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_0_0:
1548; GFX900:       ; %bb.0:
1549; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1550; GFX900-NEXT:    ;;#ASMSTART
1551; GFX900-NEXT:    ; def v[0:1]
1552; GFX900-NEXT:    ;;#ASMEND
1553; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1554; GFX900-NEXT:    ;;#ASMSTART
1555; GFX900-NEXT:    ; def v[1:2]
1556; GFX900-NEXT:    ;;#ASMEND
1557; GFX900-NEXT:    v_alignbit_b32 v1, v0, v2, 16
1558; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
1559; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
1560; GFX900-NEXT:    s_waitcnt vmcnt(0)
1561; GFX900-NEXT:    s_setpc_b64 s[30:31]
1562;
1563; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_0_0:
1564; GFX90A:       ; %bb.0:
1565; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1566; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1567; GFX90A-NEXT:    ;;#ASMSTART
1568; GFX90A-NEXT:    ; def v[0:1]
1569; GFX90A-NEXT:    ;;#ASMEND
1570; GFX90A-NEXT:    ;;#ASMSTART
1571; GFX90A-NEXT:    ; def v[2:3]
1572; GFX90A-NEXT:    ;;#ASMEND
1573; GFX90A-NEXT:    v_alignbit_b32 v1, v0, v3, 16
1574; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1575; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1576; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1577; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1578;
1579; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_0:
1580; GFX940:       ; %bb.0:
1581; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1582; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1583; GFX940-NEXT:    ;;#ASMSTART
1584; GFX940-NEXT:    ; def v[0:1]
1585; GFX940-NEXT:    ;;#ASMEND
1586; GFX940-NEXT:    ;;#ASMSTART
1587; GFX940-NEXT:    ; def v[2:3]
1588; GFX940-NEXT:    ;;#ASMEND
1589; GFX940-NEXT:    s_nop 0
1590; GFX940-NEXT:    v_alignbit_b32 v1, v0, v3, 16
1591; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1592; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1593; GFX940-NEXT:    s_waitcnt vmcnt(0)
1594; GFX940-NEXT:    s_setpc_b64 s[30:31]
1595  %vec0 = call <4 x half> asm "; def $0", "=v"()
1596  %vec1 = call <4 x half> asm "; def $0", "=v"()
1597  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 0>
1598  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
1599  ret void
1600}
1601
1602define void @v_shuffle_v3f16_v4f16__7_u_0(ptr addrspace(1) inreg %ptr) {
1603; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_u_0:
1604; GFX900:       ; %bb.0:
1605; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1606; GFX900-NEXT:    ;;#ASMSTART
1607; GFX900-NEXT:    ; def v[0:1]
1608; GFX900-NEXT:    ;;#ASMEND
1609; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1610; GFX900-NEXT:    ;;#ASMSTART
1611; GFX900-NEXT:    ; def v[1:2]
1612; GFX900-NEXT:    ;;#ASMEND
1613; GFX900-NEXT:    v_alignbit_b32 v1, s4, v2, 16
1614; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
1615; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
1616; GFX900-NEXT:    s_waitcnt vmcnt(0)
1617; GFX900-NEXT:    s_setpc_b64 s[30:31]
1618;
1619; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_u_0:
1620; GFX90A:       ; %bb.0:
1621; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1622; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1623; GFX90A-NEXT:    ;;#ASMSTART
1624; GFX90A-NEXT:    ; def v[0:1]
1625; GFX90A-NEXT:    ;;#ASMEND
1626; GFX90A-NEXT:    ;;#ASMSTART
1627; GFX90A-NEXT:    ; def v[2:3]
1628; GFX90A-NEXT:    ;;#ASMEND
1629; GFX90A-NEXT:    v_alignbit_b32 v1, s4, v3, 16
1630; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1631; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1632; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1633; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1634;
1635; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_0:
1636; GFX940:       ; %bb.0:
1637; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1638; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1639; GFX940-NEXT:    ;;#ASMSTART
1640; GFX940-NEXT:    ; def v[0:1]
1641; GFX940-NEXT:    ;;#ASMEND
1642; GFX940-NEXT:    ;;#ASMSTART
1643; GFX940-NEXT:    ; def v[2:3]
1644; GFX940-NEXT:    ;;#ASMEND
1645; GFX940-NEXT:    s_nop 0
1646; GFX940-NEXT:    v_alignbit_b32 v1, s0, v3, 16
1647; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1648; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1649; GFX940-NEXT:    s_waitcnt vmcnt(0)
1650; GFX940-NEXT:    s_setpc_b64 s[30:31]
1651  %vec0 = call <4 x half> asm "; def $0", "=v"()
1652  %vec1 = call <4 x half> asm "; def $0", "=v"()
1653  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 0>
1654  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
1655  ret void
1656}
1657
1658define void @v_shuffle_v3f16_v4f16__7_1_0(ptr addrspace(1) inreg %ptr) {
1659; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_1_0:
1660; GFX900:       ; %bb.0:
1661; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1662; GFX900-NEXT:    ;;#ASMSTART
1663; GFX900-NEXT:    ; def v[0:1]
1664; GFX900-NEXT:    ;;#ASMEND
1665; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1666; GFX900-NEXT:    ;;#ASMSTART
1667; GFX900-NEXT:    ; def v[1:2]
1668; GFX900-NEXT:    ;;#ASMEND
1669; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1670; GFX900-NEXT:    v_perm_b32 v1, v0, v2, s4
1671; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
1672; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
1673; GFX900-NEXT:    s_waitcnt vmcnt(0)
1674; GFX900-NEXT:    s_setpc_b64 s[30:31]
1675;
1676; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_1_0:
1677; GFX90A:       ; %bb.0:
1678; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1679; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1680; GFX90A-NEXT:    ;;#ASMSTART
1681; GFX90A-NEXT:    ; def v[0:1]
1682; GFX90A-NEXT:    ;;#ASMEND
1683; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1684; GFX90A-NEXT:    ;;#ASMSTART
1685; GFX90A-NEXT:    ; def v[2:3]
1686; GFX90A-NEXT:    ;;#ASMEND
1687; GFX90A-NEXT:    v_perm_b32 v1, v0, v3, s4
1688; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1689; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1690; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1691; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1692;
1693; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_0:
1694; GFX940:       ; %bb.0:
1695; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1696; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1697; GFX940-NEXT:    ;;#ASMSTART
1698; GFX940-NEXT:    ; def v[0:1]
1699; GFX940-NEXT:    ;;#ASMEND
1700; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1701; GFX940-NEXT:    ;;#ASMSTART
1702; GFX940-NEXT:    ; def v[2:3]
1703; GFX940-NEXT:    ;;#ASMEND
1704; GFX940-NEXT:    s_nop 0
1705; GFX940-NEXT:    v_perm_b32 v1, v0, v3, s2
1706; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1707; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1708; GFX940-NEXT:    s_waitcnt vmcnt(0)
1709; GFX940-NEXT:    s_setpc_b64 s[30:31]
1710  %vec0 = call <4 x half> asm "; def $0", "=v"()
1711  %vec1 = call <4 x half> asm "; def $0", "=v"()
1712  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 0>
1713  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
1714  ret void
1715}
1716
1717define void @v_shuffle_v3f16_v4f16__7_2_0(ptr addrspace(1) inreg %ptr) {
1718; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_2_0:
1719; GFX900:       ; %bb.0:
1720; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1721; GFX900-NEXT:    v_mov_b32_e32 v4, 0
1722; GFX900-NEXT:    ;;#ASMSTART
1723; GFX900-NEXT:    ; def v[0:1]
1724; GFX900-NEXT:    ;;#ASMEND
1725; GFX900-NEXT:    ;;#ASMSTART
1726; GFX900-NEXT:    ; def v[2:3]
1727; GFX900-NEXT:    ;;#ASMEND
1728; GFX900-NEXT:    v_alignbit_b32 v1, v1, v3, 16
1729; GFX900-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1730; GFX900-NEXT:    global_store_dword v4, v1, s[16:17]
1731; GFX900-NEXT:    s_waitcnt vmcnt(0)
1732; GFX900-NEXT:    s_setpc_b64 s[30:31]
1733;
1734; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_2_0:
1735; GFX90A:       ; %bb.0:
1736; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1737; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1738; GFX90A-NEXT:    ;;#ASMSTART
1739; GFX90A-NEXT:    ; def v[0:1]
1740; GFX90A-NEXT:    ;;#ASMEND
1741; GFX90A-NEXT:    ;;#ASMSTART
1742; GFX90A-NEXT:    ; def v[2:3]
1743; GFX90A-NEXT:    ;;#ASMEND
1744; GFX90A-NEXT:    v_alignbit_b32 v1, v1, v3, 16
1745; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1746; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1747; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1748; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1749;
1750; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_0:
1751; GFX940:       ; %bb.0:
1752; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1753; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1754; GFX940-NEXT:    ;;#ASMSTART
1755; GFX940-NEXT:    ; def v[0:1]
1756; GFX940-NEXT:    ;;#ASMEND
1757; GFX940-NEXT:    ;;#ASMSTART
1758; GFX940-NEXT:    ; def v[2:3]
1759; GFX940-NEXT:    ;;#ASMEND
1760; GFX940-NEXT:    s_nop 0
1761; GFX940-NEXT:    v_alignbit_b32 v1, v1, v3, 16
1762; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1763; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1764; GFX940-NEXT:    s_waitcnt vmcnt(0)
1765; GFX940-NEXT:    s_setpc_b64 s[30:31]
1766  %vec0 = call <4 x half> asm "; def $0", "=v"()
1767  %vec1 = call <4 x half> asm "; def $0", "=v"()
1768  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 0>
1769  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
1770  ret void
1771}
1772
1773define void @v_shuffle_v3f16_v4f16__7_3_0(ptr addrspace(1) inreg %ptr) {
1774; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_3_0:
1775; GFX900:       ; %bb.0:
1776; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1777; GFX900-NEXT:    v_mov_b32_e32 v4, 0
1778; GFX900-NEXT:    ;;#ASMSTART
1779; GFX900-NEXT:    ; def v[0:1]
1780; GFX900-NEXT:    ;;#ASMEND
1781; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1782; GFX900-NEXT:    ;;#ASMSTART
1783; GFX900-NEXT:    ; def v[2:3]
1784; GFX900-NEXT:    ;;#ASMEND
1785; GFX900-NEXT:    v_perm_b32 v1, v1, v3, s4
1786; GFX900-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1787; GFX900-NEXT:    global_store_dword v4, v1, s[16:17]
1788; GFX900-NEXT:    s_waitcnt vmcnt(0)
1789; GFX900-NEXT:    s_setpc_b64 s[30:31]
1790;
1791; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_3_0:
1792; GFX90A:       ; %bb.0:
1793; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1794; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1795; GFX90A-NEXT:    ;;#ASMSTART
1796; GFX90A-NEXT:    ; def v[0:1]
1797; GFX90A-NEXT:    ;;#ASMEND
1798; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1799; GFX90A-NEXT:    ;;#ASMSTART
1800; GFX90A-NEXT:    ; def v[2:3]
1801; GFX90A-NEXT:    ;;#ASMEND
1802; GFX90A-NEXT:    v_perm_b32 v1, v1, v3, s4
1803; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1804; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1805; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1806; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1807;
1808; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_0:
1809; GFX940:       ; %bb.0:
1810; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1811; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1812; GFX940-NEXT:    ;;#ASMSTART
1813; GFX940-NEXT:    ; def v[0:1]
1814; GFX940-NEXT:    ;;#ASMEND
1815; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1816; GFX940-NEXT:    ;;#ASMSTART
1817; GFX940-NEXT:    ; def v[2:3]
1818; GFX940-NEXT:    ;;#ASMEND
1819; GFX940-NEXT:    s_nop 0
1820; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s2
1821; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1822; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1823; GFX940-NEXT:    s_waitcnt vmcnt(0)
1824; GFX940-NEXT:    s_setpc_b64 s[30:31]
1825  %vec0 = call <4 x half> asm "; def $0", "=v"()
1826  %vec1 = call <4 x half> asm "; def $0", "=v"()
1827  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 0>
1828  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
1829  ret void
1830}
1831
1832define void @v_shuffle_v3f16_v4f16__7_4_0(ptr addrspace(1) inreg %ptr) {
1833; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_4_0:
1834; GFX900:       ; %bb.0:
1835; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1836; GFX900-NEXT:    ;;#ASMSTART
1837; GFX900-NEXT:    ; def v[0:1]
1838; GFX900-NEXT:    ;;#ASMEND
1839; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1840; GFX900-NEXT:    ;;#ASMSTART
1841; GFX900-NEXT:    ; def v[1:2]
1842; GFX900-NEXT:    ;;#ASMEND
1843; GFX900-NEXT:    v_alignbit_b32 v1, v1, v2, 16
1844; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
1845; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
1846; GFX900-NEXT:    s_waitcnt vmcnt(0)
1847; GFX900-NEXT:    s_setpc_b64 s[30:31]
1848;
1849; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_4_0:
1850; GFX90A:       ; %bb.0:
1851; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1852; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1853; GFX90A-NEXT:    ;;#ASMSTART
1854; GFX90A-NEXT:    ; def v[0:1]
1855; GFX90A-NEXT:    ;;#ASMEND
1856; GFX90A-NEXT:    ;;#ASMSTART
1857; GFX90A-NEXT:    ; def v[2:3]
1858; GFX90A-NEXT:    ;;#ASMEND
1859; GFX90A-NEXT:    v_alignbit_b32 v1, v2, v3, 16
1860; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1861; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1862; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1863; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1864;
1865; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_0:
1866; GFX940:       ; %bb.0:
1867; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1868; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1869; GFX940-NEXT:    ;;#ASMSTART
1870; GFX940-NEXT:    ; def v[0:1]
1871; GFX940-NEXT:    ;;#ASMEND
1872; GFX940-NEXT:    ;;#ASMSTART
1873; GFX940-NEXT:    ; def v[2:3]
1874; GFX940-NEXT:    ;;#ASMEND
1875; GFX940-NEXT:    s_nop 0
1876; GFX940-NEXT:    v_alignbit_b32 v1, v2, v3, 16
1877; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1878; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1879; GFX940-NEXT:    s_waitcnt vmcnt(0)
1880; GFX940-NEXT:    s_setpc_b64 s[30:31]
1881  %vec0 = call <4 x half> asm "; def $0", "=v"()
1882  %vec1 = call <4 x half> asm "; def $0", "=v"()
1883  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 0>
1884  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
1885  ret void
1886}
1887
1888define void @v_shuffle_v3f16_v4f16__7_5_0(ptr addrspace(1) inreg %ptr) {
1889; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_5_0:
1890; GFX900:       ; %bb.0:
1891; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1892; GFX900-NEXT:    ;;#ASMSTART
1893; GFX900-NEXT:    ; def v[0:1]
1894; GFX900-NEXT:    ;;#ASMEND
1895; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1896; GFX900-NEXT:    ;;#ASMSTART
1897; GFX900-NEXT:    ; def v[1:2]
1898; GFX900-NEXT:    ;;#ASMEND
1899; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1900; GFX900-NEXT:    v_perm_b32 v1, v1, v2, s4
1901; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
1902; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
1903; GFX900-NEXT:    s_waitcnt vmcnt(0)
1904; GFX900-NEXT:    s_setpc_b64 s[30:31]
1905;
1906; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_5_0:
1907; GFX90A:       ; %bb.0:
1908; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1909; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1910; GFX90A-NEXT:    ;;#ASMSTART
1911; GFX90A-NEXT:    ; def v[0:1]
1912; GFX90A-NEXT:    ;;#ASMEND
1913; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1914; GFX90A-NEXT:    ;;#ASMSTART
1915; GFX90A-NEXT:    ; def v[2:3]
1916; GFX90A-NEXT:    ;;#ASMEND
1917; GFX90A-NEXT:    v_perm_b32 v1, v2, v3, s4
1918; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1919; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1920; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1921; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1922;
1923; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_0:
1924; GFX940:       ; %bb.0:
1925; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1926; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1927; GFX940-NEXT:    ;;#ASMSTART
1928; GFX940-NEXT:    ; def v[0:1]
1929; GFX940-NEXT:    ;;#ASMEND
1930; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1931; GFX940-NEXT:    ;;#ASMSTART
1932; GFX940-NEXT:    ; def v[2:3]
1933; GFX940-NEXT:    ;;#ASMEND
1934; GFX940-NEXT:    s_nop 0
1935; GFX940-NEXT:    v_perm_b32 v1, v2, v3, s2
1936; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1937; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1938; GFX940-NEXT:    s_waitcnt vmcnt(0)
1939; GFX940-NEXT:    s_setpc_b64 s[30:31]
1940  %vec0 = call <4 x half> asm "; def $0", "=v"()
1941  %vec1 = call <4 x half> asm "; def $0", "=v"()
1942  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 0>
1943  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
1944  ret void
1945}
1946
1947define void @v_shuffle_v3f16_v4f16__7_6_0(ptr addrspace(1) inreg %ptr) {
1948; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_6_0:
1949; GFX900:       ; %bb.0:
1950; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1951; GFX900-NEXT:    ;;#ASMSTART
1952; GFX900-NEXT:    ; def v[0:1]
1953; GFX900-NEXT:    ;;#ASMEND
1954; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1955; GFX900-NEXT:    ;;#ASMSTART
1956; GFX900-NEXT:    ; def v[1:2]
1957; GFX900-NEXT:    ;;#ASMEND
1958; GFX900-NEXT:    v_alignbit_b32 v1, v2, v2, 16
1959; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
1960; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
1961; GFX900-NEXT:    s_waitcnt vmcnt(0)
1962; GFX900-NEXT:    s_setpc_b64 s[30:31]
1963;
1964; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_6_0:
1965; GFX90A:       ; %bb.0:
1966; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1967; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1968; GFX90A-NEXT:    ;;#ASMSTART
1969; GFX90A-NEXT:    ; def v[0:1]
1970; GFX90A-NEXT:    ;;#ASMEND
1971; GFX90A-NEXT:    ;;#ASMSTART
1972; GFX90A-NEXT:    ; def v[2:3]
1973; GFX90A-NEXT:    ;;#ASMEND
1974; GFX90A-NEXT:    v_alignbit_b32 v1, v3, v3, 16
1975; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
1976; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
1977; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1978; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1979;
1980; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_0:
1981; GFX940:       ; %bb.0:
1982; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1983; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1984; GFX940-NEXT:    ;;#ASMSTART
1985; GFX940-NEXT:    ; def v[0:1]
1986; GFX940-NEXT:    ;;#ASMEND
1987; GFX940-NEXT:    ;;#ASMSTART
1988; GFX940-NEXT:    ; def v[2:3]
1989; GFX940-NEXT:    ;;#ASMEND
1990; GFX940-NEXT:    s_nop 0
1991; GFX940-NEXT:    v_alignbit_b32 v1, v3, v3, 16
1992; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
1993; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
1994; GFX940-NEXT:    s_waitcnt vmcnt(0)
1995; GFX940-NEXT:    s_setpc_b64 s[30:31]
1996  %vec0 = call <4 x half> asm "; def $0", "=v"()
1997  %vec1 = call <4 x half> asm "; def $0", "=v"()
1998  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 0>
1999  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
2000  ret void
2001}
2002
2003define void @v_shuffle_v3f16_v4f16__u_1_1(ptr addrspace(1) inreg %ptr) {
2004; GFX900-LABEL: v_shuffle_v3f16_v4f16__u_1_1:
2005; GFX900:       ; %bb.0:
2006; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2007; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2008; GFX900-NEXT:    ;;#ASMSTART
2009; GFX900-NEXT:    ; def v[0:1]
2010; GFX900-NEXT:    ;;#ASMEND
2011; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
2012; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2013; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
2014; GFX900-NEXT:    s_waitcnt vmcnt(0)
2015; GFX900-NEXT:    s_setpc_b64 s[30:31]
2016;
2017; GFX90A-LABEL: v_shuffle_v3f16_v4f16__u_1_1:
2018; GFX90A:       ; %bb.0:
2019; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2020; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2021; GFX90A-NEXT:    ;;#ASMSTART
2022; GFX90A-NEXT:    ; def v[0:1]
2023; GFX90A-NEXT:    ;;#ASMEND
2024; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
2025; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2026; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
2027; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2028; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2029;
2030; GFX940-LABEL: v_shuffle_v3f16_v4f16__u_1_1:
2031; GFX940:       ; %bb.0:
2032; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2033; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2034; GFX940-NEXT:    ;;#ASMSTART
2035; GFX940-NEXT:    ; def v[0:1]
2036; GFX940-NEXT:    ;;#ASMEND
2037; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
2038; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2039; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
2040; GFX940-NEXT:    s_waitcnt vmcnt(0)
2041; GFX940-NEXT:    s_setpc_b64 s[30:31]
2042  %vec0 = call <4 x half> asm "; def $0", "=v"()
2043  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 poison, i32 1, i32 1>
2044  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
2045  ret void
2046}
2047
2048define void @v_shuffle_v3f16_v4f16__0_1_1(ptr addrspace(1) inreg %ptr) {
2049; GFX900-LABEL: v_shuffle_v3f16_v4f16__0_1_1:
2050; GFX900:       ; %bb.0:
2051; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2052; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2053; GFX900-NEXT:    ;;#ASMSTART
2054; GFX900-NEXT:    ; def v[0:1]
2055; GFX900-NEXT:    ;;#ASMEND
2056; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
2057; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2058; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
2059; GFX900-NEXT:    s_waitcnt vmcnt(0)
2060; GFX900-NEXT:    s_setpc_b64 s[30:31]
2061;
2062; GFX90A-LABEL: v_shuffle_v3f16_v4f16__0_1_1:
2063; GFX90A:       ; %bb.0:
2064; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2065; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2066; GFX90A-NEXT:    ;;#ASMSTART
2067; GFX90A-NEXT:    ; def v[0:1]
2068; GFX90A-NEXT:    ;;#ASMEND
2069; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
2070; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2071; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
2072; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2073; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2074;
2075; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_1_1:
2076; GFX940:       ; %bb.0:
2077; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2078; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2079; GFX940-NEXT:    ;;#ASMSTART
2080; GFX940-NEXT:    ; def v[0:1]
2081; GFX940-NEXT:    ;;#ASMEND
2082; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
2083; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2084; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
2085; GFX940-NEXT:    s_waitcnt vmcnt(0)
2086; GFX940-NEXT:    s_setpc_b64 s[30:31]
2087  %vec0 = call <4 x half> asm "; def $0", "=v"()
2088  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 1>
2089  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
2090  ret void
2091}
2092
2093define void @v_shuffle_v3f16_v4f16__1_1_1(ptr addrspace(1) inreg %ptr) {
2094; GFX900-LABEL: v_shuffle_v3f16_v4f16__1_1_1:
2095; GFX900:       ; %bb.0:
2096; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2097; GFX900-NEXT:    ;;#ASMSTART
2098; GFX900-NEXT:    ; def v[0:1]
2099; GFX900-NEXT:    ;;#ASMEND
2100; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2101; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2102; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
2103; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2104; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
2105; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
2106; GFX900-NEXT:    s_waitcnt vmcnt(0)
2107; GFX900-NEXT:    s_setpc_b64 s[30:31]
2108;
2109; GFX90A-LABEL: v_shuffle_v3f16_v4f16__1_1_1:
2110; GFX90A:       ; %bb.0:
2111; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2112; GFX90A-NEXT:    ;;#ASMSTART
2113; GFX90A-NEXT:    ; def v[0:1]
2114; GFX90A-NEXT:    ;;#ASMEND
2115; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2116; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2117; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
2118; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2119; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
2120; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
2121; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2122; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2123;
2124; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_1_1:
2125; GFX940:       ; %bb.0:
2126; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2127; GFX940-NEXT:    ;;#ASMSTART
2128; GFX940-NEXT:    ; def v[0:1]
2129; GFX940-NEXT:    ;;#ASMEND
2130; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2131; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2132; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
2133; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2134; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
2135; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
2136; GFX940-NEXT:    s_waitcnt vmcnt(0)
2137; GFX940-NEXT:    s_setpc_b64 s[30:31]
2138  %vec0 = call <4 x half> asm "; def $0", "=v"()
2139  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 1, i32 1, i32 1>
2140  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
2141  ret void
2142}
2143
2144define void @v_shuffle_v3f16_v4f16__2_1_1(ptr addrspace(1) inreg %ptr) {
2145; GFX900-LABEL: v_shuffle_v3f16_v4f16__2_1_1:
2146; GFX900:       ; %bb.0:
2147; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2148; GFX900-NEXT:    ;;#ASMSTART
2149; GFX900-NEXT:    ; def v[0:1]
2150; GFX900-NEXT:    ;;#ASMEND
2151; GFX900-NEXT:    s_mov_b32 s4, 0xffff
2152; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2153; GFX900-NEXT:    v_bfi_b32 v1, s4, v1, v0
2154; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2155; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
2156; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
2157; GFX900-NEXT:    s_waitcnt vmcnt(0)
2158; GFX900-NEXT:    s_setpc_b64 s[30:31]
2159;
2160; GFX90A-LABEL: v_shuffle_v3f16_v4f16__2_1_1:
2161; GFX90A:       ; %bb.0:
2162; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2163; GFX90A-NEXT:    ;;#ASMSTART
2164; GFX90A-NEXT:    ; def v[0:1]
2165; GFX90A-NEXT:    ;;#ASMEND
2166; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
2167; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2168; GFX90A-NEXT:    v_bfi_b32 v1, s4, v1, v0
2169; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2170; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
2171; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
2172; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2173; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2174;
2175; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_1_1:
2176; GFX940:       ; %bb.0:
2177; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2178; GFX940-NEXT:    ;;#ASMSTART
2179; GFX940-NEXT:    ; def v[0:1]
2180; GFX940-NEXT:    ;;#ASMEND
2181; GFX940-NEXT:    s_mov_b32 s2, 0xffff
2182; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2183; GFX940-NEXT:    v_bfi_b32 v1, s2, v1, v0
2184; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2185; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
2186; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
2187; GFX940-NEXT:    s_waitcnt vmcnt(0)
2188; GFX940-NEXT:    s_setpc_b64 s[30:31]
2189  %vec0 = call <4 x half> asm "; def $0", "=v"()
2190  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 2, i32 1, i32 1>
2191  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
2192  ret void
2193}
2194
2195define void @v_shuffle_v3f16_v4f16__3_1_1(ptr addrspace(1) inreg %ptr) {
2196; GFX900-LABEL: v_shuffle_v3f16_v4f16__3_1_1:
2197; GFX900:       ; %bb.0:
2198; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2199; GFX900-NEXT:    ;;#ASMSTART
2200; GFX900-NEXT:    ; def v[0:1]
2201; GFX900-NEXT:    ;;#ASMEND
2202; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2203; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2204; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
2205; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2206; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
2207; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
2208; GFX900-NEXT:    s_waitcnt vmcnt(0)
2209; GFX900-NEXT:    s_setpc_b64 s[30:31]
2210;
2211; GFX90A-LABEL: v_shuffle_v3f16_v4f16__3_1_1:
2212; GFX90A:       ; %bb.0:
2213; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2214; GFX90A-NEXT:    ;;#ASMSTART
2215; GFX90A-NEXT:    ; def v[0:1]
2216; GFX90A-NEXT:    ;;#ASMEND
2217; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2218; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2219; GFX90A-NEXT:    v_perm_b32 v1, v0, v1, s4
2220; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2221; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
2222; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
2223; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2224; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2225;
2226; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_1_1:
2227; GFX940:       ; %bb.0:
2228; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2229; GFX940-NEXT:    ;;#ASMSTART
2230; GFX940-NEXT:    ; def v[0:1]
2231; GFX940-NEXT:    ;;#ASMEND
2232; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2233; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2234; GFX940-NEXT:    v_perm_b32 v1, v0, v1, s2
2235; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2236; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
2237; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
2238; GFX940-NEXT:    s_waitcnt vmcnt(0)
2239; GFX940-NEXT:    s_setpc_b64 s[30:31]
2240  %vec0 = call <4 x half> asm "; def $0", "=v"()
2241  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 3, i32 1, i32 1>
2242  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
2243  ret void
2244}
2245
2246define void @v_shuffle_v3f16_v4f16__4_1_1(ptr addrspace(1) inreg %ptr) {
2247; GFX900-LABEL: v_shuffle_v3f16_v4f16__4_1_1:
2248; GFX900:       ; %bb.0:
2249; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2250; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2251; GFX900-NEXT:    ;;#ASMSTART
2252; GFX900-NEXT:    ; def v[0:1]
2253; GFX900-NEXT:    ;;#ASMEND
2254; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
2255; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2256; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
2257; GFX900-NEXT:    s_waitcnt vmcnt(0)
2258; GFX900-NEXT:    s_setpc_b64 s[30:31]
2259;
2260; GFX90A-LABEL: v_shuffle_v3f16_v4f16__4_1_1:
2261; GFX90A:       ; %bb.0:
2262; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2263; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2264; GFX90A-NEXT:    ;;#ASMSTART
2265; GFX90A-NEXT:    ; def v[0:1]
2266; GFX90A-NEXT:    ;;#ASMEND
2267; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
2268; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2269; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
2270; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2271; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2272;
2273; GFX940-LABEL: v_shuffle_v3f16_v4f16__4_1_1:
2274; GFX940:       ; %bb.0:
2275; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2276; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2277; GFX940-NEXT:    ;;#ASMSTART
2278; GFX940-NEXT:    ; def v[0:1]
2279; GFX940-NEXT:    ;;#ASMEND
2280; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
2281; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2282; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
2283; GFX940-NEXT:    s_waitcnt vmcnt(0)
2284; GFX940-NEXT:    s_setpc_b64 s[30:31]
2285  %vec0 = call <4 x half> asm "; def $0", "=v"()
2286  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 4, i32 1, i32 1>
2287  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
2288  ret void
2289}
2290
2291define void @v_shuffle_v3f16_v4f16__5_1_1(ptr addrspace(1) inreg %ptr) {
2292; GFX900-LABEL: v_shuffle_v3f16_v4f16__5_1_1:
2293; GFX900:       ; %bb.0:
2294; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2295; GFX900-NEXT:    ;;#ASMSTART
2296; GFX900-NEXT:    ; def v[0:1]
2297; GFX900-NEXT:    ;;#ASMEND
2298; GFX900-NEXT:    ;;#ASMSTART
2299; GFX900-NEXT:    ; def v[1:2]
2300; GFX900-NEXT:    ;;#ASMEND
2301; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2302; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2303; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
2304; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2305; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
2306; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
2307; GFX900-NEXT:    s_waitcnt vmcnt(0)
2308; GFX900-NEXT:    s_setpc_b64 s[30:31]
2309;
2310; GFX90A-LABEL: v_shuffle_v3f16_v4f16__5_1_1:
2311; GFX90A:       ; %bb.0:
2312; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2313; GFX90A-NEXT:    ;;#ASMSTART
2314; GFX90A-NEXT:    ; def v[0:1]
2315; GFX90A-NEXT:    ;;#ASMEND
2316; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2317; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2318; GFX90A-NEXT:    ;;#ASMSTART
2319; GFX90A-NEXT:    ; def v[2:3]
2320; GFX90A-NEXT:    ;;#ASMEND
2321; GFX90A-NEXT:    v_perm_b32 v1, v0, v2, s4
2322; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2323; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
2324; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
2325; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2326; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2327;
2328; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_1_1:
2329; GFX940:       ; %bb.0:
2330; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2331; GFX940-NEXT:    ;;#ASMSTART
2332; GFX940-NEXT:    ; def v[0:1]
2333; GFX940-NEXT:    ;;#ASMEND
2334; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2335; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2336; GFX940-NEXT:    ;;#ASMSTART
2337; GFX940-NEXT:    ; def v[2:3]
2338; GFX940-NEXT:    ;;#ASMEND
2339; GFX940-NEXT:    s_nop 0
2340; GFX940-NEXT:    v_perm_b32 v1, v0, v2, s2
2341; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2342; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
2343; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
2344; GFX940-NEXT:    s_waitcnt vmcnt(0)
2345; GFX940-NEXT:    s_setpc_b64 s[30:31]
2346  %vec0 = call <4 x half> asm "; def $0", "=v"()
2347  %vec1 = call <4 x half> asm "; def $0", "=v"()
2348  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 1, i32 1>
2349  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
2350  ret void
2351}
2352
2353define void @v_shuffle_v3f16_v4f16__6_1_1(ptr addrspace(1) inreg %ptr) {
2354; GFX900-LABEL: v_shuffle_v3f16_v4f16__6_1_1:
2355; GFX900:       ; %bb.0:
2356; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2357; GFX900-NEXT:    ;;#ASMSTART
2358; GFX900-NEXT:    ; def v[0:1]
2359; GFX900-NEXT:    ;;#ASMEND
2360; GFX900-NEXT:    ;;#ASMSTART
2361; GFX900-NEXT:    ; def v[1:2]
2362; GFX900-NEXT:    ;;#ASMEND
2363; GFX900-NEXT:    s_mov_b32 s4, 0xffff
2364; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2365; GFX900-NEXT:    v_bfi_b32 v1, s4, v2, v0
2366; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2367; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
2368; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
2369; GFX900-NEXT:    s_waitcnt vmcnt(0)
2370; GFX900-NEXT:    s_setpc_b64 s[30:31]
2371;
2372; GFX90A-LABEL: v_shuffle_v3f16_v4f16__6_1_1:
2373; GFX90A:       ; %bb.0:
2374; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2375; GFX90A-NEXT:    ;;#ASMSTART
2376; GFX90A-NEXT:    ; def v[0:1]
2377; GFX90A-NEXT:    ;;#ASMEND
2378; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
2379; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2380; GFX90A-NEXT:    ;;#ASMSTART
2381; GFX90A-NEXT:    ; def v[2:3]
2382; GFX90A-NEXT:    ;;#ASMEND
2383; GFX90A-NEXT:    v_bfi_b32 v1, s4, v3, v0
2384; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2385; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
2386; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
2387; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2388; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2389;
2390; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_1_1:
2391; GFX940:       ; %bb.0:
2392; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2393; GFX940-NEXT:    ;;#ASMSTART
2394; GFX940-NEXT:    ; def v[0:1]
2395; GFX940-NEXT:    ;;#ASMEND
2396; GFX940-NEXT:    s_mov_b32 s2, 0xffff
2397; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2398; GFX940-NEXT:    ;;#ASMSTART
2399; GFX940-NEXT:    ; def v[2:3]
2400; GFX940-NEXT:    ;;#ASMEND
2401; GFX940-NEXT:    s_nop 0
2402; GFX940-NEXT:    v_bfi_b32 v1, s2, v3, v0
2403; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2404; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
2405; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
2406; GFX940-NEXT:    s_waitcnt vmcnt(0)
2407; GFX940-NEXT:    s_setpc_b64 s[30:31]
2408  %vec0 = call <4 x half> asm "; def $0", "=v"()
2409  %vec1 = call <4 x half> asm "; def $0", "=v"()
2410  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 1, i32 1>
2411  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
2412  ret void
2413}
2414
2415define void @v_shuffle_v3f16_v4f16__7_1_1(ptr addrspace(1) inreg %ptr) {
2416; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_1_1:
2417; GFX900:       ; %bb.0:
2418; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2419; GFX900-NEXT:    ;;#ASMSTART
2420; GFX900-NEXT:    ; def v[0:1]
2421; GFX900-NEXT:    ;;#ASMEND
2422; GFX900-NEXT:    ;;#ASMSTART
2423; GFX900-NEXT:    ; def v[1:2]
2424; GFX900-NEXT:    ;;#ASMEND
2425; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2426; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2427; GFX900-NEXT:    v_perm_b32 v1, v0, v2, s4
2428; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2429; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
2430; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
2431; GFX900-NEXT:    s_waitcnt vmcnt(0)
2432; GFX900-NEXT:    s_setpc_b64 s[30:31]
2433;
2434; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_1_1:
2435; GFX90A:       ; %bb.0:
2436; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2437; GFX90A-NEXT:    ;;#ASMSTART
2438; GFX90A-NEXT:    ; def v[0:1]
2439; GFX90A-NEXT:    ;;#ASMEND
2440; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2441; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2442; GFX90A-NEXT:    ;;#ASMSTART
2443; GFX90A-NEXT:    ; def v[2:3]
2444; GFX90A-NEXT:    ;;#ASMEND
2445; GFX90A-NEXT:    v_perm_b32 v1, v0, v3, s4
2446; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2447; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
2448; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
2449; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2450; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2451;
2452; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_1:
2453; GFX940:       ; %bb.0:
2454; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2455; GFX940-NEXT:    ;;#ASMSTART
2456; GFX940-NEXT:    ; def v[0:1]
2457; GFX940-NEXT:    ;;#ASMEND
2458; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2459; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2460; GFX940-NEXT:    ;;#ASMSTART
2461; GFX940-NEXT:    ; def v[2:3]
2462; GFX940-NEXT:    ;;#ASMEND
2463; GFX940-NEXT:    s_nop 0
2464; GFX940-NEXT:    v_perm_b32 v1, v0, v3, s2
2465; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2466; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
2467; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
2468; GFX940-NEXT:    s_waitcnt vmcnt(0)
2469; GFX940-NEXT:    s_setpc_b64 s[30:31]
2470  %vec0 = call <4 x half> asm "; def $0", "=v"()
2471  %vec1 = call <4 x half> asm "; def $0", "=v"()
2472  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 1>
2473  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
2474  ret void
2475}
2476
2477define void @v_shuffle_v3f16_v4f16__7_u_1(ptr addrspace(1) inreg %ptr) {
2478; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_u_1:
2479; GFX900:       ; %bb.0:
2480; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2481; GFX900-NEXT:    ;;#ASMSTART
2482; GFX900-NEXT:    ; def v[0:1]
2483; GFX900-NEXT:    ;;#ASMEND
2484; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2485; GFX900-NEXT:    ;;#ASMSTART
2486; GFX900-NEXT:    ; def v[1:2]
2487; GFX900-NEXT:    ;;#ASMEND
2488; GFX900-NEXT:    v_alignbit_b32 v1, s4, v2, 16
2489; GFX900-NEXT:    global_store_short_d16_hi v3, v0, s[16:17] offset:4
2490; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
2491; GFX900-NEXT:    s_waitcnt vmcnt(0)
2492; GFX900-NEXT:    s_setpc_b64 s[30:31]
2493;
2494; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_u_1:
2495; GFX90A:       ; %bb.0:
2496; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2497; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2498; GFX90A-NEXT:    ;;#ASMSTART
2499; GFX90A-NEXT:    ; def v[0:1]
2500; GFX90A-NEXT:    ;;#ASMEND
2501; GFX90A-NEXT:    ;;#ASMSTART
2502; GFX90A-NEXT:    ; def v[2:3]
2503; GFX90A-NEXT:    ;;#ASMEND
2504; GFX90A-NEXT:    v_alignbit_b32 v1, s4, v3, 16
2505; GFX90A-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
2506; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
2507; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2508; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2509;
2510; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_1:
2511; GFX940:       ; %bb.0:
2512; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2513; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2514; GFX940-NEXT:    ;;#ASMSTART
2515; GFX940-NEXT:    ; def v[0:1]
2516; GFX940-NEXT:    ;;#ASMEND
2517; GFX940-NEXT:    ;;#ASMSTART
2518; GFX940-NEXT:    ; def v[2:3]
2519; GFX940-NEXT:    ;;#ASMEND
2520; GFX940-NEXT:    s_nop 0
2521; GFX940-NEXT:    v_alignbit_b32 v1, s0, v3, 16
2522; GFX940-NEXT:    global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2523; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
2524; GFX940-NEXT:    s_waitcnt vmcnt(0)
2525; GFX940-NEXT:    s_setpc_b64 s[30:31]
2526  %vec0 = call <4 x half> asm "; def $0", "=v"()
2527  %vec1 = call <4 x half> asm "; def $0", "=v"()
2528  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 1>
2529  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
2530  ret void
2531}
2532
2533define void @v_shuffle_v3f16_v4f16__7_0_1(ptr addrspace(1) inreg %ptr) {
2534; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_0_1:
2535; GFX900:       ; %bb.0:
2536; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2537; GFX900-NEXT:    ;;#ASMSTART
2538; GFX900-NEXT:    ; def v[0:1]
2539; GFX900-NEXT:    ;;#ASMEND
2540; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2541; GFX900-NEXT:    ;;#ASMSTART
2542; GFX900-NEXT:    ; def v[1:2]
2543; GFX900-NEXT:    ;;#ASMEND
2544; GFX900-NEXT:    v_alignbit_b32 v1, v0, v2, 16
2545; GFX900-NEXT:    global_store_short_d16_hi v3, v0, s[16:17] offset:4
2546; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
2547; GFX900-NEXT:    s_waitcnt vmcnt(0)
2548; GFX900-NEXT:    s_setpc_b64 s[30:31]
2549;
2550; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_0_1:
2551; GFX90A:       ; %bb.0:
2552; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2553; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2554; GFX90A-NEXT:    ;;#ASMSTART
2555; GFX90A-NEXT:    ; def v[0:1]
2556; GFX90A-NEXT:    ;;#ASMEND
2557; GFX90A-NEXT:    ;;#ASMSTART
2558; GFX90A-NEXT:    ; def v[2:3]
2559; GFX90A-NEXT:    ;;#ASMEND
2560; GFX90A-NEXT:    v_alignbit_b32 v1, v0, v3, 16
2561; GFX90A-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
2562; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
2563; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2564; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2565;
2566; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_1:
2567; GFX940:       ; %bb.0:
2568; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2569; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2570; GFX940-NEXT:    ;;#ASMSTART
2571; GFX940-NEXT:    ; def v[0:1]
2572; GFX940-NEXT:    ;;#ASMEND
2573; GFX940-NEXT:    ;;#ASMSTART
2574; GFX940-NEXT:    ; def v[2:3]
2575; GFX940-NEXT:    ;;#ASMEND
2576; GFX940-NEXT:    s_nop 0
2577; GFX940-NEXT:    v_alignbit_b32 v1, v0, v3, 16
2578; GFX940-NEXT:    global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2579; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
2580; GFX940-NEXT:    s_waitcnt vmcnt(0)
2581; GFX940-NEXT:    s_setpc_b64 s[30:31]
2582  %vec0 = call <4 x half> asm "; def $0", "=v"()
2583  %vec1 = call <4 x half> asm "; def $0", "=v"()
2584  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 1>
2585  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
2586  ret void
2587}
2588
2589define void @v_shuffle_v3f16_v4f16__7_2_1(ptr addrspace(1) inreg %ptr) {
2590; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_2_1:
2591; GFX900:       ; %bb.0:
2592; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2593; GFX900-NEXT:    v_mov_b32_e32 v4, 0
2594; GFX900-NEXT:    ;;#ASMSTART
2595; GFX900-NEXT:    ; def v[0:1]
2596; GFX900-NEXT:    ;;#ASMEND
2597; GFX900-NEXT:    ;;#ASMSTART
2598; GFX900-NEXT:    ; def v[2:3]
2599; GFX900-NEXT:    ;;#ASMEND
2600; GFX900-NEXT:    v_alignbit_b32 v1, v1, v3, 16
2601; GFX900-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
2602; GFX900-NEXT:    global_store_dword v4, v1, s[16:17]
2603; GFX900-NEXT:    s_waitcnt vmcnt(0)
2604; GFX900-NEXT:    s_setpc_b64 s[30:31]
2605;
2606; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_2_1:
2607; GFX90A:       ; %bb.0:
2608; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2609; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2610; GFX90A-NEXT:    ;;#ASMSTART
2611; GFX90A-NEXT:    ; def v[0:1]
2612; GFX90A-NEXT:    ;;#ASMEND
2613; GFX90A-NEXT:    ;;#ASMSTART
2614; GFX90A-NEXT:    ; def v[2:3]
2615; GFX90A-NEXT:    ;;#ASMEND
2616; GFX90A-NEXT:    v_alignbit_b32 v1, v1, v3, 16
2617; GFX90A-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
2618; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
2619; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2620; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2621;
2622; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_1:
2623; GFX940:       ; %bb.0:
2624; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2625; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2626; GFX940-NEXT:    ;;#ASMSTART
2627; GFX940-NEXT:    ; def v[0:1]
2628; GFX940-NEXT:    ;;#ASMEND
2629; GFX940-NEXT:    ;;#ASMSTART
2630; GFX940-NEXT:    ; def v[2:3]
2631; GFX940-NEXT:    ;;#ASMEND
2632; GFX940-NEXT:    s_nop 0
2633; GFX940-NEXT:    v_alignbit_b32 v1, v1, v3, 16
2634; GFX940-NEXT:    global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2635; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
2636; GFX940-NEXT:    s_waitcnt vmcnt(0)
2637; GFX940-NEXT:    s_setpc_b64 s[30:31]
2638  %vec0 = call <4 x half> asm "; def $0", "=v"()
2639  %vec1 = call <4 x half> asm "; def $0", "=v"()
2640  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 1>
2641  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
2642  ret void
2643}
2644
2645define void @v_shuffle_v3f16_v4f16__7_3_1(ptr addrspace(1) inreg %ptr) {
2646; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_3_1:
2647; GFX900:       ; %bb.0:
2648; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2649; GFX900-NEXT:    v_mov_b32_e32 v4, 0
2650; GFX900-NEXT:    ;;#ASMSTART
2651; GFX900-NEXT:    ; def v[0:1]
2652; GFX900-NEXT:    ;;#ASMEND
2653; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2654; GFX900-NEXT:    ;;#ASMSTART
2655; GFX900-NEXT:    ; def v[2:3]
2656; GFX900-NEXT:    ;;#ASMEND
2657; GFX900-NEXT:    v_perm_b32 v1, v1, v3, s4
2658; GFX900-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
2659; GFX900-NEXT:    global_store_dword v4, v1, s[16:17]
2660; GFX900-NEXT:    s_waitcnt vmcnt(0)
2661; GFX900-NEXT:    s_setpc_b64 s[30:31]
2662;
2663; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_3_1:
2664; GFX90A:       ; %bb.0:
2665; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2666; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2667; GFX90A-NEXT:    ;;#ASMSTART
2668; GFX90A-NEXT:    ; def v[0:1]
2669; GFX90A-NEXT:    ;;#ASMEND
2670; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2671; GFX90A-NEXT:    ;;#ASMSTART
2672; GFX90A-NEXT:    ; def v[2:3]
2673; GFX90A-NEXT:    ;;#ASMEND
2674; GFX90A-NEXT:    v_perm_b32 v1, v1, v3, s4
2675; GFX90A-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
2676; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
2677; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2678; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2679;
2680; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_1:
2681; GFX940:       ; %bb.0:
2682; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2683; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2684; GFX940-NEXT:    ;;#ASMSTART
2685; GFX940-NEXT:    ; def v[0:1]
2686; GFX940-NEXT:    ;;#ASMEND
2687; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2688; GFX940-NEXT:    ;;#ASMSTART
2689; GFX940-NEXT:    ; def v[2:3]
2690; GFX940-NEXT:    ;;#ASMEND
2691; GFX940-NEXT:    s_nop 0
2692; GFX940-NEXT:    v_perm_b32 v1, v1, v3, s2
2693; GFX940-NEXT:    global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2694; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
2695; GFX940-NEXT:    s_waitcnt vmcnt(0)
2696; GFX940-NEXT:    s_setpc_b64 s[30:31]
2697  %vec0 = call <4 x half> asm "; def $0", "=v"()
2698  %vec1 = call <4 x half> asm "; def $0", "=v"()
2699  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 1>
2700  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
2701  ret void
2702}
2703
2704define void @v_shuffle_v3f16_v4f16__7_4_1(ptr addrspace(1) inreg %ptr) {
2705; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_4_1:
2706; GFX900:       ; %bb.0:
2707; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2708; GFX900-NEXT:    ;;#ASMSTART
2709; GFX900-NEXT:    ; def v[0:1]
2710; GFX900-NEXT:    ;;#ASMEND
2711; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2712; GFX900-NEXT:    ;;#ASMSTART
2713; GFX900-NEXT:    ; def v[1:2]
2714; GFX900-NEXT:    ;;#ASMEND
2715; GFX900-NEXT:    v_alignbit_b32 v1, v1, v2, 16
2716; GFX900-NEXT:    global_store_short_d16_hi v3, v0, s[16:17] offset:4
2717; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
2718; GFX900-NEXT:    s_waitcnt vmcnt(0)
2719; GFX900-NEXT:    s_setpc_b64 s[30:31]
2720;
2721; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_4_1:
2722; GFX90A:       ; %bb.0:
2723; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2724; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2725; GFX90A-NEXT:    ;;#ASMSTART
2726; GFX90A-NEXT:    ; def v[0:1]
2727; GFX90A-NEXT:    ;;#ASMEND
2728; GFX90A-NEXT:    ;;#ASMSTART
2729; GFX90A-NEXT:    ; def v[2:3]
2730; GFX90A-NEXT:    ;;#ASMEND
2731; GFX90A-NEXT:    v_alignbit_b32 v1, v2, v3, 16
2732; GFX90A-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
2733; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
2734; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2735; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2736;
2737; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_1:
2738; GFX940:       ; %bb.0:
2739; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2740; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2741; GFX940-NEXT:    ;;#ASMSTART
2742; GFX940-NEXT:    ; def v[0:1]
2743; GFX940-NEXT:    ;;#ASMEND
2744; GFX940-NEXT:    ;;#ASMSTART
2745; GFX940-NEXT:    ; def v[2:3]
2746; GFX940-NEXT:    ;;#ASMEND
2747; GFX940-NEXT:    s_nop 0
2748; GFX940-NEXT:    v_alignbit_b32 v1, v2, v3, 16
2749; GFX940-NEXT:    global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2750; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
2751; GFX940-NEXT:    s_waitcnt vmcnt(0)
2752; GFX940-NEXT:    s_setpc_b64 s[30:31]
2753  %vec0 = call <4 x half> asm "; def $0", "=v"()
2754  %vec1 = call <4 x half> asm "; def $0", "=v"()
2755  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 1>
2756  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
2757  ret void
2758}
2759
2760define void @v_shuffle_v3f16_v4f16__7_5_1(ptr addrspace(1) inreg %ptr) {
2761; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_5_1:
2762; GFX900:       ; %bb.0:
2763; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2764; GFX900-NEXT:    ;;#ASMSTART
2765; GFX900-NEXT:    ; def v[0:1]
2766; GFX900-NEXT:    ;;#ASMEND
2767; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2768; GFX900-NEXT:    ;;#ASMSTART
2769; GFX900-NEXT:    ; def v[1:2]
2770; GFX900-NEXT:    ;;#ASMEND
2771; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
2772; GFX900-NEXT:    v_perm_b32 v1, v1, v2, s4
2773; GFX900-NEXT:    global_store_short_d16_hi v3, v0, s[16:17] offset:4
2774; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
2775; GFX900-NEXT:    s_waitcnt vmcnt(0)
2776; GFX900-NEXT:    s_setpc_b64 s[30:31]
2777;
2778; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_5_1:
2779; GFX90A:       ; %bb.0:
2780; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2781; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2782; GFX90A-NEXT:    ;;#ASMSTART
2783; GFX90A-NEXT:    ; def v[0:1]
2784; GFX90A-NEXT:    ;;#ASMEND
2785; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
2786; GFX90A-NEXT:    ;;#ASMSTART
2787; GFX90A-NEXT:    ; def v[2:3]
2788; GFX90A-NEXT:    ;;#ASMEND
2789; GFX90A-NEXT:    v_perm_b32 v1, v2, v3, s4
2790; GFX90A-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
2791; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
2792; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2793; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2794;
2795; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_1:
2796; GFX940:       ; %bb.0:
2797; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2798; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2799; GFX940-NEXT:    ;;#ASMSTART
2800; GFX940-NEXT:    ; def v[0:1]
2801; GFX940-NEXT:    ;;#ASMEND
2802; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
2803; GFX940-NEXT:    ;;#ASMSTART
2804; GFX940-NEXT:    ; def v[2:3]
2805; GFX940-NEXT:    ;;#ASMEND
2806; GFX940-NEXT:    s_nop 0
2807; GFX940-NEXT:    v_perm_b32 v1, v2, v3, s2
2808; GFX940-NEXT:    global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2809; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
2810; GFX940-NEXT:    s_waitcnt vmcnt(0)
2811; GFX940-NEXT:    s_setpc_b64 s[30:31]
2812  %vec0 = call <4 x half> asm "; def $0", "=v"()
2813  %vec1 = call <4 x half> asm "; def $0", "=v"()
2814  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 1>
2815  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
2816  ret void
2817}
2818
2819define void @v_shuffle_v3f16_v4f16__7_6_1(ptr addrspace(1) inreg %ptr) {
2820; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_6_1:
2821; GFX900:       ; %bb.0:
2822; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2823; GFX900-NEXT:    ;;#ASMSTART
2824; GFX900-NEXT:    ; def v[0:1]
2825; GFX900-NEXT:    ;;#ASMEND
2826; GFX900-NEXT:    v_mov_b32_e32 v3, 0
2827; GFX900-NEXT:    ;;#ASMSTART
2828; GFX900-NEXT:    ; def v[1:2]
2829; GFX900-NEXT:    ;;#ASMEND
2830; GFX900-NEXT:    v_alignbit_b32 v1, v2, v2, 16
2831; GFX900-NEXT:    global_store_short_d16_hi v3, v0, s[16:17] offset:4
2832; GFX900-NEXT:    global_store_dword v3, v1, s[16:17]
2833; GFX900-NEXT:    s_waitcnt vmcnt(0)
2834; GFX900-NEXT:    s_setpc_b64 s[30:31]
2835;
2836; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_6_1:
2837; GFX90A:       ; %bb.0:
2838; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2839; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2840; GFX90A-NEXT:    ;;#ASMSTART
2841; GFX90A-NEXT:    ; def v[0:1]
2842; GFX90A-NEXT:    ;;#ASMEND
2843; GFX90A-NEXT:    ;;#ASMSTART
2844; GFX90A-NEXT:    ; def v[2:3]
2845; GFX90A-NEXT:    ;;#ASMEND
2846; GFX90A-NEXT:    v_alignbit_b32 v1, v3, v3, 16
2847; GFX90A-NEXT:    global_store_short_d16_hi v4, v0, s[16:17] offset:4
2848; GFX90A-NEXT:    global_store_dword v4, v1, s[16:17]
2849; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2850; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2851;
2852; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_1:
2853; GFX940:       ; %bb.0:
2854; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2855; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2856; GFX940-NEXT:    ;;#ASMSTART
2857; GFX940-NEXT:    ; def v[0:1]
2858; GFX940-NEXT:    ;;#ASMEND
2859; GFX940-NEXT:    ;;#ASMSTART
2860; GFX940-NEXT:    ; def v[2:3]
2861; GFX940-NEXT:    ;;#ASMEND
2862; GFX940-NEXT:    s_nop 0
2863; GFX940-NEXT:    v_alignbit_b32 v1, v3, v3, 16
2864; GFX940-NEXT:    global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1
2865; GFX940-NEXT:    global_store_dword v4, v1, s[0:1] sc0 sc1
2866; GFX940-NEXT:    s_waitcnt vmcnt(0)
2867; GFX940-NEXT:    s_setpc_b64 s[30:31]
2868  %vec0 = call <4 x half> asm "; def $0", "=v"()
2869  %vec1 = call <4 x half> asm "; def $0", "=v"()
2870  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 1>
2871  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
2872  ret void
2873}
2874
2875define void @v_shuffle_v3f16_v4f16__u_2_2(ptr addrspace(1) inreg %ptr) {
2876; GFX900-LABEL: v_shuffle_v3f16_v4f16__u_2_2:
2877; GFX900:       ; %bb.0:
2878; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2879; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2880; GFX900-NEXT:    ;;#ASMSTART
2881; GFX900-NEXT:    ; def v[0:1]
2882; GFX900-NEXT:    ;;#ASMEND
2883; GFX900-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
2884; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
2885; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
2886; GFX900-NEXT:    s_waitcnt vmcnt(0)
2887; GFX900-NEXT:    s_setpc_b64 s[30:31]
2888;
2889; GFX90A-LABEL: v_shuffle_v3f16_v4f16__u_2_2:
2890; GFX90A:       ; %bb.0:
2891; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2892; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2893; GFX90A-NEXT:    ;;#ASMSTART
2894; GFX90A-NEXT:    ; def v[0:1]
2895; GFX90A-NEXT:    ;;#ASMEND
2896; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
2897; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
2898; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
2899; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2900; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2901;
2902; GFX940-LABEL: v_shuffle_v3f16_v4f16__u_2_2:
2903; GFX940:       ; %bb.0:
2904; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2905; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2906; GFX940-NEXT:    ;;#ASMSTART
2907; GFX940-NEXT:    ; def v[0:1]
2908; GFX940-NEXT:    ;;#ASMEND
2909; GFX940-NEXT:    s_nop 0
2910; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
2911; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
2912; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
2913; GFX940-NEXT:    s_waitcnt vmcnt(0)
2914; GFX940-NEXT:    s_setpc_b64 s[30:31]
2915  %vec0 = call <4 x half> asm "; def $0", "=v"()
2916  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 poison, i32 2, i32 2>
2917  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
2918  ret void
2919}
2920
2921define void @v_shuffle_v3f16_v4f16__0_2_2(ptr addrspace(1) inreg %ptr) {
2922; GFX900-LABEL: v_shuffle_v3f16_v4f16__0_2_2:
2923; GFX900:       ; %bb.0:
2924; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2925; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2926; GFX900-NEXT:    ;;#ASMSTART
2927; GFX900-NEXT:    ; def v[0:1]
2928; GFX900-NEXT:    ;;#ASMEND
2929; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2930; GFX900-NEXT:    v_perm_b32 v0, v1, v0, s4
2931; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
2932; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
2933; GFX900-NEXT:    s_waitcnt vmcnt(0)
2934; GFX900-NEXT:    s_setpc_b64 s[30:31]
2935;
2936; GFX90A-LABEL: v_shuffle_v3f16_v4f16__0_2_2:
2937; GFX90A:       ; %bb.0:
2938; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2939; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2940; GFX90A-NEXT:    ;;#ASMSTART
2941; GFX90A-NEXT:    ; def v[0:1]
2942; GFX90A-NEXT:    ;;#ASMEND
2943; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2944; GFX90A-NEXT:    v_perm_b32 v0, v1, v0, s4
2945; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
2946; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
2947; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2948; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2949;
2950; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_2_2:
2951; GFX940:       ; %bb.0:
2952; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2953; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2954; GFX940-NEXT:    ;;#ASMSTART
2955; GFX940-NEXT:    ; def v[0:1]
2956; GFX940-NEXT:    ;;#ASMEND
2957; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2958; GFX940-NEXT:    v_perm_b32 v0, v1, v0, s2
2959; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
2960; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
2961; GFX940-NEXT:    s_waitcnt vmcnt(0)
2962; GFX940-NEXT:    s_setpc_b64 s[30:31]
2963  %vec0 = call <4 x half> asm "; def $0", "=v"()
2964  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 2, i32 2>
2965  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
2966  ret void
2967}
2968
2969define void @v_shuffle_v3f16_v4f16__1_2_2(ptr addrspace(1) inreg %ptr) {
2970; GFX900-LABEL: v_shuffle_v3f16_v4f16__1_2_2:
2971; GFX900:       ; %bb.0:
2972; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2973; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2974; GFX900-NEXT:    ;;#ASMSTART
2975; GFX900-NEXT:    ; def v[0:1]
2976; GFX900-NEXT:    ;;#ASMEND
2977; GFX900-NEXT:    v_alignbit_b32 v0, v1, v0, 16
2978; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
2979; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
2980; GFX900-NEXT:    s_waitcnt vmcnt(0)
2981; GFX900-NEXT:    s_setpc_b64 s[30:31]
2982;
2983; GFX90A-LABEL: v_shuffle_v3f16_v4f16__1_2_2:
2984; GFX90A:       ; %bb.0:
2985; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2986; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2987; GFX90A-NEXT:    ;;#ASMSTART
2988; GFX90A-NEXT:    ; def v[0:1]
2989; GFX90A-NEXT:    ;;#ASMEND
2990; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v0, 16
2991; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
2992; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
2993; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2994; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2995;
2996; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_2_2:
2997; GFX940:       ; %bb.0:
2998; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2999; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3000; GFX940-NEXT:    ;;#ASMSTART
3001; GFX940-NEXT:    ; def v[0:1]
3002; GFX940-NEXT:    ;;#ASMEND
3003; GFX940-NEXT:    s_nop 0
3004; GFX940-NEXT:    v_alignbit_b32 v0, v1, v0, 16
3005; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
3006; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
3007; GFX940-NEXT:    s_waitcnt vmcnt(0)
3008; GFX940-NEXT:    s_setpc_b64 s[30:31]
3009  %vec0 = call <4 x half> asm "; def $0", "=v"()
3010  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 1, i32 2, i32 2>
3011  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
3012  ret void
3013}
3014
3015define void @v_shuffle_v3f16_v4f16__2_2_2(ptr addrspace(1) inreg %ptr) {
3016; GFX900-LABEL: v_shuffle_v3f16_v4f16__2_2_2:
3017; GFX900:       ; %bb.0:
3018; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3019; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3020; GFX900-NEXT:    ;;#ASMSTART
3021; GFX900-NEXT:    ; def v[0:1]
3022; GFX900-NEXT:    ;;#ASMEND
3023; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
3024; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
3025; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
3026; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
3027; GFX900-NEXT:    s_waitcnt vmcnt(0)
3028; GFX900-NEXT:    s_setpc_b64 s[30:31]
3029;
3030; GFX90A-LABEL: v_shuffle_v3f16_v4f16__2_2_2:
3031; GFX90A:       ; %bb.0:
3032; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3033; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3034; GFX90A-NEXT:    ;;#ASMSTART
3035; GFX90A-NEXT:    ; def v[0:1]
3036; GFX90A-NEXT:    ;;#ASMEND
3037; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
3038; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
3039; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
3040; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
3041; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3042; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3043;
3044; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_2_2:
3045; GFX940:       ; %bb.0:
3046; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3047; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3048; GFX940-NEXT:    ;;#ASMSTART
3049; GFX940-NEXT:    ; def v[0:1]
3050; GFX940-NEXT:    ;;#ASMEND
3051; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
3052; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
3053; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
3054; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
3055; GFX940-NEXT:    s_waitcnt vmcnt(0)
3056; GFX940-NEXT:    s_setpc_b64 s[30:31]
3057  %vec0 = call <4 x half> asm "; def $0", "=v"()
3058  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 2, i32 2, i32 2>
3059  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
3060  ret void
3061}
3062
3063define void @v_shuffle_v3f16_v4f16__3_2_2(ptr addrspace(1) inreg %ptr) {
3064; GFX900-LABEL: v_shuffle_v3f16_v4f16__3_2_2:
3065; GFX900:       ; %bb.0:
3066; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3067; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3068; GFX900-NEXT:    ;;#ASMSTART
3069; GFX900-NEXT:    ; def v[0:1]
3070; GFX900-NEXT:    ;;#ASMEND
3071; GFX900-NEXT:    v_alignbit_b32 v0, v1, v1, 16
3072; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
3073; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
3074; GFX900-NEXT:    s_waitcnt vmcnt(0)
3075; GFX900-NEXT:    s_setpc_b64 s[30:31]
3076;
3077; GFX90A-LABEL: v_shuffle_v3f16_v4f16__3_2_2:
3078; GFX90A:       ; %bb.0:
3079; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3080; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3081; GFX90A-NEXT:    ;;#ASMSTART
3082; GFX90A-NEXT:    ; def v[0:1]
3083; GFX90A-NEXT:    ;;#ASMEND
3084; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v1, 16
3085; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
3086; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
3087; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3088; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3089;
3090; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_2_2:
3091; GFX940:       ; %bb.0:
3092; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3093; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3094; GFX940-NEXT:    ;;#ASMSTART
3095; GFX940-NEXT:    ; def v[0:1]
3096; GFX940-NEXT:    ;;#ASMEND
3097; GFX940-NEXT:    s_nop 0
3098; GFX940-NEXT:    v_alignbit_b32 v0, v1, v1, 16
3099; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
3100; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
3101; GFX940-NEXT:    s_waitcnt vmcnt(0)
3102; GFX940-NEXT:    s_setpc_b64 s[30:31]
3103  %vec0 = call <4 x half> asm "; def $0", "=v"()
3104  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 3, i32 2, i32 2>
3105  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
3106  ret void
3107}
3108
3109define void @v_shuffle_v3f16_v4f16__4_2_2(ptr addrspace(1) inreg %ptr) {
3110; GFX900-LABEL: v_shuffle_v3f16_v4f16__4_2_2:
3111; GFX900:       ; %bb.0:
3112; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3113; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3114; GFX900-NEXT:    ;;#ASMSTART
3115; GFX900-NEXT:    ; def v[0:1]
3116; GFX900-NEXT:    ;;#ASMEND
3117; GFX900-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
3118; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
3119; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
3120; GFX900-NEXT:    s_waitcnt vmcnt(0)
3121; GFX900-NEXT:    s_setpc_b64 s[30:31]
3122;
3123; GFX90A-LABEL: v_shuffle_v3f16_v4f16__4_2_2:
3124; GFX90A:       ; %bb.0:
3125; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3126; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3127; GFX90A-NEXT:    ;;#ASMSTART
3128; GFX90A-NEXT:    ; def v[0:1]
3129; GFX90A-NEXT:    ;;#ASMEND
3130; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
3131; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
3132; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
3133; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3134; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3135;
3136; GFX940-LABEL: v_shuffle_v3f16_v4f16__4_2_2:
3137; GFX940:       ; %bb.0:
3138; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3139; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3140; GFX940-NEXT:    ;;#ASMSTART
3141; GFX940-NEXT:    ; def v[0:1]
3142; GFX940-NEXT:    ;;#ASMEND
3143; GFX940-NEXT:    s_nop 0
3144; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
3145; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
3146; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
3147; GFX940-NEXT:    s_waitcnt vmcnt(0)
3148; GFX940-NEXT:    s_setpc_b64 s[30:31]
3149  %vec0 = call <4 x half> asm "; def $0", "=v"()
3150  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 4, i32 2, i32 2>
3151  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
3152  ret void
3153}
3154
3155define void @v_shuffle_v3f16_v4f16__5_2_2(ptr addrspace(1) inreg %ptr) {
3156; GFX900-LABEL: v_shuffle_v3f16_v4f16__5_2_2:
3157; GFX900:       ; %bb.0:
3158; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3159; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3160; GFX900-NEXT:    ;;#ASMSTART
3161; GFX900-NEXT:    ; def v[0:1]
3162; GFX900-NEXT:    ;;#ASMEND
3163; GFX900-NEXT:    ;;#ASMSTART
3164; GFX900-NEXT:    ; def v[2:3]
3165; GFX900-NEXT:    ;;#ASMEND
3166; GFX900-NEXT:    v_alignbit_b32 v0, v1, v2, 16
3167; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3168; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
3169; GFX900-NEXT:    s_waitcnt vmcnt(0)
3170; GFX900-NEXT:    s_setpc_b64 s[30:31]
3171;
3172; GFX90A-LABEL: v_shuffle_v3f16_v4f16__5_2_2:
3173; GFX90A:       ; %bb.0:
3174; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3175; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3176; GFX90A-NEXT:    ;;#ASMSTART
3177; GFX90A-NEXT:    ; def v[0:1]
3178; GFX90A-NEXT:    ;;#ASMEND
3179; GFX90A-NEXT:    ;;#ASMSTART
3180; GFX90A-NEXT:    ; def v[2:3]
3181; GFX90A-NEXT:    ;;#ASMEND
3182; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v2, 16
3183; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3184; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3185; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3186; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3187;
3188; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_2_2:
3189; GFX940:       ; %bb.0:
3190; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3191; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3192; GFX940-NEXT:    ;;#ASMSTART
3193; GFX940-NEXT:    ; def v[0:1]
3194; GFX940-NEXT:    ;;#ASMEND
3195; GFX940-NEXT:    ;;#ASMSTART
3196; GFX940-NEXT:    ; def v[2:3]
3197; GFX940-NEXT:    ;;#ASMEND
3198; GFX940-NEXT:    s_nop 0
3199; GFX940-NEXT:    v_alignbit_b32 v0, v1, v2, 16
3200; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3201; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3202; GFX940-NEXT:    s_waitcnt vmcnt(0)
3203; GFX940-NEXT:    s_setpc_b64 s[30:31]
3204  %vec0 = call <4 x half> asm "; def $0", "=v"()
3205  %vec1 = call <4 x half> asm "; def $0", "=v"()
3206  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 2, i32 2>
3207  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
3208  ret void
3209}
3210
3211define void @v_shuffle_v3f16_v4f16__6_2_2(ptr addrspace(1) inreg %ptr) {
3212; GFX900-LABEL: v_shuffle_v3f16_v4f16__6_2_2:
3213; GFX900:       ; %bb.0:
3214; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3215; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3216; GFX900-NEXT:    ;;#ASMSTART
3217; GFX900-NEXT:    ; def v[0:1]
3218; GFX900-NEXT:    ;;#ASMEND
3219; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
3220; GFX900-NEXT:    ;;#ASMSTART
3221; GFX900-NEXT:    ; def v[2:3]
3222; GFX900-NEXT:    ;;#ASMEND
3223; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
3224; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3225; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
3226; GFX900-NEXT:    s_waitcnt vmcnt(0)
3227; GFX900-NEXT:    s_setpc_b64 s[30:31]
3228;
3229; GFX90A-LABEL: v_shuffle_v3f16_v4f16__6_2_2:
3230; GFX90A:       ; %bb.0:
3231; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3232; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3233; GFX90A-NEXT:    ;;#ASMSTART
3234; GFX90A-NEXT:    ; def v[0:1]
3235; GFX90A-NEXT:    ;;#ASMEND
3236; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
3237; GFX90A-NEXT:    ;;#ASMSTART
3238; GFX90A-NEXT:    ; def v[2:3]
3239; GFX90A-NEXT:    ;;#ASMEND
3240; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
3241; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3242; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3243; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3244; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3245;
3246; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_2_2:
3247; GFX940:       ; %bb.0:
3248; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3249; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3250; GFX940-NEXT:    ;;#ASMSTART
3251; GFX940-NEXT:    ; def v[0:1]
3252; GFX940-NEXT:    ;;#ASMEND
3253; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
3254; GFX940-NEXT:    ;;#ASMSTART
3255; GFX940-NEXT:    ; def v[2:3]
3256; GFX940-NEXT:    ;;#ASMEND
3257; GFX940-NEXT:    s_nop 0
3258; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
3259; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3260; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3261; GFX940-NEXT:    s_waitcnt vmcnt(0)
3262; GFX940-NEXT:    s_setpc_b64 s[30:31]
3263  %vec0 = call <4 x half> asm "; def $0", "=v"()
3264  %vec1 = call <4 x half> asm "; def $0", "=v"()
3265  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 2, i32 2>
3266  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
3267  ret void
3268}
3269
3270define void @v_shuffle_v3f16_v4f16__7_2_2(ptr addrspace(1) inreg %ptr) {
3271; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_2_2:
3272; GFX900:       ; %bb.0:
3273; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3274; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3275; GFX900-NEXT:    ;;#ASMSTART
3276; GFX900-NEXT:    ; def v[0:1]
3277; GFX900-NEXT:    ;;#ASMEND
3278; GFX900-NEXT:    ;;#ASMSTART
3279; GFX900-NEXT:    ; def v[2:3]
3280; GFX900-NEXT:    ;;#ASMEND
3281; GFX900-NEXT:    v_alignbit_b32 v0, v1, v3, 16
3282; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3283; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
3284; GFX900-NEXT:    s_waitcnt vmcnt(0)
3285; GFX900-NEXT:    s_setpc_b64 s[30:31]
3286;
3287; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_2_2:
3288; GFX90A:       ; %bb.0:
3289; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3290; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3291; GFX90A-NEXT:    ;;#ASMSTART
3292; GFX90A-NEXT:    ; def v[0:1]
3293; GFX90A-NEXT:    ;;#ASMEND
3294; GFX90A-NEXT:    ;;#ASMSTART
3295; GFX90A-NEXT:    ; def v[2:3]
3296; GFX90A-NEXT:    ;;#ASMEND
3297; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v3, 16
3298; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3299; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3300; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3301; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3302;
3303; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_2:
3304; GFX940:       ; %bb.0:
3305; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3306; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3307; GFX940-NEXT:    ;;#ASMSTART
3308; GFX940-NEXT:    ; def v[0:1]
3309; GFX940-NEXT:    ;;#ASMEND
3310; GFX940-NEXT:    ;;#ASMSTART
3311; GFX940-NEXT:    ; def v[2:3]
3312; GFX940-NEXT:    ;;#ASMEND
3313; GFX940-NEXT:    s_nop 0
3314; GFX940-NEXT:    v_alignbit_b32 v0, v1, v3, 16
3315; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3316; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3317; GFX940-NEXT:    s_waitcnt vmcnt(0)
3318; GFX940-NEXT:    s_setpc_b64 s[30:31]
3319  %vec0 = call <4 x half> asm "; def $0", "=v"()
3320  %vec1 = call <4 x half> asm "; def $0", "=v"()
3321  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 2>
3322  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
3323  ret void
3324}
3325
3326define void @v_shuffle_v3f16_v4f16__7_u_2(ptr addrspace(1) inreg %ptr) {
3327; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_u_2:
3328; GFX900:       ; %bb.0:
3329; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3330; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3331; GFX900-NEXT:    ;;#ASMSTART
3332; GFX900-NEXT:    ; def v[0:1]
3333; GFX900-NEXT:    ;;#ASMEND
3334; GFX900-NEXT:    ;;#ASMSTART
3335; GFX900-NEXT:    ; def v[2:3]
3336; GFX900-NEXT:    ;;#ASMEND
3337; GFX900-NEXT:    v_alignbit_b32 v0, s4, v3, 16
3338; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3339; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
3340; GFX900-NEXT:    s_waitcnt vmcnt(0)
3341; GFX900-NEXT:    s_setpc_b64 s[30:31]
3342;
3343; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_u_2:
3344; GFX90A:       ; %bb.0:
3345; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3346; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3347; GFX90A-NEXT:    ;;#ASMSTART
3348; GFX90A-NEXT:    ; def v[0:1]
3349; GFX90A-NEXT:    ;;#ASMEND
3350; GFX90A-NEXT:    ;;#ASMSTART
3351; GFX90A-NEXT:    ; def v[2:3]
3352; GFX90A-NEXT:    ;;#ASMEND
3353; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v3, 16
3354; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3355; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3356; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3357; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3358;
3359; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_2:
3360; GFX940:       ; %bb.0:
3361; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3362; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3363; GFX940-NEXT:    ;;#ASMSTART
3364; GFX940-NEXT:    ; def v[0:1]
3365; GFX940-NEXT:    ;;#ASMEND
3366; GFX940-NEXT:    ;;#ASMSTART
3367; GFX940-NEXT:    ; def v[2:3]
3368; GFX940-NEXT:    ;;#ASMEND
3369; GFX940-NEXT:    s_nop 0
3370; GFX940-NEXT:    v_alignbit_b32 v0, s0, v3, 16
3371; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3372; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3373; GFX940-NEXT:    s_waitcnt vmcnt(0)
3374; GFX940-NEXT:    s_setpc_b64 s[30:31]
3375  %vec0 = call <4 x half> asm "; def $0", "=v"()
3376  %vec1 = call <4 x half> asm "; def $0", "=v"()
3377  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 2>
3378  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
3379  ret void
3380}
3381
3382define void @v_shuffle_v3f16_v4f16__7_0_2(ptr addrspace(1) inreg %ptr) {
3383; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_0_2:
3384; GFX900:       ; %bb.0:
3385; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3386; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3387; GFX900-NEXT:    ;;#ASMSTART
3388; GFX900-NEXT:    ; def v[0:1]
3389; GFX900-NEXT:    ;;#ASMEND
3390; GFX900-NEXT:    ;;#ASMSTART
3391; GFX900-NEXT:    ; def v[2:3]
3392; GFX900-NEXT:    ;;#ASMEND
3393; GFX900-NEXT:    v_alignbit_b32 v0, v0, v3, 16
3394; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3395; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
3396; GFX900-NEXT:    s_waitcnt vmcnt(0)
3397; GFX900-NEXT:    s_setpc_b64 s[30:31]
3398;
3399; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_0_2:
3400; GFX90A:       ; %bb.0:
3401; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3402; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3403; GFX90A-NEXT:    ;;#ASMSTART
3404; GFX90A-NEXT:    ; def v[0:1]
3405; GFX90A-NEXT:    ;;#ASMEND
3406; GFX90A-NEXT:    ;;#ASMSTART
3407; GFX90A-NEXT:    ; def v[2:3]
3408; GFX90A-NEXT:    ;;#ASMEND
3409; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v3, 16
3410; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3411; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3412; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3413; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3414;
3415; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_2:
3416; GFX940:       ; %bb.0:
3417; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3418; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3419; GFX940-NEXT:    ;;#ASMSTART
3420; GFX940-NEXT:    ; def v[0:1]
3421; GFX940-NEXT:    ;;#ASMEND
3422; GFX940-NEXT:    ;;#ASMSTART
3423; GFX940-NEXT:    ; def v[2:3]
3424; GFX940-NEXT:    ;;#ASMEND
3425; GFX940-NEXT:    s_nop 0
3426; GFX940-NEXT:    v_alignbit_b32 v0, v0, v3, 16
3427; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3428; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3429; GFX940-NEXT:    s_waitcnt vmcnt(0)
3430; GFX940-NEXT:    s_setpc_b64 s[30:31]
3431  %vec0 = call <4 x half> asm "; def $0", "=v"()
3432  %vec1 = call <4 x half> asm "; def $0", "=v"()
3433  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 2>
3434  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
3435  ret void
3436}
3437
3438define void @v_shuffle_v3f16_v4f16__7_1_2(ptr addrspace(1) inreg %ptr) {
3439; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_1_2:
3440; GFX900:       ; %bb.0:
3441; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3442; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3443; GFX900-NEXT:    ;;#ASMSTART
3444; GFX900-NEXT:    ; def v[0:1]
3445; GFX900-NEXT:    ;;#ASMEND
3446; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3447; GFX900-NEXT:    ;;#ASMSTART
3448; GFX900-NEXT:    ; def v[2:3]
3449; GFX900-NEXT:    ;;#ASMEND
3450; GFX900-NEXT:    v_perm_b32 v0, v0, v3, s4
3451; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3452; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
3453; GFX900-NEXT:    s_waitcnt vmcnt(0)
3454; GFX900-NEXT:    s_setpc_b64 s[30:31]
3455;
3456; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_1_2:
3457; GFX90A:       ; %bb.0:
3458; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3459; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3460; GFX90A-NEXT:    ;;#ASMSTART
3461; GFX90A-NEXT:    ; def v[0:1]
3462; GFX90A-NEXT:    ;;#ASMEND
3463; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3464; GFX90A-NEXT:    ;;#ASMSTART
3465; GFX90A-NEXT:    ; def v[2:3]
3466; GFX90A-NEXT:    ;;#ASMEND
3467; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
3468; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3469; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3470; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3471; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3472;
3473; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_2:
3474; GFX940:       ; %bb.0:
3475; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3476; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3477; GFX940-NEXT:    ;;#ASMSTART
3478; GFX940-NEXT:    ; def v[0:1]
3479; GFX940-NEXT:    ;;#ASMEND
3480; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3481; GFX940-NEXT:    ;;#ASMSTART
3482; GFX940-NEXT:    ; def v[2:3]
3483; GFX940-NEXT:    ;;#ASMEND
3484; GFX940-NEXT:    s_nop 0
3485; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
3486; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3487; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3488; GFX940-NEXT:    s_waitcnt vmcnt(0)
3489; GFX940-NEXT:    s_setpc_b64 s[30:31]
3490  %vec0 = call <4 x half> asm "; def $0", "=v"()
3491  %vec1 = call <4 x half> asm "; def $0", "=v"()
3492  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 2>
3493  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
3494  ret void
3495}
3496
3497define void @v_shuffle_v3f16_v4f16__7_3_2(ptr addrspace(1) inreg %ptr) {
3498; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_3_2:
3499; GFX900:       ; %bb.0:
3500; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3501; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3502; GFX900-NEXT:    ;;#ASMSTART
3503; GFX900-NEXT:    ; def v[0:1]
3504; GFX900-NEXT:    ;;#ASMEND
3505; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3506; GFX900-NEXT:    ;;#ASMSTART
3507; GFX900-NEXT:    ; def v[2:3]
3508; GFX900-NEXT:    ;;#ASMEND
3509; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
3510; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3511; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
3512; GFX900-NEXT:    s_waitcnt vmcnt(0)
3513; GFX900-NEXT:    s_setpc_b64 s[30:31]
3514;
3515; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_3_2:
3516; GFX90A:       ; %bb.0:
3517; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3518; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3519; GFX90A-NEXT:    ;;#ASMSTART
3520; GFX90A-NEXT:    ; def v[0:1]
3521; GFX90A-NEXT:    ;;#ASMEND
3522; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3523; GFX90A-NEXT:    ;;#ASMSTART
3524; GFX90A-NEXT:    ; def v[2:3]
3525; GFX90A-NEXT:    ;;#ASMEND
3526; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
3527; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3528; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3529; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3530; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3531;
3532; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_2:
3533; GFX940:       ; %bb.0:
3534; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3535; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3536; GFX940-NEXT:    ;;#ASMSTART
3537; GFX940-NEXT:    ; def v[0:1]
3538; GFX940-NEXT:    ;;#ASMEND
3539; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3540; GFX940-NEXT:    ;;#ASMSTART
3541; GFX940-NEXT:    ; def v[2:3]
3542; GFX940-NEXT:    ;;#ASMEND
3543; GFX940-NEXT:    s_nop 0
3544; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
3545; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3546; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3547; GFX940-NEXT:    s_waitcnt vmcnt(0)
3548; GFX940-NEXT:    s_setpc_b64 s[30:31]
3549  %vec0 = call <4 x half> asm "; def $0", "=v"()
3550  %vec1 = call <4 x half> asm "; def $0", "=v"()
3551  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 2>
3552  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
3553  ret void
3554}
3555
3556define void @v_shuffle_v3f16_v4f16__7_4_2(ptr addrspace(1) inreg %ptr) {
3557; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_4_2:
3558; GFX900:       ; %bb.0:
3559; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3560; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3561; GFX900-NEXT:    ;;#ASMSTART
3562; GFX900-NEXT:    ; def v[0:1]
3563; GFX900-NEXT:    ;;#ASMEND
3564; GFX900-NEXT:    ;;#ASMSTART
3565; GFX900-NEXT:    ; def v[2:3]
3566; GFX900-NEXT:    ;;#ASMEND
3567; GFX900-NEXT:    v_alignbit_b32 v0, v2, v3, 16
3568; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3569; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
3570; GFX900-NEXT:    s_waitcnt vmcnt(0)
3571; GFX900-NEXT:    s_setpc_b64 s[30:31]
3572;
3573; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_4_2:
3574; GFX90A:       ; %bb.0:
3575; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3576; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3577; GFX90A-NEXT:    ;;#ASMSTART
3578; GFX90A-NEXT:    ; def v[0:1]
3579; GFX90A-NEXT:    ;;#ASMEND
3580; GFX90A-NEXT:    ;;#ASMSTART
3581; GFX90A-NEXT:    ; def v[2:3]
3582; GFX90A-NEXT:    ;;#ASMEND
3583; GFX90A-NEXT:    v_alignbit_b32 v0, v2, v3, 16
3584; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3585; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3586; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3587; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3588;
3589; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_2:
3590; GFX940:       ; %bb.0:
3591; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3592; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3593; GFX940-NEXT:    ;;#ASMSTART
3594; GFX940-NEXT:    ; def v[0:1]
3595; GFX940-NEXT:    ;;#ASMEND
3596; GFX940-NEXT:    ;;#ASMSTART
3597; GFX940-NEXT:    ; def v[2:3]
3598; GFX940-NEXT:    ;;#ASMEND
3599; GFX940-NEXT:    s_nop 0
3600; GFX940-NEXT:    v_alignbit_b32 v0, v2, v3, 16
3601; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3602; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3603; GFX940-NEXT:    s_waitcnt vmcnt(0)
3604; GFX940-NEXT:    s_setpc_b64 s[30:31]
3605  %vec0 = call <4 x half> asm "; def $0", "=v"()
3606  %vec1 = call <4 x half> asm "; def $0", "=v"()
3607  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 2>
3608  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
3609  ret void
3610}
3611
3612define void @v_shuffle_v3f16_v4f16__7_5_2(ptr addrspace(1) inreg %ptr) {
3613; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_5_2:
3614; GFX900:       ; %bb.0:
3615; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3616; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3617; GFX900-NEXT:    ;;#ASMSTART
3618; GFX900-NEXT:    ; def v[0:1]
3619; GFX900-NEXT:    ;;#ASMEND
3620; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3621; GFX900-NEXT:    ;;#ASMSTART
3622; GFX900-NEXT:    ; def v[2:3]
3623; GFX900-NEXT:    ;;#ASMEND
3624; GFX900-NEXT:    v_perm_b32 v0, v2, v3, s4
3625; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3626; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
3627; GFX900-NEXT:    s_waitcnt vmcnt(0)
3628; GFX900-NEXT:    s_setpc_b64 s[30:31]
3629;
3630; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_5_2:
3631; GFX90A:       ; %bb.0:
3632; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3633; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3634; GFX90A-NEXT:    ;;#ASMSTART
3635; GFX90A-NEXT:    ; def v[0:1]
3636; GFX90A-NEXT:    ;;#ASMEND
3637; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3638; GFX90A-NEXT:    ;;#ASMSTART
3639; GFX90A-NEXT:    ; def v[2:3]
3640; GFX90A-NEXT:    ;;#ASMEND
3641; GFX90A-NEXT:    v_perm_b32 v0, v2, v3, s4
3642; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3643; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3644; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3645; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3646;
3647; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_2:
3648; GFX940:       ; %bb.0:
3649; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3650; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3651; GFX940-NEXT:    ;;#ASMSTART
3652; GFX940-NEXT:    ; def v[0:1]
3653; GFX940-NEXT:    ;;#ASMEND
3654; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3655; GFX940-NEXT:    ;;#ASMSTART
3656; GFX940-NEXT:    ; def v[2:3]
3657; GFX940-NEXT:    ;;#ASMEND
3658; GFX940-NEXT:    s_nop 0
3659; GFX940-NEXT:    v_perm_b32 v0, v2, v3, s2
3660; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3661; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3662; GFX940-NEXT:    s_waitcnt vmcnt(0)
3663; GFX940-NEXT:    s_setpc_b64 s[30:31]
3664  %vec0 = call <4 x half> asm "; def $0", "=v"()
3665  %vec1 = call <4 x half> asm "; def $0", "=v"()
3666  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 2>
3667  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
3668  ret void
3669}
3670
3671define void @v_shuffle_v3f16_v4f16__7_6_2(ptr addrspace(1) inreg %ptr) {
3672; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_6_2:
3673; GFX900:       ; %bb.0:
3674; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3675; GFX900-NEXT:    v_mov_b32_e32 v4, 0
3676; GFX900-NEXT:    ;;#ASMSTART
3677; GFX900-NEXT:    ; def v[0:1]
3678; GFX900-NEXT:    ;;#ASMEND
3679; GFX900-NEXT:    ;;#ASMSTART
3680; GFX900-NEXT:    ; def v[2:3]
3681; GFX900-NEXT:    ;;#ASMEND
3682; GFX900-NEXT:    v_alignbit_b32 v0, v3, v3, 16
3683; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3684; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
3685; GFX900-NEXT:    s_waitcnt vmcnt(0)
3686; GFX900-NEXT:    s_setpc_b64 s[30:31]
3687;
3688; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_6_2:
3689; GFX90A:       ; %bb.0:
3690; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3691; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
3692; GFX90A-NEXT:    ;;#ASMSTART
3693; GFX90A-NEXT:    ; def v[0:1]
3694; GFX90A-NEXT:    ;;#ASMEND
3695; GFX90A-NEXT:    ;;#ASMSTART
3696; GFX90A-NEXT:    ; def v[2:3]
3697; GFX90A-NEXT:    ;;#ASMEND
3698; GFX90A-NEXT:    v_alignbit_b32 v0, v3, v3, 16
3699; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
3700; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
3701; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3702; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3703;
3704; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_2:
3705; GFX940:       ; %bb.0:
3706; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3707; GFX940-NEXT:    v_mov_b32_e32 v4, 0
3708; GFX940-NEXT:    ;;#ASMSTART
3709; GFX940-NEXT:    ; def v[0:1]
3710; GFX940-NEXT:    ;;#ASMEND
3711; GFX940-NEXT:    ;;#ASMSTART
3712; GFX940-NEXT:    ; def v[2:3]
3713; GFX940-NEXT:    ;;#ASMEND
3714; GFX940-NEXT:    s_nop 0
3715; GFX940-NEXT:    v_alignbit_b32 v0, v3, v3, 16
3716; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
3717; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
3718; GFX940-NEXT:    s_waitcnt vmcnt(0)
3719; GFX940-NEXT:    s_setpc_b64 s[30:31]
3720  %vec0 = call <4 x half> asm "; def $0", "=v"()
3721  %vec1 = call <4 x half> asm "; def $0", "=v"()
3722  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 2>
3723  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
3724  ret void
3725}
3726
3727define void @v_shuffle_v3f16_v4f16__u_3_3(ptr addrspace(1) inreg %ptr) {
3728; GFX900-LABEL: v_shuffle_v3f16_v4f16__u_3_3:
3729; GFX900:       ; %bb.0:
3730; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3731; GFX900-NEXT:    ;;#ASMSTART
3732; GFX900-NEXT:    ; def v[0:1]
3733; GFX900-NEXT:    ;;#ASMEND
3734; GFX900-NEXT:    s_mov_b32 s4, 0xffff
3735; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3736; GFX900-NEXT:    v_bfi_b32 v0, s4, v0, v1
3737; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3738; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
3739; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
3740; GFX900-NEXT:    s_waitcnt vmcnt(0)
3741; GFX900-NEXT:    s_setpc_b64 s[30:31]
3742;
3743; GFX90A-LABEL: v_shuffle_v3f16_v4f16__u_3_3:
3744; GFX90A:       ; %bb.0:
3745; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3746; GFX90A-NEXT:    ;;#ASMSTART
3747; GFX90A-NEXT:    ; def v[0:1]
3748; GFX90A-NEXT:    ;;#ASMEND
3749; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
3750; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3751; GFX90A-NEXT:    v_bfi_b32 v0, s4, v0, v1
3752; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3753; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
3754; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
3755; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3756; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3757;
3758; GFX940-LABEL: v_shuffle_v3f16_v4f16__u_3_3:
3759; GFX940:       ; %bb.0:
3760; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3761; GFX940-NEXT:    ;;#ASMSTART
3762; GFX940-NEXT:    ; def v[0:1]
3763; GFX940-NEXT:    ;;#ASMEND
3764; GFX940-NEXT:    s_mov_b32 s2, 0xffff
3765; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3766; GFX940-NEXT:    v_bfi_b32 v0, s2, v0, v1
3767; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3768; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
3769; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
3770; GFX940-NEXT:    s_waitcnt vmcnt(0)
3771; GFX940-NEXT:    s_setpc_b64 s[30:31]
3772  %vec0 = call <4 x half> asm "; def $0", "=v"()
3773  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 poison, i32 3, i32 3>
3774  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
3775  ret void
3776}
3777
3778define void @v_shuffle_v3f16_v4f16__0_3_3(ptr addrspace(1) inreg %ptr) {
3779; GFX900-LABEL: v_shuffle_v3f16_v4f16__0_3_3:
3780; GFX900:       ; %bb.0:
3781; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3782; GFX900-NEXT:    ;;#ASMSTART
3783; GFX900-NEXT:    ; def v[0:1]
3784; GFX900-NEXT:    ;;#ASMEND
3785; GFX900-NEXT:    s_mov_b32 s4, 0xffff
3786; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3787; GFX900-NEXT:    v_bfi_b32 v0, s4, v0, v1
3788; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3789; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
3790; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
3791; GFX900-NEXT:    s_waitcnt vmcnt(0)
3792; GFX900-NEXT:    s_setpc_b64 s[30:31]
3793;
3794; GFX90A-LABEL: v_shuffle_v3f16_v4f16__0_3_3:
3795; GFX90A:       ; %bb.0:
3796; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3797; GFX90A-NEXT:    ;;#ASMSTART
3798; GFX90A-NEXT:    ; def v[0:1]
3799; GFX90A-NEXT:    ;;#ASMEND
3800; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
3801; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3802; GFX90A-NEXT:    v_bfi_b32 v0, s4, v0, v1
3803; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3804; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
3805; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
3806; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3807; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3808;
3809; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_3_3:
3810; GFX940:       ; %bb.0:
3811; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3812; GFX940-NEXT:    ;;#ASMSTART
3813; GFX940-NEXT:    ; def v[0:1]
3814; GFX940-NEXT:    ;;#ASMEND
3815; GFX940-NEXT:    s_mov_b32 s2, 0xffff
3816; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3817; GFX940-NEXT:    v_bfi_b32 v0, s2, v0, v1
3818; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3819; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
3820; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
3821; GFX940-NEXT:    s_waitcnt vmcnt(0)
3822; GFX940-NEXT:    s_setpc_b64 s[30:31]
3823  %vec0 = call <4 x half> asm "; def $0", "=v"()
3824  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 3, i32 3>
3825  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
3826  ret void
3827}
3828
3829define void @v_shuffle_v3f16_v4f16__1_3_3(ptr addrspace(1) inreg %ptr) {
3830; GFX900-LABEL: v_shuffle_v3f16_v4f16__1_3_3:
3831; GFX900:       ; %bb.0:
3832; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3833; GFX900-NEXT:    ;;#ASMSTART
3834; GFX900-NEXT:    ; def v[0:1]
3835; GFX900-NEXT:    ;;#ASMEND
3836; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3837; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3838; GFX900-NEXT:    v_perm_b32 v0, v1, v0, s4
3839; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3840; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
3841; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
3842; GFX900-NEXT:    s_waitcnt vmcnt(0)
3843; GFX900-NEXT:    s_setpc_b64 s[30:31]
3844;
3845; GFX90A-LABEL: v_shuffle_v3f16_v4f16__1_3_3:
3846; GFX90A:       ; %bb.0:
3847; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3848; GFX90A-NEXT:    ;;#ASMSTART
3849; GFX90A-NEXT:    ; def v[0:1]
3850; GFX90A-NEXT:    ;;#ASMEND
3851; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3852; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3853; GFX90A-NEXT:    v_perm_b32 v0, v1, v0, s4
3854; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3855; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
3856; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
3857; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3858; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3859;
3860; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_3_3:
3861; GFX940:       ; %bb.0:
3862; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3863; GFX940-NEXT:    ;;#ASMSTART
3864; GFX940-NEXT:    ; def v[0:1]
3865; GFX940-NEXT:    ;;#ASMEND
3866; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3867; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3868; GFX940-NEXT:    v_perm_b32 v0, v1, v0, s2
3869; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3870; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
3871; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
3872; GFX940-NEXT:    s_waitcnt vmcnt(0)
3873; GFX940-NEXT:    s_setpc_b64 s[30:31]
3874  %vec0 = call <4 x half> asm "; def $0", "=v"()
3875  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 1, i32 3, i32 3>
3876  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
3877  ret void
3878}
3879
3880define void @v_shuffle_v3f16_v4f16__2_3_3(ptr addrspace(1) inreg %ptr) {
3881; GFX900-LABEL: v_shuffle_v3f16_v4f16__2_3_3:
3882; GFX900:       ; %bb.0:
3883; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3884; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3885; GFX900-NEXT:    ;;#ASMSTART
3886; GFX900-NEXT:    ; def v[0:1]
3887; GFX900-NEXT:    ;;#ASMEND
3888; GFX900-NEXT:    global_store_short_d16_hi v2, v1, s[16:17] offset:4
3889; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
3890; GFX900-NEXT:    s_waitcnt vmcnt(0)
3891; GFX900-NEXT:    s_setpc_b64 s[30:31]
3892;
3893; GFX90A-LABEL: v_shuffle_v3f16_v4f16__2_3_3:
3894; GFX90A:       ; %bb.0:
3895; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3896; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3897; GFX90A-NEXT:    ;;#ASMSTART
3898; GFX90A-NEXT:    ; def v[0:1]
3899; GFX90A-NEXT:    ;;#ASMEND
3900; GFX90A-NEXT:    global_store_short_d16_hi v2, v1, s[16:17] offset:4
3901; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
3902; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3903; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3904;
3905; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_3_3:
3906; GFX940:       ; %bb.0:
3907; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3908; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3909; GFX940-NEXT:    ;;#ASMSTART
3910; GFX940-NEXT:    ; def v[0:1]
3911; GFX940-NEXT:    ;;#ASMEND
3912; GFX940-NEXT:    global_store_short_d16_hi v2, v1, s[0:1] offset:4 sc0 sc1
3913; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
3914; GFX940-NEXT:    s_waitcnt vmcnt(0)
3915; GFX940-NEXT:    s_setpc_b64 s[30:31]
3916  %vec0 = call <4 x half> asm "; def $0", "=v"()
3917  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 2, i32 3, i32 3>
3918  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
3919  ret void
3920}
3921
3922define void @v_shuffle_v3f16_v4f16__3_3_3(ptr addrspace(1) inreg %ptr) {
3923; GFX900-LABEL: v_shuffle_v3f16_v4f16__3_3_3:
3924; GFX900:       ; %bb.0:
3925; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3926; GFX900-NEXT:    ;;#ASMSTART
3927; GFX900-NEXT:    ; def v[0:1]
3928; GFX900-NEXT:    ;;#ASMEND
3929; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
3930; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3931; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
3932; GFX900-NEXT:    v_perm_b32 v1, v1, v1, s4
3933; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
3934; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
3935; GFX900-NEXT:    s_waitcnt vmcnt(0)
3936; GFX900-NEXT:    s_setpc_b64 s[30:31]
3937;
3938; GFX90A-LABEL: v_shuffle_v3f16_v4f16__3_3_3:
3939; GFX90A:       ; %bb.0:
3940; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3941; GFX90A-NEXT:    ;;#ASMSTART
3942; GFX90A-NEXT:    ; def v[0:1]
3943; GFX90A-NEXT:    ;;#ASMEND
3944; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
3945; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3946; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
3947; GFX90A-NEXT:    v_perm_b32 v1, v1, v1, s4
3948; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
3949; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
3950; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3951; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3952;
3953; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_3_3:
3954; GFX940:       ; %bb.0:
3955; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3956; GFX940-NEXT:    ;;#ASMSTART
3957; GFX940-NEXT:    ; def v[0:1]
3958; GFX940-NEXT:    ;;#ASMEND
3959; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
3960; GFX940-NEXT:    v_mov_b32_e32 v2, 0
3961; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
3962; GFX940-NEXT:    v_perm_b32 v1, v1, v1, s2
3963; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
3964; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
3965; GFX940-NEXT:    s_waitcnt vmcnt(0)
3966; GFX940-NEXT:    s_setpc_b64 s[30:31]
3967  %vec0 = call <4 x half> asm "; def $0", "=v"()
3968  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 3, i32 3, i32 3>
3969  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
3970  ret void
3971}
3972
3973define void @v_shuffle_v3f16_v4f16__4_3_3(ptr addrspace(1) inreg %ptr) {
3974; GFX900-LABEL: v_shuffle_v3f16_v4f16__4_3_3:
3975; GFX900:       ; %bb.0:
3976; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3977; GFX900-NEXT:    ;;#ASMSTART
3978; GFX900-NEXT:    ; def v[0:1]
3979; GFX900-NEXT:    ;;#ASMEND
3980; GFX900-NEXT:    s_mov_b32 s4, 0xffff
3981; GFX900-NEXT:    v_mov_b32_e32 v2, 0
3982; GFX900-NEXT:    v_bfi_b32 v0, s4, v0, v1
3983; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3984; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
3985; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
3986; GFX900-NEXT:    s_waitcnt vmcnt(0)
3987; GFX900-NEXT:    s_setpc_b64 s[30:31]
3988;
3989; GFX90A-LABEL: v_shuffle_v3f16_v4f16__4_3_3:
3990; GFX90A:       ; %bb.0:
3991; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3992; GFX90A-NEXT:    ;;#ASMSTART
3993; GFX90A-NEXT:    ; def v[0:1]
3994; GFX90A-NEXT:    ;;#ASMEND
3995; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
3996; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
3997; GFX90A-NEXT:    v_bfi_b32 v0, s4, v0, v1
3998; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3999; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
4000; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
4001; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4002; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4003;
4004; GFX940-LABEL: v_shuffle_v3f16_v4f16__4_3_3:
4005; GFX940:       ; %bb.0:
4006; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4007; GFX940-NEXT:    ;;#ASMSTART
4008; GFX940-NEXT:    ; def v[0:1]
4009; GFX940-NEXT:    ;;#ASMEND
4010; GFX940-NEXT:    s_mov_b32 s2, 0xffff
4011; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4012; GFX940-NEXT:    v_bfi_b32 v0, s2, v0, v1
4013; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
4014; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
4015; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
4016; GFX940-NEXT:    s_waitcnt vmcnt(0)
4017; GFX940-NEXT:    s_setpc_b64 s[30:31]
4018  %vec0 = call <4 x half> asm "; def $0", "=v"()
4019  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 4, i32 3, i32 3>
4020  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
4021  ret void
4022}
4023
4024define void @v_shuffle_v3f16_v4f16__5_3_3(ptr addrspace(1) inreg %ptr) {
4025; GFX900-LABEL: v_shuffle_v3f16_v4f16__5_3_3:
4026; GFX900:       ; %bb.0:
4027; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4028; GFX900-NEXT:    ;;#ASMSTART
4029; GFX900-NEXT:    ; def v[0:1]
4030; GFX900-NEXT:    ;;#ASMEND
4031; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
4032; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4033; GFX900-NEXT:    ;;#ASMSTART
4034; GFX900-NEXT:    ; def v[2:3]
4035; GFX900-NEXT:    ;;#ASMEND
4036; GFX900-NEXT:    v_perm_b32 v0, v1, v2, s4
4037; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
4038; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
4039; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
4040; GFX900-NEXT:    s_waitcnt vmcnt(0)
4041; GFX900-NEXT:    s_setpc_b64 s[30:31]
4042;
4043; GFX90A-LABEL: v_shuffle_v3f16_v4f16__5_3_3:
4044; GFX90A:       ; %bb.0:
4045; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4046; GFX90A-NEXT:    ;;#ASMSTART
4047; GFX90A-NEXT:    ; def v[0:1]
4048; GFX90A-NEXT:    ;;#ASMEND
4049; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
4050; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4051; GFX90A-NEXT:    ;;#ASMSTART
4052; GFX90A-NEXT:    ; def v[2:3]
4053; GFX90A-NEXT:    ;;#ASMEND
4054; GFX90A-NEXT:    v_perm_b32 v0, v1, v2, s4
4055; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
4056; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4057; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
4058; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4059; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4060;
4061; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_3_3:
4062; GFX940:       ; %bb.0:
4063; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4064; GFX940-NEXT:    ;;#ASMSTART
4065; GFX940-NEXT:    ; def v[0:1]
4066; GFX940-NEXT:    ;;#ASMEND
4067; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
4068; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4069; GFX940-NEXT:    ;;#ASMSTART
4070; GFX940-NEXT:    ; def v[2:3]
4071; GFX940-NEXT:    ;;#ASMEND
4072; GFX940-NEXT:    s_nop 0
4073; GFX940-NEXT:    v_perm_b32 v0, v1, v2, s2
4074; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
4075; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
4076; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
4077; GFX940-NEXT:    s_waitcnt vmcnt(0)
4078; GFX940-NEXT:    s_setpc_b64 s[30:31]
4079  %vec0 = call <4 x half> asm "; def $0", "=v"()
4080  %vec1 = call <4 x half> asm "; def $0", "=v"()
4081  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 3, i32 3>
4082  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
4083  ret void
4084}
4085
4086define void @v_shuffle_v3f16_v4f16__6_3_3(ptr addrspace(1) inreg %ptr) {
4087; GFX900-LABEL: v_shuffle_v3f16_v4f16__6_3_3:
4088; GFX900:       ; %bb.0:
4089; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4090; GFX900-NEXT:    ;;#ASMSTART
4091; GFX900-NEXT:    ; def v[0:1]
4092; GFX900-NEXT:    ;;#ASMEND
4093; GFX900-NEXT:    s_mov_b32 s4, 0xffff
4094; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4095; GFX900-NEXT:    ;;#ASMSTART
4096; GFX900-NEXT:    ; def v[2:3]
4097; GFX900-NEXT:    ;;#ASMEND
4098; GFX900-NEXT:    v_bfi_b32 v0, s4, v3, v1
4099; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
4100; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
4101; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
4102; GFX900-NEXT:    s_waitcnt vmcnt(0)
4103; GFX900-NEXT:    s_setpc_b64 s[30:31]
4104;
4105; GFX90A-LABEL: v_shuffle_v3f16_v4f16__6_3_3:
4106; GFX90A:       ; %bb.0:
4107; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4108; GFX90A-NEXT:    ;;#ASMSTART
4109; GFX90A-NEXT:    ; def v[0:1]
4110; GFX90A-NEXT:    ;;#ASMEND
4111; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
4112; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4113; GFX90A-NEXT:    ;;#ASMSTART
4114; GFX90A-NEXT:    ; def v[2:3]
4115; GFX90A-NEXT:    ;;#ASMEND
4116; GFX90A-NEXT:    v_bfi_b32 v0, s4, v3, v1
4117; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
4118; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
4119; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4120; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4121; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4122;
4123; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_3_3:
4124; GFX940:       ; %bb.0:
4125; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4126; GFX940-NEXT:    ;;#ASMSTART
4127; GFX940-NEXT:    ; def v[0:1]
4128; GFX940-NEXT:    ;;#ASMEND
4129; GFX940-NEXT:    s_mov_b32 s2, 0xffff
4130; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4131; GFX940-NEXT:    ;;#ASMSTART
4132; GFX940-NEXT:    ; def v[2:3]
4133; GFX940-NEXT:    ;;#ASMEND
4134; GFX940-NEXT:    s_nop 0
4135; GFX940-NEXT:    v_bfi_b32 v0, s2, v3, v1
4136; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
4137; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
4138; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
4139; GFX940-NEXT:    s_waitcnt vmcnt(0)
4140; GFX940-NEXT:    s_setpc_b64 s[30:31]
4141  %vec0 = call <4 x half> asm "; def $0", "=v"()
4142  %vec1 = call <4 x half> asm "; def $0", "=v"()
4143  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 3, i32 3>
4144  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
4145  ret void
4146}
4147
4148define void @v_shuffle_v3f16_v4f16__7_3_3(ptr addrspace(1) inreg %ptr) {
4149; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_3_3:
4150; GFX900:       ; %bb.0:
4151; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4152; GFX900-NEXT:    ;;#ASMSTART
4153; GFX900-NEXT:    ; def v[0:1]
4154; GFX900-NEXT:    ;;#ASMEND
4155; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
4156; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4157; GFX900-NEXT:    ;;#ASMSTART
4158; GFX900-NEXT:    ; def v[2:3]
4159; GFX900-NEXT:    ;;#ASMEND
4160; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
4161; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
4162; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
4163; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
4164; GFX900-NEXT:    s_waitcnt vmcnt(0)
4165; GFX900-NEXT:    s_setpc_b64 s[30:31]
4166;
4167; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_3_3:
4168; GFX90A:       ; %bb.0:
4169; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4170; GFX90A-NEXT:    ;;#ASMSTART
4171; GFX90A-NEXT:    ; def v[0:1]
4172; GFX90A-NEXT:    ;;#ASMEND
4173; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
4174; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4175; GFX90A-NEXT:    ;;#ASMSTART
4176; GFX90A-NEXT:    ; def v[2:3]
4177; GFX90A-NEXT:    ;;#ASMEND
4178; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
4179; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
4180; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4181; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
4182; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4183; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4184;
4185; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_3:
4186; GFX940:       ; %bb.0:
4187; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4188; GFX940-NEXT:    ;;#ASMSTART
4189; GFX940-NEXT:    ; def v[0:1]
4190; GFX940-NEXT:    ;;#ASMEND
4191; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
4192; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4193; GFX940-NEXT:    ;;#ASMSTART
4194; GFX940-NEXT:    ; def v[2:3]
4195; GFX940-NEXT:    ;;#ASMEND
4196; GFX940-NEXT:    s_nop 0
4197; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
4198; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
4199; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
4200; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
4201; GFX940-NEXT:    s_waitcnt vmcnt(0)
4202; GFX940-NEXT:    s_setpc_b64 s[30:31]
4203  %vec0 = call <4 x half> asm "; def $0", "=v"()
4204  %vec1 = call <4 x half> asm "; def $0", "=v"()
4205  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 3>
4206  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
4207  ret void
4208}
4209
4210define void @v_shuffle_v3f16_v4f16__7_u_3(ptr addrspace(1) inreg %ptr) {
4211; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_u_3:
4212; GFX900:       ; %bb.0:
4213; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4214; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4215; GFX900-NEXT:    ;;#ASMSTART
4216; GFX900-NEXT:    ; def v[0:1]
4217; GFX900-NEXT:    ;;#ASMEND
4218; GFX900-NEXT:    ;;#ASMSTART
4219; GFX900-NEXT:    ; def v[2:3]
4220; GFX900-NEXT:    ;;#ASMEND
4221; GFX900-NEXT:    v_alignbit_b32 v0, s4, v3, 16
4222; GFX900-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4223; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
4224; GFX900-NEXT:    s_waitcnt vmcnt(0)
4225; GFX900-NEXT:    s_setpc_b64 s[30:31]
4226;
4227; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_u_3:
4228; GFX90A:       ; %bb.0:
4229; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4230; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4231; GFX90A-NEXT:    ;;#ASMSTART
4232; GFX90A-NEXT:    ; def v[0:1]
4233; GFX90A-NEXT:    ;;#ASMEND
4234; GFX90A-NEXT:    ;;#ASMSTART
4235; GFX90A-NEXT:    ; def v[2:3]
4236; GFX90A-NEXT:    ;;#ASMEND
4237; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v3, 16
4238; GFX90A-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4239; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4240; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4241; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4242;
4243; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_3:
4244; GFX940:       ; %bb.0:
4245; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4246; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4247; GFX940-NEXT:    ;;#ASMSTART
4248; GFX940-NEXT:    ; def v[0:1]
4249; GFX940-NEXT:    ;;#ASMEND
4250; GFX940-NEXT:    ;;#ASMSTART
4251; GFX940-NEXT:    ; def v[2:3]
4252; GFX940-NEXT:    ;;#ASMEND
4253; GFX940-NEXT:    s_nop 0
4254; GFX940-NEXT:    v_alignbit_b32 v0, s0, v3, 16
4255; GFX940-NEXT:    global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1
4256; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
4257; GFX940-NEXT:    s_waitcnt vmcnt(0)
4258; GFX940-NEXT:    s_setpc_b64 s[30:31]
4259  %vec0 = call <4 x half> asm "; def $0", "=v"()
4260  %vec1 = call <4 x half> asm "; def $0", "=v"()
4261  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 3>
4262  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
4263  ret void
4264}
4265
4266define void @v_shuffle_v3f16_v4f16__7_0_3(ptr addrspace(1) inreg %ptr) {
4267; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_0_3:
4268; GFX900:       ; %bb.0:
4269; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4270; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4271; GFX900-NEXT:    ;;#ASMSTART
4272; GFX900-NEXT:    ; def v[0:1]
4273; GFX900-NEXT:    ;;#ASMEND
4274; GFX900-NEXT:    ;;#ASMSTART
4275; GFX900-NEXT:    ; def v[2:3]
4276; GFX900-NEXT:    ;;#ASMEND
4277; GFX900-NEXT:    v_alignbit_b32 v0, v0, v3, 16
4278; GFX900-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4279; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
4280; GFX900-NEXT:    s_waitcnt vmcnt(0)
4281; GFX900-NEXT:    s_setpc_b64 s[30:31]
4282;
4283; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_0_3:
4284; GFX90A:       ; %bb.0:
4285; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4286; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4287; GFX90A-NEXT:    ;;#ASMSTART
4288; GFX90A-NEXT:    ; def v[0:1]
4289; GFX90A-NEXT:    ;;#ASMEND
4290; GFX90A-NEXT:    ;;#ASMSTART
4291; GFX90A-NEXT:    ; def v[2:3]
4292; GFX90A-NEXT:    ;;#ASMEND
4293; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v3, 16
4294; GFX90A-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4295; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4296; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4297; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4298;
4299; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_3:
4300; GFX940:       ; %bb.0:
4301; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4302; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4303; GFX940-NEXT:    ;;#ASMSTART
4304; GFX940-NEXT:    ; def v[0:1]
4305; GFX940-NEXT:    ;;#ASMEND
4306; GFX940-NEXT:    ;;#ASMSTART
4307; GFX940-NEXT:    ; def v[2:3]
4308; GFX940-NEXT:    ;;#ASMEND
4309; GFX940-NEXT:    s_nop 0
4310; GFX940-NEXT:    v_alignbit_b32 v0, v0, v3, 16
4311; GFX940-NEXT:    global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1
4312; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
4313; GFX940-NEXT:    s_waitcnt vmcnt(0)
4314; GFX940-NEXT:    s_setpc_b64 s[30:31]
4315  %vec0 = call <4 x half> asm "; def $0", "=v"()
4316  %vec1 = call <4 x half> asm "; def $0", "=v"()
4317  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 3>
4318  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
4319  ret void
4320}
4321
4322define void @v_shuffle_v3f16_v4f16__7_1_3(ptr addrspace(1) inreg %ptr) {
4323; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_1_3:
4324; GFX900:       ; %bb.0:
4325; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4326; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4327; GFX900-NEXT:    ;;#ASMSTART
4328; GFX900-NEXT:    ; def v[0:1]
4329; GFX900-NEXT:    ;;#ASMEND
4330; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
4331; GFX900-NEXT:    ;;#ASMSTART
4332; GFX900-NEXT:    ; def v[2:3]
4333; GFX900-NEXT:    ;;#ASMEND
4334; GFX900-NEXT:    v_perm_b32 v0, v0, v3, s4
4335; GFX900-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4336; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
4337; GFX900-NEXT:    s_waitcnt vmcnt(0)
4338; GFX900-NEXT:    s_setpc_b64 s[30:31]
4339;
4340; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_1_3:
4341; GFX90A:       ; %bb.0:
4342; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4343; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4344; GFX90A-NEXT:    ;;#ASMSTART
4345; GFX90A-NEXT:    ; def v[0:1]
4346; GFX90A-NEXT:    ;;#ASMEND
4347; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
4348; GFX90A-NEXT:    ;;#ASMSTART
4349; GFX90A-NEXT:    ; def v[2:3]
4350; GFX90A-NEXT:    ;;#ASMEND
4351; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
4352; GFX90A-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4353; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4354; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4355; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4356;
4357; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_3:
4358; GFX940:       ; %bb.0:
4359; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4360; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4361; GFX940-NEXT:    ;;#ASMSTART
4362; GFX940-NEXT:    ; def v[0:1]
4363; GFX940-NEXT:    ;;#ASMEND
4364; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
4365; GFX940-NEXT:    ;;#ASMSTART
4366; GFX940-NEXT:    ; def v[2:3]
4367; GFX940-NEXT:    ;;#ASMEND
4368; GFX940-NEXT:    s_nop 0
4369; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
4370; GFX940-NEXT:    global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1
4371; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
4372; GFX940-NEXT:    s_waitcnt vmcnt(0)
4373; GFX940-NEXT:    s_setpc_b64 s[30:31]
4374  %vec0 = call <4 x half> asm "; def $0", "=v"()
4375  %vec1 = call <4 x half> asm "; def $0", "=v"()
4376  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 3>
4377  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
4378  ret void
4379}
4380
4381define void @v_shuffle_v3f16_v4f16__7_2_3(ptr addrspace(1) inreg %ptr) {
4382; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_2_3:
4383; GFX900:       ; %bb.0:
4384; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4385; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4386; GFX900-NEXT:    ;;#ASMSTART
4387; GFX900-NEXT:    ; def v[0:1]
4388; GFX900-NEXT:    ;;#ASMEND
4389; GFX900-NEXT:    ;;#ASMSTART
4390; GFX900-NEXT:    ; def v[2:3]
4391; GFX900-NEXT:    ;;#ASMEND
4392; GFX900-NEXT:    v_alignbit_b32 v0, v1, v3, 16
4393; GFX900-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4394; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
4395; GFX900-NEXT:    s_waitcnt vmcnt(0)
4396; GFX900-NEXT:    s_setpc_b64 s[30:31]
4397;
4398; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_2_3:
4399; GFX90A:       ; %bb.0:
4400; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4401; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4402; GFX90A-NEXT:    ;;#ASMSTART
4403; GFX90A-NEXT:    ; def v[0:1]
4404; GFX90A-NEXT:    ;;#ASMEND
4405; GFX90A-NEXT:    ;;#ASMSTART
4406; GFX90A-NEXT:    ; def v[2:3]
4407; GFX90A-NEXT:    ;;#ASMEND
4408; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v3, 16
4409; GFX90A-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4410; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4411; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4412; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4413;
4414; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_3:
4415; GFX940:       ; %bb.0:
4416; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4417; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4418; GFX940-NEXT:    ;;#ASMSTART
4419; GFX940-NEXT:    ; def v[0:1]
4420; GFX940-NEXT:    ;;#ASMEND
4421; GFX940-NEXT:    ;;#ASMSTART
4422; GFX940-NEXT:    ; def v[2:3]
4423; GFX940-NEXT:    ;;#ASMEND
4424; GFX940-NEXT:    s_nop 0
4425; GFX940-NEXT:    v_alignbit_b32 v0, v1, v3, 16
4426; GFX940-NEXT:    global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1
4427; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
4428; GFX940-NEXT:    s_waitcnt vmcnt(0)
4429; GFX940-NEXT:    s_setpc_b64 s[30:31]
4430  %vec0 = call <4 x half> asm "; def $0", "=v"()
4431  %vec1 = call <4 x half> asm "; def $0", "=v"()
4432  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 3>
4433  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
4434  ret void
4435}
4436
4437define void @v_shuffle_v3f16_v4f16__7_4_3(ptr addrspace(1) inreg %ptr) {
4438; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_4_3:
4439; GFX900:       ; %bb.0:
4440; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4441; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4442; GFX900-NEXT:    ;;#ASMSTART
4443; GFX900-NEXT:    ; def v[0:1]
4444; GFX900-NEXT:    ;;#ASMEND
4445; GFX900-NEXT:    ;;#ASMSTART
4446; GFX900-NEXT:    ; def v[2:3]
4447; GFX900-NEXT:    ;;#ASMEND
4448; GFX900-NEXT:    v_alignbit_b32 v0, v2, v3, 16
4449; GFX900-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4450; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
4451; GFX900-NEXT:    s_waitcnt vmcnt(0)
4452; GFX900-NEXT:    s_setpc_b64 s[30:31]
4453;
4454; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_4_3:
4455; GFX90A:       ; %bb.0:
4456; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4457; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4458; GFX90A-NEXT:    ;;#ASMSTART
4459; GFX90A-NEXT:    ; def v[0:1]
4460; GFX90A-NEXT:    ;;#ASMEND
4461; GFX90A-NEXT:    ;;#ASMSTART
4462; GFX90A-NEXT:    ; def v[2:3]
4463; GFX90A-NEXT:    ;;#ASMEND
4464; GFX90A-NEXT:    v_alignbit_b32 v0, v2, v3, 16
4465; GFX90A-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4466; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4467; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4468; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4469;
4470; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_3:
4471; GFX940:       ; %bb.0:
4472; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4473; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4474; GFX940-NEXT:    ;;#ASMSTART
4475; GFX940-NEXT:    ; def v[0:1]
4476; GFX940-NEXT:    ;;#ASMEND
4477; GFX940-NEXT:    ;;#ASMSTART
4478; GFX940-NEXT:    ; def v[2:3]
4479; GFX940-NEXT:    ;;#ASMEND
4480; GFX940-NEXT:    s_nop 0
4481; GFX940-NEXT:    v_alignbit_b32 v0, v2, v3, 16
4482; GFX940-NEXT:    global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1
4483; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
4484; GFX940-NEXT:    s_waitcnt vmcnt(0)
4485; GFX940-NEXT:    s_setpc_b64 s[30:31]
4486  %vec0 = call <4 x half> asm "; def $0", "=v"()
4487  %vec1 = call <4 x half> asm "; def $0", "=v"()
4488  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 3>
4489  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
4490  ret void
4491}
4492
4493define void @v_shuffle_v3f16_v4f16__7_5_3(ptr addrspace(1) inreg %ptr) {
4494; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_5_3:
4495; GFX900:       ; %bb.0:
4496; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4497; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4498; GFX900-NEXT:    ;;#ASMSTART
4499; GFX900-NEXT:    ; def v[0:1]
4500; GFX900-NEXT:    ;;#ASMEND
4501; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
4502; GFX900-NEXT:    ;;#ASMSTART
4503; GFX900-NEXT:    ; def v[2:3]
4504; GFX900-NEXT:    ;;#ASMEND
4505; GFX900-NEXT:    v_perm_b32 v0, v2, v3, s4
4506; GFX900-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4507; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
4508; GFX900-NEXT:    s_waitcnt vmcnt(0)
4509; GFX900-NEXT:    s_setpc_b64 s[30:31]
4510;
4511; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_5_3:
4512; GFX90A:       ; %bb.0:
4513; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4514; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4515; GFX90A-NEXT:    ;;#ASMSTART
4516; GFX90A-NEXT:    ; def v[0:1]
4517; GFX90A-NEXT:    ;;#ASMEND
4518; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
4519; GFX90A-NEXT:    ;;#ASMSTART
4520; GFX90A-NEXT:    ; def v[2:3]
4521; GFX90A-NEXT:    ;;#ASMEND
4522; GFX90A-NEXT:    v_perm_b32 v0, v2, v3, s4
4523; GFX90A-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4524; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4525; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4526; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4527;
4528; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_3:
4529; GFX940:       ; %bb.0:
4530; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4531; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4532; GFX940-NEXT:    ;;#ASMSTART
4533; GFX940-NEXT:    ; def v[0:1]
4534; GFX940-NEXT:    ;;#ASMEND
4535; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
4536; GFX940-NEXT:    ;;#ASMSTART
4537; GFX940-NEXT:    ; def v[2:3]
4538; GFX940-NEXT:    ;;#ASMEND
4539; GFX940-NEXT:    s_nop 0
4540; GFX940-NEXT:    v_perm_b32 v0, v2, v3, s2
4541; GFX940-NEXT:    global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1
4542; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
4543; GFX940-NEXT:    s_waitcnt vmcnt(0)
4544; GFX940-NEXT:    s_setpc_b64 s[30:31]
4545  %vec0 = call <4 x half> asm "; def $0", "=v"()
4546  %vec1 = call <4 x half> asm "; def $0", "=v"()
4547  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 3>
4548  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
4549  ret void
4550}
4551
4552define void @v_shuffle_v3f16_v4f16__7_6_3(ptr addrspace(1) inreg %ptr) {
4553; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_6_3:
4554; GFX900:       ; %bb.0:
4555; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4556; GFX900-NEXT:    v_mov_b32_e32 v4, 0
4557; GFX900-NEXT:    ;;#ASMSTART
4558; GFX900-NEXT:    ; def v[0:1]
4559; GFX900-NEXT:    ;;#ASMEND
4560; GFX900-NEXT:    ;;#ASMSTART
4561; GFX900-NEXT:    ; def v[2:3]
4562; GFX900-NEXT:    ;;#ASMEND
4563; GFX900-NEXT:    v_alignbit_b32 v0, v3, v3, 16
4564; GFX900-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4565; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
4566; GFX900-NEXT:    s_waitcnt vmcnt(0)
4567; GFX900-NEXT:    s_setpc_b64 s[30:31]
4568;
4569; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_6_3:
4570; GFX90A:       ; %bb.0:
4571; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4572; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
4573; GFX90A-NEXT:    ;;#ASMSTART
4574; GFX90A-NEXT:    ; def v[0:1]
4575; GFX90A-NEXT:    ;;#ASMEND
4576; GFX90A-NEXT:    ;;#ASMSTART
4577; GFX90A-NEXT:    ; def v[2:3]
4578; GFX90A-NEXT:    ;;#ASMEND
4579; GFX90A-NEXT:    v_alignbit_b32 v0, v3, v3, 16
4580; GFX90A-NEXT:    global_store_short_d16_hi v4, v1, s[16:17] offset:4
4581; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
4582; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4583; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4584;
4585; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_3:
4586; GFX940:       ; %bb.0:
4587; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4588; GFX940-NEXT:    v_mov_b32_e32 v4, 0
4589; GFX940-NEXT:    ;;#ASMSTART
4590; GFX940-NEXT:    ; def v[0:1]
4591; GFX940-NEXT:    ;;#ASMEND
4592; GFX940-NEXT:    ;;#ASMSTART
4593; GFX940-NEXT:    ; def v[2:3]
4594; GFX940-NEXT:    ;;#ASMEND
4595; GFX940-NEXT:    s_nop 0
4596; GFX940-NEXT:    v_alignbit_b32 v0, v3, v3, 16
4597; GFX940-NEXT:    global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1
4598; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
4599; GFX940-NEXT:    s_waitcnt vmcnt(0)
4600; GFX940-NEXT:    s_setpc_b64 s[30:31]
4601  %vec0 = call <4 x half> asm "; def $0", "=v"()
4602  %vec1 = call <4 x half> asm "; def $0", "=v"()
4603  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 3>
4604  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
4605  ret void
4606}
4607
4608define void @v_shuffle_v3f16_v4f16__u_4_4(ptr addrspace(1) inreg %ptr) {
4609; GFX9-LABEL: v_shuffle_v3f16_v4f16__u_4_4:
4610; GFX9:       ; %bb.0:
4611; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4612; GFX9-NEXT:    s_setpc_b64 s[30:31]
4613  %vec0 = call <4 x half> asm "; def $0", "=v"()
4614  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 poison, i32 4, i32 4>
4615  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
4616  ret void
4617}
4618
4619define void @v_shuffle_v3f16_v4f16__0_4_4(ptr addrspace(1) inreg %ptr) {
4620; GFX900-LABEL: v_shuffle_v3f16_v4f16__0_4_4:
4621; GFX900:       ; %bb.0:
4622; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4623; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4624; GFX900-NEXT:    ;;#ASMSTART
4625; GFX900-NEXT:    ; def v[0:1]
4626; GFX900-NEXT:    ;;#ASMEND
4627; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
4628; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
4629; GFX900-NEXT:    s_waitcnt vmcnt(0)
4630; GFX900-NEXT:    s_setpc_b64 s[30:31]
4631;
4632; GFX90A-LABEL: v_shuffle_v3f16_v4f16__0_4_4:
4633; GFX90A:       ; %bb.0:
4634; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4635; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4636; GFX90A-NEXT:    ;;#ASMSTART
4637; GFX90A-NEXT:    ; def v[0:1]
4638; GFX90A-NEXT:    ;;#ASMEND
4639; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
4640; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
4641; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4642; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4643;
4644; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_4_4:
4645; GFX940:       ; %bb.0:
4646; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4647; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4648; GFX940-NEXT:    ;;#ASMSTART
4649; GFX940-NEXT:    ; def v[0:1]
4650; GFX940-NEXT:    ;;#ASMEND
4651; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
4652; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
4653; GFX940-NEXT:    s_waitcnt vmcnt(0)
4654; GFX940-NEXT:    s_setpc_b64 s[30:31]
4655  %vec0 = call <4 x half> asm "; def $0", "=v"()
4656  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 4, i32 4>
4657  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
4658  ret void
4659}
4660
4661define void @v_shuffle_v3f16_v4f16__1_4_4(ptr addrspace(1) inreg %ptr) {
4662; GFX900-LABEL: v_shuffle_v3f16_v4f16__1_4_4:
4663; GFX900:       ; %bb.0:
4664; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4665; GFX900-NEXT:    ;;#ASMSTART
4666; GFX900-NEXT:    ; def v[0:1]
4667; GFX900-NEXT:    ;;#ASMEND
4668; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4669; GFX900-NEXT:    v_alignbit_b32 v0, s4, v0, 16
4670; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
4671; GFX900-NEXT:    s_waitcnt vmcnt(0)
4672; GFX900-NEXT:    s_setpc_b64 s[30:31]
4673;
4674; GFX90A-LABEL: v_shuffle_v3f16_v4f16__1_4_4:
4675; GFX90A:       ; %bb.0:
4676; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4677; GFX90A-NEXT:    ;;#ASMSTART
4678; GFX90A-NEXT:    ; def v[0:1]
4679; GFX90A-NEXT:    ;;#ASMEND
4680; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4681; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v0, 16
4682; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
4683; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4684; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4685;
4686; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_4_4:
4687; GFX940:       ; %bb.0:
4688; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4689; GFX940-NEXT:    ;;#ASMSTART
4690; GFX940-NEXT:    ; def v[0:1]
4691; GFX940-NEXT:    ;;#ASMEND
4692; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4693; GFX940-NEXT:    v_alignbit_b32 v0, s0, v0, 16
4694; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
4695; GFX940-NEXT:    s_waitcnt vmcnt(0)
4696; GFX940-NEXT:    s_setpc_b64 s[30:31]
4697  %vec0 = call <4 x half> asm "; def $0", "=v"()
4698  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 1, i32 4, i32 4>
4699  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
4700  ret void
4701}
4702
4703define void @v_shuffle_v3f16_v4f16__2_4_4(ptr addrspace(1) inreg %ptr) {
4704; GFX900-LABEL: v_shuffle_v3f16_v4f16__2_4_4:
4705; GFX900:       ; %bb.0:
4706; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4707; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4708; GFX900-NEXT:    ;;#ASMSTART
4709; GFX900-NEXT:    ; def v[0:1]
4710; GFX900-NEXT:    ;;#ASMEND
4711; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
4712; GFX900-NEXT:    s_waitcnt vmcnt(0)
4713; GFX900-NEXT:    s_setpc_b64 s[30:31]
4714;
4715; GFX90A-LABEL: v_shuffle_v3f16_v4f16__2_4_4:
4716; GFX90A:       ; %bb.0:
4717; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4718; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4719; GFX90A-NEXT:    ;;#ASMSTART
4720; GFX90A-NEXT:    ; def v[0:1]
4721; GFX90A-NEXT:    ;;#ASMEND
4722; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
4723; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4724; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4725;
4726; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_4_4:
4727; GFX940:       ; %bb.0:
4728; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4729; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4730; GFX940-NEXT:    ;;#ASMSTART
4731; GFX940-NEXT:    ; def v[0:1]
4732; GFX940-NEXT:    ;;#ASMEND
4733; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
4734; GFX940-NEXT:    s_waitcnt vmcnt(0)
4735; GFX940-NEXT:    s_setpc_b64 s[30:31]
4736  %vec0 = call <4 x half> asm "; def $0", "=v"()
4737  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 2, i32 4, i32 4>
4738  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
4739  ret void
4740}
4741
4742define void @v_shuffle_v3f16_v4f16__3_4_4(ptr addrspace(1) inreg %ptr) {
4743; GFX900-LABEL: v_shuffle_v3f16_v4f16__3_4_4:
4744; GFX900:       ; %bb.0:
4745; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4746; GFX900-NEXT:    ;;#ASMSTART
4747; GFX900-NEXT:    ; def v[0:1]
4748; GFX900-NEXT:    ;;#ASMEND
4749; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4750; GFX900-NEXT:    v_alignbit_b32 v0, s4, v1, 16
4751; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
4752; GFX900-NEXT:    s_waitcnt vmcnt(0)
4753; GFX900-NEXT:    s_setpc_b64 s[30:31]
4754;
4755; GFX90A-LABEL: v_shuffle_v3f16_v4f16__3_4_4:
4756; GFX90A:       ; %bb.0:
4757; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4758; GFX90A-NEXT:    ;;#ASMSTART
4759; GFX90A-NEXT:    ; def v[0:1]
4760; GFX90A-NEXT:    ;;#ASMEND
4761; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4762; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v1, 16
4763; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
4764; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4765; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4766;
4767; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_4_4:
4768; GFX940:       ; %bb.0:
4769; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4770; GFX940-NEXT:    ;;#ASMSTART
4771; GFX940-NEXT:    ; def v[0:1]
4772; GFX940-NEXT:    ;;#ASMEND
4773; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4774; GFX940-NEXT:    v_alignbit_b32 v0, s0, v1, 16
4775; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
4776; GFX940-NEXT:    s_waitcnt vmcnt(0)
4777; GFX940-NEXT:    s_setpc_b64 s[30:31]
4778  %vec0 = call <4 x half> asm "; def $0", "=v"()
4779  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 3, i32 4, i32 4>
4780  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
4781  ret void
4782}
4783
4784define void @v_shuffle_v3f16_v4f16__4_4_4(ptr addrspace(1) inreg %ptr) {
4785; GFX9-LABEL: v_shuffle_v3f16_v4f16__4_4_4:
4786; GFX9:       ; %bb.0:
4787; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4788; GFX9-NEXT:    s_setpc_b64 s[30:31]
4789  %vec0 = call <4 x half> asm "; def $0", "=v"()
4790  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 4, i32 4, i32 4>
4791  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
4792  ret void
4793}
4794
4795define void @v_shuffle_v3f16_v4f16__5_4_4(ptr addrspace(1) inreg %ptr) {
4796; GFX900-LABEL: v_shuffle_v3f16_v4f16__5_4_4:
4797; GFX900:       ; %bb.0:
4798; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4799; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4800; GFX900-NEXT:    ;;#ASMSTART
4801; GFX900-NEXT:    ; def v[0:1]
4802; GFX900-NEXT:    ;;#ASMEND
4803; GFX900-NEXT:    v_alignbit_b32 v1, v0, v0, 16
4804; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
4805; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
4806; GFX900-NEXT:    s_waitcnt vmcnt(0)
4807; GFX900-NEXT:    s_setpc_b64 s[30:31]
4808;
4809; GFX90A-LABEL: v_shuffle_v3f16_v4f16__5_4_4:
4810; GFX90A:       ; %bb.0:
4811; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4812; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4813; GFX90A-NEXT:    ;;#ASMSTART
4814; GFX90A-NEXT:    ; def v[0:1]
4815; GFX90A-NEXT:    ;;#ASMEND
4816; GFX90A-NEXT:    v_alignbit_b32 v1, v0, v0, 16
4817; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
4818; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
4819; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4820; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4821;
4822; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_4_4:
4823; GFX940:       ; %bb.0:
4824; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4825; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4826; GFX940-NEXT:    ;;#ASMSTART
4827; GFX940-NEXT:    ; def v[0:1]
4828; GFX940-NEXT:    ;;#ASMEND
4829; GFX940-NEXT:    s_nop 0
4830; GFX940-NEXT:    v_alignbit_b32 v1, v0, v0, 16
4831; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
4832; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
4833; GFX940-NEXT:    s_waitcnt vmcnt(0)
4834; GFX940-NEXT:    s_setpc_b64 s[30:31]
4835  %vec0 = call <4 x half> asm "; def $0", "=v"()
4836  %vec1 = call <4 x half> asm "; def $0", "=v"()
4837  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 4, i32 4>
4838  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
4839  ret void
4840}
4841
4842define void @v_shuffle_v3f16_v4f16__6_4_4(ptr addrspace(1) inreg %ptr) {
4843; GFX900-LABEL: v_shuffle_v3f16_v4f16__6_4_4:
4844; GFX900:       ; %bb.0:
4845; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4846; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4847; GFX900-NEXT:    ;;#ASMSTART
4848; GFX900-NEXT:    ; def v[0:1]
4849; GFX900-NEXT:    ;;#ASMEND
4850; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
4851; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
4852; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
4853; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
4854; GFX900-NEXT:    s_waitcnt vmcnt(0)
4855; GFX900-NEXT:    s_setpc_b64 s[30:31]
4856;
4857; GFX90A-LABEL: v_shuffle_v3f16_v4f16__6_4_4:
4858; GFX90A:       ; %bb.0:
4859; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4860; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4861; GFX90A-NEXT:    ;;#ASMSTART
4862; GFX90A-NEXT:    ; def v[0:1]
4863; GFX90A-NEXT:    ;;#ASMEND
4864; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
4865; GFX90A-NEXT:    v_perm_b32 v1, v0, v1, s4
4866; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
4867; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
4868; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4869; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4870;
4871; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_4_4:
4872; GFX940:       ; %bb.0:
4873; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4874; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4875; GFX940-NEXT:    ;;#ASMSTART
4876; GFX940-NEXT:    ; def v[0:1]
4877; GFX940-NEXT:    ;;#ASMEND
4878; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
4879; GFX940-NEXT:    v_perm_b32 v1, v0, v1, s2
4880; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
4881; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
4882; GFX940-NEXT:    s_waitcnt vmcnt(0)
4883; GFX940-NEXT:    s_setpc_b64 s[30:31]
4884  %vec0 = call <4 x half> asm "; def $0", "=v"()
4885  %vec1 = call <4 x half> asm "; def $0", "=v"()
4886  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 4, i32 4>
4887  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
4888  ret void
4889}
4890
4891define void @v_shuffle_v3f16_v4f16__7_4_4(ptr addrspace(1) inreg %ptr) {
4892; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_4_4:
4893; GFX900:       ; %bb.0:
4894; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4895; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4896; GFX900-NEXT:    ;;#ASMSTART
4897; GFX900-NEXT:    ; def v[0:1]
4898; GFX900-NEXT:    ;;#ASMEND
4899; GFX900-NEXT:    v_alignbit_b32 v1, v0, v1, 16
4900; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
4901; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
4902; GFX900-NEXT:    s_waitcnt vmcnt(0)
4903; GFX900-NEXT:    s_setpc_b64 s[30:31]
4904;
4905; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_4_4:
4906; GFX90A:       ; %bb.0:
4907; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4908; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4909; GFX90A-NEXT:    ;;#ASMSTART
4910; GFX90A-NEXT:    ; def v[0:1]
4911; GFX90A-NEXT:    ;;#ASMEND
4912; GFX90A-NEXT:    v_alignbit_b32 v1, v0, v1, 16
4913; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
4914; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
4915; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4916; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4917;
4918; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_4:
4919; GFX940:       ; %bb.0:
4920; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4921; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4922; GFX940-NEXT:    ;;#ASMSTART
4923; GFX940-NEXT:    ; def v[0:1]
4924; GFX940-NEXT:    ;;#ASMEND
4925; GFX940-NEXT:    s_nop 0
4926; GFX940-NEXT:    v_alignbit_b32 v1, v0, v1, 16
4927; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
4928; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
4929; GFX940-NEXT:    s_waitcnt vmcnt(0)
4930; GFX940-NEXT:    s_setpc_b64 s[30:31]
4931  %vec0 = call <4 x half> asm "; def $0", "=v"()
4932  %vec1 = call <4 x half> asm "; def $0", "=v"()
4933  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 4>
4934  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
4935  ret void
4936}
4937
4938define void @v_shuffle_v3f16_v4f16__7_u_4(ptr addrspace(1) inreg %ptr) {
4939; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_u_4:
4940; GFX900:       ; %bb.0:
4941; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4942; GFX900-NEXT:    v_mov_b32_e32 v2, 0
4943; GFX900-NEXT:    ;;#ASMSTART
4944; GFX900-NEXT:    ; def v[0:1]
4945; GFX900-NEXT:    ;;#ASMEND
4946; GFX900-NEXT:    v_alignbit_b32 v1, s4, v1, 16
4947; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
4948; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
4949; GFX900-NEXT:    s_waitcnt vmcnt(0)
4950; GFX900-NEXT:    s_setpc_b64 s[30:31]
4951;
4952; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_u_4:
4953; GFX90A:       ; %bb.0:
4954; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4955; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
4956; GFX90A-NEXT:    ;;#ASMSTART
4957; GFX90A-NEXT:    ; def v[0:1]
4958; GFX90A-NEXT:    ;;#ASMEND
4959; GFX90A-NEXT:    v_alignbit_b32 v1, s4, v1, 16
4960; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
4961; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
4962; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4963; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4964;
4965; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_4:
4966; GFX940:       ; %bb.0:
4967; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4968; GFX940-NEXT:    v_mov_b32_e32 v2, 0
4969; GFX940-NEXT:    ;;#ASMSTART
4970; GFX940-NEXT:    ; def v[0:1]
4971; GFX940-NEXT:    ;;#ASMEND
4972; GFX940-NEXT:    s_nop 0
4973; GFX940-NEXT:    v_alignbit_b32 v1, s0, v1, 16
4974; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
4975; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
4976; GFX940-NEXT:    s_waitcnt vmcnt(0)
4977; GFX940-NEXT:    s_setpc_b64 s[30:31]
4978  %vec0 = call <4 x half> asm "; def $0", "=v"()
4979  %vec1 = call <4 x half> asm "; def $0", "=v"()
4980  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 4>
4981  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
4982  ret void
4983}
4984
4985define void @v_shuffle_v3f16_v4f16__7_0_4(ptr addrspace(1) inreg %ptr) {
4986; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_0_4:
4987; GFX900:       ; %bb.0:
4988; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4989; GFX900-NEXT:    ;;#ASMSTART
4990; GFX900-NEXT:    ; def v[0:1]
4991; GFX900-NEXT:    ;;#ASMEND
4992; GFX900-NEXT:    v_mov_b32_e32 v3, 0
4993; GFX900-NEXT:    ;;#ASMSTART
4994; GFX900-NEXT:    ; def v[1:2]
4995; GFX900-NEXT:    ;;#ASMEND
4996; GFX900-NEXT:    v_alignbit_b32 v0, v0, v2, 16
4997; GFX900-NEXT:    global_store_short v3, v1, s[16:17] offset:4
4998; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
4999; GFX900-NEXT:    s_waitcnt vmcnt(0)
5000; GFX900-NEXT:    s_setpc_b64 s[30:31]
5001;
5002; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_0_4:
5003; GFX90A:       ; %bb.0:
5004; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5005; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5006; GFX90A-NEXT:    ;;#ASMSTART
5007; GFX90A-NEXT:    ; def v[0:1]
5008; GFX90A-NEXT:    ;;#ASMEND
5009; GFX90A-NEXT:    ;;#ASMSTART
5010; GFX90A-NEXT:    ; def v[2:3]
5011; GFX90A-NEXT:    ;;#ASMEND
5012; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v3, 16
5013; GFX90A-NEXT:    global_store_short v4, v2, s[16:17] offset:4
5014; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
5015; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5016; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5017;
5018; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_4:
5019; GFX940:       ; %bb.0:
5020; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5021; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5022; GFX940-NEXT:    ;;#ASMSTART
5023; GFX940-NEXT:    ; def v[0:1]
5024; GFX940-NEXT:    ;;#ASMEND
5025; GFX940-NEXT:    ;;#ASMSTART
5026; GFX940-NEXT:    ; def v[2:3]
5027; GFX940-NEXT:    ;;#ASMEND
5028; GFX940-NEXT:    s_nop 0
5029; GFX940-NEXT:    v_alignbit_b32 v0, v0, v3, 16
5030; GFX940-NEXT:    global_store_short v4, v2, s[0:1] offset:4 sc0 sc1
5031; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
5032; GFX940-NEXT:    s_waitcnt vmcnt(0)
5033; GFX940-NEXT:    s_setpc_b64 s[30:31]
5034  %vec0 = call <4 x half> asm "; def $0", "=v"()
5035  %vec1 = call <4 x half> asm "; def $0", "=v"()
5036  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 4>
5037  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
5038  ret void
5039}
5040
5041define void @v_shuffle_v3f16_v4f16__7_1_4(ptr addrspace(1) inreg %ptr) {
5042; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_1_4:
5043; GFX900:       ; %bb.0:
5044; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5045; GFX900-NEXT:    ;;#ASMSTART
5046; GFX900-NEXT:    ; def v[0:1]
5047; GFX900-NEXT:    ;;#ASMEND
5048; GFX900-NEXT:    v_mov_b32_e32 v3, 0
5049; GFX900-NEXT:    ;;#ASMSTART
5050; GFX900-NEXT:    ; def v[1:2]
5051; GFX900-NEXT:    ;;#ASMEND
5052; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5053; GFX900-NEXT:    v_perm_b32 v0, v0, v2, s4
5054; GFX900-NEXT:    global_store_short v3, v1, s[16:17] offset:4
5055; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
5056; GFX900-NEXT:    s_waitcnt vmcnt(0)
5057; GFX900-NEXT:    s_setpc_b64 s[30:31]
5058;
5059; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_1_4:
5060; GFX90A:       ; %bb.0:
5061; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5062; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5063; GFX90A-NEXT:    ;;#ASMSTART
5064; GFX90A-NEXT:    ; def v[0:1]
5065; GFX90A-NEXT:    ;;#ASMEND
5066; GFX90A-NEXT:    ;;#ASMSTART
5067; GFX90A-NEXT:    ; def v[2:3]
5068; GFX90A-NEXT:    ;;#ASMEND
5069; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5070; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
5071; GFX90A-NEXT:    global_store_short v4, v2, s[16:17] offset:4
5072; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
5073; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5074; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5075;
5076; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_4:
5077; GFX940:       ; %bb.0:
5078; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5079; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5080; GFX940-NEXT:    ;;#ASMSTART
5081; GFX940-NEXT:    ; def v[0:1]
5082; GFX940-NEXT:    ;;#ASMEND
5083; GFX940-NEXT:    ;;#ASMSTART
5084; GFX940-NEXT:    ; def v[2:3]
5085; GFX940-NEXT:    ;;#ASMEND
5086; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5087; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
5088; GFX940-NEXT:    global_store_short v4, v2, s[0:1] offset:4 sc0 sc1
5089; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
5090; GFX940-NEXT:    s_waitcnt vmcnt(0)
5091; GFX940-NEXT:    s_setpc_b64 s[30:31]
5092  %vec0 = call <4 x half> asm "; def $0", "=v"()
5093  %vec1 = call <4 x half> asm "; def $0", "=v"()
5094  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 4>
5095  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
5096  ret void
5097}
5098
5099define void @v_shuffle_v3f16_v4f16__7_2_4(ptr addrspace(1) inreg %ptr) {
5100; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_2_4:
5101; GFX900:       ; %bb.0:
5102; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5103; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5104; GFX900-NEXT:    ;;#ASMSTART
5105; GFX900-NEXT:    ; def v[0:1]
5106; GFX900-NEXT:    ;;#ASMEND
5107; GFX900-NEXT:    ;;#ASMSTART
5108; GFX900-NEXT:    ; def v[2:3]
5109; GFX900-NEXT:    ;;#ASMEND
5110; GFX900-NEXT:    v_alignbit_b32 v0, v1, v3, 16
5111; GFX900-NEXT:    global_store_short v4, v2, s[16:17] offset:4
5112; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
5113; GFX900-NEXT:    s_waitcnt vmcnt(0)
5114; GFX900-NEXT:    s_setpc_b64 s[30:31]
5115;
5116; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_2_4:
5117; GFX90A:       ; %bb.0:
5118; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5119; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5120; GFX90A-NEXT:    ;;#ASMSTART
5121; GFX90A-NEXT:    ; def v[0:1]
5122; GFX90A-NEXT:    ;;#ASMEND
5123; GFX90A-NEXT:    ;;#ASMSTART
5124; GFX90A-NEXT:    ; def v[2:3]
5125; GFX90A-NEXT:    ;;#ASMEND
5126; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v3, 16
5127; GFX90A-NEXT:    global_store_short v4, v2, s[16:17] offset:4
5128; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
5129; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5130; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5131;
5132; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_4:
5133; GFX940:       ; %bb.0:
5134; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5135; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5136; GFX940-NEXT:    ;;#ASMSTART
5137; GFX940-NEXT:    ; def v[0:1]
5138; GFX940-NEXT:    ;;#ASMEND
5139; GFX940-NEXT:    ;;#ASMSTART
5140; GFX940-NEXT:    ; def v[2:3]
5141; GFX940-NEXT:    ;;#ASMEND
5142; GFX940-NEXT:    s_nop 0
5143; GFX940-NEXT:    v_alignbit_b32 v0, v1, v3, 16
5144; GFX940-NEXT:    global_store_short v4, v2, s[0:1] offset:4 sc0 sc1
5145; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
5146; GFX940-NEXT:    s_waitcnt vmcnt(0)
5147; GFX940-NEXT:    s_setpc_b64 s[30:31]
5148  %vec0 = call <4 x half> asm "; def $0", "=v"()
5149  %vec1 = call <4 x half> asm "; def $0", "=v"()
5150  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 4>
5151  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
5152  ret void
5153}
5154
5155define void @v_shuffle_v3f16_v4f16__7_3_4(ptr addrspace(1) inreg %ptr) {
5156; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_3_4:
5157; GFX900:       ; %bb.0:
5158; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5159; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5160; GFX900-NEXT:    ;;#ASMSTART
5161; GFX900-NEXT:    ; def v[0:1]
5162; GFX900-NEXT:    ;;#ASMEND
5163; GFX900-NEXT:    ;;#ASMSTART
5164; GFX900-NEXT:    ; def v[2:3]
5165; GFX900-NEXT:    ;;#ASMEND
5166; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5167; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
5168; GFX900-NEXT:    global_store_short v4, v2, s[16:17] offset:4
5169; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
5170; GFX900-NEXT:    s_waitcnt vmcnt(0)
5171; GFX900-NEXT:    s_setpc_b64 s[30:31]
5172;
5173; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_3_4:
5174; GFX90A:       ; %bb.0:
5175; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5176; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5177; GFX90A-NEXT:    ;;#ASMSTART
5178; GFX90A-NEXT:    ; def v[0:1]
5179; GFX90A-NEXT:    ;;#ASMEND
5180; GFX90A-NEXT:    ;;#ASMSTART
5181; GFX90A-NEXT:    ; def v[2:3]
5182; GFX90A-NEXT:    ;;#ASMEND
5183; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5184; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
5185; GFX90A-NEXT:    global_store_short v4, v2, s[16:17] offset:4
5186; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
5187; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5188; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5189;
5190; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_4:
5191; GFX940:       ; %bb.0:
5192; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5193; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5194; GFX940-NEXT:    ;;#ASMSTART
5195; GFX940-NEXT:    ; def v[0:1]
5196; GFX940-NEXT:    ;;#ASMEND
5197; GFX940-NEXT:    ;;#ASMSTART
5198; GFX940-NEXT:    ; def v[2:3]
5199; GFX940-NEXT:    ;;#ASMEND
5200; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5201; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
5202; GFX940-NEXT:    global_store_short v4, v2, s[0:1] offset:4 sc0 sc1
5203; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
5204; GFX940-NEXT:    s_waitcnt vmcnt(0)
5205; GFX940-NEXT:    s_setpc_b64 s[30:31]
5206  %vec0 = call <4 x half> asm "; def $0", "=v"()
5207  %vec1 = call <4 x half> asm "; def $0", "=v"()
5208  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 4>
5209  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
5210  ret void
5211}
5212
5213define void @v_shuffle_v3f16_v4f16__7_5_4(ptr addrspace(1) inreg %ptr) {
5214; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_5_4:
5215; GFX900:       ; %bb.0:
5216; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5217; GFX900-NEXT:    v_mov_b32_e32 v2, 0
5218; GFX900-NEXT:    ;;#ASMSTART
5219; GFX900-NEXT:    ; def v[0:1]
5220; GFX900-NEXT:    ;;#ASMEND
5221; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5222; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
5223; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
5224; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
5225; GFX900-NEXT:    s_waitcnt vmcnt(0)
5226; GFX900-NEXT:    s_setpc_b64 s[30:31]
5227;
5228; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_5_4:
5229; GFX90A:       ; %bb.0:
5230; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5231; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
5232; GFX90A-NEXT:    ;;#ASMSTART
5233; GFX90A-NEXT:    ; def v[0:1]
5234; GFX90A-NEXT:    ;;#ASMEND
5235; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5236; GFX90A-NEXT:    v_perm_b32 v1, v0, v1, s4
5237; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
5238; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
5239; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5240; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5241;
5242; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_4:
5243; GFX940:       ; %bb.0:
5244; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5245; GFX940-NEXT:    v_mov_b32_e32 v2, 0
5246; GFX940-NEXT:    ;;#ASMSTART
5247; GFX940-NEXT:    ; def v[0:1]
5248; GFX940-NEXT:    ;;#ASMEND
5249; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5250; GFX940-NEXT:    v_perm_b32 v1, v0, v1, s2
5251; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
5252; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
5253; GFX940-NEXT:    s_waitcnt vmcnt(0)
5254; GFX940-NEXT:    s_setpc_b64 s[30:31]
5255  %vec0 = call <4 x half> asm "; def $0", "=v"()
5256  %vec1 = call <4 x half> asm "; def $0", "=v"()
5257  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 4>
5258  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
5259  ret void
5260}
5261
5262define void @v_shuffle_v3f16_v4f16__7_6_4(ptr addrspace(1) inreg %ptr) {
5263; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_6_4:
5264; GFX900:       ; %bb.0:
5265; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5266; GFX900-NEXT:    v_mov_b32_e32 v2, 0
5267; GFX900-NEXT:    ;;#ASMSTART
5268; GFX900-NEXT:    ; def v[0:1]
5269; GFX900-NEXT:    ;;#ASMEND
5270; GFX900-NEXT:    v_alignbit_b32 v1, v1, v1, 16
5271; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
5272; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
5273; GFX900-NEXT:    s_waitcnt vmcnt(0)
5274; GFX900-NEXT:    s_setpc_b64 s[30:31]
5275;
5276; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_6_4:
5277; GFX90A:       ; %bb.0:
5278; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5279; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
5280; GFX90A-NEXT:    ;;#ASMSTART
5281; GFX90A-NEXT:    ; def v[0:1]
5282; GFX90A-NEXT:    ;;#ASMEND
5283; GFX90A-NEXT:    v_alignbit_b32 v1, v1, v1, 16
5284; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
5285; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
5286; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5287; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5288;
5289; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_4:
5290; GFX940:       ; %bb.0:
5291; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5292; GFX940-NEXT:    v_mov_b32_e32 v2, 0
5293; GFX940-NEXT:    ;;#ASMSTART
5294; GFX940-NEXT:    ; def v[0:1]
5295; GFX940-NEXT:    ;;#ASMEND
5296; GFX940-NEXT:    s_nop 0
5297; GFX940-NEXT:    v_alignbit_b32 v1, v1, v1, 16
5298; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
5299; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
5300; GFX940-NEXT:    s_waitcnt vmcnt(0)
5301; GFX940-NEXT:    s_setpc_b64 s[30:31]
5302  %vec0 = call <4 x half> asm "; def $0", "=v"()
5303  %vec1 = call <4 x half> asm "; def $0", "=v"()
5304  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 4>
5305  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
5306  ret void
5307}
5308
5309define void @v_shuffle_v3f16_v4f16__u_5_5(ptr addrspace(1) inreg %ptr) {
5310; GFX900-LABEL: v_shuffle_v3f16_v4f16__u_5_5:
5311; GFX900:       ; %bb.0:
5312; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5313; GFX900-NEXT:    v_mov_b32_e32 v2, 0
5314; GFX900-NEXT:    ;;#ASMSTART
5315; GFX900-NEXT:    ; def v[0:1]
5316; GFX900-NEXT:    ;;#ASMEND
5317; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
5318; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5319; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
5320; GFX900-NEXT:    s_waitcnt vmcnt(0)
5321; GFX900-NEXT:    s_setpc_b64 s[30:31]
5322;
5323; GFX90A-LABEL: v_shuffle_v3f16_v4f16__u_5_5:
5324; GFX90A:       ; %bb.0:
5325; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5326; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
5327; GFX90A-NEXT:    ;;#ASMSTART
5328; GFX90A-NEXT:    ; def v[0:1]
5329; GFX90A-NEXT:    ;;#ASMEND
5330; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
5331; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5332; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
5333; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5334; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5335;
5336; GFX940-LABEL: v_shuffle_v3f16_v4f16__u_5_5:
5337; GFX940:       ; %bb.0:
5338; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5339; GFX940-NEXT:    v_mov_b32_e32 v2, 0
5340; GFX940-NEXT:    ;;#ASMSTART
5341; GFX940-NEXT:    ; def v[0:1]
5342; GFX940-NEXT:    ;;#ASMEND
5343; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
5344; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5345; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
5346; GFX940-NEXT:    s_waitcnt vmcnt(0)
5347; GFX940-NEXT:    s_setpc_b64 s[30:31]
5348  %vec0 = call <4 x half> asm "; def $0", "=v"()
5349  %vec1 = call <4 x half> asm "; def $0", "=v"()
5350  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 poison, i32 5, i32 5>
5351  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
5352  ret void
5353}
5354
5355define void @v_shuffle_v3f16_v4f16__0_5_5(ptr addrspace(1) inreg %ptr) {
5356; GFX900-LABEL: v_shuffle_v3f16_v4f16__0_5_5:
5357; GFX900:       ; %bb.0:
5358; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5359; GFX900-NEXT:    ;;#ASMSTART
5360; GFX900-NEXT:    ; def v[0:1]
5361; GFX900-NEXT:    ;;#ASMEND
5362; GFX900-NEXT:    s_mov_b32 s4, 0xffff
5363; GFX900-NEXT:    v_mov_b32_e32 v3, 0
5364; GFX900-NEXT:    ;;#ASMSTART
5365; GFX900-NEXT:    ; def v[1:2]
5366; GFX900-NEXT:    ;;#ASMEND
5367; GFX900-NEXT:    v_bfi_b32 v0, s4, v0, v1
5368; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
5369; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
5370; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
5371; GFX900-NEXT:    s_waitcnt vmcnt(0)
5372; GFX900-NEXT:    s_setpc_b64 s[30:31]
5373;
5374; GFX90A-LABEL: v_shuffle_v3f16_v4f16__0_5_5:
5375; GFX90A:       ; %bb.0:
5376; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5377; GFX90A-NEXT:    ;;#ASMSTART
5378; GFX90A-NEXT:    ; def v[0:1]
5379; GFX90A-NEXT:    ;;#ASMEND
5380; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
5381; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5382; GFX90A-NEXT:    ;;#ASMSTART
5383; GFX90A-NEXT:    ; def v[2:3]
5384; GFX90A-NEXT:    ;;#ASMEND
5385; GFX90A-NEXT:    v_bfi_b32 v0, s4, v0, v2
5386; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
5387; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
5388; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
5389; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5390; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5391;
5392; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_5_5:
5393; GFX940:       ; %bb.0:
5394; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5395; GFX940-NEXT:    ;;#ASMSTART
5396; GFX940-NEXT:    ; def v[0:1]
5397; GFX940-NEXT:    ;;#ASMEND
5398; GFX940-NEXT:    s_mov_b32 s2, 0xffff
5399; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5400; GFX940-NEXT:    ;;#ASMSTART
5401; GFX940-NEXT:    ; def v[2:3]
5402; GFX940-NEXT:    ;;#ASMEND
5403; GFX940-NEXT:    s_nop 0
5404; GFX940-NEXT:    v_bfi_b32 v0, s2, v0, v2
5405; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
5406; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
5407; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
5408; GFX940-NEXT:    s_waitcnt vmcnt(0)
5409; GFX940-NEXT:    s_setpc_b64 s[30:31]
5410  %vec0 = call <4 x half> asm "; def $0", "=v"()
5411  %vec1 = call <4 x half> asm "; def $0", "=v"()
5412  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 0, i32 5, i32 5>
5413  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
5414  ret void
5415}
5416
5417define void @v_shuffle_v3f16_v4f16__1_5_5(ptr addrspace(1) inreg %ptr) {
5418; GFX900-LABEL: v_shuffle_v3f16_v4f16__1_5_5:
5419; GFX900:       ; %bb.0:
5420; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5421; GFX900-NEXT:    ;;#ASMSTART
5422; GFX900-NEXT:    ; def v[0:1]
5423; GFX900-NEXT:    ;;#ASMEND
5424; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5425; GFX900-NEXT:    v_mov_b32_e32 v3, 0
5426; GFX900-NEXT:    ;;#ASMSTART
5427; GFX900-NEXT:    ; def v[1:2]
5428; GFX900-NEXT:    ;;#ASMEND
5429; GFX900-NEXT:    v_perm_b32 v0, v1, v0, s4
5430; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
5431; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
5432; GFX900-NEXT:    global_store_short v3, v0, s[16:17] offset:4
5433; GFX900-NEXT:    s_waitcnt vmcnt(0)
5434; GFX900-NEXT:    s_setpc_b64 s[30:31]
5435;
5436; GFX90A-LABEL: v_shuffle_v3f16_v4f16__1_5_5:
5437; GFX90A:       ; %bb.0:
5438; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5439; GFX90A-NEXT:    ;;#ASMSTART
5440; GFX90A-NEXT:    ; def v[0:1]
5441; GFX90A-NEXT:    ;;#ASMEND
5442; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5443; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5444; GFX90A-NEXT:    ;;#ASMSTART
5445; GFX90A-NEXT:    ; def v[2:3]
5446; GFX90A-NEXT:    ;;#ASMEND
5447; GFX90A-NEXT:    v_perm_b32 v0, v2, v0, s4
5448; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
5449; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
5450; GFX90A-NEXT:    global_store_short v4, v0, s[16:17] offset:4
5451; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5452; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5453;
5454; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_5_5:
5455; GFX940:       ; %bb.0:
5456; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5457; GFX940-NEXT:    ;;#ASMSTART
5458; GFX940-NEXT:    ; def v[0:1]
5459; GFX940-NEXT:    ;;#ASMEND
5460; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5461; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5462; GFX940-NEXT:    ;;#ASMSTART
5463; GFX940-NEXT:    ; def v[2:3]
5464; GFX940-NEXT:    ;;#ASMEND
5465; GFX940-NEXT:    s_nop 0
5466; GFX940-NEXT:    v_perm_b32 v0, v2, v0, s2
5467; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
5468; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
5469; GFX940-NEXT:    global_store_short v4, v0, s[0:1] offset:4 sc0 sc1
5470; GFX940-NEXT:    s_waitcnt vmcnt(0)
5471; GFX940-NEXT:    s_setpc_b64 s[30:31]
5472  %vec0 = call <4 x half> asm "; def $0", "=v"()
5473  %vec1 = call <4 x half> asm "; def $0", "=v"()
5474  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 1, i32 5, i32 5>
5475  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
5476  ret void
5477}
5478
5479define void @v_shuffle_v3f16_v4f16__2_5_5(ptr addrspace(1) inreg %ptr) {
5480; GFX900-LABEL: v_shuffle_v3f16_v4f16__2_5_5:
5481; GFX900:       ; %bb.0:
5482; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5483; GFX900-NEXT:    ;;#ASMSTART
5484; GFX900-NEXT:    ; def v[0:1]
5485; GFX900-NEXT:    ;;#ASMEND
5486; GFX900-NEXT:    s_mov_b32 s4, 0xffff
5487; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5488; GFX900-NEXT:    ;;#ASMSTART
5489; GFX900-NEXT:    ; def v[2:3]
5490; GFX900-NEXT:    ;;#ASMEND
5491; GFX900-NEXT:    v_bfi_b32 v0, s4, v1, v2
5492; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
5493; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
5494; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
5495; GFX900-NEXT:    s_waitcnt vmcnt(0)
5496; GFX900-NEXT:    s_setpc_b64 s[30:31]
5497;
5498; GFX90A-LABEL: v_shuffle_v3f16_v4f16__2_5_5:
5499; GFX90A:       ; %bb.0:
5500; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5501; GFX90A-NEXT:    ;;#ASMSTART
5502; GFX90A-NEXT:    ; def v[0:1]
5503; GFX90A-NEXT:    ;;#ASMEND
5504; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
5505; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5506; GFX90A-NEXT:    ;;#ASMSTART
5507; GFX90A-NEXT:    ; def v[2:3]
5508; GFX90A-NEXT:    ;;#ASMEND
5509; GFX90A-NEXT:    v_bfi_b32 v0, s4, v1, v2
5510; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
5511; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
5512; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
5513; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5514; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5515;
5516; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_5_5:
5517; GFX940:       ; %bb.0:
5518; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5519; GFX940-NEXT:    ;;#ASMSTART
5520; GFX940-NEXT:    ; def v[0:1]
5521; GFX940-NEXT:    ;;#ASMEND
5522; GFX940-NEXT:    s_mov_b32 s2, 0xffff
5523; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5524; GFX940-NEXT:    ;;#ASMSTART
5525; GFX940-NEXT:    ; def v[2:3]
5526; GFX940-NEXT:    ;;#ASMEND
5527; GFX940-NEXT:    s_nop 0
5528; GFX940-NEXT:    v_bfi_b32 v0, s2, v1, v2
5529; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
5530; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
5531; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
5532; GFX940-NEXT:    s_waitcnt vmcnt(0)
5533; GFX940-NEXT:    s_setpc_b64 s[30:31]
5534  %vec0 = call <4 x half> asm "; def $0", "=v"()
5535  %vec1 = call <4 x half> asm "; def $0", "=v"()
5536  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 2, i32 5, i32 5>
5537  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
5538  ret void
5539}
5540
5541define void @v_shuffle_v3f16_v4f16__3_5_5(ptr addrspace(1) inreg %ptr) {
5542; GFX900-LABEL: v_shuffle_v3f16_v4f16__3_5_5:
5543; GFX900:       ; %bb.0:
5544; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5545; GFX900-NEXT:    ;;#ASMSTART
5546; GFX900-NEXT:    ; def v[0:1]
5547; GFX900-NEXT:    ;;#ASMEND
5548; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5549; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5550; GFX900-NEXT:    ;;#ASMSTART
5551; GFX900-NEXT:    ; def v[2:3]
5552; GFX900-NEXT:    ;;#ASMEND
5553; GFX900-NEXT:    v_perm_b32 v0, v2, v1, s4
5554; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
5555; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
5556; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
5557; GFX900-NEXT:    s_waitcnt vmcnt(0)
5558; GFX900-NEXT:    s_setpc_b64 s[30:31]
5559;
5560; GFX90A-LABEL: v_shuffle_v3f16_v4f16__3_5_5:
5561; GFX90A:       ; %bb.0:
5562; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5563; GFX90A-NEXT:    ;;#ASMSTART
5564; GFX90A-NEXT:    ; def v[0:1]
5565; GFX90A-NEXT:    ;;#ASMEND
5566; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5567; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5568; GFX90A-NEXT:    ;;#ASMSTART
5569; GFX90A-NEXT:    ; def v[2:3]
5570; GFX90A-NEXT:    ;;#ASMEND
5571; GFX90A-NEXT:    v_perm_b32 v0, v2, v1, s4
5572; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
5573; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
5574; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
5575; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5576; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5577;
5578; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_5_5:
5579; GFX940:       ; %bb.0:
5580; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5581; GFX940-NEXT:    ;;#ASMSTART
5582; GFX940-NEXT:    ; def v[0:1]
5583; GFX940-NEXT:    ;;#ASMEND
5584; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5585; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5586; GFX940-NEXT:    ;;#ASMSTART
5587; GFX940-NEXT:    ; def v[2:3]
5588; GFX940-NEXT:    ;;#ASMEND
5589; GFX940-NEXT:    s_nop 0
5590; GFX940-NEXT:    v_perm_b32 v0, v2, v1, s2
5591; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
5592; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
5593; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
5594; GFX940-NEXT:    s_waitcnt vmcnt(0)
5595; GFX940-NEXT:    s_setpc_b64 s[30:31]
5596  %vec0 = call <4 x half> asm "; def $0", "=v"()
5597  %vec1 = call <4 x half> asm "; def $0", "=v"()
5598  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 3, i32 5, i32 5>
5599  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
5600  ret void
5601}
5602
5603define void @v_shuffle_v3f16_v4f16__4_5_5(ptr addrspace(1) inreg %ptr) {
5604; GFX900-LABEL: v_shuffle_v3f16_v4f16__4_5_5:
5605; GFX900:       ; %bb.0:
5606; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5607; GFX900-NEXT:    v_mov_b32_e32 v2, 0
5608; GFX900-NEXT:    ;;#ASMSTART
5609; GFX900-NEXT:    ; def v[0:1]
5610; GFX900-NEXT:    ;;#ASMEND
5611; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
5612; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5613; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
5614; GFX900-NEXT:    s_waitcnt vmcnt(0)
5615; GFX900-NEXT:    s_setpc_b64 s[30:31]
5616;
5617; GFX90A-LABEL: v_shuffle_v3f16_v4f16__4_5_5:
5618; GFX90A:       ; %bb.0:
5619; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5620; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
5621; GFX90A-NEXT:    ;;#ASMSTART
5622; GFX90A-NEXT:    ; def v[0:1]
5623; GFX90A-NEXT:    ;;#ASMEND
5624; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
5625; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5626; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
5627; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5628; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5629;
5630; GFX940-LABEL: v_shuffle_v3f16_v4f16__4_5_5:
5631; GFX940:       ; %bb.0:
5632; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5633; GFX940-NEXT:    v_mov_b32_e32 v2, 0
5634; GFX940-NEXT:    ;;#ASMSTART
5635; GFX940-NEXT:    ; def v[0:1]
5636; GFX940-NEXT:    ;;#ASMEND
5637; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
5638; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5639; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
5640; GFX940-NEXT:    s_waitcnt vmcnt(0)
5641; GFX940-NEXT:    s_setpc_b64 s[30:31]
5642  %vec0 = call <4 x half> asm "; def $0", "=v"()
5643  %vec1 = call <4 x half> asm "; def $0", "=v"()
5644  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 4, i32 5, i32 5>
5645  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
5646  ret void
5647}
5648
5649define void @v_shuffle_v3f16_v4f16__5_5_5(ptr addrspace(1) inreg %ptr) {
5650; GFX900-LABEL: v_shuffle_v3f16_v4f16__5_5_5:
5651; GFX900:       ; %bb.0:
5652; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5653; GFX900-NEXT:    ;;#ASMSTART
5654; GFX900-NEXT:    ; def v[0:1]
5655; GFX900-NEXT:    ;;#ASMEND
5656; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5657; GFX900-NEXT:    v_mov_b32_e32 v2, 0
5658; GFX900-NEXT:    v_perm_b32 v1, v0, v0, s4
5659; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5660; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
5661; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
5662; GFX900-NEXT:    s_waitcnt vmcnt(0)
5663; GFX900-NEXT:    s_setpc_b64 s[30:31]
5664;
5665; GFX90A-LABEL: v_shuffle_v3f16_v4f16__5_5_5:
5666; GFX90A:       ; %bb.0:
5667; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5668; GFX90A-NEXT:    ;;#ASMSTART
5669; GFX90A-NEXT:    ; def v[0:1]
5670; GFX90A-NEXT:    ;;#ASMEND
5671; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5672; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
5673; GFX90A-NEXT:    v_perm_b32 v1, v0, v0, s4
5674; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5675; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
5676; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
5677; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5678; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5679;
5680; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_5_5:
5681; GFX940:       ; %bb.0:
5682; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5683; GFX940-NEXT:    ;;#ASMSTART
5684; GFX940-NEXT:    ; def v[0:1]
5685; GFX940-NEXT:    ;;#ASMEND
5686; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5687; GFX940-NEXT:    v_mov_b32_e32 v2, 0
5688; GFX940-NEXT:    v_perm_b32 v1, v0, v0, s2
5689; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5690; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
5691; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
5692; GFX940-NEXT:    s_waitcnt vmcnt(0)
5693; GFX940-NEXT:    s_setpc_b64 s[30:31]
5694  %vec0 = call <4 x half> asm "; def $0", "=v"()
5695  %vec1 = call <4 x half> asm "; def $0", "=v"()
5696  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 5, i32 5>
5697  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
5698  ret void
5699}
5700
5701define void @v_shuffle_v3f16_v4f16__6_5_5(ptr addrspace(1) inreg %ptr) {
5702; GFX900-LABEL: v_shuffle_v3f16_v4f16__6_5_5:
5703; GFX900:       ; %bb.0:
5704; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5705; GFX900-NEXT:    ;;#ASMSTART
5706; GFX900-NEXT:    ; def v[0:1]
5707; GFX900-NEXT:    ;;#ASMEND
5708; GFX900-NEXT:    s_mov_b32 s4, 0xffff
5709; GFX900-NEXT:    v_mov_b32_e32 v2, 0
5710; GFX900-NEXT:    v_bfi_b32 v1, s4, v1, v0
5711; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5712; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
5713; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
5714; GFX900-NEXT:    s_waitcnt vmcnt(0)
5715; GFX900-NEXT:    s_setpc_b64 s[30:31]
5716;
5717; GFX90A-LABEL: v_shuffle_v3f16_v4f16__6_5_5:
5718; GFX90A:       ; %bb.0:
5719; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5720; GFX90A-NEXT:    ;;#ASMSTART
5721; GFX90A-NEXT:    ; def v[0:1]
5722; GFX90A-NEXT:    ;;#ASMEND
5723; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
5724; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
5725; GFX90A-NEXT:    v_bfi_b32 v1, s4, v1, v0
5726; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5727; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
5728; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
5729; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5730; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5731;
5732; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_5_5:
5733; GFX940:       ; %bb.0:
5734; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5735; GFX940-NEXT:    ;;#ASMSTART
5736; GFX940-NEXT:    ; def v[0:1]
5737; GFX940-NEXT:    ;;#ASMEND
5738; GFX940-NEXT:    s_mov_b32 s2, 0xffff
5739; GFX940-NEXT:    v_mov_b32_e32 v2, 0
5740; GFX940-NEXT:    v_bfi_b32 v1, s2, v1, v0
5741; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5742; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
5743; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
5744; GFX940-NEXT:    s_waitcnt vmcnt(0)
5745; GFX940-NEXT:    s_setpc_b64 s[30:31]
5746  %vec0 = call <4 x half> asm "; def $0", "=v"()
5747  %vec1 = call <4 x half> asm "; def $0", "=v"()
5748  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 5, i32 5>
5749  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
5750  ret void
5751}
5752
5753define void @v_shuffle_v3f16_v4f16__7_5_5(ptr addrspace(1) inreg %ptr) {
5754; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_5_5:
5755; GFX900:       ; %bb.0:
5756; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5757; GFX900-NEXT:    ;;#ASMSTART
5758; GFX900-NEXT:    ; def v[0:1]
5759; GFX900-NEXT:    ;;#ASMEND
5760; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5761; GFX900-NEXT:    v_mov_b32_e32 v2, 0
5762; GFX900-NEXT:    v_perm_b32 v1, v0, v1, s4
5763; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5764; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
5765; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
5766; GFX900-NEXT:    s_waitcnt vmcnt(0)
5767; GFX900-NEXT:    s_setpc_b64 s[30:31]
5768;
5769; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_5_5:
5770; GFX90A:       ; %bb.0:
5771; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5772; GFX90A-NEXT:    ;;#ASMSTART
5773; GFX90A-NEXT:    ; def v[0:1]
5774; GFX90A-NEXT:    ;;#ASMEND
5775; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5776; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
5777; GFX90A-NEXT:    v_perm_b32 v1, v0, v1, s4
5778; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5779; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
5780; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
5781; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5782; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5783;
5784; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_5:
5785; GFX940:       ; %bb.0:
5786; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5787; GFX940-NEXT:    ;;#ASMSTART
5788; GFX940-NEXT:    ; def v[0:1]
5789; GFX940-NEXT:    ;;#ASMEND
5790; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5791; GFX940-NEXT:    v_mov_b32_e32 v2, 0
5792; GFX940-NEXT:    v_perm_b32 v1, v0, v1, s2
5793; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
5794; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
5795; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
5796; GFX940-NEXT:    s_waitcnt vmcnt(0)
5797; GFX940-NEXT:    s_setpc_b64 s[30:31]
5798  %vec0 = call <4 x half> asm "; def $0", "=v"()
5799  %vec1 = call <4 x half> asm "; def $0", "=v"()
5800  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 5>
5801  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
5802  ret void
5803}
5804
5805define void @v_shuffle_v3f16_v4f16__7_u_5(ptr addrspace(1) inreg %ptr) {
5806; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_u_5:
5807; GFX900:       ; %bb.0:
5808; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5809; GFX900-NEXT:    v_mov_b32_e32 v2, 0
5810; GFX900-NEXT:    ;;#ASMSTART
5811; GFX900-NEXT:    ; def v[0:1]
5812; GFX900-NEXT:    ;;#ASMEND
5813; GFX900-NEXT:    v_alignbit_b32 v1, s4, v1, 16
5814; GFX900-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
5815; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
5816; GFX900-NEXT:    s_waitcnt vmcnt(0)
5817; GFX900-NEXT:    s_setpc_b64 s[30:31]
5818;
5819; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_u_5:
5820; GFX90A:       ; %bb.0:
5821; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5822; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
5823; GFX90A-NEXT:    ;;#ASMSTART
5824; GFX90A-NEXT:    ; def v[0:1]
5825; GFX90A-NEXT:    ;;#ASMEND
5826; GFX90A-NEXT:    v_alignbit_b32 v1, s4, v1, 16
5827; GFX90A-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
5828; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
5829; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5830; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5831;
5832; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_5:
5833; GFX940:       ; %bb.0:
5834; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5835; GFX940-NEXT:    v_mov_b32_e32 v2, 0
5836; GFX940-NEXT:    ;;#ASMSTART
5837; GFX940-NEXT:    ; def v[0:1]
5838; GFX940-NEXT:    ;;#ASMEND
5839; GFX940-NEXT:    s_nop 0
5840; GFX940-NEXT:    v_alignbit_b32 v1, s0, v1, 16
5841; GFX940-NEXT:    global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1
5842; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
5843; GFX940-NEXT:    s_waitcnt vmcnt(0)
5844; GFX940-NEXT:    s_setpc_b64 s[30:31]
5845  %vec0 = call <4 x half> asm "; def $0", "=v"()
5846  %vec1 = call <4 x half> asm "; def $0", "=v"()
5847  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 5>
5848  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
5849  ret void
5850}
5851
5852define void @v_shuffle_v3f16_v4f16__7_0_5(ptr addrspace(1) inreg %ptr) {
5853; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_0_5:
5854; GFX900:       ; %bb.0:
5855; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5856; GFX900-NEXT:    ;;#ASMSTART
5857; GFX900-NEXT:    ; def v[0:1]
5858; GFX900-NEXT:    ;;#ASMEND
5859; GFX900-NEXT:    v_mov_b32_e32 v3, 0
5860; GFX900-NEXT:    ;;#ASMSTART
5861; GFX900-NEXT:    ; def v[1:2]
5862; GFX900-NEXT:    ;;#ASMEND
5863; GFX900-NEXT:    v_alignbit_b32 v0, v0, v2, 16
5864; GFX900-NEXT:    global_store_short_d16_hi v3, v1, s[16:17] offset:4
5865; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
5866; GFX900-NEXT:    s_waitcnt vmcnt(0)
5867; GFX900-NEXT:    s_setpc_b64 s[30:31]
5868;
5869; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_0_5:
5870; GFX90A:       ; %bb.0:
5871; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5872; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5873; GFX90A-NEXT:    ;;#ASMSTART
5874; GFX90A-NEXT:    ; def v[0:1]
5875; GFX90A-NEXT:    ;;#ASMEND
5876; GFX90A-NEXT:    ;;#ASMSTART
5877; GFX90A-NEXT:    ; def v[2:3]
5878; GFX90A-NEXT:    ;;#ASMEND
5879; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v3, 16
5880; GFX90A-NEXT:    global_store_short_d16_hi v4, v2, s[16:17] offset:4
5881; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
5882; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5883; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5884;
5885; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_5:
5886; GFX940:       ; %bb.0:
5887; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5888; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5889; GFX940-NEXT:    ;;#ASMSTART
5890; GFX940-NEXT:    ; def v[0:1]
5891; GFX940-NEXT:    ;;#ASMEND
5892; GFX940-NEXT:    ;;#ASMSTART
5893; GFX940-NEXT:    ; def v[2:3]
5894; GFX940-NEXT:    ;;#ASMEND
5895; GFX940-NEXT:    s_nop 0
5896; GFX940-NEXT:    v_alignbit_b32 v0, v0, v3, 16
5897; GFX940-NEXT:    global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1
5898; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
5899; GFX940-NEXT:    s_waitcnt vmcnt(0)
5900; GFX940-NEXT:    s_setpc_b64 s[30:31]
5901  %vec0 = call <4 x half> asm "; def $0", "=v"()
5902  %vec1 = call <4 x half> asm "; def $0", "=v"()
5903  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 5>
5904  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
5905  ret void
5906}
5907
5908define void @v_shuffle_v3f16_v4f16__7_1_5(ptr addrspace(1) inreg %ptr) {
5909; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_1_5:
5910; GFX900:       ; %bb.0:
5911; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5912; GFX900-NEXT:    ;;#ASMSTART
5913; GFX900-NEXT:    ; def v[0:1]
5914; GFX900-NEXT:    ;;#ASMEND
5915; GFX900-NEXT:    v_mov_b32_e32 v3, 0
5916; GFX900-NEXT:    ;;#ASMSTART
5917; GFX900-NEXT:    ; def v[1:2]
5918; GFX900-NEXT:    ;;#ASMEND
5919; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
5920; GFX900-NEXT:    v_perm_b32 v0, v0, v2, s4
5921; GFX900-NEXT:    global_store_short_d16_hi v3, v1, s[16:17] offset:4
5922; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
5923; GFX900-NEXT:    s_waitcnt vmcnt(0)
5924; GFX900-NEXT:    s_setpc_b64 s[30:31]
5925;
5926; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_1_5:
5927; GFX90A:       ; %bb.0:
5928; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5929; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5930; GFX90A-NEXT:    ;;#ASMSTART
5931; GFX90A-NEXT:    ; def v[0:1]
5932; GFX90A-NEXT:    ;;#ASMEND
5933; GFX90A-NEXT:    ;;#ASMSTART
5934; GFX90A-NEXT:    ; def v[2:3]
5935; GFX90A-NEXT:    ;;#ASMEND
5936; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
5937; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
5938; GFX90A-NEXT:    global_store_short_d16_hi v4, v2, s[16:17] offset:4
5939; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
5940; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5941; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5942;
5943; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_5:
5944; GFX940:       ; %bb.0:
5945; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5946; GFX940-NEXT:    v_mov_b32_e32 v4, 0
5947; GFX940-NEXT:    ;;#ASMSTART
5948; GFX940-NEXT:    ; def v[0:1]
5949; GFX940-NEXT:    ;;#ASMEND
5950; GFX940-NEXT:    ;;#ASMSTART
5951; GFX940-NEXT:    ; def v[2:3]
5952; GFX940-NEXT:    ;;#ASMEND
5953; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
5954; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
5955; GFX940-NEXT:    global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1
5956; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
5957; GFX940-NEXT:    s_waitcnt vmcnt(0)
5958; GFX940-NEXT:    s_setpc_b64 s[30:31]
5959  %vec0 = call <4 x half> asm "; def $0", "=v"()
5960  %vec1 = call <4 x half> asm "; def $0", "=v"()
5961  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 5>
5962  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
5963  ret void
5964}
5965
5966define void @v_shuffle_v3f16_v4f16__7_2_5(ptr addrspace(1) inreg %ptr) {
5967; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_2_5:
5968; GFX900:       ; %bb.0:
5969; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5970; GFX900-NEXT:    v_mov_b32_e32 v4, 0
5971; GFX900-NEXT:    ;;#ASMSTART
5972; GFX900-NEXT:    ; def v[0:1]
5973; GFX900-NEXT:    ;;#ASMEND
5974; GFX900-NEXT:    ;;#ASMSTART
5975; GFX900-NEXT:    ; def v[2:3]
5976; GFX900-NEXT:    ;;#ASMEND
5977; GFX900-NEXT:    v_alignbit_b32 v0, v1, v3, 16
5978; GFX900-NEXT:    global_store_short_d16_hi v4, v2, s[16:17] offset:4
5979; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
5980; GFX900-NEXT:    s_waitcnt vmcnt(0)
5981; GFX900-NEXT:    s_setpc_b64 s[30:31]
5982;
5983; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_2_5:
5984; GFX90A:       ; %bb.0:
5985; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5986; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
5987; GFX90A-NEXT:    ;;#ASMSTART
5988; GFX90A-NEXT:    ; def v[0:1]
5989; GFX90A-NEXT:    ;;#ASMEND
5990; GFX90A-NEXT:    ;;#ASMSTART
5991; GFX90A-NEXT:    ; def v[2:3]
5992; GFX90A-NEXT:    ;;#ASMEND
5993; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v3, 16
5994; GFX90A-NEXT:    global_store_short_d16_hi v4, v2, s[16:17] offset:4
5995; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
5996; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5997; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5998;
5999; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_5:
6000; GFX940:       ; %bb.0:
6001; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6002; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6003; GFX940-NEXT:    ;;#ASMSTART
6004; GFX940-NEXT:    ; def v[0:1]
6005; GFX940-NEXT:    ;;#ASMEND
6006; GFX940-NEXT:    ;;#ASMSTART
6007; GFX940-NEXT:    ; def v[2:3]
6008; GFX940-NEXT:    ;;#ASMEND
6009; GFX940-NEXT:    s_nop 0
6010; GFX940-NEXT:    v_alignbit_b32 v0, v1, v3, 16
6011; GFX940-NEXT:    global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1
6012; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
6013; GFX940-NEXT:    s_waitcnt vmcnt(0)
6014; GFX940-NEXT:    s_setpc_b64 s[30:31]
6015  %vec0 = call <4 x half> asm "; def $0", "=v"()
6016  %vec1 = call <4 x half> asm "; def $0", "=v"()
6017  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 5>
6018  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
6019  ret void
6020}
6021
6022define void @v_shuffle_v3f16_v4f16__7_3_5(ptr addrspace(1) inreg %ptr) {
6023; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_3_5:
6024; GFX900:       ; %bb.0:
6025; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6026; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6027; GFX900-NEXT:    ;;#ASMSTART
6028; GFX900-NEXT:    ; def v[0:1]
6029; GFX900-NEXT:    ;;#ASMEND
6030; GFX900-NEXT:    ;;#ASMSTART
6031; GFX900-NEXT:    ; def v[2:3]
6032; GFX900-NEXT:    ;;#ASMEND
6033; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
6034; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
6035; GFX900-NEXT:    global_store_short_d16_hi v4, v2, s[16:17] offset:4
6036; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
6037; GFX900-NEXT:    s_waitcnt vmcnt(0)
6038; GFX900-NEXT:    s_setpc_b64 s[30:31]
6039;
6040; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_3_5:
6041; GFX90A:       ; %bb.0:
6042; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6043; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6044; GFX90A-NEXT:    ;;#ASMSTART
6045; GFX90A-NEXT:    ; def v[0:1]
6046; GFX90A-NEXT:    ;;#ASMEND
6047; GFX90A-NEXT:    ;;#ASMSTART
6048; GFX90A-NEXT:    ; def v[2:3]
6049; GFX90A-NEXT:    ;;#ASMEND
6050; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
6051; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
6052; GFX90A-NEXT:    global_store_short_d16_hi v4, v2, s[16:17] offset:4
6053; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
6054; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6055; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6056;
6057; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_5:
6058; GFX940:       ; %bb.0:
6059; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6060; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6061; GFX940-NEXT:    ;;#ASMSTART
6062; GFX940-NEXT:    ; def v[0:1]
6063; GFX940-NEXT:    ;;#ASMEND
6064; GFX940-NEXT:    ;;#ASMSTART
6065; GFX940-NEXT:    ; def v[2:3]
6066; GFX940-NEXT:    ;;#ASMEND
6067; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
6068; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
6069; GFX940-NEXT:    global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1
6070; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
6071; GFX940-NEXT:    s_waitcnt vmcnt(0)
6072; GFX940-NEXT:    s_setpc_b64 s[30:31]
6073  %vec0 = call <4 x half> asm "; def $0", "=v"()
6074  %vec1 = call <4 x half> asm "; def $0", "=v"()
6075  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 5>
6076  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
6077  ret void
6078}
6079
6080define void @v_shuffle_v3f16_v4f16__7_4_5(ptr addrspace(1) inreg %ptr) {
6081; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_4_5:
6082; GFX900:       ; %bb.0:
6083; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6084; GFX900-NEXT:    v_mov_b32_e32 v2, 0
6085; GFX900-NEXT:    ;;#ASMSTART
6086; GFX900-NEXT:    ; def v[0:1]
6087; GFX900-NEXT:    ;;#ASMEND
6088; GFX900-NEXT:    v_alignbit_b32 v1, v0, v1, 16
6089; GFX900-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
6090; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
6091; GFX900-NEXT:    s_waitcnt vmcnt(0)
6092; GFX900-NEXT:    s_setpc_b64 s[30:31]
6093;
6094; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_4_5:
6095; GFX90A:       ; %bb.0:
6096; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6097; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
6098; GFX90A-NEXT:    ;;#ASMSTART
6099; GFX90A-NEXT:    ; def v[0:1]
6100; GFX90A-NEXT:    ;;#ASMEND
6101; GFX90A-NEXT:    v_alignbit_b32 v1, v0, v1, 16
6102; GFX90A-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
6103; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
6104; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6105; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6106;
6107; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_5:
6108; GFX940:       ; %bb.0:
6109; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6110; GFX940-NEXT:    v_mov_b32_e32 v2, 0
6111; GFX940-NEXT:    ;;#ASMSTART
6112; GFX940-NEXT:    ; def v[0:1]
6113; GFX940-NEXT:    ;;#ASMEND
6114; GFX940-NEXT:    s_nop 0
6115; GFX940-NEXT:    v_alignbit_b32 v1, v0, v1, 16
6116; GFX940-NEXT:    global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1
6117; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
6118; GFX940-NEXT:    s_waitcnt vmcnt(0)
6119; GFX940-NEXT:    s_setpc_b64 s[30:31]
6120  %vec0 = call <4 x half> asm "; def $0", "=v"()
6121  %vec1 = call <4 x half> asm "; def $0", "=v"()
6122  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 5>
6123  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
6124  ret void
6125}
6126
6127define void @v_shuffle_v3f16_v4f16__7_6_5(ptr addrspace(1) inreg %ptr) {
6128; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_6_5:
6129; GFX900:       ; %bb.0:
6130; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6131; GFX900-NEXT:    v_mov_b32_e32 v2, 0
6132; GFX900-NEXT:    ;;#ASMSTART
6133; GFX900-NEXT:    ; def v[0:1]
6134; GFX900-NEXT:    ;;#ASMEND
6135; GFX900-NEXT:    v_alignbit_b32 v1, v1, v1, 16
6136; GFX900-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
6137; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
6138; GFX900-NEXT:    s_waitcnt vmcnt(0)
6139; GFX900-NEXT:    s_setpc_b64 s[30:31]
6140;
6141; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_6_5:
6142; GFX90A:       ; %bb.0:
6143; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6144; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
6145; GFX90A-NEXT:    ;;#ASMSTART
6146; GFX90A-NEXT:    ; def v[0:1]
6147; GFX90A-NEXT:    ;;#ASMEND
6148; GFX90A-NEXT:    v_alignbit_b32 v1, v1, v1, 16
6149; GFX90A-NEXT:    global_store_short_d16_hi v2, v0, s[16:17] offset:4
6150; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
6151; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6152; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6153;
6154; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_5:
6155; GFX940:       ; %bb.0:
6156; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6157; GFX940-NEXT:    v_mov_b32_e32 v2, 0
6158; GFX940-NEXT:    ;;#ASMSTART
6159; GFX940-NEXT:    ; def v[0:1]
6160; GFX940-NEXT:    ;;#ASMEND
6161; GFX940-NEXT:    s_nop 0
6162; GFX940-NEXT:    v_alignbit_b32 v1, v1, v1, 16
6163; GFX940-NEXT:    global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1
6164; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
6165; GFX940-NEXT:    s_waitcnt vmcnt(0)
6166; GFX940-NEXT:    s_setpc_b64 s[30:31]
6167  %vec0 = call <4 x half> asm "; def $0", "=v"()
6168  %vec1 = call <4 x half> asm "; def $0", "=v"()
6169  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 5>
6170  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
6171  ret void
6172}
6173
6174define void @v_shuffle_v3f16_v4f16__u_6_6(ptr addrspace(1) inreg %ptr) {
6175; GFX900-LABEL: v_shuffle_v3f16_v4f16__u_6_6:
6176; GFX900:       ; %bb.0:
6177; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6178; GFX900-NEXT:    v_mov_b32_e32 v2, 0
6179; GFX900-NEXT:    ;;#ASMSTART
6180; GFX900-NEXT:    ; def v[0:1]
6181; GFX900-NEXT:    ;;#ASMEND
6182; GFX900-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
6183; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6184; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
6185; GFX900-NEXT:    s_waitcnt vmcnt(0)
6186; GFX900-NEXT:    s_setpc_b64 s[30:31]
6187;
6188; GFX90A-LABEL: v_shuffle_v3f16_v4f16__u_6_6:
6189; GFX90A:       ; %bb.0:
6190; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6191; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
6192; GFX90A-NEXT:    ;;#ASMSTART
6193; GFX90A-NEXT:    ; def v[0:1]
6194; GFX90A-NEXT:    ;;#ASMEND
6195; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
6196; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6197; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
6198; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6199; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6200;
6201; GFX940-LABEL: v_shuffle_v3f16_v4f16__u_6_6:
6202; GFX940:       ; %bb.0:
6203; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6204; GFX940-NEXT:    v_mov_b32_e32 v2, 0
6205; GFX940-NEXT:    ;;#ASMSTART
6206; GFX940-NEXT:    ; def v[0:1]
6207; GFX940-NEXT:    ;;#ASMEND
6208; GFX940-NEXT:    s_nop 0
6209; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
6210; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
6211; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
6212; GFX940-NEXT:    s_waitcnt vmcnt(0)
6213; GFX940-NEXT:    s_setpc_b64 s[30:31]
6214  %vec0 = call <4 x half> asm "; def $0", "=v"()
6215  %vec1 = call <4 x half> asm "; def $0", "=v"()
6216  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 poison, i32 6, i32 6>
6217  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
6218  ret void
6219}
6220
6221define void @v_shuffle_v3f16_v4f16__0_6_6(ptr addrspace(1) inreg %ptr) {
6222; GFX900-LABEL: v_shuffle_v3f16_v4f16__0_6_6:
6223; GFX900:       ; %bb.0:
6224; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6225; GFX900-NEXT:    ;;#ASMSTART
6226; GFX900-NEXT:    ; def v[0:1]
6227; GFX900-NEXT:    ;;#ASMEND
6228; GFX900-NEXT:    v_mov_b32_e32 v3, 0
6229; GFX900-NEXT:    ;;#ASMSTART
6230; GFX900-NEXT:    ; def v[1:2]
6231; GFX900-NEXT:    ;;#ASMEND
6232; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
6233; GFX900-NEXT:    v_perm_b32 v0, v2, v0, s4
6234; GFX900-NEXT:    global_store_short v3, v2, s[16:17] offset:4
6235; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
6236; GFX900-NEXT:    s_waitcnt vmcnt(0)
6237; GFX900-NEXT:    s_setpc_b64 s[30:31]
6238;
6239; GFX90A-LABEL: v_shuffle_v3f16_v4f16__0_6_6:
6240; GFX90A:       ; %bb.0:
6241; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6242; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6243; GFX90A-NEXT:    ;;#ASMSTART
6244; GFX90A-NEXT:    ; def v[0:1]
6245; GFX90A-NEXT:    ;;#ASMEND
6246; GFX90A-NEXT:    ;;#ASMSTART
6247; GFX90A-NEXT:    ; def v[2:3]
6248; GFX90A-NEXT:    ;;#ASMEND
6249; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
6250; GFX90A-NEXT:    v_perm_b32 v0, v3, v0, s4
6251; GFX90A-NEXT:    global_store_short v4, v3, s[16:17] offset:4
6252; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
6253; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6254; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6255;
6256; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_6_6:
6257; GFX940:       ; %bb.0:
6258; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6259; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6260; GFX940-NEXT:    ;;#ASMSTART
6261; GFX940-NEXT:    ; def v[0:1]
6262; GFX940-NEXT:    ;;#ASMEND
6263; GFX940-NEXT:    ;;#ASMSTART
6264; GFX940-NEXT:    ; def v[2:3]
6265; GFX940-NEXT:    ;;#ASMEND
6266; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
6267; GFX940-NEXT:    v_perm_b32 v0, v3, v0, s2
6268; GFX940-NEXT:    global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
6269; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
6270; GFX940-NEXT:    s_waitcnt vmcnt(0)
6271; GFX940-NEXT:    s_setpc_b64 s[30:31]
6272  %vec0 = call <4 x half> asm "; def $0", "=v"()
6273  %vec1 = call <4 x half> asm "; def $0", "=v"()
6274  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 0, i32 6, i32 6>
6275  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
6276  ret void
6277}
6278
6279define void @v_shuffle_v3f16_v4f16__1_6_6(ptr addrspace(1) inreg %ptr) {
6280; GFX900-LABEL: v_shuffle_v3f16_v4f16__1_6_6:
6281; GFX900:       ; %bb.0:
6282; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6283; GFX900-NEXT:    ;;#ASMSTART
6284; GFX900-NEXT:    ; def v[0:1]
6285; GFX900-NEXT:    ;;#ASMEND
6286; GFX900-NEXT:    v_mov_b32_e32 v3, 0
6287; GFX900-NEXT:    ;;#ASMSTART
6288; GFX900-NEXT:    ; def v[1:2]
6289; GFX900-NEXT:    ;;#ASMEND
6290; GFX900-NEXT:    v_alignbit_b32 v0, v2, v0, 16
6291; GFX900-NEXT:    global_store_short v3, v2, s[16:17] offset:4
6292; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
6293; GFX900-NEXT:    s_waitcnt vmcnt(0)
6294; GFX900-NEXT:    s_setpc_b64 s[30:31]
6295;
6296; GFX90A-LABEL: v_shuffle_v3f16_v4f16__1_6_6:
6297; GFX90A:       ; %bb.0:
6298; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6299; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6300; GFX90A-NEXT:    ;;#ASMSTART
6301; GFX90A-NEXT:    ; def v[0:1]
6302; GFX90A-NEXT:    ;;#ASMEND
6303; GFX90A-NEXT:    ;;#ASMSTART
6304; GFX90A-NEXT:    ; def v[2:3]
6305; GFX90A-NEXT:    ;;#ASMEND
6306; GFX90A-NEXT:    v_alignbit_b32 v0, v3, v0, 16
6307; GFX90A-NEXT:    global_store_short v4, v3, s[16:17] offset:4
6308; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
6309; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6310; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6311;
6312; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_6_6:
6313; GFX940:       ; %bb.0:
6314; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6315; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6316; GFX940-NEXT:    ;;#ASMSTART
6317; GFX940-NEXT:    ; def v[0:1]
6318; GFX940-NEXT:    ;;#ASMEND
6319; GFX940-NEXT:    ;;#ASMSTART
6320; GFX940-NEXT:    ; def v[2:3]
6321; GFX940-NEXT:    ;;#ASMEND
6322; GFX940-NEXT:    s_nop 0
6323; GFX940-NEXT:    v_alignbit_b32 v0, v3, v0, 16
6324; GFX940-NEXT:    global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
6325; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
6326; GFX940-NEXT:    s_waitcnt vmcnt(0)
6327; GFX940-NEXT:    s_setpc_b64 s[30:31]
6328  %vec0 = call <4 x half> asm "; def $0", "=v"()
6329  %vec1 = call <4 x half> asm "; def $0", "=v"()
6330  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 1, i32 6, i32 6>
6331  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
6332  ret void
6333}
6334
6335define void @v_shuffle_v3f16_v4f16__2_6_6(ptr addrspace(1) inreg %ptr) {
6336; GFX900-LABEL: v_shuffle_v3f16_v4f16__2_6_6:
6337; GFX900:       ; %bb.0:
6338; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6339; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6340; GFX900-NEXT:    ;;#ASMSTART
6341; GFX900-NEXT:    ; def v[0:1]
6342; GFX900-NEXT:    ;;#ASMEND
6343; GFX900-NEXT:    ;;#ASMSTART
6344; GFX900-NEXT:    ; def v[2:3]
6345; GFX900-NEXT:    ;;#ASMEND
6346; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
6347; GFX900-NEXT:    v_perm_b32 v0, v3, v1, s4
6348; GFX900-NEXT:    global_store_short v4, v3, s[16:17] offset:4
6349; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
6350; GFX900-NEXT:    s_waitcnt vmcnt(0)
6351; GFX900-NEXT:    s_setpc_b64 s[30:31]
6352;
6353; GFX90A-LABEL: v_shuffle_v3f16_v4f16__2_6_6:
6354; GFX90A:       ; %bb.0:
6355; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6356; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6357; GFX90A-NEXT:    ;;#ASMSTART
6358; GFX90A-NEXT:    ; def v[0:1]
6359; GFX90A-NEXT:    ;;#ASMEND
6360; GFX90A-NEXT:    ;;#ASMSTART
6361; GFX90A-NEXT:    ; def v[2:3]
6362; GFX90A-NEXT:    ;;#ASMEND
6363; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
6364; GFX90A-NEXT:    v_perm_b32 v0, v3, v1, s4
6365; GFX90A-NEXT:    global_store_short v4, v3, s[16:17] offset:4
6366; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
6367; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6368; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6369;
6370; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_6_6:
6371; GFX940:       ; %bb.0:
6372; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6373; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6374; GFX940-NEXT:    ;;#ASMSTART
6375; GFX940-NEXT:    ; def v[0:1]
6376; GFX940-NEXT:    ;;#ASMEND
6377; GFX940-NEXT:    ;;#ASMSTART
6378; GFX940-NEXT:    ; def v[2:3]
6379; GFX940-NEXT:    ;;#ASMEND
6380; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
6381; GFX940-NEXT:    v_perm_b32 v0, v3, v1, s2
6382; GFX940-NEXT:    global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
6383; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
6384; GFX940-NEXT:    s_waitcnt vmcnt(0)
6385; GFX940-NEXT:    s_setpc_b64 s[30:31]
6386  %vec0 = call <4 x half> asm "; def $0", "=v"()
6387  %vec1 = call <4 x half> asm "; def $0", "=v"()
6388  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 2, i32 6, i32 6>
6389  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
6390  ret void
6391}
6392
6393define void @v_shuffle_v3f16_v4f16__3_6_6(ptr addrspace(1) inreg %ptr) {
6394; GFX900-LABEL: v_shuffle_v3f16_v4f16__3_6_6:
6395; GFX900:       ; %bb.0:
6396; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6397; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6398; GFX900-NEXT:    ;;#ASMSTART
6399; GFX900-NEXT:    ; def v[0:1]
6400; GFX900-NEXT:    ;;#ASMEND
6401; GFX900-NEXT:    ;;#ASMSTART
6402; GFX900-NEXT:    ; def v[2:3]
6403; GFX900-NEXT:    ;;#ASMEND
6404; GFX900-NEXT:    v_alignbit_b32 v0, v3, v1, 16
6405; GFX900-NEXT:    global_store_short v4, v3, s[16:17] offset:4
6406; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
6407; GFX900-NEXT:    s_waitcnt vmcnt(0)
6408; GFX900-NEXT:    s_setpc_b64 s[30:31]
6409;
6410; GFX90A-LABEL: v_shuffle_v3f16_v4f16__3_6_6:
6411; GFX90A:       ; %bb.0:
6412; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6413; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6414; GFX90A-NEXT:    ;;#ASMSTART
6415; GFX90A-NEXT:    ; def v[0:1]
6416; GFX90A-NEXT:    ;;#ASMEND
6417; GFX90A-NEXT:    ;;#ASMSTART
6418; GFX90A-NEXT:    ; def v[2:3]
6419; GFX90A-NEXT:    ;;#ASMEND
6420; GFX90A-NEXT:    v_alignbit_b32 v0, v3, v1, 16
6421; GFX90A-NEXT:    global_store_short v4, v3, s[16:17] offset:4
6422; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
6423; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6424; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6425;
6426; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_6_6:
6427; GFX940:       ; %bb.0:
6428; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6429; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6430; GFX940-NEXT:    ;;#ASMSTART
6431; GFX940-NEXT:    ; def v[0:1]
6432; GFX940-NEXT:    ;;#ASMEND
6433; GFX940-NEXT:    ;;#ASMSTART
6434; GFX940-NEXT:    ; def v[2:3]
6435; GFX940-NEXT:    ;;#ASMEND
6436; GFX940-NEXT:    s_nop 0
6437; GFX940-NEXT:    v_alignbit_b32 v0, v3, v1, 16
6438; GFX940-NEXT:    global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
6439; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
6440; GFX940-NEXT:    s_waitcnt vmcnt(0)
6441; GFX940-NEXT:    s_setpc_b64 s[30:31]
6442  %vec0 = call <4 x half> asm "; def $0", "=v"()
6443  %vec1 = call <4 x half> asm "; def $0", "=v"()
6444  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 3, i32 6, i32 6>
6445  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
6446  ret void
6447}
6448
6449define void @v_shuffle_v3f16_v4f16__4_6_6(ptr addrspace(1) inreg %ptr) {
6450; GFX900-LABEL: v_shuffle_v3f16_v4f16__4_6_6:
6451; GFX900:       ; %bb.0:
6452; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6453; GFX900-NEXT:    v_mov_b32_e32 v2, 0
6454; GFX900-NEXT:    ;;#ASMSTART
6455; GFX900-NEXT:    ; def v[0:1]
6456; GFX900-NEXT:    ;;#ASMEND
6457; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
6458; GFX900-NEXT:    v_perm_b32 v0, v1, v0, s4
6459; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6460; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
6461; GFX900-NEXT:    s_waitcnt vmcnt(0)
6462; GFX900-NEXT:    s_setpc_b64 s[30:31]
6463;
6464; GFX90A-LABEL: v_shuffle_v3f16_v4f16__4_6_6:
6465; GFX90A:       ; %bb.0:
6466; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6467; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
6468; GFX90A-NEXT:    ;;#ASMSTART
6469; GFX90A-NEXT:    ; def v[0:1]
6470; GFX90A-NEXT:    ;;#ASMEND
6471; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
6472; GFX90A-NEXT:    v_perm_b32 v0, v1, v0, s4
6473; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6474; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
6475; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6476; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6477;
6478; GFX940-LABEL: v_shuffle_v3f16_v4f16__4_6_6:
6479; GFX940:       ; %bb.0:
6480; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6481; GFX940-NEXT:    v_mov_b32_e32 v2, 0
6482; GFX940-NEXT:    ;;#ASMSTART
6483; GFX940-NEXT:    ; def v[0:1]
6484; GFX940-NEXT:    ;;#ASMEND
6485; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
6486; GFX940-NEXT:    v_perm_b32 v0, v1, v0, s2
6487; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
6488; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
6489; GFX940-NEXT:    s_waitcnt vmcnt(0)
6490; GFX940-NEXT:    s_setpc_b64 s[30:31]
6491  %vec0 = call <4 x half> asm "; def $0", "=v"()
6492  %vec1 = call <4 x half> asm "; def $0", "=v"()
6493  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 4, i32 6, i32 6>
6494  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
6495  ret void
6496}
6497
6498define void @v_shuffle_v3f16_v4f16__5_6_6(ptr addrspace(1) inreg %ptr) {
6499; GFX900-LABEL: v_shuffle_v3f16_v4f16__5_6_6:
6500; GFX900:       ; %bb.0:
6501; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6502; GFX900-NEXT:    v_mov_b32_e32 v2, 0
6503; GFX900-NEXT:    ;;#ASMSTART
6504; GFX900-NEXT:    ; def v[0:1]
6505; GFX900-NEXT:    ;;#ASMEND
6506; GFX900-NEXT:    v_alignbit_b32 v0, v1, v0, 16
6507; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6508; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
6509; GFX900-NEXT:    s_waitcnt vmcnt(0)
6510; GFX900-NEXT:    s_setpc_b64 s[30:31]
6511;
6512; GFX90A-LABEL: v_shuffle_v3f16_v4f16__5_6_6:
6513; GFX90A:       ; %bb.0:
6514; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6515; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
6516; GFX90A-NEXT:    ;;#ASMSTART
6517; GFX90A-NEXT:    ; def v[0:1]
6518; GFX90A-NEXT:    ;;#ASMEND
6519; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v0, 16
6520; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6521; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
6522; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6523; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6524;
6525; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_6_6:
6526; GFX940:       ; %bb.0:
6527; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6528; GFX940-NEXT:    v_mov_b32_e32 v2, 0
6529; GFX940-NEXT:    ;;#ASMSTART
6530; GFX940-NEXT:    ; def v[0:1]
6531; GFX940-NEXT:    ;;#ASMEND
6532; GFX940-NEXT:    s_nop 0
6533; GFX940-NEXT:    v_alignbit_b32 v0, v1, v0, 16
6534; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
6535; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
6536; GFX940-NEXT:    s_waitcnt vmcnt(0)
6537; GFX940-NEXT:    s_setpc_b64 s[30:31]
6538  %vec0 = call <4 x half> asm "; def $0", "=v"()
6539  %vec1 = call <4 x half> asm "; def $0", "=v"()
6540  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 6, i32 6>
6541  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
6542  ret void
6543}
6544
6545define void @v_shuffle_v3f16_v4f16__6_6_6(ptr addrspace(1) inreg %ptr) {
6546; GFX900-LABEL: v_shuffle_v3f16_v4f16__6_6_6:
6547; GFX900:       ; %bb.0:
6548; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6549; GFX900-NEXT:    v_mov_b32_e32 v2, 0
6550; GFX900-NEXT:    ;;#ASMSTART
6551; GFX900-NEXT:    ; def v[0:1]
6552; GFX900-NEXT:    ;;#ASMEND
6553; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
6554; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
6555; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6556; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
6557; GFX900-NEXT:    s_waitcnt vmcnt(0)
6558; GFX900-NEXT:    s_setpc_b64 s[30:31]
6559;
6560; GFX90A-LABEL: v_shuffle_v3f16_v4f16__6_6_6:
6561; GFX90A:       ; %bb.0:
6562; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6563; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
6564; GFX90A-NEXT:    ;;#ASMSTART
6565; GFX90A-NEXT:    ; def v[0:1]
6566; GFX90A-NEXT:    ;;#ASMEND
6567; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
6568; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
6569; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6570; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
6571; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6572; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6573;
6574; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_6_6:
6575; GFX940:       ; %bb.0:
6576; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6577; GFX940-NEXT:    v_mov_b32_e32 v2, 0
6578; GFX940-NEXT:    ;;#ASMSTART
6579; GFX940-NEXT:    ; def v[0:1]
6580; GFX940-NEXT:    ;;#ASMEND
6581; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
6582; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
6583; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
6584; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
6585; GFX940-NEXT:    s_waitcnt vmcnt(0)
6586; GFX940-NEXT:    s_setpc_b64 s[30:31]
6587  %vec0 = call <4 x half> asm "; def $0", "=v"()
6588  %vec1 = call <4 x half> asm "; def $0", "=v"()
6589  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 6, i32 6>
6590  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
6591  ret void
6592}
6593
6594define void @v_shuffle_v3f16_v4f16__7_6_6(ptr addrspace(1) inreg %ptr) {
6595; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_6_6:
6596; GFX900:       ; %bb.0:
6597; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6598; GFX900-NEXT:    v_mov_b32_e32 v2, 0
6599; GFX900-NEXT:    ;;#ASMSTART
6600; GFX900-NEXT:    ; def v[0:1]
6601; GFX900-NEXT:    ;;#ASMEND
6602; GFX900-NEXT:    v_alignbit_b32 v0, v1, v1, 16
6603; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6604; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
6605; GFX900-NEXT:    s_waitcnt vmcnt(0)
6606; GFX900-NEXT:    s_setpc_b64 s[30:31]
6607;
6608; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_6_6:
6609; GFX90A:       ; %bb.0:
6610; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6611; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
6612; GFX90A-NEXT:    ;;#ASMSTART
6613; GFX90A-NEXT:    ; def v[0:1]
6614; GFX90A-NEXT:    ;;#ASMEND
6615; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v1, 16
6616; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6617; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
6618; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6619; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6620;
6621; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_6:
6622; GFX940:       ; %bb.0:
6623; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6624; GFX940-NEXT:    v_mov_b32_e32 v2, 0
6625; GFX940-NEXT:    ;;#ASMSTART
6626; GFX940-NEXT:    ; def v[0:1]
6627; GFX940-NEXT:    ;;#ASMEND
6628; GFX940-NEXT:    s_nop 0
6629; GFX940-NEXT:    v_alignbit_b32 v0, v1, v1, 16
6630; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
6631; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
6632; GFX940-NEXT:    s_waitcnt vmcnt(0)
6633; GFX940-NEXT:    s_setpc_b64 s[30:31]
6634  %vec0 = call <4 x half> asm "; def $0", "=v"()
6635  %vec1 = call <4 x half> asm "; def $0", "=v"()
6636  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 6>
6637  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
6638  ret void
6639}
6640
6641define void @v_shuffle_v3f16_v4f16__7_u_6(ptr addrspace(1) inreg %ptr) {
6642; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_u_6:
6643; GFX900:       ; %bb.0:
6644; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6645; GFX900-NEXT:    v_mov_b32_e32 v2, 0
6646; GFX900-NEXT:    ;;#ASMSTART
6647; GFX900-NEXT:    ; def v[0:1]
6648; GFX900-NEXT:    ;;#ASMEND
6649; GFX900-NEXT:    v_alignbit_b32 v0, s4, v1, 16
6650; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6651; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
6652; GFX900-NEXT:    s_waitcnt vmcnt(0)
6653; GFX900-NEXT:    s_setpc_b64 s[30:31]
6654;
6655; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_u_6:
6656; GFX90A:       ; %bb.0:
6657; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6658; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
6659; GFX90A-NEXT:    ;;#ASMSTART
6660; GFX90A-NEXT:    ; def v[0:1]
6661; GFX90A-NEXT:    ;;#ASMEND
6662; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v1, 16
6663; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6664; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
6665; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6666; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6667;
6668; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_6:
6669; GFX940:       ; %bb.0:
6670; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6671; GFX940-NEXT:    v_mov_b32_e32 v2, 0
6672; GFX940-NEXT:    ;;#ASMSTART
6673; GFX940-NEXT:    ; def v[0:1]
6674; GFX940-NEXT:    ;;#ASMEND
6675; GFX940-NEXT:    s_nop 0
6676; GFX940-NEXT:    v_alignbit_b32 v0, s0, v1, 16
6677; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
6678; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
6679; GFX940-NEXT:    s_waitcnt vmcnt(0)
6680; GFX940-NEXT:    s_setpc_b64 s[30:31]
6681  %vec0 = call <4 x half> asm "; def $0", "=v"()
6682  %vec1 = call <4 x half> asm "; def $0", "=v"()
6683  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 6>
6684  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
6685  ret void
6686}
6687
6688define void @v_shuffle_v3f16_v4f16__7_0_6(ptr addrspace(1) inreg %ptr) {
6689; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_0_6:
6690; GFX900:       ; %bb.0:
6691; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6692; GFX900-NEXT:    ;;#ASMSTART
6693; GFX900-NEXT:    ; def v[0:1]
6694; GFX900-NEXT:    ;;#ASMEND
6695; GFX900-NEXT:    v_mov_b32_e32 v3, 0
6696; GFX900-NEXT:    ;;#ASMSTART
6697; GFX900-NEXT:    ; def v[1:2]
6698; GFX900-NEXT:    ;;#ASMEND
6699; GFX900-NEXT:    v_alignbit_b32 v0, v0, v2, 16
6700; GFX900-NEXT:    global_store_short v3, v2, s[16:17] offset:4
6701; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
6702; GFX900-NEXT:    s_waitcnt vmcnt(0)
6703; GFX900-NEXT:    s_setpc_b64 s[30:31]
6704;
6705; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_0_6:
6706; GFX90A:       ; %bb.0:
6707; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6708; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6709; GFX90A-NEXT:    ;;#ASMSTART
6710; GFX90A-NEXT:    ; def v[0:1]
6711; GFX90A-NEXT:    ;;#ASMEND
6712; GFX90A-NEXT:    ;;#ASMSTART
6713; GFX90A-NEXT:    ; def v[2:3]
6714; GFX90A-NEXT:    ;;#ASMEND
6715; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v3, 16
6716; GFX90A-NEXT:    global_store_short v4, v3, s[16:17] offset:4
6717; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
6718; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6719; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6720;
6721; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_6:
6722; GFX940:       ; %bb.0:
6723; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6724; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6725; GFX940-NEXT:    ;;#ASMSTART
6726; GFX940-NEXT:    ; def v[0:1]
6727; GFX940-NEXT:    ;;#ASMEND
6728; GFX940-NEXT:    ;;#ASMSTART
6729; GFX940-NEXT:    ; def v[2:3]
6730; GFX940-NEXT:    ;;#ASMEND
6731; GFX940-NEXT:    s_nop 0
6732; GFX940-NEXT:    v_alignbit_b32 v0, v0, v3, 16
6733; GFX940-NEXT:    global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
6734; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
6735; GFX940-NEXT:    s_waitcnt vmcnt(0)
6736; GFX940-NEXT:    s_setpc_b64 s[30:31]
6737  %vec0 = call <4 x half> asm "; def $0", "=v"()
6738  %vec1 = call <4 x half> asm "; def $0", "=v"()
6739  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 6>
6740  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
6741  ret void
6742}
6743
6744define void @v_shuffle_v3f16_v4f16__7_1_6(ptr addrspace(1) inreg %ptr) {
6745; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_1_6:
6746; GFX900:       ; %bb.0:
6747; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6748; GFX900-NEXT:    ;;#ASMSTART
6749; GFX900-NEXT:    ; def v[0:1]
6750; GFX900-NEXT:    ;;#ASMEND
6751; GFX900-NEXT:    v_mov_b32_e32 v3, 0
6752; GFX900-NEXT:    ;;#ASMSTART
6753; GFX900-NEXT:    ; def v[1:2]
6754; GFX900-NEXT:    ;;#ASMEND
6755; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
6756; GFX900-NEXT:    v_perm_b32 v0, v0, v2, s4
6757; GFX900-NEXT:    global_store_short v3, v2, s[16:17] offset:4
6758; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
6759; GFX900-NEXT:    s_waitcnt vmcnt(0)
6760; GFX900-NEXT:    s_setpc_b64 s[30:31]
6761;
6762; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_1_6:
6763; GFX90A:       ; %bb.0:
6764; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6765; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6766; GFX90A-NEXT:    ;;#ASMSTART
6767; GFX90A-NEXT:    ; def v[0:1]
6768; GFX90A-NEXT:    ;;#ASMEND
6769; GFX90A-NEXT:    ;;#ASMSTART
6770; GFX90A-NEXT:    ; def v[2:3]
6771; GFX90A-NEXT:    ;;#ASMEND
6772; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
6773; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
6774; GFX90A-NEXT:    global_store_short v4, v3, s[16:17] offset:4
6775; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
6776; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6777; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6778;
6779; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_6:
6780; GFX940:       ; %bb.0:
6781; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6782; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6783; GFX940-NEXT:    ;;#ASMSTART
6784; GFX940-NEXT:    ; def v[0:1]
6785; GFX940-NEXT:    ;;#ASMEND
6786; GFX940-NEXT:    ;;#ASMSTART
6787; GFX940-NEXT:    ; def v[2:3]
6788; GFX940-NEXT:    ;;#ASMEND
6789; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
6790; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
6791; GFX940-NEXT:    global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
6792; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
6793; GFX940-NEXT:    s_waitcnt vmcnt(0)
6794; GFX940-NEXT:    s_setpc_b64 s[30:31]
6795  %vec0 = call <4 x half> asm "; def $0", "=v"()
6796  %vec1 = call <4 x half> asm "; def $0", "=v"()
6797  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 6>
6798  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
6799  ret void
6800}
6801
6802define void @v_shuffle_v3f16_v4f16__7_2_6(ptr addrspace(1) inreg %ptr) {
6803; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_2_6:
6804; GFX900:       ; %bb.0:
6805; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6806; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6807; GFX900-NEXT:    ;;#ASMSTART
6808; GFX900-NEXT:    ; def v[0:1]
6809; GFX900-NEXT:    ;;#ASMEND
6810; GFX900-NEXT:    ;;#ASMSTART
6811; GFX900-NEXT:    ; def v[2:3]
6812; GFX900-NEXT:    ;;#ASMEND
6813; GFX900-NEXT:    v_alignbit_b32 v0, v1, v3, 16
6814; GFX900-NEXT:    global_store_short v4, v3, s[16:17] offset:4
6815; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
6816; GFX900-NEXT:    s_waitcnt vmcnt(0)
6817; GFX900-NEXT:    s_setpc_b64 s[30:31]
6818;
6819; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_2_6:
6820; GFX90A:       ; %bb.0:
6821; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6822; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6823; GFX90A-NEXT:    ;;#ASMSTART
6824; GFX90A-NEXT:    ; def v[0:1]
6825; GFX90A-NEXT:    ;;#ASMEND
6826; GFX90A-NEXT:    ;;#ASMSTART
6827; GFX90A-NEXT:    ; def v[2:3]
6828; GFX90A-NEXT:    ;;#ASMEND
6829; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v3, 16
6830; GFX90A-NEXT:    global_store_short v4, v3, s[16:17] offset:4
6831; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
6832; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6833; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6834;
6835; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_6:
6836; GFX940:       ; %bb.0:
6837; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6838; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6839; GFX940-NEXT:    ;;#ASMSTART
6840; GFX940-NEXT:    ; def v[0:1]
6841; GFX940-NEXT:    ;;#ASMEND
6842; GFX940-NEXT:    ;;#ASMSTART
6843; GFX940-NEXT:    ; def v[2:3]
6844; GFX940-NEXT:    ;;#ASMEND
6845; GFX940-NEXT:    s_nop 0
6846; GFX940-NEXT:    v_alignbit_b32 v0, v1, v3, 16
6847; GFX940-NEXT:    global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
6848; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
6849; GFX940-NEXT:    s_waitcnt vmcnt(0)
6850; GFX940-NEXT:    s_setpc_b64 s[30:31]
6851  %vec0 = call <4 x half> asm "; def $0", "=v"()
6852  %vec1 = call <4 x half> asm "; def $0", "=v"()
6853  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 6>
6854  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
6855  ret void
6856}
6857
6858define void @v_shuffle_v3f16_v4f16__7_3_6(ptr addrspace(1) inreg %ptr) {
6859; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_3_6:
6860; GFX900:       ; %bb.0:
6861; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6862; GFX900-NEXT:    v_mov_b32_e32 v4, 0
6863; GFX900-NEXT:    ;;#ASMSTART
6864; GFX900-NEXT:    ; def v[0:1]
6865; GFX900-NEXT:    ;;#ASMEND
6866; GFX900-NEXT:    ;;#ASMSTART
6867; GFX900-NEXT:    ; def v[2:3]
6868; GFX900-NEXT:    ;;#ASMEND
6869; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
6870; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
6871; GFX900-NEXT:    global_store_short v4, v3, s[16:17] offset:4
6872; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
6873; GFX900-NEXT:    s_waitcnt vmcnt(0)
6874; GFX900-NEXT:    s_setpc_b64 s[30:31]
6875;
6876; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_3_6:
6877; GFX90A:       ; %bb.0:
6878; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6879; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
6880; GFX90A-NEXT:    ;;#ASMSTART
6881; GFX90A-NEXT:    ; def v[0:1]
6882; GFX90A-NEXT:    ;;#ASMEND
6883; GFX90A-NEXT:    ;;#ASMSTART
6884; GFX90A-NEXT:    ; def v[2:3]
6885; GFX90A-NEXT:    ;;#ASMEND
6886; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
6887; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
6888; GFX90A-NEXT:    global_store_short v4, v3, s[16:17] offset:4
6889; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
6890; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6891; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6892;
6893; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_6:
6894; GFX940:       ; %bb.0:
6895; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6896; GFX940-NEXT:    v_mov_b32_e32 v4, 0
6897; GFX940-NEXT:    ;;#ASMSTART
6898; GFX940-NEXT:    ; def v[0:1]
6899; GFX940-NEXT:    ;;#ASMEND
6900; GFX940-NEXT:    ;;#ASMSTART
6901; GFX940-NEXT:    ; def v[2:3]
6902; GFX940-NEXT:    ;;#ASMEND
6903; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
6904; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
6905; GFX940-NEXT:    global_store_short v4, v3, s[0:1] offset:4 sc0 sc1
6906; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
6907; GFX940-NEXT:    s_waitcnt vmcnt(0)
6908; GFX940-NEXT:    s_setpc_b64 s[30:31]
6909  %vec0 = call <4 x half> asm "; def $0", "=v"()
6910  %vec1 = call <4 x half> asm "; def $0", "=v"()
6911  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 6>
6912  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
6913  ret void
6914}
6915
6916define void @v_shuffle_v3f16_v4f16__7_4_6(ptr addrspace(1) inreg %ptr) {
6917; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_4_6:
6918; GFX900:       ; %bb.0:
6919; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6920; GFX900-NEXT:    v_mov_b32_e32 v2, 0
6921; GFX900-NEXT:    ;;#ASMSTART
6922; GFX900-NEXT:    ; def v[0:1]
6923; GFX900-NEXT:    ;;#ASMEND
6924; GFX900-NEXT:    v_alignbit_b32 v0, v0, v1, 16
6925; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6926; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
6927; GFX900-NEXT:    s_waitcnt vmcnt(0)
6928; GFX900-NEXT:    s_setpc_b64 s[30:31]
6929;
6930; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_4_6:
6931; GFX90A:       ; %bb.0:
6932; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6933; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
6934; GFX90A-NEXT:    ;;#ASMSTART
6935; GFX90A-NEXT:    ; def v[0:1]
6936; GFX90A-NEXT:    ;;#ASMEND
6937; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v1, 16
6938; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6939; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
6940; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6941; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6942;
6943; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_6:
6944; GFX940:       ; %bb.0:
6945; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6946; GFX940-NEXT:    v_mov_b32_e32 v2, 0
6947; GFX940-NEXT:    ;;#ASMSTART
6948; GFX940-NEXT:    ; def v[0:1]
6949; GFX940-NEXT:    ;;#ASMEND
6950; GFX940-NEXT:    s_nop 0
6951; GFX940-NEXT:    v_alignbit_b32 v0, v0, v1, 16
6952; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
6953; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
6954; GFX940-NEXT:    s_waitcnt vmcnt(0)
6955; GFX940-NEXT:    s_setpc_b64 s[30:31]
6956  %vec0 = call <4 x half> asm "; def $0", "=v"()
6957  %vec1 = call <4 x half> asm "; def $0", "=v"()
6958  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 6>
6959  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
6960  ret void
6961}
6962
6963define void @v_shuffle_v3f16_v4f16__7_5_6(ptr addrspace(1) inreg %ptr) {
6964; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_5_6:
6965; GFX900:       ; %bb.0:
6966; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6967; GFX900-NEXT:    v_mov_b32_e32 v2, 0
6968; GFX900-NEXT:    ;;#ASMSTART
6969; GFX900-NEXT:    ; def v[0:1]
6970; GFX900-NEXT:    ;;#ASMEND
6971; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
6972; GFX900-NEXT:    v_perm_b32 v0, v0, v1, s4
6973; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6974; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
6975; GFX900-NEXT:    s_waitcnt vmcnt(0)
6976; GFX900-NEXT:    s_setpc_b64 s[30:31]
6977;
6978; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_5_6:
6979; GFX90A:       ; %bb.0:
6980; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6981; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
6982; GFX90A-NEXT:    ;;#ASMSTART
6983; GFX90A-NEXT:    ; def v[0:1]
6984; GFX90A-NEXT:    ;;#ASMEND
6985; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
6986; GFX90A-NEXT:    v_perm_b32 v0, v0, v1, s4
6987; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
6988; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
6989; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6990; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6991;
6992; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_6:
6993; GFX940:       ; %bb.0:
6994; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6995; GFX940-NEXT:    v_mov_b32_e32 v2, 0
6996; GFX940-NEXT:    ;;#ASMSTART
6997; GFX940-NEXT:    ; def v[0:1]
6998; GFX940-NEXT:    ;;#ASMEND
6999; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
7000; GFX940-NEXT:    v_perm_b32 v0, v0, v1, s2
7001; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
7002; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
7003; GFX940-NEXT:    s_waitcnt vmcnt(0)
7004; GFX940-NEXT:    s_setpc_b64 s[30:31]
7005  %vec0 = call <4 x half> asm "; def $0", "=v"()
7006  %vec1 = call <4 x half> asm "; def $0", "=v"()
7007  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 6>
7008  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
7009  ret void
7010}
7011
7012define void @v_shuffle_v3f16_v4f16__u_7_7(ptr addrspace(1) inreg %ptr) {
7013; GFX900-LABEL: v_shuffle_v3f16_v4f16__u_7_7:
7014; GFX900:       ; %bb.0:
7015; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7016; GFX900-NEXT:    ;;#ASMSTART
7017; GFX900-NEXT:    ; def v[0:1]
7018; GFX900-NEXT:    ;;#ASMEND
7019; GFX900-NEXT:    s_mov_b32 s4, 0xffff
7020; GFX900-NEXT:    v_mov_b32_e32 v2, 0
7021; GFX900-NEXT:    v_bfi_b32 v0, s4, v0, v1
7022; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7023; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
7024; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
7025; GFX900-NEXT:    s_waitcnt vmcnt(0)
7026; GFX900-NEXT:    s_setpc_b64 s[30:31]
7027;
7028; GFX90A-LABEL: v_shuffle_v3f16_v4f16__u_7_7:
7029; GFX90A:       ; %bb.0:
7030; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7031; GFX90A-NEXT:    ;;#ASMSTART
7032; GFX90A-NEXT:    ; def v[0:1]
7033; GFX90A-NEXT:    ;;#ASMEND
7034; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
7035; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
7036; GFX90A-NEXT:    v_bfi_b32 v0, s4, v0, v1
7037; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7038; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
7039; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
7040; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7041; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7042;
7043; GFX940-LABEL: v_shuffle_v3f16_v4f16__u_7_7:
7044; GFX940:       ; %bb.0:
7045; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7046; GFX940-NEXT:    ;;#ASMSTART
7047; GFX940-NEXT:    ; def v[0:1]
7048; GFX940-NEXT:    ;;#ASMEND
7049; GFX940-NEXT:    s_mov_b32 s2, 0xffff
7050; GFX940-NEXT:    v_mov_b32_e32 v2, 0
7051; GFX940-NEXT:    v_bfi_b32 v0, s2, v0, v1
7052; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7053; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
7054; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
7055; GFX940-NEXT:    s_waitcnt vmcnt(0)
7056; GFX940-NEXT:    s_setpc_b64 s[30:31]
7057  %vec0 = call <4 x half> asm "; def $0", "=v"()
7058  %vec1 = call <4 x half> asm "; def $0", "=v"()
7059  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 poison, i32 7, i32 7>
7060  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
7061  ret void
7062}
7063
7064define void @v_shuffle_v3f16_v4f16__0_7_7(ptr addrspace(1) inreg %ptr) {
7065; GFX900-LABEL: v_shuffle_v3f16_v4f16__0_7_7:
7066; GFX900:       ; %bb.0:
7067; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7068; GFX900-NEXT:    ;;#ASMSTART
7069; GFX900-NEXT:    ; def v[0:1]
7070; GFX900-NEXT:    ;;#ASMEND
7071; GFX900-NEXT:    s_mov_b32 s4, 0xffff
7072; GFX900-NEXT:    v_mov_b32_e32 v3, 0
7073; GFX900-NEXT:    ;;#ASMSTART
7074; GFX900-NEXT:    ; def v[1:2]
7075; GFX900-NEXT:    ;;#ASMEND
7076; GFX900-NEXT:    v_bfi_b32 v0, s4, v0, v2
7077; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
7078; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
7079; GFX900-NEXT:    global_store_short v3, v1, s[16:17] offset:4
7080; GFX900-NEXT:    s_waitcnt vmcnt(0)
7081; GFX900-NEXT:    s_setpc_b64 s[30:31]
7082;
7083; GFX90A-LABEL: v_shuffle_v3f16_v4f16__0_7_7:
7084; GFX90A:       ; %bb.0:
7085; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7086; GFX90A-NEXT:    ;;#ASMSTART
7087; GFX90A-NEXT:    ; def v[0:1]
7088; GFX90A-NEXT:    ;;#ASMEND
7089; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
7090; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7091; GFX90A-NEXT:    ;;#ASMSTART
7092; GFX90A-NEXT:    ; def v[2:3]
7093; GFX90A-NEXT:    ;;#ASMEND
7094; GFX90A-NEXT:    v_bfi_b32 v0, s4, v0, v3
7095; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7096; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
7097; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
7098; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7099; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7100;
7101; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_7_7:
7102; GFX940:       ; %bb.0:
7103; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7104; GFX940-NEXT:    ;;#ASMSTART
7105; GFX940-NEXT:    ; def v[0:1]
7106; GFX940-NEXT:    ;;#ASMEND
7107; GFX940-NEXT:    s_mov_b32 s2, 0xffff
7108; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7109; GFX940-NEXT:    ;;#ASMSTART
7110; GFX940-NEXT:    ; def v[2:3]
7111; GFX940-NEXT:    ;;#ASMEND
7112; GFX940-NEXT:    s_nop 0
7113; GFX940-NEXT:    v_bfi_b32 v0, s2, v0, v3
7114; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7115; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
7116; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
7117; GFX940-NEXT:    s_waitcnt vmcnt(0)
7118; GFX940-NEXT:    s_setpc_b64 s[30:31]
7119  %vec0 = call <4 x half> asm "; def $0", "=v"()
7120  %vec1 = call <4 x half> asm "; def $0", "=v"()
7121  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 0, i32 7, i32 7>
7122  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
7123  ret void
7124}
7125
7126define void @v_shuffle_v3f16_v4f16__1_7_7(ptr addrspace(1) inreg %ptr) {
7127; GFX900-LABEL: v_shuffle_v3f16_v4f16__1_7_7:
7128; GFX900:       ; %bb.0:
7129; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7130; GFX900-NEXT:    ;;#ASMSTART
7131; GFX900-NEXT:    ; def v[0:1]
7132; GFX900-NEXT:    ;;#ASMEND
7133; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
7134; GFX900-NEXT:    v_mov_b32_e32 v3, 0
7135; GFX900-NEXT:    ;;#ASMSTART
7136; GFX900-NEXT:    ; def v[1:2]
7137; GFX900-NEXT:    ;;#ASMEND
7138; GFX900-NEXT:    v_perm_b32 v0, v2, v0, s4
7139; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
7140; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
7141; GFX900-NEXT:    global_store_short v3, v1, s[16:17] offset:4
7142; GFX900-NEXT:    s_waitcnt vmcnt(0)
7143; GFX900-NEXT:    s_setpc_b64 s[30:31]
7144;
7145; GFX90A-LABEL: v_shuffle_v3f16_v4f16__1_7_7:
7146; GFX90A:       ; %bb.0:
7147; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7148; GFX90A-NEXT:    ;;#ASMSTART
7149; GFX90A-NEXT:    ; def v[0:1]
7150; GFX90A-NEXT:    ;;#ASMEND
7151; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
7152; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7153; GFX90A-NEXT:    ;;#ASMSTART
7154; GFX90A-NEXT:    ; def v[2:3]
7155; GFX90A-NEXT:    ;;#ASMEND
7156; GFX90A-NEXT:    v_perm_b32 v0, v3, v0, s4
7157; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7158; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
7159; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
7160; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7161; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7162;
7163; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_7_7:
7164; GFX940:       ; %bb.0:
7165; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7166; GFX940-NEXT:    ;;#ASMSTART
7167; GFX940-NEXT:    ; def v[0:1]
7168; GFX940-NEXT:    ;;#ASMEND
7169; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
7170; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7171; GFX940-NEXT:    ;;#ASMSTART
7172; GFX940-NEXT:    ; def v[2:3]
7173; GFX940-NEXT:    ;;#ASMEND
7174; GFX940-NEXT:    s_nop 0
7175; GFX940-NEXT:    v_perm_b32 v0, v3, v0, s2
7176; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7177; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
7178; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
7179; GFX940-NEXT:    s_waitcnt vmcnt(0)
7180; GFX940-NEXT:    s_setpc_b64 s[30:31]
7181  %vec0 = call <4 x half> asm "; def $0", "=v"()
7182  %vec1 = call <4 x half> asm "; def $0", "=v"()
7183  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 1, i32 7, i32 7>
7184  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
7185  ret void
7186}
7187
7188define void @v_shuffle_v3f16_v4f16__2_7_7(ptr addrspace(1) inreg %ptr) {
7189; GFX900-LABEL: v_shuffle_v3f16_v4f16__2_7_7:
7190; GFX900:       ; %bb.0:
7191; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7192; GFX900-NEXT:    ;;#ASMSTART
7193; GFX900-NEXT:    ; def v[0:1]
7194; GFX900-NEXT:    ;;#ASMEND
7195; GFX900-NEXT:    s_mov_b32 s4, 0xffff
7196; GFX900-NEXT:    v_mov_b32_e32 v4, 0
7197; GFX900-NEXT:    ;;#ASMSTART
7198; GFX900-NEXT:    ; def v[2:3]
7199; GFX900-NEXT:    ;;#ASMEND
7200; GFX900-NEXT:    v_bfi_b32 v0, s4, v1, v3
7201; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7202; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
7203; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
7204; GFX900-NEXT:    s_waitcnt vmcnt(0)
7205; GFX900-NEXT:    s_setpc_b64 s[30:31]
7206;
7207; GFX90A-LABEL: v_shuffle_v3f16_v4f16__2_7_7:
7208; GFX90A:       ; %bb.0:
7209; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7210; GFX90A-NEXT:    ;;#ASMSTART
7211; GFX90A-NEXT:    ; def v[0:1]
7212; GFX90A-NEXT:    ;;#ASMEND
7213; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
7214; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7215; GFX90A-NEXT:    ;;#ASMSTART
7216; GFX90A-NEXT:    ; def v[2:3]
7217; GFX90A-NEXT:    ;;#ASMEND
7218; GFX90A-NEXT:    v_bfi_b32 v0, s4, v1, v3
7219; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7220; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
7221; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
7222; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7223; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7224;
7225; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_7_7:
7226; GFX940:       ; %bb.0:
7227; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7228; GFX940-NEXT:    ;;#ASMSTART
7229; GFX940-NEXT:    ; def v[0:1]
7230; GFX940-NEXT:    ;;#ASMEND
7231; GFX940-NEXT:    s_mov_b32 s2, 0xffff
7232; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7233; GFX940-NEXT:    ;;#ASMSTART
7234; GFX940-NEXT:    ; def v[2:3]
7235; GFX940-NEXT:    ;;#ASMEND
7236; GFX940-NEXT:    s_nop 0
7237; GFX940-NEXT:    v_bfi_b32 v0, s2, v1, v3
7238; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7239; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
7240; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
7241; GFX940-NEXT:    s_waitcnt vmcnt(0)
7242; GFX940-NEXT:    s_setpc_b64 s[30:31]
7243  %vec0 = call <4 x half> asm "; def $0", "=v"()
7244  %vec1 = call <4 x half> asm "; def $0", "=v"()
7245  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 2, i32 7, i32 7>
7246  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
7247  ret void
7248}
7249
7250define void @v_shuffle_v3f16_v4f16__3_7_7(ptr addrspace(1) inreg %ptr) {
7251; GFX900-LABEL: v_shuffle_v3f16_v4f16__3_7_7:
7252; GFX900:       ; %bb.0:
7253; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7254; GFX900-NEXT:    ;;#ASMSTART
7255; GFX900-NEXT:    ; def v[0:1]
7256; GFX900-NEXT:    ;;#ASMEND
7257; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
7258; GFX900-NEXT:    v_mov_b32_e32 v4, 0
7259; GFX900-NEXT:    ;;#ASMSTART
7260; GFX900-NEXT:    ; def v[2:3]
7261; GFX900-NEXT:    ;;#ASMEND
7262; GFX900-NEXT:    v_perm_b32 v0, v3, v1, s4
7263; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7264; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
7265; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
7266; GFX900-NEXT:    s_waitcnt vmcnt(0)
7267; GFX900-NEXT:    s_setpc_b64 s[30:31]
7268;
7269; GFX90A-LABEL: v_shuffle_v3f16_v4f16__3_7_7:
7270; GFX90A:       ; %bb.0:
7271; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7272; GFX90A-NEXT:    ;;#ASMSTART
7273; GFX90A-NEXT:    ; def v[0:1]
7274; GFX90A-NEXT:    ;;#ASMEND
7275; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
7276; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7277; GFX90A-NEXT:    ;;#ASMSTART
7278; GFX90A-NEXT:    ; def v[2:3]
7279; GFX90A-NEXT:    ;;#ASMEND
7280; GFX90A-NEXT:    v_perm_b32 v0, v3, v1, s4
7281; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7282; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
7283; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
7284; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7285; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7286;
7287; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_7_7:
7288; GFX940:       ; %bb.0:
7289; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7290; GFX940-NEXT:    ;;#ASMSTART
7291; GFX940-NEXT:    ; def v[0:1]
7292; GFX940-NEXT:    ;;#ASMEND
7293; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
7294; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7295; GFX940-NEXT:    ;;#ASMSTART
7296; GFX940-NEXT:    ; def v[2:3]
7297; GFX940-NEXT:    ;;#ASMEND
7298; GFX940-NEXT:    s_nop 0
7299; GFX940-NEXT:    v_perm_b32 v0, v3, v1, s2
7300; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7301; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
7302; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
7303; GFX940-NEXT:    s_waitcnt vmcnt(0)
7304; GFX940-NEXT:    s_setpc_b64 s[30:31]
7305  %vec0 = call <4 x half> asm "; def $0", "=v"()
7306  %vec1 = call <4 x half> asm "; def $0", "=v"()
7307  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 3, i32 7, i32 7>
7308  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
7309  ret void
7310}
7311
7312define void @v_shuffle_v3f16_v4f16__4_7_7(ptr addrspace(1) inreg %ptr) {
7313; GFX900-LABEL: v_shuffle_v3f16_v4f16__4_7_7:
7314; GFX900:       ; %bb.0:
7315; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7316; GFX900-NEXT:    ;;#ASMSTART
7317; GFX900-NEXT:    ; def v[0:1]
7318; GFX900-NEXT:    ;;#ASMEND
7319; GFX900-NEXT:    s_mov_b32 s4, 0xffff
7320; GFX900-NEXT:    v_mov_b32_e32 v2, 0
7321; GFX900-NEXT:    v_bfi_b32 v0, s4, v0, v1
7322; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7323; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
7324; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
7325; GFX900-NEXT:    s_waitcnt vmcnt(0)
7326; GFX900-NEXT:    s_setpc_b64 s[30:31]
7327;
7328; GFX90A-LABEL: v_shuffle_v3f16_v4f16__4_7_7:
7329; GFX90A:       ; %bb.0:
7330; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7331; GFX90A-NEXT:    ;;#ASMSTART
7332; GFX90A-NEXT:    ; def v[0:1]
7333; GFX90A-NEXT:    ;;#ASMEND
7334; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
7335; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
7336; GFX90A-NEXT:    v_bfi_b32 v0, s4, v0, v1
7337; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7338; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
7339; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
7340; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7341; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7342;
7343; GFX940-LABEL: v_shuffle_v3f16_v4f16__4_7_7:
7344; GFX940:       ; %bb.0:
7345; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7346; GFX940-NEXT:    ;;#ASMSTART
7347; GFX940-NEXT:    ; def v[0:1]
7348; GFX940-NEXT:    ;;#ASMEND
7349; GFX940-NEXT:    s_mov_b32 s2, 0xffff
7350; GFX940-NEXT:    v_mov_b32_e32 v2, 0
7351; GFX940-NEXT:    v_bfi_b32 v0, s2, v0, v1
7352; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7353; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
7354; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
7355; GFX940-NEXT:    s_waitcnt vmcnt(0)
7356; GFX940-NEXT:    s_setpc_b64 s[30:31]
7357  %vec0 = call <4 x half> asm "; def $0", "=v"()
7358  %vec1 = call <4 x half> asm "; def $0", "=v"()
7359  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 4, i32 7, i32 7>
7360  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
7361  ret void
7362}
7363
7364define void @v_shuffle_v3f16_v4f16__5_7_7(ptr addrspace(1) inreg %ptr) {
7365; GFX900-LABEL: v_shuffle_v3f16_v4f16__5_7_7:
7366; GFX900:       ; %bb.0:
7367; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7368; GFX900-NEXT:    ;;#ASMSTART
7369; GFX900-NEXT:    ; def v[0:1]
7370; GFX900-NEXT:    ;;#ASMEND
7371; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
7372; GFX900-NEXT:    v_mov_b32_e32 v2, 0
7373; GFX900-NEXT:    v_perm_b32 v0, v1, v0, s4
7374; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7375; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
7376; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
7377; GFX900-NEXT:    s_waitcnt vmcnt(0)
7378; GFX900-NEXT:    s_setpc_b64 s[30:31]
7379;
7380; GFX90A-LABEL: v_shuffle_v3f16_v4f16__5_7_7:
7381; GFX90A:       ; %bb.0:
7382; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7383; GFX90A-NEXT:    ;;#ASMSTART
7384; GFX90A-NEXT:    ; def v[0:1]
7385; GFX90A-NEXT:    ;;#ASMEND
7386; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
7387; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
7388; GFX90A-NEXT:    v_perm_b32 v0, v1, v0, s4
7389; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7390; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
7391; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
7392; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7393; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7394;
7395; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_7_7:
7396; GFX940:       ; %bb.0:
7397; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7398; GFX940-NEXT:    ;;#ASMSTART
7399; GFX940-NEXT:    ; def v[0:1]
7400; GFX940-NEXT:    ;;#ASMEND
7401; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
7402; GFX940-NEXT:    v_mov_b32_e32 v2, 0
7403; GFX940-NEXT:    v_perm_b32 v0, v1, v0, s2
7404; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7405; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
7406; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
7407; GFX940-NEXT:    s_waitcnt vmcnt(0)
7408; GFX940-NEXT:    s_setpc_b64 s[30:31]
7409  %vec0 = call <4 x half> asm "; def $0", "=v"()
7410  %vec1 = call <4 x half> asm "; def $0", "=v"()
7411  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 7, i32 7>
7412  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
7413  ret void
7414}
7415
7416define void @v_shuffle_v3f16_v4f16__6_7_7(ptr addrspace(1) inreg %ptr) {
7417; GFX900-LABEL: v_shuffle_v3f16_v4f16__6_7_7:
7418; GFX900:       ; %bb.0:
7419; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7420; GFX900-NEXT:    v_mov_b32_e32 v2, 0
7421; GFX900-NEXT:    ;;#ASMSTART
7422; GFX900-NEXT:    ; def v[0:1]
7423; GFX900-NEXT:    ;;#ASMEND
7424; GFX900-NEXT:    global_store_short_d16_hi v2, v1, s[16:17] offset:4
7425; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
7426; GFX900-NEXT:    s_waitcnt vmcnt(0)
7427; GFX900-NEXT:    s_setpc_b64 s[30:31]
7428;
7429; GFX90A-LABEL: v_shuffle_v3f16_v4f16__6_7_7:
7430; GFX90A:       ; %bb.0:
7431; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7432; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
7433; GFX90A-NEXT:    ;;#ASMSTART
7434; GFX90A-NEXT:    ; def v[0:1]
7435; GFX90A-NEXT:    ;;#ASMEND
7436; GFX90A-NEXT:    global_store_short_d16_hi v2, v1, s[16:17] offset:4
7437; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
7438; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7439; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7440;
7441; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_7_7:
7442; GFX940:       ; %bb.0:
7443; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7444; GFX940-NEXT:    v_mov_b32_e32 v2, 0
7445; GFX940-NEXT:    ;;#ASMSTART
7446; GFX940-NEXT:    ; def v[0:1]
7447; GFX940-NEXT:    ;;#ASMEND
7448; GFX940-NEXT:    global_store_short_d16_hi v2, v1, s[0:1] offset:4 sc0 sc1
7449; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
7450; GFX940-NEXT:    s_waitcnt vmcnt(0)
7451; GFX940-NEXT:    s_setpc_b64 s[30:31]
7452  %vec0 = call <4 x half> asm "; def $0", "=v"()
7453  %vec1 = call <4 x half> asm "; def $0", "=v"()
7454  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 7, i32 7>
7455  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
7456  ret void
7457}
7458
7459define void @v_shuffle_v3f16_v4f16__7_u_7(ptr addrspace(1) inreg %ptr) {
7460; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_u_7:
7461; GFX900:       ; %bb.0:
7462; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7463; GFX900-NEXT:    ;;#ASMSTART
7464; GFX900-NEXT:    ; def v[0:1]
7465; GFX900-NEXT:    ;;#ASMEND
7466; GFX900-NEXT:    v_mov_b32_e32 v2, 0
7467; GFX900-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
7468; GFX900-NEXT:    v_alignbit_b32 v1, s4, v1, 16
7469; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
7470; GFX900-NEXT:    global_store_short v2, v0, s[16:17] offset:4
7471; GFX900-NEXT:    s_waitcnt vmcnt(0)
7472; GFX900-NEXT:    s_setpc_b64 s[30:31]
7473;
7474; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_u_7:
7475; GFX90A:       ; %bb.0:
7476; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7477; GFX90A-NEXT:    ;;#ASMSTART
7478; GFX90A-NEXT:    ; def v[0:1]
7479; GFX90A-NEXT:    ;;#ASMEND
7480; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
7481; GFX90A-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
7482; GFX90A-NEXT:    v_alignbit_b32 v1, s4, v1, 16
7483; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
7484; GFX90A-NEXT:    global_store_short v2, v0, s[16:17] offset:4
7485; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7486; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7487;
7488; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_7:
7489; GFX940:       ; %bb.0:
7490; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7491; GFX940-NEXT:    ;;#ASMSTART
7492; GFX940-NEXT:    ; def v[0:1]
7493; GFX940-NEXT:    ;;#ASMEND
7494; GFX940-NEXT:    v_mov_b32_e32 v2, 0
7495; GFX940-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
7496; GFX940-NEXT:    v_alignbit_b32 v1, s0, v1, 16
7497; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
7498; GFX940-NEXT:    global_store_short v2, v0, s[0:1] offset:4 sc0 sc1
7499; GFX940-NEXT:    s_waitcnt vmcnt(0)
7500; GFX940-NEXT:    s_setpc_b64 s[30:31]
7501  %vec0 = call <4 x half> asm "; def $0", "=v"()
7502  %vec1 = call <4 x half> asm "; def $0", "=v"()
7503  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 7>
7504  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
7505  ret void
7506}
7507
7508define void @v_shuffle_v3f16_v4f16__7_0_7(ptr addrspace(1) inreg %ptr) {
7509; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_0_7:
7510; GFX900:       ; %bb.0:
7511; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7512; GFX900-NEXT:    ;;#ASMSTART
7513; GFX900-NEXT:    ; def v[0:1]
7514; GFX900-NEXT:    ;;#ASMEND
7515; GFX900-NEXT:    v_mov_b32_e32 v3, 0
7516; GFX900-NEXT:    ;;#ASMSTART
7517; GFX900-NEXT:    ; def v[1:2]
7518; GFX900-NEXT:    ;;#ASMEND
7519; GFX900-NEXT:    v_alignbit_b32 v0, v0, v2, 16
7520; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
7521; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
7522; GFX900-NEXT:    global_store_short v3, v1, s[16:17] offset:4
7523; GFX900-NEXT:    s_waitcnt vmcnt(0)
7524; GFX900-NEXT:    s_setpc_b64 s[30:31]
7525;
7526; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_0_7:
7527; GFX90A:       ; %bb.0:
7528; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7529; GFX90A-NEXT:    ;;#ASMSTART
7530; GFX90A-NEXT:    ; def v[0:1]
7531; GFX90A-NEXT:    ;;#ASMEND
7532; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7533; GFX90A-NEXT:    ;;#ASMSTART
7534; GFX90A-NEXT:    ; def v[2:3]
7535; GFX90A-NEXT:    ;;#ASMEND
7536; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v3, 16
7537; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7538; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
7539; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
7540; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7541; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7542;
7543; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_7:
7544; GFX940:       ; %bb.0:
7545; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7546; GFX940-NEXT:    ;;#ASMSTART
7547; GFX940-NEXT:    ; def v[0:1]
7548; GFX940-NEXT:    ;;#ASMEND
7549; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7550; GFX940-NEXT:    ;;#ASMSTART
7551; GFX940-NEXT:    ; def v[2:3]
7552; GFX940-NEXT:    ;;#ASMEND
7553; GFX940-NEXT:    s_nop 0
7554; GFX940-NEXT:    v_alignbit_b32 v0, v0, v3, 16
7555; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7556; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
7557; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
7558; GFX940-NEXT:    s_waitcnt vmcnt(0)
7559; GFX940-NEXT:    s_setpc_b64 s[30:31]
7560  %vec0 = call <4 x half> asm "; def $0", "=v"()
7561  %vec1 = call <4 x half> asm "; def $0", "=v"()
7562  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 7>
7563  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
7564  ret void
7565}
7566
7567define void @v_shuffle_v3f16_v4f16__7_1_7(ptr addrspace(1) inreg %ptr) {
7568; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_1_7:
7569; GFX900:       ; %bb.0:
7570; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7571; GFX900-NEXT:    ;;#ASMSTART
7572; GFX900-NEXT:    ; def v[0:1]
7573; GFX900-NEXT:    ;;#ASMEND
7574; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
7575; GFX900-NEXT:    v_mov_b32_e32 v3, 0
7576; GFX900-NEXT:    ;;#ASMSTART
7577; GFX900-NEXT:    ; def v[1:2]
7578; GFX900-NEXT:    ;;#ASMEND
7579; GFX900-NEXT:    v_perm_b32 v0, v0, v2, s4
7580; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
7581; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
7582; GFX900-NEXT:    global_store_short v3, v1, s[16:17] offset:4
7583; GFX900-NEXT:    s_waitcnt vmcnt(0)
7584; GFX900-NEXT:    s_setpc_b64 s[30:31]
7585;
7586; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_1_7:
7587; GFX90A:       ; %bb.0:
7588; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7589; GFX90A-NEXT:    ;;#ASMSTART
7590; GFX90A-NEXT:    ; def v[0:1]
7591; GFX90A-NEXT:    ;;#ASMEND
7592; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
7593; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7594; GFX90A-NEXT:    ;;#ASMSTART
7595; GFX90A-NEXT:    ; def v[2:3]
7596; GFX90A-NEXT:    ;;#ASMEND
7597; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
7598; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7599; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
7600; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
7601; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7602; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7603;
7604; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_7:
7605; GFX940:       ; %bb.0:
7606; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7607; GFX940-NEXT:    ;;#ASMSTART
7608; GFX940-NEXT:    ; def v[0:1]
7609; GFX940-NEXT:    ;;#ASMEND
7610; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
7611; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7612; GFX940-NEXT:    ;;#ASMSTART
7613; GFX940-NEXT:    ; def v[2:3]
7614; GFX940-NEXT:    ;;#ASMEND
7615; GFX940-NEXT:    s_nop 0
7616; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
7617; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7618; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
7619; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
7620; GFX940-NEXT:    s_waitcnt vmcnt(0)
7621; GFX940-NEXT:    s_setpc_b64 s[30:31]
7622  %vec0 = call <4 x half> asm "; def $0", "=v"()
7623  %vec1 = call <4 x half> asm "; def $0", "=v"()
7624  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 7>
7625  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
7626  ret void
7627}
7628
7629define void @v_shuffle_v3f16_v4f16__7_2_7(ptr addrspace(1) inreg %ptr) {
7630; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_2_7:
7631; GFX900:       ; %bb.0:
7632; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7633; GFX900-NEXT:    ;;#ASMSTART
7634; GFX900-NEXT:    ; def v[0:1]
7635; GFX900-NEXT:    ;;#ASMEND
7636; GFX900-NEXT:    v_mov_b32_e32 v4, 0
7637; GFX900-NEXT:    ;;#ASMSTART
7638; GFX900-NEXT:    ; def v[2:3]
7639; GFX900-NEXT:    ;;#ASMEND
7640; GFX900-NEXT:    v_alignbit_b32 v0, v1, v3, 16
7641; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7642; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
7643; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
7644; GFX900-NEXT:    s_waitcnt vmcnt(0)
7645; GFX900-NEXT:    s_setpc_b64 s[30:31]
7646;
7647; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_2_7:
7648; GFX90A:       ; %bb.0:
7649; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7650; GFX90A-NEXT:    ;;#ASMSTART
7651; GFX90A-NEXT:    ; def v[0:1]
7652; GFX90A-NEXT:    ;;#ASMEND
7653; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7654; GFX90A-NEXT:    ;;#ASMSTART
7655; GFX90A-NEXT:    ; def v[2:3]
7656; GFX90A-NEXT:    ;;#ASMEND
7657; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v3, 16
7658; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7659; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
7660; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
7661; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7662; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7663;
7664; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_7:
7665; GFX940:       ; %bb.0:
7666; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7667; GFX940-NEXT:    ;;#ASMSTART
7668; GFX940-NEXT:    ; def v[0:1]
7669; GFX940-NEXT:    ;;#ASMEND
7670; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7671; GFX940-NEXT:    ;;#ASMSTART
7672; GFX940-NEXT:    ; def v[2:3]
7673; GFX940-NEXT:    ;;#ASMEND
7674; GFX940-NEXT:    s_nop 0
7675; GFX940-NEXT:    v_alignbit_b32 v0, v1, v3, 16
7676; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7677; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
7678; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
7679; GFX940-NEXT:    s_waitcnt vmcnt(0)
7680; GFX940-NEXT:    s_setpc_b64 s[30:31]
7681  %vec0 = call <4 x half> asm "; def $0", "=v"()
7682  %vec1 = call <4 x half> asm "; def $0", "=v"()
7683  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 7>
7684  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
7685  ret void
7686}
7687
7688define void @v_shuffle_v3f16_v4f16__7_3_7(ptr addrspace(1) inreg %ptr) {
7689; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_3_7:
7690; GFX900:       ; %bb.0:
7691; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7692; GFX900-NEXT:    ;;#ASMSTART
7693; GFX900-NEXT:    ; def v[0:1]
7694; GFX900-NEXT:    ;;#ASMEND
7695; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
7696; GFX900-NEXT:    v_mov_b32_e32 v4, 0
7697; GFX900-NEXT:    ;;#ASMSTART
7698; GFX900-NEXT:    ; def v[2:3]
7699; GFX900-NEXT:    ;;#ASMEND
7700; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
7701; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7702; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
7703; GFX900-NEXT:    global_store_short v4, v1, s[16:17] offset:4
7704; GFX900-NEXT:    s_waitcnt vmcnt(0)
7705; GFX900-NEXT:    s_setpc_b64 s[30:31]
7706;
7707; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_3_7:
7708; GFX90A:       ; %bb.0:
7709; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7710; GFX90A-NEXT:    ;;#ASMSTART
7711; GFX90A-NEXT:    ; def v[0:1]
7712; GFX90A-NEXT:    ;;#ASMEND
7713; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
7714; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
7715; GFX90A-NEXT:    ;;#ASMSTART
7716; GFX90A-NEXT:    ; def v[2:3]
7717; GFX90A-NEXT:    ;;#ASMEND
7718; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
7719; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7720; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
7721; GFX90A-NEXT:    global_store_short v4, v1, s[16:17] offset:4
7722; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7723; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7724;
7725; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_7:
7726; GFX940:       ; %bb.0:
7727; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7728; GFX940-NEXT:    ;;#ASMSTART
7729; GFX940-NEXT:    ; def v[0:1]
7730; GFX940-NEXT:    ;;#ASMEND
7731; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
7732; GFX940-NEXT:    v_mov_b32_e32 v4, 0
7733; GFX940-NEXT:    ;;#ASMSTART
7734; GFX940-NEXT:    ; def v[2:3]
7735; GFX940-NEXT:    ;;#ASMEND
7736; GFX940-NEXT:    s_nop 0
7737; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
7738; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
7739; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
7740; GFX940-NEXT:    global_store_short v4, v1, s[0:1] offset:4 sc0 sc1
7741; GFX940-NEXT:    s_waitcnt vmcnt(0)
7742; GFX940-NEXT:    s_setpc_b64 s[30:31]
7743  %vec0 = call <4 x half> asm "; def $0", "=v"()
7744  %vec1 = call <4 x half> asm "; def $0", "=v"()
7745  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 7>
7746  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
7747  ret void
7748}
7749
7750define void @v_shuffle_v3f16_v4f16__7_4_7(ptr addrspace(1) inreg %ptr) {
7751; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_4_7:
7752; GFX900:       ; %bb.0:
7753; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7754; GFX900-NEXT:    ;;#ASMSTART
7755; GFX900-NEXT:    ; def v[0:1]
7756; GFX900-NEXT:    ;;#ASMEND
7757; GFX900-NEXT:    v_mov_b32_e32 v2, 0
7758; GFX900-NEXT:    v_alignbit_b32 v0, v0, v1, 16
7759; GFX900-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
7760; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
7761; GFX900-NEXT:    global_store_short v2, v3, s[16:17] offset:4
7762; GFX900-NEXT:    s_waitcnt vmcnt(0)
7763; GFX900-NEXT:    s_setpc_b64 s[30:31]
7764;
7765; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_4_7:
7766; GFX90A:       ; %bb.0:
7767; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7768; GFX90A-NEXT:    ;;#ASMSTART
7769; GFX90A-NEXT:    ; def v[0:1]
7770; GFX90A-NEXT:    ;;#ASMEND
7771; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
7772; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v1, 16
7773; GFX90A-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
7774; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
7775; GFX90A-NEXT:    global_store_short v2, v3, s[16:17] offset:4
7776; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7777; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7778;
7779; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_7:
7780; GFX940:       ; %bb.0:
7781; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7782; GFX940-NEXT:    ;;#ASMSTART
7783; GFX940-NEXT:    ; def v[0:1]
7784; GFX940-NEXT:    ;;#ASMEND
7785; GFX940-NEXT:    v_mov_b32_e32 v2, 0
7786; GFX940-NEXT:    v_alignbit_b32 v0, v0, v1, 16
7787; GFX940-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
7788; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
7789; GFX940-NEXT:    global_store_short v2, v3, s[0:1] offset:4 sc0 sc1
7790; GFX940-NEXT:    s_waitcnt vmcnt(0)
7791; GFX940-NEXT:    s_setpc_b64 s[30:31]
7792  %vec0 = call <4 x half> asm "; def $0", "=v"()
7793  %vec1 = call <4 x half> asm "; def $0", "=v"()
7794  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 7>
7795  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
7796  ret void
7797}
7798
7799define void @v_shuffle_v3f16_v4f16__7_5_7(ptr addrspace(1) inreg %ptr) {
7800; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_5_7:
7801; GFX900:       ; %bb.0:
7802; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7803; GFX900-NEXT:    ;;#ASMSTART
7804; GFX900-NEXT:    ; def v[0:1]
7805; GFX900-NEXT:    ;;#ASMEND
7806; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
7807; GFX900-NEXT:    v_mov_b32_e32 v2, 0
7808; GFX900-NEXT:    v_perm_b32 v0, v0, v1, s4
7809; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7810; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
7811; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
7812; GFX900-NEXT:    s_waitcnt vmcnt(0)
7813; GFX900-NEXT:    s_setpc_b64 s[30:31]
7814;
7815; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_5_7:
7816; GFX90A:       ; %bb.0:
7817; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7818; GFX90A-NEXT:    ;;#ASMSTART
7819; GFX90A-NEXT:    ; def v[0:1]
7820; GFX90A-NEXT:    ;;#ASMEND
7821; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
7822; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
7823; GFX90A-NEXT:    v_perm_b32 v0, v0, v1, s4
7824; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7825; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
7826; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
7827; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7828; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7829;
7830; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_7:
7831; GFX940:       ; %bb.0:
7832; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7833; GFX940-NEXT:    ;;#ASMSTART
7834; GFX940-NEXT:    ; def v[0:1]
7835; GFX940-NEXT:    ;;#ASMEND
7836; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
7837; GFX940-NEXT:    v_mov_b32_e32 v2, 0
7838; GFX940-NEXT:    v_perm_b32 v0, v0, v1, s2
7839; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7840; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
7841; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
7842; GFX940-NEXT:    s_waitcnt vmcnt(0)
7843; GFX940-NEXT:    s_setpc_b64 s[30:31]
7844  %vec0 = call <4 x half> asm "; def $0", "=v"()
7845  %vec1 = call <4 x half> asm "; def $0", "=v"()
7846  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 7>
7847  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
7848  ret void
7849}
7850
7851define void @v_shuffle_v3f16_v4f16__7_6_7(ptr addrspace(1) inreg %ptr) {
7852; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_6_7:
7853; GFX900:       ; %bb.0:
7854; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7855; GFX900-NEXT:    ;;#ASMSTART
7856; GFX900-NEXT:    ; def v[0:1]
7857; GFX900-NEXT:    ;;#ASMEND
7858; GFX900-NEXT:    v_mov_b32_e32 v2, 0
7859; GFX900-NEXT:    v_alignbit_b32 v0, v1, v1, 16
7860; GFX900-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7861; GFX900-NEXT:    global_store_short v2, v1, s[16:17] offset:4
7862; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
7863; GFX900-NEXT:    s_waitcnt vmcnt(0)
7864; GFX900-NEXT:    s_setpc_b64 s[30:31]
7865;
7866; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_6_7:
7867; GFX90A:       ; %bb.0:
7868; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7869; GFX90A-NEXT:    ;;#ASMSTART
7870; GFX90A-NEXT:    ; def v[0:1]
7871; GFX90A-NEXT:    ;;#ASMEND
7872; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
7873; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v1, 16
7874; GFX90A-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7875; GFX90A-NEXT:    global_store_short v2, v1, s[16:17] offset:4
7876; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
7877; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7878; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7879;
7880; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_7:
7881; GFX940:       ; %bb.0:
7882; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7883; GFX940-NEXT:    ;;#ASMSTART
7884; GFX940-NEXT:    ; def v[0:1]
7885; GFX940-NEXT:    ;;#ASMEND
7886; GFX940-NEXT:    v_mov_b32_e32 v2, 0
7887; GFX940-NEXT:    v_alignbit_b32 v0, v1, v1, 16
7888; GFX940-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
7889; GFX940-NEXT:    global_store_short v2, v1, s[0:1] offset:4 sc0 sc1
7890; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
7891; GFX940-NEXT:    s_waitcnt vmcnt(0)
7892; GFX940-NEXT:    s_setpc_b64 s[30:31]
7893  %vec0 = call <4 x half> asm "; def $0", "=v"()
7894  %vec1 = call <4 x half> asm "; def $0", "=v"()
7895  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 7>
7896  store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8
7897  ret void
7898}
7899
7900define void @s_shuffle_v3f16_v4f16__u_u_u() {
7901; GFX9-LABEL: s_shuffle_v3f16_v4f16__u_u_u:
7902; GFX9:       ; %bb.0:
7903; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7904; GFX9-NEXT:    ;;#ASMSTART
7905; GFX9-NEXT:    ; use s[8:9]
7906; GFX9-NEXT:    ;;#ASMEND
7907; GFX9-NEXT:    s_setpc_b64 s[30:31]
7908  %vec0 = call <4 x half> asm "; def $0", "=s"()
7909  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> poison
7910  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7911  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
7912  ret void
7913}
7914
7915define void @s_shuffle_v3f16_v4f16__0_u_u() {
7916; GFX900-LABEL: s_shuffle_v3f16_v4f16__0_u_u:
7917; GFX900:       ; %bb.0:
7918; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7919; GFX900-NEXT:    ;;#ASMSTART
7920; GFX900-NEXT:    ; def s[8:9]
7921; GFX900-NEXT:    ;;#ASMEND
7922; GFX900-NEXT:    ;;#ASMSTART
7923; GFX900-NEXT:    ; use s[8:9]
7924; GFX900-NEXT:    ;;#ASMEND
7925; GFX900-NEXT:    s_setpc_b64 s[30:31]
7926;
7927; GFX90A-LABEL: s_shuffle_v3f16_v4f16__0_u_u:
7928; GFX90A:       ; %bb.0:
7929; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7930; GFX90A-NEXT:    ;;#ASMSTART
7931; GFX90A-NEXT:    ; def s[8:9]
7932; GFX90A-NEXT:    ;;#ASMEND
7933; GFX90A-NEXT:    ;;#ASMSTART
7934; GFX90A-NEXT:    ; use s[8:9]
7935; GFX90A-NEXT:    ;;#ASMEND
7936; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7937;
7938; GFX940-LABEL: s_shuffle_v3f16_v4f16__0_u_u:
7939; GFX940:       ; %bb.0:
7940; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7941; GFX940-NEXT:    ;;#ASMSTART
7942; GFX940-NEXT:    ; def s[8:9]
7943; GFX940-NEXT:    ;;#ASMEND
7944; GFX940-NEXT:    s_nop 0
7945; GFX940-NEXT:    ;;#ASMSTART
7946; GFX940-NEXT:    ; use s[8:9]
7947; GFX940-NEXT:    ;;#ASMEND
7948; GFX940-NEXT:    s_setpc_b64 s[30:31]
7949  %vec0 = call <4 x half> asm "; def $0", "=s"()
7950  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 poison, i32 poison>
7951  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7952  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
7953  ret void
7954}
7955
7956define void @s_shuffle_v3f16_v4f16__1_u_u() {
7957; GFX900-LABEL: s_shuffle_v3f16_v4f16__1_u_u:
7958; GFX900:       ; %bb.0:
7959; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7960; GFX900-NEXT:    ;;#ASMSTART
7961; GFX900-NEXT:    ; def s[4:5]
7962; GFX900-NEXT:    ;;#ASMEND
7963; GFX900-NEXT:    s_lshr_b32 s8, s4, 16
7964; GFX900-NEXT:    ;;#ASMSTART
7965; GFX900-NEXT:    ; use s[8:9]
7966; GFX900-NEXT:    ;;#ASMEND
7967; GFX900-NEXT:    s_setpc_b64 s[30:31]
7968;
7969; GFX90A-LABEL: s_shuffle_v3f16_v4f16__1_u_u:
7970; GFX90A:       ; %bb.0:
7971; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7972; GFX90A-NEXT:    ;;#ASMSTART
7973; GFX90A-NEXT:    ; def s[4:5]
7974; GFX90A-NEXT:    ;;#ASMEND
7975; GFX90A-NEXT:    s_lshr_b32 s8, s4, 16
7976; GFX90A-NEXT:    ;;#ASMSTART
7977; GFX90A-NEXT:    ; use s[8:9]
7978; GFX90A-NEXT:    ;;#ASMEND
7979; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7980;
7981; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_u_u:
7982; GFX940:       ; %bb.0:
7983; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7984; GFX940-NEXT:    ;;#ASMSTART
7985; GFX940-NEXT:    ; def s[0:1]
7986; GFX940-NEXT:    ;;#ASMEND
7987; GFX940-NEXT:    s_lshr_b32 s8, s0, 16
7988; GFX940-NEXT:    ;;#ASMSTART
7989; GFX940-NEXT:    ; use s[8:9]
7990; GFX940-NEXT:    ;;#ASMEND
7991; GFX940-NEXT:    s_setpc_b64 s[30:31]
7992  %vec0 = call <4 x half> asm "; def $0", "=s"()
7993  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 1, i32 poison, i32 poison>
7994  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
7995  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
7996  ret void
7997}
7998
7999define void @s_shuffle_v3f16_v4f16__2_u_u() {
8000; GFX900-LABEL: s_shuffle_v3f16_v4f16__2_u_u:
8001; GFX900:       ; %bb.0:
8002; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8003; GFX900-NEXT:    ;;#ASMSTART
8004; GFX900-NEXT:    ; def s[4:5]
8005; GFX900-NEXT:    ;;#ASMEND
8006; GFX900-NEXT:    s_mov_b32 s8, s5
8007; GFX900-NEXT:    ;;#ASMSTART
8008; GFX900-NEXT:    ; use s[8:9]
8009; GFX900-NEXT:    ;;#ASMEND
8010; GFX900-NEXT:    s_setpc_b64 s[30:31]
8011;
8012; GFX90A-LABEL: s_shuffle_v3f16_v4f16__2_u_u:
8013; GFX90A:       ; %bb.0:
8014; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8015; GFX90A-NEXT:    ;;#ASMSTART
8016; GFX90A-NEXT:    ; def s[4:5]
8017; GFX90A-NEXT:    ;;#ASMEND
8018; GFX90A-NEXT:    s_mov_b32 s8, s5
8019; GFX90A-NEXT:    ;;#ASMSTART
8020; GFX90A-NEXT:    ; use s[8:9]
8021; GFX90A-NEXT:    ;;#ASMEND
8022; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8023;
8024; GFX940-LABEL: s_shuffle_v3f16_v4f16__2_u_u:
8025; GFX940:       ; %bb.0:
8026; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8027; GFX940-NEXT:    ;;#ASMSTART
8028; GFX940-NEXT:    ; def s[0:1]
8029; GFX940-NEXT:    ;;#ASMEND
8030; GFX940-NEXT:    s_mov_b32 s8, s1
8031; GFX940-NEXT:    ;;#ASMSTART
8032; GFX940-NEXT:    ; use s[8:9]
8033; GFX940-NEXT:    ;;#ASMEND
8034; GFX940-NEXT:    s_setpc_b64 s[30:31]
8035  %vec0 = call <4 x half> asm "; def $0", "=s"()
8036  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 2, i32 poison, i32 poison>
8037  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8038  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
8039  ret void
8040}
8041
8042define void @s_shuffle_v3f16_v4f16__3_u_u() {
8043; GFX900-LABEL: s_shuffle_v3f16_v4f16__3_u_u:
8044; GFX900:       ; %bb.0:
8045; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8046; GFX900-NEXT:    ;;#ASMSTART
8047; GFX900-NEXT:    ; def s[4:5]
8048; GFX900-NEXT:    ;;#ASMEND
8049; GFX900-NEXT:    s_lshr_b32 s8, s5, 16
8050; GFX900-NEXT:    ;;#ASMSTART
8051; GFX900-NEXT:    ; use s[8:9]
8052; GFX900-NEXT:    ;;#ASMEND
8053; GFX900-NEXT:    s_setpc_b64 s[30:31]
8054;
8055; GFX90A-LABEL: s_shuffle_v3f16_v4f16__3_u_u:
8056; GFX90A:       ; %bb.0:
8057; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8058; GFX90A-NEXT:    ;;#ASMSTART
8059; GFX90A-NEXT:    ; def s[4:5]
8060; GFX90A-NEXT:    ;;#ASMEND
8061; GFX90A-NEXT:    s_lshr_b32 s8, s5, 16
8062; GFX90A-NEXT:    ;;#ASMSTART
8063; GFX90A-NEXT:    ; use s[8:9]
8064; GFX90A-NEXT:    ;;#ASMEND
8065; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8066;
8067; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_u_u:
8068; GFX940:       ; %bb.0:
8069; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8070; GFX940-NEXT:    ;;#ASMSTART
8071; GFX940-NEXT:    ; def s[0:1]
8072; GFX940-NEXT:    ;;#ASMEND
8073; GFX940-NEXT:    s_lshr_b32 s8, s1, 16
8074; GFX940-NEXT:    ;;#ASMSTART
8075; GFX940-NEXT:    ; use s[8:9]
8076; GFX940-NEXT:    ;;#ASMEND
8077; GFX940-NEXT:    s_setpc_b64 s[30:31]
8078  %vec0 = call <4 x half> asm "; def $0", "=s"()
8079  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 3, i32 poison, i32 poison>
8080  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8081  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
8082  ret void
8083}
8084
8085define void @s_shuffle_v3f16_v4f16__4_u_u() {
8086; GFX9-LABEL: s_shuffle_v3f16_v4f16__4_u_u:
8087; GFX9:       ; %bb.0:
8088; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8089; GFX9-NEXT:    ;;#ASMSTART
8090; GFX9-NEXT:    ; use s[8:9]
8091; GFX9-NEXT:    ;;#ASMEND
8092; GFX9-NEXT:    s_setpc_b64 s[30:31]
8093  %vec0 = call <4 x half> asm "; def $0", "=s"()
8094  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 4, i32 poison, i32 poison>
8095  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8096  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
8097  ret void
8098}
8099
8100define void @s_shuffle_v3f16_v4f16__5_u_u() {
8101; GFX900-LABEL: s_shuffle_v3f16_v4f16__5_u_u:
8102; GFX900:       ; %bb.0:
8103; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8104; GFX900-NEXT:    ;;#ASMSTART
8105; GFX900-NEXT:    ; def s[4:5]
8106; GFX900-NEXT:    ;;#ASMEND
8107; GFX900-NEXT:    s_lshr_b32 s8, s4, 16
8108; GFX900-NEXT:    ;;#ASMSTART
8109; GFX900-NEXT:    ; use s[8:9]
8110; GFX900-NEXT:    ;;#ASMEND
8111; GFX900-NEXT:    s_setpc_b64 s[30:31]
8112;
8113; GFX90A-LABEL: s_shuffle_v3f16_v4f16__5_u_u:
8114; GFX90A:       ; %bb.0:
8115; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8116; GFX90A-NEXT:    ;;#ASMSTART
8117; GFX90A-NEXT:    ; def s[4:5]
8118; GFX90A-NEXT:    ;;#ASMEND
8119; GFX90A-NEXT:    s_lshr_b32 s8, s4, 16
8120; GFX90A-NEXT:    ;;#ASMSTART
8121; GFX90A-NEXT:    ; use s[8:9]
8122; GFX90A-NEXT:    ;;#ASMEND
8123; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8124;
8125; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_u_u:
8126; GFX940:       ; %bb.0:
8127; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8128; GFX940-NEXT:    ;;#ASMSTART
8129; GFX940-NEXT:    ; def s[0:1]
8130; GFX940-NEXT:    ;;#ASMEND
8131; GFX940-NEXT:    s_lshr_b32 s8, s0, 16
8132; GFX940-NEXT:    ;;#ASMSTART
8133; GFX940-NEXT:    ; use s[8:9]
8134; GFX940-NEXT:    ;;#ASMEND
8135; GFX940-NEXT:    s_setpc_b64 s[30:31]
8136  %vec0 = call <4 x half> asm "; def $0", "=s"()
8137  %vec1 = call <4 x half> asm "; def $0", "=s"()
8138  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 poison, i32 poison>
8139  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8140  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
8141  ret void
8142}
8143
8144define void @s_shuffle_v3f16_v4f16__6_u_u() {
8145; GFX900-LABEL: s_shuffle_v3f16_v4f16__6_u_u:
8146; GFX900:       ; %bb.0:
8147; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8148; GFX900-NEXT:    ;;#ASMSTART
8149; GFX900-NEXT:    ; def s[4:5]
8150; GFX900-NEXT:    ;;#ASMEND
8151; GFX900-NEXT:    s_mov_b32 s8, s5
8152; GFX900-NEXT:    ;;#ASMSTART
8153; GFX900-NEXT:    ; use s[8:9]
8154; GFX900-NEXT:    ;;#ASMEND
8155; GFX900-NEXT:    s_setpc_b64 s[30:31]
8156;
8157; GFX90A-LABEL: s_shuffle_v3f16_v4f16__6_u_u:
8158; GFX90A:       ; %bb.0:
8159; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8160; GFX90A-NEXT:    ;;#ASMSTART
8161; GFX90A-NEXT:    ; def s[4:5]
8162; GFX90A-NEXT:    ;;#ASMEND
8163; GFX90A-NEXT:    s_mov_b32 s8, s5
8164; GFX90A-NEXT:    ;;#ASMSTART
8165; GFX90A-NEXT:    ; use s[8:9]
8166; GFX90A-NEXT:    ;;#ASMEND
8167; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8168;
8169; GFX940-LABEL: s_shuffle_v3f16_v4f16__6_u_u:
8170; GFX940:       ; %bb.0:
8171; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8172; GFX940-NEXT:    ;;#ASMSTART
8173; GFX940-NEXT:    ; def s[0:1]
8174; GFX940-NEXT:    ;;#ASMEND
8175; GFX940-NEXT:    s_mov_b32 s8, s1
8176; GFX940-NEXT:    ;;#ASMSTART
8177; GFX940-NEXT:    ; use s[8:9]
8178; GFX940-NEXT:    ;;#ASMEND
8179; GFX940-NEXT:    s_setpc_b64 s[30:31]
8180  %vec0 = call <4 x half> asm "; def $0", "=s"()
8181  %vec1 = call <4 x half> asm "; def $0", "=s"()
8182  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 poison, i32 poison>
8183  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8184  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
8185  ret void
8186}
8187
8188define void @s_shuffle_v3f16_v4f16__7_u_u() {
8189; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_u_u:
8190; GFX900:       ; %bb.0:
8191; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8192; GFX900-NEXT:    ;;#ASMSTART
8193; GFX900-NEXT:    ; def s[4:5]
8194; GFX900-NEXT:    ;;#ASMEND
8195; GFX900-NEXT:    s_lshr_b32 s8, s5, 16
8196; GFX900-NEXT:    ;;#ASMSTART
8197; GFX900-NEXT:    ; use s[8:9]
8198; GFX900-NEXT:    ;;#ASMEND
8199; GFX900-NEXT:    s_setpc_b64 s[30:31]
8200;
8201; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_u_u:
8202; GFX90A:       ; %bb.0:
8203; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8204; GFX90A-NEXT:    ;;#ASMSTART
8205; GFX90A-NEXT:    ; def s[4:5]
8206; GFX90A-NEXT:    ;;#ASMEND
8207; GFX90A-NEXT:    s_lshr_b32 s8, s5, 16
8208; GFX90A-NEXT:    ;;#ASMSTART
8209; GFX90A-NEXT:    ; use s[8:9]
8210; GFX90A-NEXT:    ;;#ASMEND
8211; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8212;
8213; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_u_u:
8214; GFX940:       ; %bb.0:
8215; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8216; GFX940-NEXT:    ;;#ASMSTART
8217; GFX940-NEXT:    ; def s[0:1]
8218; GFX940-NEXT:    ;;#ASMEND
8219; GFX940-NEXT:    s_lshr_b32 s8, s1, 16
8220; GFX940-NEXT:    ;;#ASMSTART
8221; GFX940-NEXT:    ; use s[8:9]
8222; GFX940-NEXT:    ;;#ASMEND
8223; GFX940-NEXT:    s_setpc_b64 s[30:31]
8224  %vec0 = call <4 x half> asm "; def $0", "=s"()
8225  %vec1 = call <4 x half> asm "; def $0", "=s"()
8226  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 poison>
8227  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8228  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
8229  ret void
8230}
8231
8232define void @s_shuffle_v3f16_v4f16__7_0_u() {
8233; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_0_u:
8234; GFX900:       ; %bb.0:
8235; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8236; GFX900-NEXT:    ;;#ASMSTART
8237; GFX900-NEXT:    ; def s[4:5]
8238; GFX900-NEXT:    ;;#ASMEND
8239; GFX900-NEXT:    ;;#ASMSTART
8240; GFX900-NEXT:    ; def s[6:7]
8241; GFX900-NEXT:    ;;#ASMEND
8242; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
8243; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
8244; GFX900-NEXT:    ;;#ASMSTART
8245; GFX900-NEXT:    ; use s[8:9]
8246; GFX900-NEXT:    ;;#ASMEND
8247; GFX900-NEXT:    s_setpc_b64 s[30:31]
8248;
8249; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_0_u:
8250; GFX90A:       ; %bb.0:
8251; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8252; GFX90A-NEXT:    ;;#ASMSTART
8253; GFX90A-NEXT:    ; def s[4:5]
8254; GFX90A-NEXT:    ;;#ASMEND
8255; GFX90A-NEXT:    ;;#ASMSTART
8256; GFX90A-NEXT:    ; def s[6:7]
8257; GFX90A-NEXT:    ;;#ASMEND
8258; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
8259; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
8260; GFX90A-NEXT:    ;;#ASMSTART
8261; GFX90A-NEXT:    ; use s[8:9]
8262; GFX90A-NEXT:    ;;#ASMEND
8263; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8264;
8265; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_u:
8266; GFX940:       ; %bb.0:
8267; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8268; GFX940-NEXT:    ;;#ASMSTART
8269; GFX940-NEXT:    ; def s[0:1]
8270; GFX940-NEXT:    ;;#ASMEND
8271; GFX940-NEXT:    ;;#ASMSTART
8272; GFX940-NEXT:    ; def s[2:3]
8273; GFX940-NEXT:    ;;#ASMEND
8274; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
8275; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
8276; GFX940-NEXT:    ;;#ASMSTART
8277; GFX940-NEXT:    ; use s[8:9]
8278; GFX940-NEXT:    ;;#ASMEND
8279; GFX940-NEXT:    s_setpc_b64 s[30:31]
8280  %vec0 = call <4 x half> asm "; def $0", "=s"()
8281  %vec1 = call <4 x half> asm "; def $0", "=s"()
8282  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 poison>
8283  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8284  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
8285  ret void
8286}
8287
8288define void @s_shuffle_v3f16_v4f16__7_1_u() {
8289; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_1_u:
8290; GFX900:       ; %bb.0:
8291; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8292; GFX900-NEXT:    ;;#ASMSTART
8293; GFX900-NEXT:    ; def s[4:5]
8294; GFX900-NEXT:    ;;#ASMEND
8295; GFX900-NEXT:    ;;#ASMSTART
8296; GFX900-NEXT:    ; def s[6:7]
8297; GFX900-NEXT:    ;;#ASMEND
8298; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
8299; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
8300; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
8301; GFX900-NEXT:    ;;#ASMSTART
8302; GFX900-NEXT:    ; use s[8:9]
8303; GFX900-NEXT:    ;;#ASMEND
8304; GFX900-NEXT:    s_setpc_b64 s[30:31]
8305;
8306; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_1_u:
8307; GFX90A:       ; %bb.0:
8308; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8309; GFX90A-NEXT:    ;;#ASMSTART
8310; GFX90A-NEXT:    ; def s[4:5]
8311; GFX90A-NEXT:    ;;#ASMEND
8312; GFX90A-NEXT:    ;;#ASMSTART
8313; GFX90A-NEXT:    ; def s[6:7]
8314; GFX90A-NEXT:    ;;#ASMEND
8315; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
8316; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
8317; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
8318; GFX90A-NEXT:    ;;#ASMSTART
8319; GFX90A-NEXT:    ; use s[8:9]
8320; GFX90A-NEXT:    ;;#ASMEND
8321; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8322;
8323; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_u:
8324; GFX940:       ; %bb.0:
8325; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8326; GFX940-NEXT:    ;;#ASMSTART
8327; GFX940-NEXT:    ; def s[0:1]
8328; GFX940-NEXT:    ;;#ASMEND
8329; GFX940-NEXT:    ;;#ASMSTART
8330; GFX940-NEXT:    ; def s[2:3]
8331; GFX940-NEXT:    ;;#ASMEND
8332; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
8333; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
8334; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
8335; GFX940-NEXT:    ;;#ASMSTART
8336; GFX940-NEXT:    ; use s[8:9]
8337; GFX940-NEXT:    ;;#ASMEND
8338; GFX940-NEXT:    s_setpc_b64 s[30:31]
8339  %vec0 = call <4 x half> asm "; def $0", "=s"()
8340  %vec1 = call <4 x half> asm "; def $0", "=s"()
8341  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 poison>
8342  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8343  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
8344  ret void
8345}
8346
8347define void @s_shuffle_v3f16_v4f16__7_2_u() {
8348; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_2_u:
8349; GFX900:       ; %bb.0:
8350; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8351; GFX900-NEXT:    ;;#ASMSTART
8352; GFX900-NEXT:    ; def s[4:5]
8353; GFX900-NEXT:    ;;#ASMEND
8354; GFX900-NEXT:    ;;#ASMSTART
8355; GFX900-NEXT:    ; def s[6:7]
8356; GFX900-NEXT:    ;;#ASMEND
8357; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
8358; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
8359; GFX900-NEXT:    ;;#ASMSTART
8360; GFX900-NEXT:    ; use s[8:9]
8361; GFX900-NEXT:    ;;#ASMEND
8362; GFX900-NEXT:    s_setpc_b64 s[30:31]
8363;
8364; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_2_u:
8365; GFX90A:       ; %bb.0:
8366; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8367; GFX90A-NEXT:    ;;#ASMSTART
8368; GFX90A-NEXT:    ; def s[4:5]
8369; GFX90A-NEXT:    ;;#ASMEND
8370; GFX90A-NEXT:    ;;#ASMSTART
8371; GFX90A-NEXT:    ; def s[6:7]
8372; GFX90A-NEXT:    ;;#ASMEND
8373; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
8374; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
8375; GFX90A-NEXT:    ;;#ASMSTART
8376; GFX90A-NEXT:    ; use s[8:9]
8377; GFX90A-NEXT:    ;;#ASMEND
8378; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8379;
8380; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_u:
8381; GFX940:       ; %bb.0:
8382; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8383; GFX940-NEXT:    ;;#ASMSTART
8384; GFX940-NEXT:    ; def s[0:1]
8385; GFX940-NEXT:    ;;#ASMEND
8386; GFX940-NEXT:    ;;#ASMSTART
8387; GFX940-NEXT:    ; def s[2:3]
8388; GFX940-NEXT:    ;;#ASMEND
8389; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
8390; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
8391; GFX940-NEXT:    ;;#ASMSTART
8392; GFX940-NEXT:    ; use s[8:9]
8393; GFX940-NEXT:    ;;#ASMEND
8394; GFX940-NEXT:    s_setpc_b64 s[30:31]
8395  %vec0 = call <4 x half> asm "; def $0", "=s"()
8396  %vec1 = call <4 x half> asm "; def $0", "=s"()
8397  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 poison>
8398  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8399  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
8400  ret void
8401}
8402
8403define void @s_shuffle_v3f16_v4f16__7_3_u() {
8404; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_3_u:
8405; GFX900:       ; %bb.0:
8406; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8407; GFX900-NEXT:    ;;#ASMSTART
8408; GFX900-NEXT:    ; def s[4:5]
8409; GFX900-NEXT:    ;;#ASMEND
8410; GFX900-NEXT:    ;;#ASMSTART
8411; GFX900-NEXT:    ; def s[6:7]
8412; GFX900-NEXT:    ;;#ASMEND
8413; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
8414; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
8415; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
8416; GFX900-NEXT:    ;;#ASMSTART
8417; GFX900-NEXT:    ; use s[8:9]
8418; GFX900-NEXT:    ;;#ASMEND
8419; GFX900-NEXT:    s_setpc_b64 s[30:31]
8420;
8421; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_3_u:
8422; GFX90A:       ; %bb.0:
8423; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8424; GFX90A-NEXT:    ;;#ASMSTART
8425; GFX90A-NEXT:    ; def s[4:5]
8426; GFX90A-NEXT:    ;;#ASMEND
8427; GFX90A-NEXT:    ;;#ASMSTART
8428; GFX90A-NEXT:    ; def s[6:7]
8429; GFX90A-NEXT:    ;;#ASMEND
8430; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
8431; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
8432; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
8433; GFX90A-NEXT:    ;;#ASMSTART
8434; GFX90A-NEXT:    ; use s[8:9]
8435; GFX90A-NEXT:    ;;#ASMEND
8436; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8437;
8438; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_u:
8439; GFX940:       ; %bb.0:
8440; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8441; GFX940-NEXT:    ;;#ASMSTART
8442; GFX940-NEXT:    ; def s[0:1]
8443; GFX940-NEXT:    ;;#ASMEND
8444; GFX940-NEXT:    ;;#ASMSTART
8445; GFX940-NEXT:    ; def s[2:3]
8446; GFX940-NEXT:    ;;#ASMEND
8447; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
8448; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
8449; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
8450; GFX940-NEXT:    ;;#ASMSTART
8451; GFX940-NEXT:    ; use s[8:9]
8452; GFX940-NEXT:    ;;#ASMEND
8453; GFX940-NEXT:    s_setpc_b64 s[30:31]
8454  %vec0 = call <4 x half> asm "; def $0", "=s"()
8455  %vec1 = call <4 x half> asm "; def $0", "=s"()
8456  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 poison>
8457  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8458  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
8459  ret void
8460}
8461
8462define void @s_shuffle_v3f16_v4f16__7_4_u() {
8463; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_4_u:
8464; GFX900:       ; %bb.0:
8465; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8466; GFX900-NEXT:    ;;#ASMSTART
8467; GFX900-NEXT:    ; def s[4:5]
8468; GFX900-NEXT:    ;;#ASMEND
8469; GFX900-NEXT:    s_lshr_b32 s5, s5, 16
8470; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
8471; GFX900-NEXT:    ;;#ASMSTART
8472; GFX900-NEXT:    ; use s[8:9]
8473; GFX900-NEXT:    ;;#ASMEND
8474; GFX900-NEXT:    s_setpc_b64 s[30:31]
8475;
8476; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_4_u:
8477; GFX90A:       ; %bb.0:
8478; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8479; GFX90A-NEXT:    ;;#ASMSTART
8480; GFX90A-NEXT:    ; def s[4:5]
8481; GFX90A-NEXT:    ;;#ASMEND
8482; GFX90A-NEXT:    s_lshr_b32 s5, s5, 16
8483; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
8484; GFX90A-NEXT:    ;;#ASMSTART
8485; GFX90A-NEXT:    ; use s[8:9]
8486; GFX90A-NEXT:    ;;#ASMEND
8487; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8488;
8489; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_u:
8490; GFX940:       ; %bb.0:
8491; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8492; GFX940-NEXT:    ;;#ASMSTART
8493; GFX940-NEXT:    ; def s[0:1]
8494; GFX940-NEXT:    ;;#ASMEND
8495; GFX940-NEXT:    s_lshr_b32 s1, s1, 16
8496; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
8497; GFX940-NEXT:    ;;#ASMSTART
8498; GFX940-NEXT:    ; use s[8:9]
8499; GFX940-NEXT:    ;;#ASMEND
8500; GFX940-NEXT:    s_setpc_b64 s[30:31]
8501  %vec0 = call <4 x half> asm "; def $0", "=s"()
8502  %vec1 = call <4 x half> asm "; def $0", "=s"()
8503  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 poison>
8504  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8505  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
8506  ret void
8507}
8508
8509define void @s_shuffle_v3f16_v4f16__7_5_u() {
8510; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_5_u:
8511; GFX900:       ; %bb.0:
8512; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8513; GFX900-NEXT:    ;;#ASMSTART
8514; GFX900-NEXT:    ; def s[4:5]
8515; GFX900-NEXT:    ;;#ASMEND
8516; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
8517; GFX900-NEXT:    s_lshr_b32 s5, s5, 16
8518; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
8519; GFX900-NEXT:    ;;#ASMSTART
8520; GFX900-NEXT:    ; use s[8:9]
8521; GFX900-NEXT:    ;;#ASMEND
8522; GFX900-NEXT:    s_setpc_b64 s[30:31]
8523;
8524; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_5_u:
8525; GFX90A:       ; %bb.0:
8526; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8527; GFX90A-NEXT:    ;;#ASMSTART
8528; GFX90A-NEXT:    ; def s[4:5]
8529; GFX90A-NEXT:    ;;#ASMEND
8530; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
8531; GFX90A-NEXT:    s_lshr_b32 s5, s5, 16
8532; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
8533; GFX90A-NEXT:    ;;#ASMSTART
8534; GFX90A-NEXT:    ; use s[8:9]
8535; GFX90A-NEXT:    ;;#ASMEND
8536; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8537;
8538; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_u:
8539; GFX940:       ; %bb.0:
8540; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8541; GFX940-NEXT:    ;;#ASMSTART
8542; GFX940-NEXT:    ; def s[0:1]
8543; GFX940-NEXT:    ;;#ASMEND
8544; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
8545; GFX940-NEXT:    s_lshr_b32 s1, s1, 16
8546; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
8547; GFX940-NEXT:    ;;#ASMSTART
8548; GFX940-NEXT:    ; use s[8:9]
8549; GFX940-NEXT:    ;;#ASMEND
8550; GFX940-NEXT:    s_setpc_b64 s[30:31]
8551  %vec0 = call <4 x half> asm "; def $0", "=s"()
8552  %vec1 = call <4 x half> asm "; def $0", "=s"()
8553  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 poison>
8554  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8555  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
8556  ret void
8557}
8558
8559define void @s_shuffle_v3f16_v4f16__7_6_u() {
8560; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_6_u:
8561; GFX900:       ; %bb.0:
8562; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8563; GFX900-NEXT:    ;;#ASMSTART
8564; GFX900-NEXT:    ; def s[4:5]
8565; GFX900-NEXT:    ;;#ASMEND
8566; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
8567; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
8568; GFX900-NEXT:    ;;#ASMSTART
8569; GFX900-NEXT:    ; use s[8:9]
8570; GFX900-NEXT:    ;;#ASMEND
8571; GFX900-NEXT:    s_setpc_b64 s[30:31]
8572;
8573; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_6_u:
8574; GFX90A:       ; %bb.0:
8575; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8576; GFX90A-NEXT:    ;;#ASMSTART
8577; GFX90A-NEXT:    ; def s[4:5]
8578; GFX90A-NEXT:    ;;#ASMEND
8579; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
8580; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
8581; GFX90A-NEXT:    ;;#ASMSTART
8582; GFX90A-NEXT:    ; use s[8:9]
8583; GFX90A-NEXT:    ;;#ASMEND
8584; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8585;
8586; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_u:
8587; GFX940:       ; %bb.0:
8588; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8589; GFX940-NEXT:    ;;#ASMSTART
8590; GFX940-NEXT:    ; def s[0:1]
8591; GFX940-NEXT:    ;;#ASMEND
8592; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
8593; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
8594; GFX940-NEXT:    ;;#ASMSTART
8595; GFX940-NEXT:    ; use s[8:9]
8596; GFX940-NEXT:    ;;#ASMEND
8597; GFX940-NEXT:    s_setpc_b64 s[30:31]
8598  %vec0 = call <4 x half> asm "; def $0", "=s"()
8599  %vec1 = call <4 x half> asm "; def $0", "=s"()
8600  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 poison>
8601  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8602  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
8603  ret void
8604}
8605
8606define void @s_shuffle_v3f16_v4f16__7_7_u() {
8607; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_7_u:
8608; GFX900:       ; %bb.0:
8609; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8610; GFX900-NEXT:    ;;#ASMSTART
8611; GFX900-NEXT:    ; def s[4:5]
8612; GFX900-NEXT:    ;;#ASMEND
8613; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
8614; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
8615; GFX900-NEXT:    ;;#ASMSTART
8616; GFX900-NEXT:    ; use s[8:9]
8617; GFX900-NEXT:    ;;#ASMEND
8618; GFX900-NEXT:    s_setpc_b64 s[30:31]
8619;
8620; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_7_u:
8621; GFX90A:       ; %bb.0:
8622; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8623; GFX90A-NEXT:    ;;#ASMSTART
8624; GFX90A-NEXT:    ; def s[4:5]
8625; GFX90A-NEXT:    ;;#ASMEND
8626; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
8627; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
8628; GFX90A-NEXT:    ;;#ASMSTART
8629; GFX90A-NEXT:    ; use s[8:9]
8630; GFX90A-NEXT:    ;;#ASMEND
8631; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8632;
8633; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_u:
8634; GFX940:       ; %bb.0:
8635; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8636; GFX940-NEXT:    ;;#ASMSTART
8637; GFX940-NEXT:    ; def s[0:1]
8638; GFX940-NEXT:    ;;#ASMEND
8639; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
8640; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s0
8641; GFX940-NEXT:    ;;#ASMSTART
8642; GFX940-NEXT:    ; use s[8:9]
8643; GFX940-NEXT:    ;;#ASMEND
8644; GFX940-NEXT:    s_setpc_b64 s[30:31]
8645  %vec0 = call <4 x half> asm "; def $0", "=s"()
8646  %vec1 = call <4 x half> asm "; def $0", "=s"()
8647  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 poison>
8648  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8649  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
8650  ret void
8651}
8652
8653define void @s_shuffle_v3f16_v4f16__7_7_0() {
8654; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_7_0:
8655; GFX900:       ; %bb.0:
8656; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8657; GFX900-NEXT:    ;;#ASMSTART
8658; GFX900-NEXT:    ; def s[4:5]
8659; GFX900-NEXT:    ;;#ASMEND
8660; GFX900-NEXT:    ;;#ASMSTART
8661; GFX900-NEXT:    ; def s[6:7]
8662; GFX900-NEXT:    ;;#ASMEND
8663; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
8664; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
8665; GFX900-NEXT:    s_mov_b32 s9, s4
8666; GFX900-NEXT:    ;;#ASMSTART
8667; GFX900-NEXT:    ; use s[8:9]
8668; GFX900-NEXT:    ;;#ASMEND
8669; GFX900-NEXT:    s_setpc_b64 s[30:31]
8670;
8671; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_7_0:
8672; GFX90A:       ; %bb.0:
8673; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8674; GFX90A-NEXT:    ;;#ASMSTART
8675; GFX90A-NEXT:    ; def s[4:5]
8676; GFX90A-NEXT:    ;;#ASMEND
8677; GFX90A-NEXT:    ;;#ASMSTART
8678; GFX90A-NEXT:    ; def s[6:7]
8679; GFX90A-NEXT:    ;;#ASMEND
8680; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
8681; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
8682; GFX90A-NEXT:    s_mov_b32 s9, s4
8683; GFX90A-NEXT:    ;;#ASMSTART
8684; GFX90A-NEXT:    ; use s[8:9]
8685; GFX90A-NEXT:    ;;#ASMEND
8686; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8687;
8688; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_0:
8689; GFX940:       ; %bb.0:
8690; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8691; GFX940-NEXT:    ;;#ASMSTART
8692; GFX940-NEXT:    ; def s[0:1]
8693; GFX940-NEXT:    ;;#ASMEND
8694; GFX940-NEXT:    ;;#ASMSTART
8695; GFX940-NEXT:    ; def s[2:3]
8696; GFX940-NEXT:    ;;#ASMEND
8697; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
8698; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
8699; GFX940-NEXT:    s_mov_b32 s9, s0
8700; GFX940-NEXT:    ;;#ASMSTART
8701; GFX940-NEXT:    ; use s[8:9]
8702; GFX940-NEXT:    ;;#ASMEND
8703; GFX940-NEXT:    s_setpc_b64 s[30:31]
8704  %vec0 = call <4 x half> asm "; def $0", "=s"()
8705  %vec1 = call <4 x half> asm "; def $0", "=s"()
8706  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 0>
8707  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8708  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
8709  ret void
8710}
8711
8712define void @s_shuffle_v3f16_v4f16__7_7_1() {
8713; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_7_1:
8714; GFX900:       ; %bb.0:
8715; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8716; GFX900-NEXT:    ;;#ASMSTART
8717; GFX900-NEXT:    ; def s[4:5]
8718; GFX900-NEXT:    ;;#ASMEND
8719; GFX900-NEXT:    ;;#ASMSTART
8720; GFX900-NEXT:    ; def s[6:7]
8721; GFX900-NEXT:    ;;#ASMEND
8722; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
8723; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
8724; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
8725; GFX900-NEXT:    ;;#ASMSTART
8726; GFX900-NEXT:    ; use s[8:9]
8727; GFX900-NEXT:    ;;#ASMEND
8728; GFX900-NEXT:    s_setpc_b64 s[30:31]
8729;
8730; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_7_1:
8731; GFX90A:       ; %bb.0:
8732; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8733; GFX90A-NEXT:    ;;#ASMSTART
8734; GFX90A-NEXT:    ; def s[4:5]
8735; GFX90A-NEXT:    ;;#ASMEND
8736; GFX90A-NEXT:    ;;#ASMSTART
8737; GFX90A-NEXT:    ; def s[6:7]
8738; GFX90A-NEXT:    ;;#ASMEND
8739; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
8740; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
8741; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
8742; GFX90A-NEXT:    ;;#ASMSTART
8743; GFX90A-NEXT:    ; use s[8:9]
8744; GFX90A-NEXT:    ;;#ASMEND
8745; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8746;
8747; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_1:
8748; GFX940:       ; %bb.0:
8749; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8750; GFX940-NEXT:    ;;#ASMSTART
8751; GFX940-NEXT:    ; def s[0:1]
8752; GFX940-NEXT:    ;;#ASMEND
8753; GFX940-NEXT:    ;;#ASMSTART
8754; GFX940-NEXT:    ; def s[2:3]
8755; GFX940-NEXT:    ;;#ASMEND
8756; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
8757; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
8758; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s0
8759; GFX940-NEXT:    ;;#ASMSTART
8760; GFX940-NEXT:    ; use s[8:9]
8761; GFX940-NEXT:    ;;#ASMEND
8762; GFX940-NEXT:    s_setpc_b64 s[30:31]
8763  %vec0 = call <4 x half> asm "; def $0", "=s"()
8764  %vec1 = call <4 x half> asm "; def $0", "=s"()
8765  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 1>
8766  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8767  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
8768  ret void
8769}
8770
8771define void @s_shuffle_v3f16_v4f16__7_7_2() {
8772; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_7_2:
8773; GFX900:       ; %bb.0:
8774; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8775; GFX900-NEXT:    ;;#ASMSTART
8776; GFX900-NEXT:    ; def s[4:5]
8777; GFX900-NEXT:    ;;#ASMEND
8778; GFX900-NEXT:    ;;#ASMSTART
8779; GFX900-NEXT:    ; def s[8:9]
8780; GFX900-NEXT:    ;;#ASMEND
8781; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
8782; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
8783; GFX900-NEXT:    ;;#ASMSTART
8784; GFX900-NEXT:    ; use s[8:9]
8785; GFX900-NEXT:    ;;#ASMEND
8786; GFX900-NEXT:    s_setpc_b64 s[30:31]
8787;
8788; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_7_2:
8789; GFX90A:       ; %bb.0:
8790; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8791; GFX90A-NEXT:    ;;#ASMSTART
8792; GFX90A-NEXT:    ; def s[4:5]
8793; GFX90A-NEXT:    ;;#ASMEND
8794; GFX90A-NEXT:    ;;#ASMSTART
8795; GFX90A-NEXT:    ; def s[8:9]
8796; GFX90A-NEXT:    ;;#ASMEND
8797; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
8798; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
8799; GFX90A-NEXT:    ;;#ASMSTART
8800; GFX90A-NEXT:    ; use s[8:9]
8801; GFX90A-NEXT:    ;;#ASMEND
8802; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8803;
8804; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_2:
8805; GFX940:       ; %bb.0:
8806; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8807; GFX940-NEXT:    ;;#ASMSTART
8808; GFX940-NEXT:    ; def s[0:1]
8809; GFX940-NEXT:    ;;#ASMEND
8810; GFX940-NEXT:    ;;#ASMSTART
8811; GFX940-NEXT:    ; def s[8:9]
8812; GFX940-NEXT:    ;;#ASMEND
8813; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
8814; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s0
8815; GFX940-NEXT:    ;;#ASMSTART
8816; GFX940-NEXT:    ; use s[8:9]
8817; GFX940-NEXT:    ;;#ASMEND
8818; GFX940-NEXT:    s_setpc_b64 s[30:31]
8819  %vec0 = call <4 x half> asm "; def $0", "=s"()
8820  %vec1 = call <4 x half> asm "; def $0", "=s"()
8821  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 2>
8822  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8823  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
8824  ret void
8825}
8826
8827define void @s_shuffle_v3f16_v4f16__7_7_3() {
8828; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_7_3:
8829; GFX900:       ; %bb.0:
8830; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8831; GFX900-NEXT:    ;;#ASMSTART
8832; GFX900-NEXT:    ; def s[4:5]
8833; GFX900-NEXT:    ;;#ASMEND
8834; GFX900-NEXT:    ;;#ASMSTART
8835; GFX900-NEXT:    ; def s[6:7]
8836; GFX900-NEXT:    ;;#ASMEND
8837; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
8838; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
8839; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
8840; GFX900-NEXT:    ;;#ASMSTART
8841; GFX900-NEXT:    ; use s[8:9]
8842; GFX900-NEXT:    ;;#ASMEND
8843; GFX900-NEXT:    s_setpc_b64 s[30:31]
8844;
8845; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_7_3:
8846; GFX90A:       ; %bb.0:
8847; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8848; GFX90A-NEXT:    ;;#ASMSTART
8849; GFX90A-NEXT:    ; def s[4:5]
8850; GFX90A-NEXT:    ;;#ASMEND
8851; GFX90A-NEXT:    ;;#ASMSTART
8852; GFX90A-NEXT:    ; def s[6:7]
8853; GFX90A-NEXT:    ;;#ASMEND
8854; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
8855; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
8856; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
8857; GFX90A-NEXT:    ;;#ASMSTART
8858; GFX90A-NEXT:    ; use s[8:9]
8859; GFX90A-NEXT:    ;;#ASMEND
8860; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8861;
8862; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_3:
8863; GFX940:       ; %bb.0:
8864; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8865; GFX940-NEXT:    ;;#ASMSTART
8866; GFX940-NEXT:    ; def s[0:1]
8867; GFX940-NEXT:    ;;#ASMEND
8868; GFX940-NEXT:    ;;#ASMSTART
8869; GFX940-NEXT:    ; def s[2:3]
8870; GFX940-NEXT:    ;;#ASMEND
8871; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
8872; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
8873; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s0
8874; GFX940-NEXT:    ;;#ASMSTART
8875; GFX940-NEXT:    ; use s[8:9]
8876; GFX940-NEXT:    ;;#ASMEND
8877; GFX940-NEXT:    s_setpc_b64 s[30:31]
8878  %vec0 = call <4 x half> asm "; def $0", "=s"()
8879  %vec1 = call <4 x half> asm "; def $0", "=s"()
8880  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 3>
8881  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8882  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
8883  ret void
8884}
8885
8886define void @s_shuffle_v3f16_v4f16__7_7_4() {
8887; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_7_4:
8888; GFX900:       ; %bb.0:
8889; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8890; GFX900-NEXT:    ;;#ASMSTART
8891; GFX900-NEXT:    ; def s[4:5]
8892; GFX900-NEXT:    ;;#ASMEND
8893; GFX900-NEXT:    s_lshr_b32 s5, s5, 16
8894; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
8895; GFX900-NEXT:    s_mov_b32 s9, s4
8896; GFX900-NEXT:    ;;#ASMSTART
8897; GFX900-NEXT:    ; use s[8:9]
8898; GFX900-NEXT:    ;;#ASMEND
8899; GFX900-NEXT:    s_setpc_b64 s[30:31]
8900;
8901; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_7_4:
8902; GFX90A:       ; %bb.0:
8903; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8904; GFX90A-NEXT:    ;;#ASMSTART
8905; GFX90A-NEXT:    ; def s[4:5]
8906; GFX90A-NEXT:    ;;#ASMEND
8907; GFX90A-NEXT:    s_lshr_b32 s5, s5, 16
8908; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
8909; GFX90A-NEXT:    s_mov_b32 s9, s4
8910; GFX90A-NEXT:    ;;#ASMSTART
8911; GFX90A-NEXT:    ; use s[8:9]
8912; GFX90A-NEXT:    ;;#ASMEND
8913; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8914;
8915; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_4:
8916; GFX940:       ; %bb.0:
8917; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8918; GFX940-NEXT:    ;;#ASMSTART
8919; GFX940-NEXT:    ; def s[0:1]
8920; GFX940-NEXT:    ;;#ASMEND
8921; GFX940-NEXT:    s_lshr_b32 s1, s1, 16
8922; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
8923; GFX940-NEXT:    s_mov_b32 s9, s0
8924; GFX940-NEXT:    ;;#ASMSTART
8925; GFX940-NEXT:    ; use s[8:9]
8926; GFX940-NEXT:    ;;#ASMEND
8927; GFX940-NEXT:    s_setpc_b64 s[30:31]
8928  %vec0 = call <4 x half> asm "; def $0", "=s"()
8929  %vec1 = call <4 x half> asm "; def $0", "=s"()
8930  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 4>
8931  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8932  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
8933  ret void
8934}
8935
8936define void @s_shuffle_v3f16_v4f16__7_7_5() {
8937; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_7_5:
8938; GFX900:       ; %bb.0:
8939; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8940; GFX900-NEXT:    ;;#ASMSTART
8941; GFX900-NEXT:    ; def s[4:5]
8942; GFX900-NEXT:    ;;#ASMEND
8943; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
8944; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
8945; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
8946; GFX900-NEXT:    ;;#ASMSTART
8947; GFX900-NEXT:    ; use s[8:9]
8948; GFX900-NEXT:    ;;#ASMEND
8949; GFX900-NEXT:    s_setpc_b64 s[30:31]
8950;
8951; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_7_5:
8952; GFX90A:       ; %bb.0:
8953; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8954; GFX90A-NEXT:    ;;#ASMSTART
8955; GFX90A-NEXT:    ; def s[4:5]
8956; GFX90A-NEXT:    ;;#ASMEND
8957; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
8958; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
8959; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
8960; GFX90A-NEXT:    ;;#ASMSTART
8961; GFX90A-NEXT:    ; use s[8:9]
8962; GFX90A-NEXT:    ;;#ASMEND
8963; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8964;
8965; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_5:
8966; GFX940:       ; %bb.0:
8967; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8968; GFX940-NEXT:    ;;#ASMSTART
8969; GFX940-NEXT:    ; def s[0:1]
8970; GFX940-NEXT:    ;;#ASMEND
8971; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
8972; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
8973; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s0
8974; GFX940-NEXT:    ;;#ASMSTART
8975; GFX940-NEXT:    ; use s[8:9]
8976; GFX940-NEXT:    ;;#ASMEND
8977; GFX940-NEXT:    s_setpc_b64 s[30:31]
8978  %vec0 = call <4 x half> asm "; def $0", "=s"()
8979  %vec1 = call <4 x half> asm "; def $0", "=s"()
8980  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 5>
8981  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
8982  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
8983  ret void
8984}
8985
8986define void @s_shuffle_v3f16_v4f16__7_7_6() {
8987; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_7_6:
8988; GFX900:       ; %bb.0:
8989; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8990; GFX900-NEXT:    ;;#ASMSTART
8991; GFX900-NEXT:    ; def s[8:9]
8992; GFX900-NEXT:    ;;#ASMEND
8993; GFX900-NEXT:    s_lshr_b32 s4, s9, 16
8994; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
8995; GFX900-NEXT:    ;;#ASMSTART
8996; GFX900-NEXT:    ; use s[8:9]
8997; GFX900-NEXT:    ;;#ASMEND
8998; GFX900-NEXT:    s_setpc_b64 s[30:31]
8999;
9000; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_7_6:
9001; GFX90A:       ; %bb.0:
9002; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9003; GFX90A-NEXT:    ;;#ASMSTART
9004; GFX90A-NEXT:    ; def s[8:9]
9005; GFX90A-NEXT:    ;;#ASMEND
9006; GFX90A-NEXT:    s_lshr_b32 s4, s9, 16
9007; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
9008; GFX90A-NEXT:    ;;#ASMSTART
9009; GFX90A-NEXT:    ; use s[8:9]
9010; GFX90A-NEXT:    ;;#ASMEND
9011; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9012;
9013; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_6:
9014; GFX940:       ; %bb.0:
9015; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9016; GFX940-NEXT:    ;;#ASMSTART
9017; GFX940-NEXT:    ; def s[8:9]
9018; GFX940-NEXT:    ;;#ASMEND
9019; GFX940-NEXT:    s_lshr_b32 s0, s9, 16
9020; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s0
9021; GFX940-NEXT:    ;;#ASMSTART
9022; GFX940-NEXT:    ; use s[8:9]
9023; GFX940-NEXT:    ;;#ASMEND
9024; GFX940-NEXT:    s_setpc_b64 s[30:31]
9025  %vec0 = call <4 x half> asm "; def $0", "=s"()
9026  %vec1 = call <4 x half> asm "; def $0", "=s"()
9027  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 6>
9028  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9029  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
9030  ret void
9031}
9032
9033define void @s_shuffle_v3f16_v4f16__7_7_7() {
9034; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_7_7:
9035; GFX900:       ; %bb.0:
9036; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9037; GFX900-NEXT:    ;;#ASMSTART
9038; GFX900-NEXT:    ; def s[4:5]
9039; GFX900-NEXT:    ;;#ASMEND
9040; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
9041; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s9, s9
9042; GFX900-NEXT:    ;;#ASMSTART
9043; GFX900-NEXT:    ; use s[8:9]
9044; GFX900-NEXT:    ;;#ASMEND
9045; GFX900-NEXT:    s_setpc_b64 s[30:31]
9046;
9047; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_7_7:
9048; GFX90A:       ; %bb.0:
9049; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9050; GFX90A-NEXT:    ;;#ASMSTART
9051; GFX90A-NEXT:    ; def s[4:5]
9052; GFX90A-NEXT:    ;;#ASMEND
9053; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
9054; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s9, s9
9055; GFX90A-NEXT:    ;;#ASMSTART
9056; GFX90A-NEXT:    ; use s[8:9]
9057; GFX90A-NEXT:    ;;#ASMEND
9058; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9059;
9060; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_7:
9061; GFX940:       ; %bb.0:
9062; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9063; GFX940-NEXT:    ;;#ASMSTART
9064; GFX940-NEXT:    ; def s[0:1]
9065; GFX940-NEXT:    ;;#ASMEND
9066; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
9067; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s9, s9
9068; GFX940-NEXT:    ;;#ASMSTART
9069; GFX940-NEXT:    ; use s[8:9]
9070; GFX940-NEXT:    ;;#ASMEND
9071; GFX940-NEXT:    s_setpc_b64 s[30:31]
9072  %vec0 = call <4 x half> asm "; def $0", "=s"()
9073  %vec1 = call <4 x half> asm "; def $0", "=s"()
9074  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 7>
9075  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9076  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
9077  ret void
9078}
9079
9080define void @s_shuffle_v3f16_v4f16__u_0_0() {
9081; GFX900-LABEL: s_shuffle_v3f16_v4f16__u_0_0:
9082; GFX900:       ; %bb.0:
9083; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9084; GFX900-NEXT:    ;;#ASMSTART
9085; GFX900-NEXT:    ; def s[4:5]
9086; GFX900-NEXT:    ;;#ASMEND
9087; GFX900-NEXT:    s_lshl_b32 s8, s4, 16
9088; GFX900-NEXT:    s_mov_b32 s9, s4
9089; GFX900-NEXT:    ;;#ASMSTART
9090; GFX900-NEXT:    ; use s[8:9]
9091; GFX900-NEXT:    ;;#ASMEND
9092; GFX900-NEXT:    s_setpc_b64 s[30:31]
9093;
9094; GFX90A-LABEL: s_shuffle_v3f16_v4f16__u_0_0:
9095; GFX90A:       ; %bb.0:
9096; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9097; GFX90A-NEXT:    ;;#ASMSTART
9098; GFX90A-NEXT:    ; def s[4:5]
9099; GFX90A-NEXT:    ;;#ASMEND
9100; GFX90A-NEXT:    s_lshl_b32 s8, s4, 16
9101; GFX90A-NEXT:    s_mov_b32 s9, s4
9102; GFX90A-NEXT:    ;;#ASMSTART
9103; GFX90A-NEXT:    ; use s[8:9]
9104; GFX90A-NEXT:    ;;#ASMEND
9105; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9106;
9107; GFX940-LABEL: s_shuffle_v3f16_v4f16__u_0_0:
9108; GFX940:       ; %bb.0:
9109; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9110; GFX940-NEXT:    ;;#ASMSTART
9111; GFX940-NEXT:    ; def s[0:1]
9112; GFX940-NEXT:    ;;#ASMEND
9113; GFX940-NEXT:    s_lshl_b32 s8, s0, 16
9114; GFX940-NEXT:    s_mov_b32 s9, s0
9115; GFX940-NEXT:    ;;#ASMSTART
9116; GFX940-NEXT:    ; use s[8:9]
9117; GFX940-NEXT:    ;;#ASMEND
9118; GFX940-NEXT:    s_setpc_b64 s[30:31]
9119  %vec0 = call <4 x half> asm "; def $0", "=s"()
9120  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 poison, i32 0, i32 0>
9121  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9122  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
9123  ret void
9124}
9125
9126define void @s_shuffle_v3f16_v4f16__0_0_0() {
9127; GFX900-LABEL: s_shuffle_v3f16_v4f16__0_0_0:
9128; GFX900:       ; %bb.0:
9129; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9130; GFX900-NEXT:    ;;#ASMSTART
9131; GFX900-NEXT:    ; def s[4:5]
9132; GFX900-NEXT:    ;;#ASMEND
9133; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
9134; GFX900-NEXT:    s_mov_b32 s9, s4
9135; GFX900-NEXT:    ;;#ASMSTART
9136; GFX900-NEXT:    ; use s[8:9]
9137; GFX900-NEXT:    ;;#ASMEND
9138; GFX900-NEXT:    s_setpc_b64 s[30:31]
9139;
9140; GFX90A-LABEL: s_shuffle_v3f16_v4f16__0_0_0:
9141; GFX90A:       ; %bb.0:
9142; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9143; GFX90A-NEXT:    ;;#ASMSTART
9144; GFX90A-NEXT:    ; def s[4:5]
9145; GFX90A-NEXT:    ;;#ASMEND
9146; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
9147; GFX90A-NEXT:    s_mov_b32 s9, s4
9148; GFX90A-NEXT:    ;;#ASMSTART
9149; GFX90A-NEXT:    ; use s[8:9]
9150; GFX90A-NEXT:    ;;#ASMEND
9151; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9152;
9153; GFX940-LABEL: s_shuffle_v3f16_v4f16__0_0_0:
9154; GFX940:       ; %bb.0:
9155; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9156; GFX940-NEXT:    ;;#ASMSTART
9157; GFX940-NEXT:    ; def s[0:1]
9158; GFX940-NEXT:    ;;#ASMEND
9159; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s0
9160; GFX940-NEXT:    s_mov_b32 s9, s0
9161; GFX940-NEXT:    ;;#ASMSTART
9162; GFX940-NEXT:    ; use s[8:9]
9163; GFX940-NEXT:    ;;#ASMEND
9164; GFX940-NEXT:    s_setpc_b64 s[30:31]
9165  %vec0 = call <4 x half> asm "; def $0", "=s"()
9166  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> zeroinitializer
9167  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9168  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
9169  ret void
9170}
9171
9172define void @s_shuffle_v3f16_v4f16__1_0_0() {
9173; GFX900-LABEL: s_shuffle_v3f16_v4f16__1_0_0:
9174; GFX900:       ; %bb.0:
9175; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9176; GFX900-NEXT:    ;;#ASMSTART
9177; GFX900-NEXT:    ; def s[4:5]
9178; GFX900-NEXT:    ;;#ASMEND
9179; GFX900-NEXT:    s_lshr_b32 s5, s4, 16
9180; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
9181; GFX900-NEXT:    s_mov_b32 s9, s4
9182; GFX900-NEXT:    ;;#ASMSTART
9183; GFX900-NEXT:    ; use s[8:9]
9184; GFX900-NEXT:    ;;#ASMEND
9185; GFX900-NEXT:    s_setpc_b64 s[30:31]
9186;
9187; GFX90A-LABEL: s_shuffle_v3f16_v4f16__1_0_0:
9188; GFX90A:       ; %bb.0:
9189; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9190; GFX90A-NEXT:    ;;#ASMSTART
9191; GFX90A-NEXT:    ; def s[4:5]
9192; GFX90A-NEXT:    ;;#ASMEND
9193; GFX90A-NEXT:    s_lshr_b32 s5, s4, 16
9194; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
9195; GFX90A-NEXT:    s_mov_b32 s9, s4
9196; GFX90A-NEXT:    ;;#ASMSTART
9197; GFX90A-NEXT:    ; use s[8:9]
9198; GFX90A-NEXT:    ;;#ASMEND
9199; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9200;
9201; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_0_0:
9202; GFX940:       ; %bb.0:
9203; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9204; GFX940-NEXT:    ;;#ASMSTART
9205; GFX940-NEXT:    ; def s[0:1]
9206; GFX940-NEXT:    ;;#ASMEND
9207; GFX940-NEXT:    s_lshr_b32 s1, s0, 16
9208; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
9209; GFX940-NEXT:    s_mov_b32 s9, s0
9210; GFX940-NEXT:    ;;#ASMSTART
9211; GFX940-NEXT:    ; use s[8:9]
9212; GFX940-NEXT:    ;;#ASMEND
9213; GFX940-NEXT:    s_setpc_b64 s[30:31]
9214  %vec0 = call <4 x half> asm "; def $0", "=s"()
9215  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 1, i32 0, i32 0>
9216  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9217  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
9218  ret void
9219}
9220
9221define void @s_shuffle_v3f16_v4f16__2_0_0() {
9222; GFX900-LABEL: s_shuffle_v3f16_v4f16__2_0_0:
9223; GFX900:       ; %bb.0:
9224; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9225; GFX900-NEXT:    ;;#ASMSTART
9226; GFX900-NEXT:    ; def s[4:5]
9227; GFX900-NEXT:    ;;#ASMEND
9228; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
9229; GFX900-NEXT:    s_mov_b32 s9, s4
9230; GFX900-NEXT:    ;;#ASMSTART
9231; GFX900-NEXT:    ; use s[8:9]
9232; GFX900-NEXT:    ;;#ASMEND
9233; GFX900-NEXT:    s_setpc_b64 s[30:31]
9234;
9235; GFX90A-LABEL: s_shuffle_v3f16_v4f16__2_0_0:
9236; GFX90A:       ; %bb.0:
9237; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9238; GFX90A-NEXT:    ;;#ASMSTART
9239; GFX90A-NEXT:    ; def s[4:5]
9240; GFX90A-NEXT:    ;;#ASMEND
9241; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
9242; GFX90A-NEXT:    s_mov_b32 s9, s4
9243; GFX90A-NEXT:    ;;#ASMSTART
9244; GFX90A-NEXT:    ; use s[8:9]
9245; GFX90A-NEXT:    ;;#ASMEND
9246; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9247;
9248; GFX940-LABEL: s_shuffle_v3f16_v4f16__2_0_0:
9249; GFX940:       ; %bb.0:
9250; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9251; GFX940-NEXT:    ;;#ASMSTART
9252; GFX940-NEXT:    ; def s[0:1]
9253; GFX940-NEXT:    ;;#ASMEND
9254; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
9255; GFX940-NEXT:    s_mov_b32 s9, s0
9256; GFX940-NEXT:    ;;#ASMSTART
9257; GFX940-NEXT:    ; use s[8:9]
9258; GFX940-NEXT:    ;;#ASMEND
9259; GFX940-NEXT:    s_setpc_b64 s[30:31]
9260  %vec0 = call <4 x half> asm "; def $0", "=s"()
9261  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 2, i32 0, i32 0>
9262  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9263  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
9264  ret void
9265}
9266
9267define void @s_shuffle_v3f16_v4f16__3_0_0() {
9268; GFX900-LABEL: s_shuffle_v3f16_v4f16__3_0_0:
9269; GFX900:       ; %bb.0:
9270; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9271; GFX900-NEXT:    ;;#ASMSTART
9272; GFX900-NEXT:    ; def s[4:5]
9273; GFX900-NEXT:    ;;#ASMEND
9274; GFX900-NEXT:    s_lshr_b32 s5, s5, 16
9275; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
9276; GFX900-NEXT:    s_mov_b32 s9, s4
9277; GFX900-NEXT:    ;;#ASMSTART
9278; GFX900-NEXT:    ; use s[8:9]
9279; GFX900-NEXT:    ;;#ASMEND
9280; GFX900-NEXT:    s_setpc_b64 s[30:31]
9281;
9282; GFX90A-LABEL: s_shuffle_v3f16_v4f16__3_0_0:
9283; GFX90A:       ; %bb.0:
9284; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9285; GFX90A-NEXT:    ;;#ASMSTART
9286; GFX90A-NEXT:    ; def s[4:5]
9287; GFX90A-NEXT:    ;;#ASMEND
9288; GFX90A-NEXT:    s_lshr_b32 s5, s5, 16
9289; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
9290; GFX90A-NEXT:    s_mov_b32 s9, s4
9291; GFX90A-NEXT:    ;;#ASMSTART
9292; GFX90A-NEXT:    ; use s[8:9]
9293; GFX90A-NEXT:    ;;#ASMEND
9294; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9295;
9296; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_0_0:
9297; GFX940:       ; %bb.0:
9298; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9299; GFX940-NEXT:    ;;#ASMSTART
9300; GFX940-NEXT:    ; def s[0:1]
9301; GFX940-NEXT:    ;;#ASMEND
9302; GFX940-NEXT:    s_lshr_b32 s1, s1, 16
9303; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
9304; GFX940-NEXT:    s_mov_b32 s9, s0
9305; GFX940-NEXT:    ;;#ASMSTART
9306; GFX940-NEXT:    ; use s[8:9]
9307; GFX940-NEXT:    ;;#ASMEND
9308; GFX940-NEXT:    s_setpc_b64 s[30:31]
9309  %vec0 = call <4 x half> asm "; def $0", "=s"()
9310  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 3, i32 0, i32 0>
9311  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9312  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
9313  ret void
9314}
9315
9316define void @s_shuffle_v3f16_v4f16__4_0_0() {
9317; GFX900-LABEL: s_shuffle_v3f16_v4f16__4_0_0:
9318; GFX900:       ; %bb.0:
9319; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9320; GFX900-NEXT:    ;;#ASMSTART
9321; GFX900-NEXT:    ; def s[4:5]
9322; GFX900-NEXT:    ;;#ASMEND
9323; GFX900-NEXT:    s_lshl_b32 s8, s4, 16
9324; GFX900-NEXT:    s_mov_b32 s9, s4
9325; GFX900-NEXT:    ;;#ASMSTART
9326; GFX900-NEXT:    ; use s[8:9]
9327; GFX900-NEXT:    ;;#ASMEND
9328; GFX900-NEXT:    s_setpc_b64 s[30:31]
9329;
9330; GFX90A-LABEL: s_shuffle_v3f16_v4f16__4_0_0:
9331; GFX90A:       ; %bb.0:
9332; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9333; GFX90A-NEXT:    ;;#ASMSTART
9334; GFX90A-NEXT:    ; def s[4:5]
9335; GFX90A-NEXT:    ;;#ASMEND
9336; GFX90A-NEXT:    s_lshl_b32 s8, s4, 16
9337; GFX90A-NEXT:    s_mov_b32 s9, s4
9338; GFX90A-NEXT:    ;;#ASMSTART
9339; GFX90A-NEXT:    ; use s[8:9]
9340; GFX90A-NEXT:    ;;#ASMEND
9341; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9342;
9343; GFX940-LABEL: s_shuffle_v3f16_v4f16__4_0_0:
9344; GFX940:       ; %bb.0:
9345; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9346; GFX940-NEXT:    ;;#ASMSTART
9347; GFX940-NEXT:    ; def s[0:1]
9348; GFX940-NEXT:    ;;#ASMEND
9349; GFX940-NEXT:    s_lshl_b32 s8, s0, 16
9350; GFX940-NEXT:    s_mov_b32 s9, s0
9351; GFX940-NEXT:    ;;#ASMSTART
9352; GFX940-NEXT:    ; use s[8:9]
9353; GFX940-NEXT:    ;;#ASMEND
9354; GFX940-NEXT:    s_setpc_b64 s[30:31]
9355  %vec0 = call <4 x half> asm "; def $0", "=s"()
9356  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 4, i32 0, i32 0>
9357  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9358  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
9359  ret void
9360}
9361
9362define void @s_shuffle_v3f16_v4f16__5_0_0() {
9363; GFX900-LABEL: s_shuffle_v3f16_v4f16__5_0_0:
9364; GFX900:       ; %bb.0:
9365; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9366; GFX900-NEXT:    ;;#ASMSTART
9367; GFX900-NEXT:    ; def s[4:5]
9368; GFX900-NEXT:    ;;#ASMEND
9369; GFX900-NEXT:    ;;#ASMSTART
9370; GFX900-NEXT:    ; def s[6:7]
9371; GFX900-NEXT:    ;;#ASMEND
9372; GFX900-NEXT:    s_lshr_b32 s5, s6, 16
9373; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
9374; GFX900-NEXT:    s_mov_b32 s9, s4
9375; GFX900-NEXT:    ;;#ASMSTART
9376; GFX900-NEXT:    ; use s[8:9]
9377; GFX900-NEXT:    ;;#ASMEND
9378; GFX900-NEXT:    s_setpc_b64 s[30:31]
9379;
9380; GFX90A-LABEL: s_shuffle_v3f16_v4f16__5_0_0:
9381; GFX90A:       ; %bb.0:
9382; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9383; GFX90A-NEXT:    ;;#ASMSTART
9384; GFX90A-NEXT:    ; def s[4:5]
9385; GFX90A-NEXT:    ;;#ASMEND
9386; GFX90A-NEXT:    ;;#ASMSTART
9387; GFX90A-NEXT:    ; def s[6:7]
9388; GFX90A-NEXT:    ;;#ASMEND
9389; GFX90A-NEXT:    s_lshr_b32 s5, s6, 16
9390; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
9391; GFX90A-NEXT:    s_mov_b32 s9, s4
9392; GFX90A-NEXT:    ;;#ASMSTART
9393; GFX90A-NEXT:    ; use s[8:9]
9394; GFX90A-NEXT:    ;;#ASMEND
9395; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9396;
9397; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_0_0:
9398; GFX940:       ; %bb.0:
9399; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9400; GFX940-NEXT:    ;;#ASMSTART
9401; GFX940-NEXT:    ; def s[0:1]
9402; GFX940-NEXT:    ;;#ASMEND
9403; GFX940-NEXT:    ;;#ASMSTART
9404; GFX940-NEXT:    ; def s[2:3]
9405; GFX940-NEXT:    ;;#ASMEND
9406; GFX940-NEXT:    s_lshr_b32 s1, s2, 16
9407; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
9408; GFX940-NEXT:    s_mov_b32 s9, s0
9409; GFX940-NEXT:    ;;#ASMSTART
9410; GFX940-NEXT:    ; use s[8:9]
9411; GFX940-NEXT:    ;;#ASMEND
9412; GFX940-NEXT:    s_setpc_b64 s[30:31]
9413  %vec0 = call <4 x half> asm "; def $0", "=s"()
9414  %vec1 = call <4 x half> asm "; def $0", "=s"()
9415  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 0, i32 0>
9416  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9417  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
9418  ret void
9419}
9420
9421define void @s_shuffle_v3f16_v4f16__6_0_0() {
9422; GFX900-LABEL: s_shuffle_v3f16_v4f16__6_0_0:
9423; GFX900:       ; %bb.0:
9424; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9425; GFX900-NEXT:    ;;#ASMSTART
9426; GFX900-NEXT:    ; def s[4:5]
9427; GFX900-NEXT:    ;;#ASMEND
9428; GFX900-NEXT:    ;;#ASMSTART
9429; GFX900-NEXT:    ; def s[6:7]
9430; GFX900-NEXT:    ;;#ASMEND
9431; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
9432; GFX900-NEXT:    s_mov_b32 s9, s4
9433; GFX900-NEXT:    ;;#ASMSTART
9434; GFX900-NEXT:    ; use s[8:9]
9435; GFX900-NEXT:    ;;#ASMEND
9436; GFX900-NEXT:    s_setpc_b64 s[30:31]
9437;
9438; GFX90A-LABEL: s_shuffle_v3f16_v4f16__6_0_0:
9439; GFX90A:       ; %bb.0:
9440; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9441; GFX90A-NEXT:    ;;#ASMSTART
9442; GFX90A-NEXT:    ; def s[4:5]
9443; GFX90A-NEXT:    ;;#ASMEND
9444; GFX90A-NEXT:    ;;#ASMSTART
9445; GFX90A-NEXT:    ; def s[6:7]
9446; GFX90A-NEXT:    ;;#ASMEND
9447; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
9448; GFX90A-NEXT:    s_mov_b32 s9, s4
9449; GFX90A-NEXT:    ;;#ASMSTART
9450; GFX90A-NEXT:    ; use s[8:9]
9451; GFX90A-NEXT:    ;;#ASMEND
9452; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9453;
9454; GFX940-LABEL: s_shuffle_v3f16_v4f16__6_0_0:
9455; GFX940:       ; %bb.0:
9456; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9457; GFX940-NEXT:    ;;#ASMSTART
9458; GFX940-NEXT:    ; def s[0:1]
9459; GFX940-NEXT:    ;;#ASMEND
9460; GFX940-NEXT:    ;;#ASMSTART
9461; GFX940-NEXT:    ; def s[2:3]
9462; GFX940-NEXT:    ;;#ASMEND
9463; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s0
9464; GFX940-NEXT:    s_mov_b32 s9, s0
9465; GFX940-NEXT:    ;;#ASMSTART
9466; GFX940-NEXT:    ; use s[8:9]
9467; GFX940-NEXT:    ;;#ASMEND
9468; GFX940-NEXT:    s_setpc_b64 s[30:31]
9469  %vec0 = call <4 x half> asm "; def $0", "=s"()
9470  %vec1 = call <4 x half> asm "; def $0", "=s"()
9471  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 0, i32 0>
9472  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9473  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
9474  ret void
9475}
9476
9477define void @s_shuffle_v3f16_v4f16__7_0_0() {
9478; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_0_0:
9479; GFX900:       ; %bb.0:
9480; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9481; GFX900-NEXT:    ;;#ASMSTART
9482; GFX900-NEXT:    ; def s[4:5]
9483; GFX900-NEXT:    ;;#ASMEND
9484; GFX900-NEXT:    ;;#ASMSTART
9485; GFX900-NEXT:    ; def s[6:7]
9486; GFX900-NEXT:    ;;#ASMEND
9487; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
9488; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
9489; GFX900-NEXT:    s_mov_b32 s9, s4
9490; GFX900-NEXT:    ;;#ASMSTART
9491; GFX900-NEXT:    ; use s[8:9]
9492; GFX900-NEXT:    ;;#ASMEND
9493; GFX900-NEXT:    s_setpc_b64 s[30:31]
9494;
9495; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_0_0:
9496; GFX90A:       ; %bb.0:
9497; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9498; GFX90A-NEXT:    ;;#ASMSTART
9499; GFX90A-NEXT:    ; def s[4:5]
9500; GFX90A-NEXT:    ;;#ASMEND
9501; GFX90A-NEXT:    ;;#ASMSTART
9502; GFX90A-NEXT:    ; def s[6:7]
9503; GFX90A-NEXT:    ;;#ASMEND
9504; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
9505; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
9506; GFX90A-NEXT:    s_mov_b32 s9, s4
9507; GFX90A-NEXT:    ;;#ASMSTART
9508; GFX90A-NEXT:    ; use s[8:9]
9509; GFX90A-NEXT:    ;;#ASMEND
9510; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9511;
9512; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_0:
9513; GFX940:       ; %bb.0:
9514; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9515; GFX940-NEXT:    ;;#ASMSTART
9516; GFX940-NEXT:    ; def s[0:1]
9517; GFX940-NEXT:    ;;#ASMEND
9518; GFX940-NEXT:    ;;#ASMSTART
9519; GFX940-NEXT:    ; def s[2:3]
9520; GFX940-NEXT:    ;;#ASMEND
9521; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
9522; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
9523; GFX940-NEXT:    s_mov_b32 s9, s0
9524; GFX940-NEXT:    ;;#ASMSTART
9525; GFX940-NEXT:    ; use s[8:9]
9526; GFX940-NEXT:    ;;#ASMEND
9527; GFX940-NEXT:    s_setpc_b64 s[30:31]
9528  %vec0 = call <4 x half> asm "; def $0", "=s"()
9529  %vec1 = call <4 x half> asm "; def $0", "=s"()
9530  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 0>
9531  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9532  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
9533  ret void
9534}
9535
9536define void @s_shuffle_v3f16_v4f16__7_u_0() {
9537; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_u_0:
9538; GFX900:       ; %bb.0:
9539; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9540; GFX900-NEXT:    ;;#ASMSTART
9541; GFX900-NEXT:    ; def s[4:5]
9542; GFX900-NEXT:    ;;#ASMEND
9543; GFX900-NEXT:    ;;#ASMSTART
9544; GFX900-NEXT:    ; def s[6:7]
9545; GFX900-NEXT:    ;;#ASMEND
9546; GFX900-NEXT:    s_lshr_b32 s8, s7, 16
9547; GFX900-NEXT:    s_mov_b32 s9, s4
9548; GFX900-NEXT:    ;;#ASMSTART
9549; GFX900-NEXT:    ; use s[8:9]
9550; GFX900-NEXT:    ;;#ASMEND
9551; GFX900-NEXT:    s_setpc_b64 s[30:31]
9552;
9553; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_u_0:
9554; GFX90A:       ; %bb.0:
9555; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9556; GFX90A-NEXT:    ;;#ASMSTART
9557; GFX90A-NEXT:    ; def s[4:5]
9558; GFX90A-NEXT:    ;;#ASMEND
9559; GFX90A-NEXT:    ;;#ASMSTART
9560; GFX90A-NEXT:    ; def s[6:7]
9561; GFX90A-NEXT:    ;;#ASMEND
9562; GFX90A-NEXT:    s_lshr_b32 s8, s7, 16
9563; GFX90A-NEXT:    s_mov_b32 s9, s4
9564; GFX90A-NEXT:    ;;#ASMSTART
9565; GFX90A-NEXT:    ; use s[8:9]
9566; GFX90A-NEXT:    ;;#ASMEND
9567; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9568;
9569; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_u_0:
9570; GFX940:       ; %bb.0:
9571; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9572; GFX940-NEXT:    ;;#ASMSTART
9573; GFX940-NEXT:    ; def s[0:1]
9574; GFX940-NEXT:    ;;#ASMEND
9575; GFX940-NEXT:    ;;#ASMSTART
9576; GFX940-NEXT:    ; def s[2:3]
9577; GFX940-NEXT:    ;;#ASMEND
9578; GFX940-NEXT:    s_lshr_b32 s8, s3, 16
9579; GFX940-NEXT:    s_mov_b32 s9, s0
9580; GFX940-NEXT:    ;;#ASMSTART
9581; GFX940-NEXT:    ; use s[8:9]
9582; GFX940-NEXT:    ;;#ASMEND
9583; GFX940-NEXT:    s_setpc_b64 s[30:31]
9584  %vec0 = call <4 x half> asm "; def $0", "=s"()
9585  %vec1 = call <4 x half> asm "; def $0", "=s"()
9586  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 0>
9587  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9588  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
9589  ret void
9590}
9591
9592define void @s_shuffle_v3f16_v4f16__7_1_0() {
9593; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_1_0:
9594; GFX900:       ; %bb.0:
9595; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9596; GFX900-NEXT:    ;;#ASMSTART
9597; GFX900-NEXT:    ; def s[4:5]
9598; GFX900-NEXT:    ;;#ASMEND
9599; GFX900-NEXT:    ;;#ASMSTART
9600; GFX900-NEXT:    ; def s[6:7]
9601; GFX900-NEXT:    ;;#ASMEND
9602; GFX900-NEXT:    s_lshr_b32 s5, s4, 16
9603; GFX900-NEXT:    s_lshr_b32 s6, s7, 16
9604; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
9605; GFX900-NEXT:    s_mov_b32 s9, s4
9606; GFX900-NEXT:    ;;#ASMSTART
9607; GFX900-NEXT:    ; use s[8:9]
9608; GFX900-NEXT:    ;;#ASMEND
9609; GFX900-NEXT:    s_setpc_b64 s[30:31]
9610;
9611; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_1_0:
9612; GFX90A:       ; %bb.0:
9613; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9614; GFX90A-NEXT:    ;;#ASMSTART
9615; GFX90A-NEXT:    ; def s[4:5]
9616; GFX90A-NEXT:    ;;#ASMEND
9617; GFX90A-NEXT:    ;;#ASMSTART
9618; GFX90A-NEXT:    ; def s[6:7]
9619; GFX90A-NEXT:    ;;#ASMEND
9620; GFX90A-NEXT:    s_lshr_b32 s5, s4, 16
9621; GFX90A-NEXT:    s_lshr_b32 s6, s7, 16
9622; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
9623; GFX90A-NEXT:    s_mov_b32 s9, s4
9624; GFX90A-NEXT:    ;;#ASMSTART
9625; GFX90A-NEXT:    ; use s[8:9]
9626; GFX90A-NEXT:    ;;#ASMEND
9627; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9628;
9629; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_0:
9630; GFX940:       ; %bb.0:
9631; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9632; GFX940-NEXT:    ;;#ASMSTART
9633; GFX940-NEXT:    ; def s[0:1]
9634; GFX940-NEXT:    ;;#ASMEND
9635; GFX940-NEXT:    ;;#ASMSTART
9636; GFX940-NEXT:    ; def s[2:3]
9637; GFX940-NEXT:    ;;#ASMEND
9638; GFX940-NEXT:    s_lshr_b32 s1, s0, 16
9639; GFX940-NEXT:    s_lshr_b32 s2, s3, 16
9640; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s2, s1
9641; GFX940-NEXT:    s_mov_b32 s9, s0
9642; GFX940-NEXT:    ;;#ASMSTART
9643; GFX940-NEXT:    ; use s[8:9]
9644; GFX940-NEXT:    ;;#ASMEND
9645; GFX940-NEXT:    s_setpc_b64 s[30:31]
9646  %vec0 = call <4 x half> asm "; def $0", "=s"()
9647  %vec1 = call <4 x half> asm "; def $0", "=s"()
9648  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 0>
9649  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9650  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
9651  ret void
9652}
9653
9654define void @s_shuffle_v3f16_v4f16__7_2_0() {
9655; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_2_0:
9656; GFX900:       ; %bb.0:
9657; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9658; GFX900-NEXT:    ;;#ASMSTART
9659; GFX900-NEXT:    ; def s[6:7]
9660; GFX900-NEXT:    ;;#ASMEND
9661; GFX900-NEXT:    s_lshr_b32 s6, s7, 16
9662; GFX900-NEXT:    ;;#ASMSTART
9663; GFX900-NEXT:    ; def s[4:5]
9664; GFX900-NEXT:    ;;#ASMEND
9665; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
9666; GFX900-NEXT:    s_mov_b32 s9, s4
9667; GFX900-NEXT:    ;;#ASMSTART
9668; GFX900-NEXT:    ; use s[8:9]
9669; GFX900-NEXT:    ;;#ASMEND
9670; GFX900-NEXT:    s_setpc_b64 s[30:31]
9671;
9672; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_2_0:
9673; GFX90A:       ; %bb.0:
9674; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9675; GFX90A-NEXT:    ;;#ASMSTART
9676; GFX90A-NEXT:    ; def s[6:7]
9677; GFX90A-NEXT:    ;;#ASMEND
9678; GFX90A-NEXT:    s_lshr_b32 s6, s7, 16
9679; GFX90A-NEXT:    ;;#ASMSTART
9680; GFX90A-NEXT:    ; def s[4:5]
9681; GFX90A-NEXT:    ;;#ASMEND
9682; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
9683; GFX90A-NEXT:    s_mov_b32 s9, s4
9684; GFX90A-NEXT:    ;;#ASMSTART
9685; GFX90A-NEXT:    ; use s[8:9]
9686; GFX90A-NEXT:    ;;#ASMEND
9687; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9688;
9689; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_0:
9690; GFX940:       ; %bb.0:
9691; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9692; GFX940-NEXT:    ;;#ASMSTART
9693; GFX940-NEXT:    ; def s[2:3]
9694; GFX940-NEXT:    ;;#ASMEND
9695; GFX940-NEXT:    s_lshr_b32 s2, s3, 16
9696; GFX940-NEXT:    ;;#ASMSTART
9697; GFX940-NEXT:    ; def s[0:1]
9698; GFX940-NEXT:    ;;#ASMEND
9699; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s2, s1
9700; GFX940-NEXT:    s_mov_b32 s9, s0
9701; GFX940-NEXT:    ;;#ASMSTART
9702; GFX940-NEXT:    ; use s[8:9]
9703; GFX940-NEXT:    ;;#ASMEND
9704; GFX940-NEXT:    s_setpc_b64 s[30:31]
9705  %vec0 = call <4 x half> asm "; def $0", "=s"()
9706  %vec1 = call <4 x half> asm "; def $0", "=s"()
9707  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 0>
9708  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9709  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
9710  ret void
9711}
9712
9713define void @s_shuffle_v3f16_v4f16__7_3_0() {
9714; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_3_0:
9715; GFX900:       ; %bb.0:
9716; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9717; GFX900-NEXT:    ;;#ASMSTART
9718; GFX900-NEXT:    ; def s[4:5]
9719; GFX900-NEXT:    ;;#ASMEND
9720; GFX900-NEXT:    ;;#ASMSTART
9721; GFX900-NEXT:    ; def s[6:7]
9722; GFX900-NEXT:    ;;#ASMEND
9723; GFX900-NEXT:    s_lshr_b32 s5, s5, 16
9724; GFX900-NEXT:    s_lshr_b32 s6, s7, 16
9725; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
9726; GFX900-NEXT:    s_mov_b32 s9, s4
9727; GFX900-NEXT:    ;;#ASMSTART
9728; GFX900-NEXT:    ; use s[8:9]
9729; GFX900-NEXT:    ;;#ASMEND
9730; GFX900-NEXT:    s_setpc_b64 s[30:31]
9731;
9732; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_3_0:
9733; GFX90A:       ; %bb.0:
9734; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9735; GFX90A-NEXT:    ;;#ASMSTART
9736; GFX90A-NEXT:    ; def s[4:5]
9737; GFX90A-NEXT:    ;;#ASMEND
9738; GFX90A-NEXT:    ;;#ASMSTART
9739; GFX90A-NEXT:    ; def s[6:7]
9740; GFX90A-NEXT:    ;;#ASMEND
9741; GFX90A-NEXT:    s_lshr_b32 s5, s5, 16
9742; GFX90A-NEXT:    s_lshr_b32 s6, s7, 16
9743; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
9744; GFX90A-NEXT:    s_mov_b32 s9, s4
9745; GFX90A-NEXT:    ;;#ASMSTART
9746; GFX90A-NEXT:    ; use s[8:9]
9747; GFX90A-NEXT:    ;;#ASMEND
9748; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9749;
9750; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_0:
9751; GFX940:       ; %bb.0:
9752; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9753; GFX940-NEXT:    ;;#ASMSTART
9754; GFX940-NEXT:    ; def s[0:1]
9755; GFX940-NEXT:    ;;#ASMEND
9756; GFX940-NEXT:    ;;#ASMSTART
9757; GFX940-NEXT:    ; def s[2:3]
9758; GFX940-NEXT:    ;;#ASMEND
9759; GFX940-NEXT:    s_lshr_b32 s1, s1, 16
9760; GFX940-NEXT:    s_lshr_b32 s2, s3, 16
9761; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s2, s1
9762; GFX940-NEXT:    s_mov_b32 s9, s0
9763; GFX940-NEXT:    ;;#ASMSTART
9764; GFX940-NEXT:    ; use s[8:9]
9765; GFX940-NEXT:    ;;#ASMEND
9766; GFX940-NEXT:    s_setpc_b64 s[30:31]
9767  %vec0 = call <4 x half> asm "; def $0", "=s"()
9768  %vec1 = call <4 x half> asm "; def $0", "=s"()
9769  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 0>
9770  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9771  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
9772  ret void
9773}
9774
9775define void @s_shuffle_v3f16_v4f16__7_4_0() {
9776; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_4_0:
9777; GFX900:       ; %bb.0:
9778; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9779; GFX900-NEXT:    ;;#ASMSTART
9780; GFX900-NEXT:    ; def s[4:5]
9781; GFX900-NEXT:    ;;#ASMEND
9782; GFX900-NEXT:    ;;#ASMSTART
9783; GFX900-NEXT:    ; def s[6:7]
9784; GFX900-NEXT:    ;;#ASMEND
9785; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
9786; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s6
9787; GFX900-NEXT:    s_mov_b32 s9, s4
9788; GFX900-NEXT:    ;;#ASMSTART
9789; GFX900-NEXT:    ; use s[8:9]
9790; GFX900-NEXT:    ;;#ASMEND
9791; GFX900-NEXT:    s_setpc_b64 s[30:31]
9792;
9793; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_4_0:
9794; GFX90A:       ; %bb.0:
9795; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9796; GFX90A-NEXT:    ;;#ASMSTART
9797; GFX90A-NEXT:    ; def s[4:5]
9798; GFX90A-NEXT:    ;;#ASMEND
9799; GFX90A-NEXT:    ;;#ASMSTART
9800; GFX90A-NEXT:    ; def s[6:7]
9801; GFX90A-NEXT:    ;;#ASMEND
9802; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
9803; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s6
9804; GFX90A-NEXT:    s_mov_b32 s9, s4
9805; GFX90A-NEXT:    ;;#ASMSTART
9806; GFX90A-NEXT:    ; use s[8:9]
9807; GFX90A-NEXT:    ;;#ASMEND
9808; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9809;
9810; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_0:
9811; GFX940:       ; %bb.0:
9812; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9813; GFX940-NEXT:    ;;#ASMSTART
9814; GFX940-NEXT:    ; def s[0:1]
9815; GFX940-NEXT:    ;;#ASMEND
9816; GFX940-NEXT:    ;;#ASMSTART
9817; GFX940-NEXT:    ; def s[2:3]
9818; GFX940-NEXT:    ;;#ASMEND
9819; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
9820; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s2
9821; GFX940-NEXT:    s_mov_b32 s9, s0
9822; GFX940-NEXT:    ;;#ASMSTART
9823; GFX940-NEXT:    ; use s[8:9]
9824; GFX940-NEXT:    ;;#ASMEND
9825; GFX940-NEXT:    s_setpc_b64 s[30:31]
9826  %vec0 = call <4 x half> asm "; def $0", "=s"()
9827  %vec1 = call <4 x half> asm "; def $0", "=s"()
9828  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 0>
9829  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9830  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
9831  ret void
9832}
9833
9834define void @s_shuffle_v3f16_v4f16__7_5_0() {
9835; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_5_0:
9836; GFX900:       ; %bb.0:
9837; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9838; GFX900-NEXT:    ;;#ASMSTART
9839; GFX900-NEXT:    ; def s[4:5]
9840; GFX900-NEXT:    ;;#ASMEND
9841; GFX900-NEXT:    ;;#ASMSTART
9842; GFX900-NEXT:    ; def s[6:7]
9843; GFX900-NEXT:    ;;#ASMEND
9844; GFX900-NEXT:    s_lshr_b32 s5, s6, 16
9845; GFX900-NEXT:    s_lshr_b32 s6, s7, 16
9846; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
9847; GFX900-NEXT:    s_mov_b32 s9, s4
9848; GFX900-NEXT:    ;;#ASMSTART
9849; GFX900-NEXT:    ; use s[8:9]
9850; GFX900-NEXT:    ;;#ASMEND
9851; GFX900-NEXT:    s_setpc_b64 s[30:31]
9852;
9853; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_5_0:
9854; GFX90A:       ; %bb.0:
9855; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9856; GFX90A-NEXT:    ;;#ASMSTART
9857; GFX90A-NEXT:    ; def s[4:5]
9858; GFX90A-NEXT:    ;;#ASMEND
9859; GFX90A-NEXT:    ;;#ASMSTART
9860; GFX90A-NEXT:    ; def s[6:7]
9861; GFX90A-NEXT:    ;;#ASMEND
9862; GFX90A-NEXT:    s_lshr_b32 s5, s6, 16
9863; GFX90A-NEXT:    s_lshr_b32 s6, s7, 16
9864; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
9865; GFX90A-NEXT:    s_mov_b32 s9, s4
9866; GFX90A-NEXT:    ;;#ASMSTART
9867; GFX90A-NEXT:    ; use s[8:9]
9868; GFX90A-NEXT:    ;;#ASMEND
9869; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9870;
9871; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_0:
9872; GFX940:       ; %bb.0:
9873; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9874; GFX940-NEXT:    ;;#ASMSTART
9875; GFX940-NEXT:    ; def s[0:1]
9876; GFX940-NEXT:    ;;#ASMEND
9877; GFX940-NEXT:    ;;#ASMSTART
9878; GFX940-NEXT:    ; def s[2:3]
9879; GFX940-NEXT:    ;;#ASMEND
9880; GFX940-NEXT:    s_lshr_b32 s1, s2, 16
9881; GFX940-NEXT:    s_lshr_b32 s2, s3, 16
9882; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s2, s1
9883; GFX940-NEXT:    s_mov_b32 s9, s0
9884; GFX940-NEXT:    ;;#ASMSTART
9885; GFX940-NEXT:    ; use s[8:9]
9886; GFX940-NEXT:    ;;#ASMEND
9887; GFX940-NEXT:    s_setpc_b64 s[30:31]
9888  %vec0 = call <4 x half> asm "; def $0", "=s"()
9889  %vec1 = call <4 x half> asm "; def $0", "=s"()
9890  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 0>
9891  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9892  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
9893  ret void
9894}
9895
9896define void @s_shuffle_v3f16_v4f16__7_6_0() {
9897; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_6_0:
9898; GFX900:       ; %bb.0:
9899; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9900; GFX900-NEXT:    ;;#ASMSTART
9901; GFX900-NEXT:    ; def s[4:5]
9902; GFX900-NEXT:    ;;#ASMEND
9903; GFX900-NEXT:    ;;#ASMSTART
9904; GFX900-NEXT:    ; def s[6:7]
9905; GFX900-NEXT:    ;;#ASMEND
9906; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
9907; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s7
9908; GFX900-NEXT:    s_mov_b32 s9, s4
9909; GFX900-NEXT:    ;;#ASMSTART
9910; GFX900-NEXT:    ; use s[8:9]
9911; GFX900-NEXT:    ;;#ASMEND
9912; GFX900-NEXT:    s_setpc_b64 s[30:31]
9913;
9914; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_6_0:
9915; GFX90A:       ; %bb.0:
9916; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9917; GFX90A-NEXT:    ;;#ASMSTART
9918; GFX90A-NEXT:    ; def s[4:5]
9919; GFX90A-NEXT:    ;;#ASMEND
9920; GFX90A-NEXT:    ;;#ASMSTART
9921; GFX90A-NEXT:    ; def s[6:7]
9922; GFX90A-NEXT:    ;;#ASMEND
9923; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
9924; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s7
9925; GFX90A-NEXT:    s_mov_b32 s9, s4
9926; GFX90A-NEXT:    ;;#ASMSTART
9927; GFX90A-NEXT:    ; use s[8:9]
9928; GFX90A-NEXT:    ;;#ASMEND
9929; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9930;
9931; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_0:
9932; GFX940:       ; %bb.0:
9933; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9934; GFX940-NEXT:    ;;#ASMSTART
9935; GFX940-NEXT:    ; def s[0:1]
9936; GFX940-NEXT:    ;;#ASMEND
9937; GFX940-NEXT:    ;;#ASMSTART
9938; GFX940-NEXT:    ; def s[2:3]
9939; GFX940-NEXT:    ;;#ASMEND
9940; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
9941; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s3
9942; GFX940-NEXT:    s_mov_b32 s9, s0
9943; GFX940-NEXT:    ;;#ASMSTART
9944; GFX940-NEXT:    ; use s[8:9]
9945; GFX940-NEXT:    ;;#ASMEND
9946; GFX940-NEXT:    s_setpc_b64 s[30:31]
9947  %vec0 = call <4 x half> asm "; def $0", "=s"()
9948  %vec1 = call <4 x half> asm "; def $0", "=s"()
9949  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 0>
9950  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9951  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
9952  ret void
9953}
9954
9955define void @s_shuffle_v3f16_v4f16__u_1_1() {
9956; GFX9-LABEL: s_shuffle_v3f16_v4f16__u_1_1:
9957; GFX9:       ; %bb.0:
9958; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9959; GFX9-NEXT:    ;;#ASMSTART
9960; GFX9-NEXT:    ; def s[8:9]
9961; GFX9-NEXT:    ;;#ASMEND
9962; GFX9-NEXT:    s_lshr_b32 s9, s8, 16
9963; GFX9-NEXT:    ;;#ASMSTART
9964; GFX9-NEXT:    ; use s[8:9]
9965; GFX9-NEXT:    ;;#ASMEND
9966; GFX9-NEXT:    s_setpc_b64 s[30:31]
9967  %vec0 = call <4 x half> asm "; def $0", "=s"()
9968  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 poison, i32 1, i32 1>
9969  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9970  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
9971  ret void
9972}
9973
9974define void @s_shuffle_v3f16_v4f16__0_1_1() {
9975; GFX9-LABEL: s_shuffle_v3f16_v4f16__0_1_1:
9976; GFX9:       ; %bb.0:
9977; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9978; GFX9-NEXT:    ;;#ASMSTART
9979; GFX9-NEXT:    ; def s[8:9]
9980; GFX9-NEXT:    ;;#ASMEND
9981; GFX9-NEXT:    s_lshr_b32 s9, s8, 16
9982; GFX9-NEXT:    ;;#ASMSTART
9983; GFX9-NEXT:    ; use s[8:9]
9984; GFX9-NEXT:    ;;#ASMEND
9985; GFX9-NEXT:    s_setpc_b64 s[30:31]
9986  %vec0 = call <4 x half> asm "; def $0", "=s"()
9987  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 1>
9988  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
9989  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
9990  ret void
9991}
9992
9993define void @s_shuffle_v3f16_v4f16__1_1_1() {
9994; GFX900-LABEL: s_shuffle_v3f16_v4f16__1_1_1:
9995; GFX900:       ; %bb.0:
9996; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9997; GFX900-NEXT:    ;;#ASMSTART
9998; GFX900-NEXT:    ; def s[4:5]
9999; GFX900-NEXT:    ;;#ASMEND
10000; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
10001; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s9, s9
10002; GFX900-NEXT:    ;;#ASMSTART
10003; GFX900-NEXT:    ; use s[8:9]
10004; GFX900-NEXT:    ;;#ASMEND
10005; GFX900-NEXT:    s_setpc_b64 s[30:31]
10006;
10007; GFX90A-LABEL: s_shuffle_v3f16_v4f16__1_1_1:
10008; GFX90A:       ; %bb.0:
10009; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10010; GFX90A-NEXT:    ;;#ASMSTART
10011; GFX90A-NEXT:    ; def s[4:5]
10012; GFX90A-NEXT:    ;;#ASMEND
10013; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
10014; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s9, s9
10015; GFX90A-NEXT:    ;;#ASMSTART
10016; GFX90A-NEXT:    ; use s[8:9]
10017; GFX90A-NEXT:    ;;#ASMEND
10018; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10019;
10020; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_1_1:
10021; GFX940:       ; %bb.0:
10022; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10023; GFX940-NEXT:    ;;#ASMSTART
10024; GFX940-NEXT:    ; def s[0:1]
10025; GFX940-NEXT:    ;;#ASMEND
10026; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
10027; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s9, s9
10028; GFX940-NEXT:    ;;#ASMSTART
10029; GFX940-NEXT:    ; use s[8:9]
10030; GFX940-NEXT:    ;;#ASMEND
10031; GFX940-NEXT:    s_setpc_b64 s[30:31]
10032  %vec0 = call <4 x half> asm "; def $0", "=s"()
10033  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 1, i32 1, i32 1>
10034  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10035  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
10036  ret void
10037}
10038
10039define void @s_shuffle_v3f16_v4f16__2_1_1() {
10040; GFX900-LABEL: s_shuffle_v3f16_v4f16__2_1_1:
10041; GFX900:       ; %bb.0:
10042; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10043; GFX900-NEXT:    ;;#ASMSTART
10044; GFX900-NEXT:    ; def s[4:5]
10045; GFX900-NEXT:    ;;#ASMEND
10046; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
10047; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s9
10048; GFX900-NEXT:    ;;#ASMSTART
10049; GFX900-NEXT:    ; use s[8:9]
10050; GFX900-NEXT:    ;;#ASMEND
10051; GFX900-NEXT:    s_setpc_b64 s[30:31]
10052;
10053; GFX90A-LABEL: s_shuffle_v3f16_v4f16__2_1_1:
10054; GFX90A:       ; %bb.0:
10055; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10056; GFX90A-NEXT:    ;;#ASMSTART
10057; GFX90A-NEXT:    ; def s[4:5]
10058; GFX90A-NEXT:    ;;#ASMEND
10059; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
10060; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s9
10061; GFX90A-NEXT:    ;;#ASMSTART
10062; GFX90A-NEXT:    ; use s[8:9]
10063; GFX90A-NEXT:    ;;#ASMEND
10064; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10065;
10066; GFX940-LABEL: s_shuffle_v3f16_v4f16__2_1_1:
10067; GFX940:       ; %bb.0:
10068; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10069; GFX940-NEXT:    ;;#ASMSTART
10070; GFX940-NEXT:    ; def s[0:1]
10071; GFX940-NEXT:    ;;#ASMEND
10072; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
10073; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s9
10074; GFX940-NEXT:    ;;#ASMSTART
10075; GFX940-NEXT:    ; use s[8:9]
10076; GFX940-NEXT:    ;;#ASMEND
10077; GFX940-NEXT:    s_setpc_b64 s[30:31]
10078  %vec0 = call <4 x half> asm "; def $0", "=s"()
10079  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 2, i32 1, i32 1>
10080  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10081  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
10082  ret void
10083}
10084
10085define void @s_shuffle_v3f16_v4f16__3_1_1() {
10086; GFX900-LABEL: s_shuffle_v3f16_v4f16__3_1_1:
10087; GFX900:       ; %bb.0:
10088; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10089; GFX900-NEXT:    ;;#ASMSTART
10090; GFX900-NEXT:    ; def s[4:5]
10091; GFX900-NEXT:    ;;#ASMEND
10092; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
10093; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
10094; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
10095; GFX900-NEXT:    ;;#ASMSTART
10096; GFX900-NEXT:    ; use s[8:9]
10097; GFX900-NEXT:    ;;#ASMEND
10098; GFX900-NEXT:    s_setpc_b64 s[30:31]
10099;
10100; GFX90A-LABEL: s_shuffle_v3f16_v4f16__3_1_1:
10101; GFX90A:       ; %bb.0:
10102; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10103; GFX90A-NEXT:    ;;#ASMSTART
10104; GFX90A-NEXT:    ; def s[4:5]
10105; GFX90A-NEXT:    ;;#ASMEND
10106; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
10107; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
10108; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
10109; GFX90A-NEXT:    ;;#ASMSTART
10110; GFX90A-NEXT:    ; use s[8:9]
10111; GFX90A-NEXT:    ;;#ASMEND
10112; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10113;
10114; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_1_1:
10115; GFX940:       ; %bb.0:
10116; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10117; GFX940-NEXT:    ;;#ASMSTART
10118; GFX940-NEXT:    ; def s[0:1]
10119; GFX940-NEXT:    ;;#ASMEND
10120; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
10121; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
10122; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
10123; GFX940-NEXT:    ;;#ASMSTART
10124; GFX940-NEXT:    ; use s[8:9]
10125; GFX940-NEXT:    ;;#ASMEND
10126; GFX940-NEXT:    s_setpc_b64 s[30:31]
10127  %vec0 = call <4 x half> asm "; def $0", "=s"()
10128  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 3, i32 1, i32 1>
10129  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10130  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
10131  ret void
10132}
10133
10134define void @s_shuffle_v3f16_v4f16__4_1_1() {
10135; GFX9-LABEL: s_shuffle_v3f16_v4f16__4_1_1:
10136; GFX9:       ; %bb.0:
10137; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10138; GFX9-NEXT:    ;;#ASMSTART
10139; GFX9-NEXT:    ; def s[8:9]
10140; GFX9-NEXT:    ;;#ASMEND
10141; GFX9-NEXT:    s_lshr_b32 s9, s8, 16
10142; GFX9-NEXT:    ;;#ASMSTART
10143; GFX9-NEXT:    ; use s[8:9]
10144; GFX9-NEXT:    ;;#ASMEND
10145; GFX9-NEXT:    s_setpc_b64 s[30:31]
10146  %vec0 = call <4 x half> asm "; def $0", "=s"()
10147  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 4, i32 1, i32 1>
10148  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10149  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
10150  ret void
10151}
10152
10153define void @s_shuffle_v3f16_v4f16__5_1_1() {
10154; GFX900-LABEL: s_shuffle_v3f16_v4f16__5_1_1:
10155; GFX900:       ; %bb.0:
10156; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10157; GFX900-NEXT:    ;;#ASMSTART
10158; GFX900-NEXT:    ; def s[4:5]
10159; GFX900-NEXT:    ;;#ASMEND
10160; GFX900-NEXT:    ;;#ASMSTART
10161; GFX900-NEXT:    ; def s[6:7]
10162; GFX900-NEXT:    ;;#ASMEND
10163; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
10164; GFX900-NEXT:    s_lshr_b32 s4, s6, 16
10165; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
10166; GFX900-NEXT:    ;;#ASMSTART
10167; GFX900-NEXT:    ; use s[8:9]
10168; GFX900-NEXT:    ;;#ASMEND
10169; GFX900-NEXT:    s_setpc_b64 s[30:31]
10170;
10171; GFX90A-LABEL: s_shuffle_v3f16_v4f16__5_1_1:
10172; GFX90A:       ; %bb.0:
10173; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10174; GFX90A-NEXT:    ;;#ASMSTART
10175; GFX90A-NEXT:    ; def s[4:5]
10176; GFX90A-NEXT:    ;;#ASMEND
10177; GFX90A-NEXT:    ;;#ASMSTART
10178; GFX90A-NEXT:    ; def s[6:7]
10179; GFX90A-NEXT:    ;;#ASMEND
10180; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
10181; GFX90A-NEXT:    s_lshr_b32 s4, s6, 16
10182; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
10183; GFX90A-NEXT:    ;;#ASMSTART
10184; GFX90A-NEXT:    ; use s[8:9]
10185; GFX90A-NEXT:    ;;#ASMEND
10186; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10187;
10188; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_1_1:
10189; GFX940:       ; %bb.0:
10190; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10191; GFX940-NEXT:    ;;#ASMSTART
10192; GFX940-NEXT:    ; def s[0:1]
10193; GFX940-NEXT:    ;;#ASMEND
10194; GFX940-NEXT:    ;;#ASMSTART
10195; GFX940-NEXT:    ; def s[2:3]
10196; GFX940-NEXT:    ;;#ASMEND
10197; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
10198; GFX940-NEXT:    s_lshr_b32 s0, s2, 16
10199; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
10200; GFX940-NEXT:    ;;#ASMSTART
10201; GFX940-NEXT:    ; use s[8:9]
10202; GFX940-NEXT:    ;;#ASMEND
10203; GFX940-NEXT:    s_setpc_b64 s[30:31]
10204  %vec0 = call <4 x half> asm "; def $0", "=s"()
10205  %vec1 = call <4 x half> asm "; def $0", "=s"()
10206  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 1, i32 1>
10207  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10208  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
10209  ret void
10210}
10211
10212define void @s_shuffle_v3f16_v4f16__6_1_1() {
10213; GFX900-LABEL: s_shuffle_v3f16_v4f16__6_1_1:
10214; GFX900:       ; %bb.0:
10215; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10216; GFX900-NEXT:    ;;#ASMSTART
10217; GFX900-NEXT:    ; def s[4:5]
10218; GFX900-NEXT:    ;;#ASMEND
10219; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
10220; GFX900-NEXT:    ;;#ASMSTART
10221; GFX900-NEXT:    ; def s[6:7]
10222; GFX900-NEXT:    ;;#ASMEND
10223; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s9
10224; GFX900-NEXT:    ;;#ASMSTART
10225; GFX900-NEXT:    ; use s[8:9]
10226; GFX900-NEXT:    ;;#ASMEND
10227; GFX900-NEXT:    s_setpc_b64 s[30:31]
10228;
10229; GFX90A-LABEL: s_shuffle_v3f16_v4f16__6_1_1:
10230; GFX90A:       ; %bb.0:
10231; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10232; GFX90A-NEXT:    ;;#ASMSTART
10233; GFX90A-NEXT:    ; def s[4:5]
10234; GFX90A-NEXT:    ;;#ASMEND
10235; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
10236; GFX90A-NEXT:    ;;#ASMSTART
10237; GFX90A-NEXT:    ; def s[6:7]
10238; GFX90A-NEXT:    ;;#ASMEND
10239; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s9
10240; GFX90A-NEXT:    ;;#ASMSTART
10241; GFX90A-NEXT:    ; use s[8:9]
10242; GFX90A-NEXT:    ;;#ASMEND
10243; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10244;
10245; GFX940-LABEL: s_shuffle_v3f16_v4f16__6_1_1:
10246; GFX940:       ; %bb.0:
10247; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10248; GFX940-NEXT:    ;;#ASMSTART
10249; GFX940-NEXT:    ; def s[0:1]
10250; GFX940-NEXT:    ;;#ASMEND
10251; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
10252; GFX940-NEXT:    ;;#ASMSTART
10253; GFX940-NEXT:    ; def s[2:3]
10254; GFX940-NEXT:    ;;#ASMEND
10255; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s9
10256; GFX940-NEXT:    ;;#ASMSTART
10257; GFX940-NEXT:    ; use s[8:9]
10258; GFX940-NEXT:    ;;#ASMEND
10259; GFX940-NEXT:    s_setpc_b64 s[30:31]
10260  %vec0 = call <4 x half> asm "; def $0", "=s"()
10261  %vec1 = call <4 x half> asm "; def $0", "=s"()
10262  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 1, i32 1>
10263  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10264  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
10265  ret void
10266}
10267
10268define void @s_shuffle_v3f16_v4f16__7_1_1() {
10269; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_1_1:
10270; GFX900:       ; %bb.0:
10271; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10272; GFX900-NEXT:    ;;#ASMSTART
10273; GFX900-NEXT:    ; def s[4:5]
10274; GFX900-NEXT:    ;;#ASMEND
10275; GFX900-NEXT:    ;;#ASMSTART
10276; GFX900-NEXT:    ; def s[6:7]
10277; GFX900-NEXT:    ;;#ASMEND
10278; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
10279; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
10280; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
10281; GFX900-NEXT:    ;;#ASMSTART
10282; GFX900-NEXT:    ; use s[8:9]
10283; GFX900-NEXT:    ;;#ASMEND
10284; GFX900-NEXT:    s_setpc_b64 s[30:31]
10285;
10286; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_1_1:
10287; GFX90A:       ; %bb.0:
10288; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10289; GFX90A-NEXT:    ;;#ASMSTART
10290; GFX90A-NEXT:    ; def s[4:5]
10291; GFX90A-NEXT:    ;;#ASMEND
10292; GFX90A-NEXT:    ;;#ASMSTART
10293; GFX90A-NEXT:    ; def s[6:7]
10294; GFX90A-NEXT:    ;;#ASMEND
10295; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
10296; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
10297; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
10298; GFX90A-NEXT:    ;;#ASMSTART
10299; GFX90A-NEXT:    ; use s[8:9]
10300; GFX90A-NEXT:    ;;#ASMEND
10301; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10302;
10303; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_1:
10304; GFX940:       ; %bb.0:
10305; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10306; GFX940-NEXT:    ;;#ASMSTART
10307; GFX940-NEXT:    ; def s[0:1]
10308; GFX940-NEXT:    ;;#ASMEND
10309; GFX940-NEXT:    ;;#ASMSTART
10310; GFX940-NEXT:    ; def s[2:3]
10311; GFX940-NEXT:    ;;#ASMEND
10312; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
10313; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
10314; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
10315; GFX940-NEXT:    ;;#ASMSTART
10316; GFX940-NEXT:    ; use s[8:9]
10317; GFX940-NEXT:    ;;#ASMEND
10318; GFX940-NEXT:    s_setpc_b64 s[30:31]
10319  %vec0 = call <4 x half> asm "; def $0", "=s"()
10320  %vec1 = call <4 x half> asm "; def $0", "=s"()
10321  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 1>
10322  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10323  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
10324  ret void
10325}
10326
10327define void @s_shuffle_v3f16_v4f16__7_u_1() {
10328; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_u_1:
10329; GFX900:       ; %bb.0:
10330; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10331; GFX900-NEXT:    ;;#ASMSTART
10332; GFX900-NEXT:    ; def s[4:5]
10333; GFX900-NEXT:    ;;#ASMEND
10334; GFX900-NEXT:    ;;#ASMSTART
10335; GFX900-NEXT:    ; def s[6:7]
10336; GFX900-NEXT:    ;;#ASMEND
10337; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
10338; GFX900-NEXT:    s_lshr_b32 s8, s7, 16
10339; GFX900-NEXT:    ;;#ASMSTART
10340; GFX900-NEXT:    ; use s[8:9]
10341; GFX900-NEXT:    ;;#ASMEND
10342; GFX900-NEXT:    s_setpc_b64 s[30:31]
10343;
10344; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_u_1:
10345; GFX90A:       ; %bb.0:
10346; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10347; GFX90A-NEXT:    ;;#ASMSTART
10348; GFX90A-NEXT:    ; def s[4:5]
10349; GFX90A-NEXT:    ;;#ASMEND
10350; GFX90A-NEXT:    ;;#ASMSTART
10351; GFX90A-NEXT:    ; def s[6:7]
10352; GFX90A-NEXT:    ;;#ASMEND
10353; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
10354; GFX90A-NEXT:    s_lshr_b32 s8, s7, 16
10355; GFX90A-NEXT:    ;;#ASMSTART
10356; GFX90A-NEXT:    ; use s[8:9]
10357; GFX90A-NEXT:    ;;#ASMEND
10358; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10359;
10360; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_u_1:
10361; GFX940:       ; %bb.0:
10362; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10363; GFX940-NEXT:    ;;#ASMSTART
10364; GFX940-NEXT:    ; def s[0:1]
10365; GFX940-NEXT:    ;;#ASMEND
10366; GFX940-NEXT:    ;;#ASMSTART
10367; GFX940-NEXT:    ; def s[2:3]
10368; GFX940-NEXT:    ;;#ASMEND
10369; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
10370; GFX940-NEXT:    s_lshr_b32 s8, s3, 16
10371; GFX940-NEXT:    ;;#ASMSTART
10372; GFX940-NEXT:    ; use s[8:9]
10373; GFX940-NEXT:    ;;#ASMEND
10374; GFX940-NEXT:    s_setpc_b64 s[30:31]
10375  %vec0 = call <4 x half> asm "; def $0", "=s"()
10376  %vec1 = call <4 x half> asm "; def $0", "=s"()
10377  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 1>
10378  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10379  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
10380  ret void
10381}
10382
10383define void @s_shuffle_v3f16_v4f16__7_0_1() {
10384; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_0_1:
10385; GFX900:       ; %bb.0:
10386; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10387; GFX900-NEXT:    ;;#ASMSTART
10388; GFX900-NEXT:    ; def s[4:5]
10389; GFX900-NEXT:    ;;#ASMEND
10390; GFX900-NEXT:    ;;#ASMSTART
10391; GFX900-NEXT:    ; def s[6:7]
10392; GFX900-NEXT:    ;;#ASMEND
10393; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
10394; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
10395; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
10396; GFX900-NEXT:    ;;#ASMSTART
10397; GFX900-NEXT:    ; use s[8:9]
10398; GFX900-NEXT:    ;;#ASMEND
10399; GFX900-NEXT:    s_setpc_b64 s[30:31]
10400;
10401; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_0_1:
10402; GFX90A:       ; %bb.0:
10403; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10404; GFX90A-NEXT:    ;;#ASMSTART
10405; GFX90A-NEXT:    ; def s[4:5]
10406; GFX90A-NEXT:    ;;#ASMEND
10407; GFX90A-NEXT:    ;;#ASMSTART
10408; GFX90A-NEXT:    ; def s[6:7]
10409; GFX90A-NEXT:    ;;#ASMEND
10410; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
10411; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
10412; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
10413; GFX90A-NEXT:    ;;#ASMSTART
10414; GFX90A-NEXT:    ; use s[8:9]
10415; GFX90A-NEXT:    ;;#ASMEND
10416; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10417;
10418; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_1:
10419; GFX940:       ; %bb.0:
10420; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10421; GFX940-NEXT:    ;;#ASMSTART
10422; GFX940-NEXT:    ; def s[0:1]
10423; GFX940-NEXT:    ;;#ASMEND
10424; GFX940-NEXT:    ;;#ASMSTART
10425; GFX940-NEXT:    ; def s[2:3]
10426; GFX940-NEXT:    ;;#ASMEND
10427; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
10428; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
10429; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
10430; GFX940-NEXT:    ;;#ASMSTART
10431; GFX940-NEXT:    ; use s[8:9]
10432; GFX940-NEXT:    ;;#ASMEND
10433; GFX940-NEXT:    s_setpc_b64 s[30:31]
10434  %vec0 = call <4 x half> asm "; def $0", "=s"()
10435  %vec1 = call <4 x half> asm "; def $0", "=s"()
10436  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 1>
10437  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10438  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
10439  ret void
10440}
10441
10442define void @s_shuffle_v3f16_v4f16__7_2_1() {
10443; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_2_1:
10444; GFX900:       ; %bb.0:
10445; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10446; GFX900-NEXT:    ;;#ASMSTART
10447; GFX900-NEXT:    ; def s[6:7]
10448; GFX900-NEXT:    ;;#ASMEND
10449; GFX900-NEXT:    s_lshr_b32 s6, s7, 16
10450; GFX900-NEXT:    ;;#ASMSTART
10451; GFX900-NEXT:    ; def s[4:5]
10452; GFX900-NEXT:    ;;#ASMEND
10453; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
10454; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
10455; GFX900-NEXT:    ;;#ASMSTART
10456; GFX900-NEXT:    ; use s[8:9]
10457; GFX900-NEXT:    ;;#ASMEND
10458; GFX900-NEXT:    s_setpc_b64 s[30:31]
10459;
10460; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_2_1:
10461; GFX90A:       ; %bb.0:
10462; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10463; GFX90A-NEXT:    ;;#ASMSTART
10464; GFX90A-NEXT:    ; def s[6:7]
10465; GFX90A-NEXT:    ;;#ASMEND
10466; GFX90A-NEXT:    s_lshr_b32 s6, s7, 16
10467; GFX90A-NEXT:    ;;#ASMSTART
10468; GFX90A-NEXT:    ; def s[4:5]
10469; GFX90A-NEXT:    ;;#ASMEND
10470; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
10471; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
10472; GFX90A-NEXT:    ;;#ASMSTART
10473; GFX90A-NEXT:    ; use s[8:9]
10474; GFX90A-NEXT:    ;;#ASMEND
10475; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10476;
10477; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_1:
10478; GFX940:       ; %bb.0:
10479; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10480; GFX940-NEXT:    ;;#ASMSTART
10481; GFX940-NEXT:    ; def s[2:3]
10482; GFX940-NEXT:    ;;#ASMEND
10483; GFX940-NEXT:    s_lshr_b32 s2, s3, 16
10484; GFX940-NEXT:    ;;#ASMSTART
10485; GFX940-NEXT:    ; def s[0:1]
10486; GFX940-NEXT:    ;;#ASMEND
10487; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s2, s1
10488; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
10489; GFX940-NEXT:    ;;#ASMSTART
10490; GFX940-NEXT:    ; use s[8:9]
10491; GFX940-NEXT:    ;;#ASMEND
10492; GFX940-NEXT:    s_setpc_b64 s[30:31]
10493  %vec0 = call <4 x half> asm "; def $0", "=s"()
10494  %vec1 = call <4 x half> asm "; def $0", "=s"()
10495  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 1>
10496  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10497  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
10498  ret void
10499}
10500
10501define void @s_shuffle_v3f16_v4f16__7_3_1() {
10502; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_3_1:
10503; GFX900:       ; %bb.0:
10504; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10505; GFX900-NEXT:    ;;#ASMSTART
10506; GFX900-NEXT:    ; def s[4:5]
10507; GFX900-NEXT:    ;;#ASMEND
10508; GFX900-NEXT:    ;;#ASMSTART
10509; GFX900-NEXT:    ; def s[6:7]
10510; GFX900-NEXT:    ;;#ASMEND
10511; GFX900-NEXT:    s_lshr_b32 s5, s5, 16
10512; GFX900-NEXT:    s_lshr_b32 s6, s7, 16
10513; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
10514; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
10515; GFX900-NEXT:    ;;#ASMSTART
10516; GFX900-NEXT:    ; use s[8:9]
10517; GFX900-NEXT:    ;;#ASMEND
10518; GFX900-NEXT:    s_setpc_b64 s[30:31]
10519;
10520; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_3_1:
10521; GFX90A:       ; %bb.0:
10522; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10523; GFX90A-NEXT:    ;;#ASMSTART
10524; GFX90A-NEXT:    ; def s[4:5]
10525; GFX90A-NEXT:    ;;#ASMEND
10526; GFX90A-NEXT:    ;;#ASMSTART
10527; GFX90A-NEXT:    ; def s[6:7]
10528; GFX90A-NEXT:    ;;#ASMEND
10529; GFX90A-NEXT:    s_lshr_b32 s5, s5, 16
10530; GFX90A-NEXT:    s_lshr_b32 s6, s7, 16
10531; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
10532; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
10533; GFX90A-NEXT:    ;;#ASMSTART
10534; GFX90A-NEXT:    ; use s[8:9]
10535; GFX90A-NEXT:    ;;#ASMEND
10536; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10537;
10538; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_1:
10539; GFX940:       ; %bb.0:
10540; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10541; GFX940-NEXT:    ;;#ASMSTART
10542; GFX940-NEXT:    ; def s[0:1]
10543; GFX940-NEXT:    ;;#ASMEND
10544; GFX940-NEXT:    ;;#ASMSTART
10545; GFX940-NEXT:    ; def s[2:3]
10546; GFX940-NEXT:    ;;#ASMEND
10547; GFX940-NEXT:    s_lshr_b32 s1, s1, 16
10548; GFX940-NEXT:    s_lshr_b32 s2, s3, 16
10549; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s2, s1
10550; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
10551; GFX940-NEXT:    ;;#ASMSTART
10552; GFX940-NEXT:    ; use s[8:9]
10553; GFX940-NEXT:    ;;#ASMEND
10554; GFX940-NEXT:    s_setpc_b64 s[30:31]
10555  %vec0 = call <4 x half> asm "; def $0", "=s"()
10556  %vec1 = call <4 x half> asm "; def $0", "=s"()
10557  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 1>
10558  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10559  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
10560  ret void
10561}
10562
10563define void @s_shuffle_v3f16_v4f16__7_4_1() {
10564; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_4_1:
10565; GFX900:       ; %bb.0:
10566; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10567; GFX900-NEXT:    ;;#ASMSTART
10568; GFX900-NEXT:    ; def s[4:5]
10569; GFX900-NEXT:    ;;#ASMEND
10570; GFX900-NEXT:    ;;#ASMSTART
10571; GFX900-NEXT:    ; def s[6:7]
10572; GFX900-NEXT:    ;;#ASMEND
10573; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
10574; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s6
10575; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
10576; GFX900-NEXT:    ;;#ASMSTART
10577; GFX900-NEXT:    ; use s[8:9]
10578; GFX900-NEXT:    ;;#ASMEND
10579; GFX900-NEXT:    s_setpc_b64 s[30:31]
10580;
10581; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_4_1:
10582; GFX90A:       ; %bb.0:
10583; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10584; GFX90A-NEXT:    ;;#ASMSTART
10585; GFX90A-NEXT:    ; def s[4:5]
10586; GFX90A-NEXT:    ;;#ASMEND
10587; GFX90A-NEXT:    ;;#ASMSTART
10588; GFX90A-NEXT:    ; def s[6:7]
10589; GFX90A-NEXT:    ;;#ASMEND
10590; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
10591; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s6
10592; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
10593; GFX90A-NEXT:    ;;#ASMSTART
10594; GFX90A-NEXT:    ; use s[8:9]
10595; GFX90A-NEXT:    ;;#ASMEND
10596; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10597;
10598; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_1:
10599; GFX940:       ; %bb.0:
10600; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10601; GFX940-NEXT:    ;;#ASMSTART
10602; GFX940-NEXT:    ; def s[0:1]
10603; GFX940-NEXT:    ;;#ASMEND
10604; GFX940-NEXT:    ;;#ASMSTART
10605; GFX940-NEXT:    ; def s[2:3]
10606; GFX940-NEXT:    ;;#ASMEND
10607; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
10608; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s2
10609; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
10610; GFX940-NEXT:    ;;#ASMSTART
10611; GFX940-NEXT:    ; use s[8:9]
10612; GFX940-NEXT:    ;;#ASMEND
10613; GFX940-NEXT:    s_setpc_b64 s[30:31]
10614  %vec0 = call <4 x half> asm "; def $0", "=s"()
10615  %vec1 = call <4 x half> asm "; def $0", "=s"()
10616  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 1>
10617  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10618  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
10619  ret void
10620}
10621
10622define void @s_shuffle_v3f16_v4f16__7_5_1() {
10623; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_5_1:
10624; GFX900:       ; %bb.0:
10625; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10626; GFX900-NEXT:    ;;#ASMSTART
10627; GFX900-NEXT:    ; def s[4:5]
10628; GFX900-NEXT:    ;;#ASMEND
10629; GFX900-NEXT:    ;;#ASMSTART
10630; GFX900-NEXT:    ; def s[6:7]
10631; GFX900-NEXT:    ;;#ASMEND
10632; GFX900-NEXT:    s_lshr_b32 s5, s6, 16
10633; GFX900-NEXT:    s_lshr_b32 s6, s7, 16
10634; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
10635; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
10636; GFX900-NEXT:    ;;#ASMSTART
10637; GFX900-NEXT:    ; use s[8:9]
10638; GFX900-NEXT:    ;;#ASMEND
10639; GFX900-NEXT:    s_setpc_b64 s[30:31]
10640;
10641; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_5_1:
10642; GFX90A:       ; %bb.0:
10643; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10644; GFX90A-NEXT:    ;;#ASMSTART
10645; GFX90A-NEXT:    ; def s[4:5]
10646; GFX90A-NEXT:    ;;#ASMEND
10647; GFX90A-NEXT:    ;;#ASMSTART
10648; GFX90A-NEXT:    ; def s[6:7]
10649; GFX90A-NEXT:    ;;#ASMEND
10650; GFX90A-NEXT:    s_lshr_b32 s5, s6, 16
10651; GFX90A-NEXT:    s_lshr_b32 s6, s7, 16
10652; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
10653; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
10654; GFX90A-NEXT:    ;;#ASMSTART
10655; GFX90A-NEXT:    ; use s[8:9]
10656; GFX90A-NEXT:    ;;#ASMEND
10657; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10658;
10659; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_1:
10660; GFX940:       ; %bb.0:
10661; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10662; GFX940-NEXT:    ;;#ASMSTART
10663; GFX940-NEXT:    ; def s[0:1]
10664; GFX940-NEXT:    ;;#ASMEND
10665; GFX940-NEXT:    ;;#ASMSTART
10666; GFX940-NEXT:    ; def s[2:3]
10667; GFX940-NEXT:    ;;#ASMEND
10668; GFX940-NEXT:    s_lshr_b32 s1, s2, 16
10669; GFX940-NEXT:    s_lshr_b32 s2, s3, 16
10670; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s2, s1
10671; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
10672; GFX940-NEXT:    ;;#ASMSTART
10673; GFX940-NEXT:    ; use s[8:9]
10674; GFX940-NEXT:    ;;#ASMEND
10675; GFX940-NEXT:    s_setpc_b64 s[30:31]
10676  %vec0 = call <4 x half> asm "; def $0", "=s"()
10677  %vec1 = call <4 x half> asm "; def $0", "=s"()
10678  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 1>
10679  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10680  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
10681  ret void
10682}
10683
10684define void @s_shuffle_v3f16_v4f16__7_6_1() {
10685; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_6_1:
10686; GFX900:       ; %bb.0:
10687; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10688; GFX900-NEXT:    ;;#ASMSTART
10689; GFX900-NEXT:    ; def s[4:5]
10690; GFX900-NEXT:    ;;#ASMEND
10691; GFX900-NEXT:    ;;#ASMSTART
10692; GFX900-NEXT:    ; def s[6:7]
10693; GFX900-NEXT:    ;;#ASMEND
10694; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
10695; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s7
10696; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
10697; GFX900-NEXT:    ;;#ASMSTART
10698; GFX900-NEXT:    ; use s[8:9]
10699; GFX900-NEXT:    ;;#ASMEND
10700; GFX900-NEXT:    s_setpc_b64 s[30:31]
10701;
10702; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_6_1:
10703; GFX90A:       ; %bb.0:
10704; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10705; GFX90A-NEXT:    ;;#ASMSTART
10706; GFX90A-NEXT:    ; def s[4:5]
10707; GFX90A-NEXT:    ;;#ASMEND
10708; GFX90A-NEXT:    ;;#ASMSTART
10709; GFX90A-NEXT:    ; def s[6:7]
10710; GFX90A-NEXT:    ;;#ASMEND
10711; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
10712; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s7
10713; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
10714; GFX90A-NEXT:    ;;#ASMSTART
10715; GFX90A-NEXT:    ; use s[8:9]
10716; GFX90A-NEXT:    ;;#ASMEND
10717; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10718;
10719; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_1:
10720; GFX940:       ; %bb.0:
10721; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10722; GFX940-NEXT:    ;;#ASMSTART
10723; GFX940-NEXT:    ; def s[0:1]
10724; GFX940-NEXT:    ;;#ASMEND
10725; GFX940-NEXT:    ;;#ASMSTART
10726; GFX940-NEXT:    ; def s[2:3]
10727; GFX940-NEXT:    ;;#ASMEND
10728; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
10729; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s3
10730; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
10731; GFX940-NEXT:    ;;#ASMSTART
10732; GFX940-NEXT:    ; use s[8:9]
10733; GFX940-NEXT:    ;;#ASMEND
10734; GFX940-NEXT:    s_setpc_b64 s[30:31]
10735  %vec0 = call <4 x half> asm "; def $0", "=s"()
10736  %vec1 = call <4 x half> asm "; def $0", "=s"()
10737  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 1>
10738  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10739  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
10740  ret void
10741}
10742
10743define void @s_shuffle_v3f16_v4f16__u_2_2() {
10744; GFX9-LABEL: s_shuffle_v3f16_v4f16__u_2_2:
10745; GFX9:       ; %bb.0:
10746; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10747; GFX9-NEXT:    ;;#ASMSTART
10748; GFX9-NEXT:    ; def s[8:9]
10749; GFX9-NEXT:    ;;#ASMEND
10750; GFX9-NEXT:    s_lshl_b32 s8, s9, 16
10751; GFX9-NEXT:    ;;#ASMSTART
10752; GFX9-NEXT:    ; use s[8:9]
10753; GFX9-NEXT:    ;;#ASMEND
10754; GFX9-NEXT:    s_setpc_b64 s[30:31]
10755  %vec0 = call <4 x half> asm "; def $0", "=s"()
10756  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 poison, i32 2, i32 2>
10757  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10758  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
10759  ret void
10760}
10761
10762define void @s_shuffle_v3f16_v4f16__0_2_2() {
10763; GFX9-LABEL: s_shuffle_v3f16_v4f16__0_2_2:
10764; GFX9:       ; %bb.0:
10765; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10766; GFX9-NEXT:    ;;#ASMSTART
10767; GFX9-NEXT:    ; def s[8:9]
10768; GFX9-NEXT:    ;;#ASMEND
10769; GFX9-NEXT:    s_pack_ll_b32_b16 s8, s8, s9
10770; GFX9-NEXT:    ;;#ASMSTART
10771; GFX9-NEXT:    ; use s[8:9]
10772; GFX9-NEXT:    ;;#ASMEND
10773; GFX9-NEXT:    s_setpc_b64 s[30:31]
10774  %vec0 = call <4 x half> asm "; def $0", "=s"()
10775  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 2, i32 2>
10776  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10777  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
10778  ret void
10779}
10780
10781define void @s_shuffle_v3f16_v4f16__1_2_2() {
10782; GFX900-LABEL: s_shuffle_v3f16_v4f16__1_2_2:
10783; GFX900:       ; %bb.0:
10784; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10785; GFX900-NEXT:    ;;#ASMSTART
10786; GFX900-NEXT:    ; def s[8:9]
10787; GFX900-NEXT:    ;;#ASMEND
10788; GFX900-NEXT:    s_lshr_b32 s4, s8, 16
10789; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
10790; GFX900-NEXT:    ;;#ASMSTART
10791; GFX900-NEXT:    ; use s[8:9]
10792; GFX900-NEXT:    ;;#ASMEND
10793; GFX900-NEXT:    s_setpc_b64 s[30:31]
10794;
10795; GFX90A-LABEL: s_shuffle_v3f16_v4f16__1_2_2:
10796; GFX90A:       ; %bb.0:
10797; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10798; GFX90A-NEXT:    ;;#ASMSTART
10799; GFX90A-NEXT:    ; def s[8:9]
10800; GFX90A-NEXT:    ;;#ASMEND
10801; GFX90A-NEXT:    s_lshr_b32 s4, s8, 16
10802; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
10803; GFX90A-NEXT:    ;;#ASMSTART
10804; GFX90A-NEXT:    ; use s[8:9]
10805; GFX90A-NEXT:    ;;#ASMEND
10806; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10807;
10808; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_2_2:
10809; GFX940:       ; %bb.0:
10810; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10811; GFX940-NEXT:    ;;#ASMSTART
10812; GFX940-NEXT:    ; def s[8:9]
10813; GFX940-NEXT:    ;;#ASMEND
10814; GFX940-NEXT:    s_lshr_b32 s0, s8, 16
10815; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
10816; GFX940-NEXT:    ;;#ASMSTART
10817; GFX940-NEXT:    ; use s[8:9]
10818; GFX940-NEXT:    ;;#ASMEND
10819; GFX940-NEXT:    s_setpc_b64 s[30:31]
10820  %vec0 = call <4 x half> asm "; def $0", "=s"()
10821  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 1, i32 2, i32 2>
10822  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10823  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
10824  ret void
10825}
10826
10827define void @s_shuffle_v3f16_v4f16__2_2_2() {
10828; GFX9-LABEL: s_shuffle_v3f16_v4f16__2_2_2:
10829; GFX9:       ; %bb.0:
10830; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10831; GFX9-NEXT:    ;;#ASMSTART
10832; GFX9-NEXT:    ; def s[8:9]
10833; GFX9-NEXT:    ;;#ASMEND
10834; GFX9-NEXT:    s_pack_ll_b32_b16 s8, s9, s9
10835; GFX9-NEXT:    ;;#ASMSTART
10836; GFX9-NEXT:    ; use s[8:9]
10837; GFX9-NEXT:    ;;#ASMEND
10838; GFX9-NEXT:    s_setpc_b64 s[30:31]
10839  %vec0 = call <4 x half> asm "; def $0", "=s"()
10840  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 2, i32 2, i32 2>
10841  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10842  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
10843  ret void
10844}
10845
10846define void @s_shuffle_v3f16_v4f16__3_2_2() {
10847; GFX900-LABEL: s_shuffle_v3f16_v4f16__3_2_2:
10848; GFX900:       ; %bb.0:
10849; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10850; GFX900-NEXT:    ;;#ASMSTART
10851; GFX900-NEXT:    ; def s[8:9]
10852; GFX900-NEXT:    ;;#ASMEND
10853; GFX900-NEXT:    s_lshr_b32 s4, s9, 16
10854; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
10855; GFX900-NEXT:    ;;#ASMSTART
10856; GFX900-NEXT:    ; use s[8:9]
10857; GFX900-NEXT:    ;;#ASMEND
10858; GFX900-NEXT:    s_setpc_b64 s[30:31]
10859;
10860; GFX90A-LABEL: s_shuffle_v3f16_v4f16__3_2_2:
10861; GFX90A:       ; %bb.0:
10862; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10863; GFX90A-NEXT:    ;;#ASMSTART
10864; GFX90A-NEXT:    ; def s[8:9]
10865; GFX90A-NEXT:    ;;#ASMEND
10866; GFX90A-NEXT:    s_lshr_b32 s4, s9, 16
10867; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
10868; GFX90A-NEXT:    ;;#ASMSTART
10869; GFX90A-NEXT:    ; use s[8:9]
10870; GFX90A-NEXT:    ;;#ASMEND
10871; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10872;
10873; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_2_2:
10874; GFX940:       ; %bb.0:
10875; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10876; GFX940-NEXT:    ;;#ASMSTART
10877; GFX940-NEXT:    ; def s[8:9]
10878; GFX940-NEXT:    ;;#ASMEND
10879; GFX940-NEXT:    s_lshr_b32 s0, s9, 16
10880; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
10881; GFX940-NEXT:    ;;#ASMSTART
10882; GFX940-NEXT:    ; use s[8:9]
10883; GFX940-NEXT:    ;;#ASMEND
10884; GFX940-NEXT:    s_setpc_b64 s[30:31]
10885  %vec0 = call <4 x half> asm "; def $0", "=s"()
10886  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 3, i32 2, i32 2>
10887  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10888  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
10889  ret void
10890}
10891
10892define void @s_shuffle_v3f16_v4f16__4_2_2() {
10893; GFX9-LABEL: s_shuffle_v3f16_v4f16__4_2_2:
10894; GFX9:       ; %bb.0:
10895; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10896; GFX9-NEXT:    ;;#ASMSTART
10897; GFX9-NEXT:    ; def s[8:9]
10898; GFX9-NEXT:    ;;#ASMEND
10899; GFX9-NEXT:    s_lshl_b32 s8, s9, 16
10900; GFX9-NEXT:    ;;#ASMSTART
10901; GFX9-NEXT:    ; use s[8:9]
10902; GFX9-NEXT:    ;;#ASMEND
10903; GFX9-NEXT:    s_setpc_b64 s[30:31]
10904  %vec0 = call <4 x half> asm "; def $0", "=s"()
10905  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 4, i32 2, i32 2>
10906  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10907  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
10908  ret void
10909}
10910
10911define void @s_shuffle_v3f16_v4f16__5_2_2() {
10912; GFX900-LABEL: s_shuffle_v3f16_v4f16__5_2_2:
10913; GFX900:       ; %bb.0:
10914; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10915; GFX900-NEXT:    ;;#ASMSTART
10916; GFX900-NEXT:    ; def s[4:5]
10917; GFX900-NEXT:    ;;#ASMEND
10918; GFX900-NEXT:    ;;#ASMSTART
10919; GFX900-NEXT:    ; def s[8:9]
10920; GFX900-NEXT:    ;;#ASMEND
10921; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
10922; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
10923; GFX900-NEXT:    ;;#ASMSTART
10924; GFX900-NEXT:    ; use s[8:9]
10925; GFX900-NEXT:    ;;#ASMEND
10926; GFX900-NEXT:    s_setpc_b64 s[30:31]
10927;
10928; GFX90A-LABEL: s_shuffle_v3f16_v4f16__5_2_2:
10929; GFX90A:       ; %bb.0:
10930; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10931; GFX90A-NEXT:    ;;#ASMSTART
10932; GFX90A-NEXT:    ; def s[4:5]
10933; GFX90A-NEXT:    ;;#ASMEND
10934; GFX90A-NEXT:    ;;#ASMSTART
10935; GFX90A-NEXT:    ; def s[8:9]
10936; GFX90A-NEXT:    ;;#ASMEND
10937; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
10938; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
10939; GFX90A-NEXT:    ;;#ASMSTART
10940; GFX90A-NEXT:    ; use s[8:9]
10941; GFX90A-NEXT:    ;;#ASMEND
10942; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10943;
10944; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_2_2:
10945; GFX940:       ; %bb.0:
10946; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10947; GFX940-NEXT:    ;;#ASMSTART
10948; GFX940-NEXT:    ; def s[0:1]
10949; GFX940-NEXT:    ;;#ASMEND
10950; GFX940-NEXT:    ;;#ASMSTART
10951; GFX940-NEXT:    ; def s[8:9]
10952; GFX940-NEXT:    ;;#ASMEND
10953; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
10954; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
10955; GFX940-NEXT:    ;;#ASMSTART
10956; GFX940-NEXT:    ; use s[8:9]
10957; GFX940-NEXT:    ;;#ASMEND
10958; GFX940-NEXT:    s_setpc_b64 s[30:31]
10959  %vec0 = call <4 x half> asm "; def $0", "=s"()
10960  %vec1 = call <4 x half> asm "; def $0", "=s"()
10961  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 2, i32 2>
10962  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
10963  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
10964  ret void
10965}
10966
10967define void @s_shuffle_v3f16_v4f16__6_2_2() {
10968; GFX900-LABEL: s_shuffle_v3f16_v4f16__6_2_2:
10969; GFX900:       ; %bb.0:
10970; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10971; GFX900-NEXT:    ;;#ASMSTART
10972; GFX900-NEXT:    ; def s[8:9]
10973; GFX900-NEXT:    ;;#ASMEND
10974; GFX900-NEXT:    ;;#ASMSTART
10975; GFX900-NEXT:    ; def s[4:5]
10976; GFX900-NEXT:    ;;#ASMEND
10977; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s9
10978; GFX900-NEXT:    ;;#ASMSTART
10979; GFX900-NEXT:    ; use s[8:9]
10980; GFX900-NEXT:    ;;#ASMEND
10981; GFX900-NEXT:    s_setpc_b64 s[30:31]
10982;
10983; GFX90A-LABEL: s_shuffle_v3f16_v4f16__6_2_2:
10984; GFX90A:       ; %bb.0:
10985; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10986; GFX90A-NEXT:    ;;#ASMSTART
10987; GFX90A-NEXT:    ; def s[8:9]
10988; GFX90A-NEXT:    ;;#ASMEND
10989; GFX90A-NEXT:    ;;#ASMSTART
10990; GFX90A-NEXT:    ; def s[4:5]
10991; GFX90A-NEXT:    ;;#ASMEND
10992; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s9
10993; GFX90A-NEXT:    ;;#ASMSTART
10994; GFX90A-NEXT:    ; use s[8:9]
10995; GFX90A-NEXT:    ;;#ASMEND
10996; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10997;
10998; GFX940-LABEL: s_shuffle_v3f16_v4f16__6_2_2:
10999; GFX940:       ; %bb.0:
11000; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11001; GFX940-NEXT:    ;;#ASMSTART
11002; GFX940-NEXT:    ; def s[8:9]
11003; GFX940-NEXT:    ;;#ASMEND
11004; GFX940-NEXT:    ;;#ASMSTART
11005; GFX940-NEXT:    ; def s[0:1]
11006; GFX940-NEXT:    ;;#ASMEND
11007; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s9
11008; GFX940-NEXT:    ;;#ASMSTART
11009; GFX940-NEXT:    ; use s[8:9]
11010; GFX940-NEXT:    ;;#ASMEND
11011; GFX940-NEXT:    s_setpc_b64 s[30:31]
11012  %vec0 = call <4 x half> asm "; def $0", "=s"()
11013  %vec1 = call <4 x half> asm "; def $0", "=s"()
11014  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 2, i32 2>
11015  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11016  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
11017  ret void
11018}
11019
11020define void @s_shuffle_v3f16_v4f16__7_2_2() {
11021; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_2_2:
11022; GFX900:       ; %bb.0:
11023; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11024; GFX900-NEXT:    ;;#ASMSTART
11025; GFX900-NEXT:    ; def s[4:5]
11026; GFX900-NEXT:    ;;#ASMEND
11027; GFX900-NEXT:    ;;#ASMSTART
11028; GFX900-NEXT:    ; def s[8:9]
11029; GFX900-NEXT:    ;;#ASMEND
11030; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
11031; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
11032; GFX900-NEXT:    ;;#ASMSTART
11033; GFX900-NEXT:    ; use s[8:9]
11034; GFX900-NEXT:    ;;#ASMEND
11035; GFX900-NEXT:    s_setpc_b64 s[30:31]
11036;
11037; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_2_2:
11038; GFX90A:       ; %bb.0:
11039; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11040; GFX90A-NEXT:    ;;#ASMSTART
11041; GFX90A-NEXT:    ; def s[4:5]
11042; GFX90A-NEXT:    ;;#ASMEND
11043; GFX90A-NEXT:    ;;#ASMSTART
11044; GFX90A-NEXT:    ; def s[8:9]
11045; GFX90A-NEXT:    ;;#ASMEND
11046; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
11047; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
11048; GFX90A-NEXT:    ;;#ASMSTART
11049; GFX90A-NEXT:    ; use s[8:9]
11050; GFX90A-NEXT:    ;;#ASMEND
11051; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11052;
11053; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_2:
11054; GFX940:       ; %bb.0:
11055; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11056; GFX940-NEXT:    ;;#ASMSTART
11057; GFX940-NEXT:    ; def s[0:1]
11058; GFX940-NEXT:    ;;#ASMEND
11059; GFX940-NEXT:    ;;#ASMSTART
11060; GFX940-NEXT:    ; def s[8:9]
11061; GFX940-NEXT:    ;;#ASMEND
11062; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
11063; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
11064; GFX940-NEXT:    ;;#ASMSTART
11065; GFX940-NEXT:    ; use s[8:9]
11066; GFX940-NEXT:    ;;#ASMEND
11067; GFX940-NEXT:    s_setpc_b64 s[30:31]
11068  %vec0 = call <4 x half> asm "; def $0", "=s"()
11069  %vec1 = call <4 x half> asm "; def $0", "=s"()
11070  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 2>
11071  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11072  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
11073  ret void
11074}
11075
11076define void @s_shuffle_v3f16_v4f16__7_u_2() {
11077; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_u_2:
11078; GFX900:       ; %bb.0:
11079; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11080; GFX900-NEXT:    ;;#ASMSTART
11081; GFX900-NEXT:    ; def s[8:9]
11082; GFX900-NEXT:    ;;#ASMEND
11083; GFX900-NEXT:    ;;#ASMSTART
11084; GFX900-NEXT:    ; def s[4:5]
11085; GFX900-NEXT:    ;;#ASMEND
11086; GFX900-NEXT:    s_lshr_b32 s8, s5, 16
11087; GFX900-NEXT:    ;;#ASMSTART
11088; GFX900-NEXT:    ; use s[8:9]
11089; GFX900-NEXT:    ;;#ASMEND
11090; GFX900-NEXT:    s_setpc_b64 s[30:31]
11091;
11092; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_u_2:
11093; GFX90A:       ; %bb.0:
11094; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11095; GFX90A-NEXT:    ;;#ASMSTART
11096; GFX90A-NEXT:    ; def s[8:9]
11097; GFX90A-NEXT:    ;;#ASMEND
11098; GFX90A-NEXT:    ;;#ASMSTART
11099; GFX90A-NEXT:    ; def s[4:5]
11100; GFX90A-NEXT:    ;;#ASMEND
11101; GFX90A-NEXT:    s_lshr_b32 s8, s5, 16
11102; GFX90A-NEXT:    ;;#ASMSTART
11103; GFX90A-NEXT:    ; use s[8:9]
11104; GFX90A-NEXT:    ;;#ASMEND
11105; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11106;
11107; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_u_2:
11108; GFX940:       ; %bb.0:
11109; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11110; GFX940-NEXT:    ;;#ASMSTART
11111; GFX940-NEXT:    ; def s[8:9]
11112; GFX940-NEXT:    ;;#ASMEND
11113; GFX940-NEXT:    ;;#ASMSTART
11114; GFX940-NEXT:    ; def s[0:1]
11115; GFX940-NEXT:    ;;#ASMEND
11116; GFX940-NEXT:    s_lshr_b32 s8, s1, 16
11117; GFX940-NEXT:    ;;#ASMSTART
11118; GFX940-NEXT:    ; use s[8:9]
11119; GFX940-NEXT:    ;;#ASMEND
11120; GFX940-NEXT:    s_setpc_b64 s[30:31]
11121  %vec0 = call <4 x half> asm "; def $0", "=s"()
11122  %vec1 = call <4 x half> asm "; def $0", "=s"()
11123  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 2>
11124  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11125  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
11126  ret void
11127}
11128
11129define void @s_shuffle_v3f16_v4f16__7_0_2() {
11130; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_0_2:
11131; GFX900:       ; %bb.0:
11132; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11133; GFX900-NEXT:    ;;#ASMSTART
11134; GFX900-NEXT:    ; def s[4:5]
11135; GFX900-NEXT:    ;;#ASMEND
11136; GFX900-NEXT:    ;;#ASMSTART
11137; GFX900-NEXT:    ; def s[8:9]
11138; GFX900-NEXT:    ;;#ASMEND
11139; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
11140; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s8
11141; GFX900-NEXT:    ;;#ASMSTART
11142; GFX900-NEXT:    ; use s[8:9]
11143; GFX900-NEXT:    ;;#ASMEND
11144; GFX900-NEXT:    s_setpc_b64 s[30:31]
11145;
11146; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_0_2:
11147; GFX90A:       ; %bb.0:
11148; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11149; GFX90A-NEXT:    ;;#ASMSTART
11150; GFX90A-NEXT:    ; def s[4:5]
11151; GFX90A-NEXT:    ;;#ASMEND
11152; GFX90A-NEXT:    ;;#ASMSTART
11153; GFX90A-NEXT:    ; def s[8:9]
11154; GFX90A-NEXT:    ;;#ASMEND
11155; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
11156; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s8
11157; GFX90A-NEXT:    ;;#ASMSTART
11158; GFX90A-NEXT:    ; use s[8:9]
11159; GFX90A-NEXT:    ;;#ASMEND
11160; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11161;
11162; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_2:
11163; GFX940:       ; %bb.0:
11164; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11165; GFX940-NEXT:    ;;#ASMSTART
11166; GFX940-NEXT:    ; def s[0:1]
11167; GFX940-NEXT:    ;;#ASMEND
11168; GFX940-NEXT:    ;;#ASMSTART
11169; GFX940-NEXT:    ; def s[8:9]
11170; GFX940-NEXT:    ;;#ASMEND
11171; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
11172; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s8
11173; GFX940-NEXT:    ;;#ASMSTART
11174; GFX940-NEXT:    ; use s[8:9]
11175; GFX940-NEXT:    ;;#ASMEND
11176; GFX940-NEXT:    s_setpc_b64 s[30:31]
11177  %vec0 = call <4 x half> asm "; def $0", "=s"()
11178  %vec1 = call <4 x half> asm "; def $0", "=s"()
11179  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 2>
11180  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11181  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
11182  ret void
11183}
11184
11185define void @s_shuffle_v3f16_v4f16__7_1_2() {
11186; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_1_2:
11187; GFX900:       ; %bb.0:
11188; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11189; GFX900-NEXT:    ;;#ASMSTART
11190; GFX900-NEXT:    ; def s[4:5]
11191; GFX900-NEXT:    ;;#ASMEND
11192; GFX900-NEXT:    ;;#ASMSTART
11193; GFX900-NEXT:    ; def s[8:9]
11194; GFX900-NEXT:    ;;#ASMEND
11195; GFX900-NEXT:    s_lshr_b32 s4, s8, 16
11196; GFX900-NEXT:    s_lshr_b32 s5, s5, 16
11197; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
11198; GFX900-NEXT:    ;;#ASMSTART
11199; GFX900-NEXT:    ; use s[8:9]
11200; GFX900-NEXT:    ;;#ASMEND
11201; GFX900-NEXT:    s_setpc_b64 s[30:31]
11202;
11203; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_1_2:
11204; GFX90A:       ; %bb.0:
11205; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11206; GFX90A-NEXT:    ;;#ASMSTART
11207; GFX90A-NEXT:    ; def s[4:5]
11208; GFX90A-NEXT:    ;;#ASMEND
11209; GFX90A-NEXT:    ;;#ASMSTART
11210; GFX90A-NEXT:    ; def s[8:9]
11211; GFX90A-NEXT:    ;;#ASMEND
11212; GFX90A-NEXT:    s_lshr_b32 s4, s8, 16
11213; GFX90A-NEXT:    s_lshr_b32 s5, s5, 16
11214; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
11215; GFX90A-NEXT:    ;;#ASMSTART
11216; GFX90A-NEXT:    ; use s[8:9]
11217; GFX90A-NEXT:    ;;#ASMEND
11218; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11219;
11220; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_2:
11221; GFX940:       ; %bb.0:
11222; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11223; GFX940-NEXT:    ;;#ASMSTART
11224; GFX940-NEXT:    ; def s[0:1]
11225; GFX940-NEXT:    ;;#ASMEND
11226; GFX940-NEXT:    ;;#ASMSTART
11227; GFX940-NEXT:    ; def s[8:9]
11228; GFX940-NEXT:    ;;#ASMEND
11229; GFX940-NEXT:    s_lshr_b32 s0, s8, 16
11230; GFX940-NEXT:    s_lshr_b32 s1, s1, 16
11231; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
11232; GFX940-NEXT:    ;;#ASMSTART
11233; GFX940-NEXT:    ; use s[8:9]
11234; GFX940-NEXT:    ;;#ASMEND
11235; GFX940-NEXT:    s_setpc_b64 s[30:31]
11236  %vec0 = call <4 x half> asm "; def $0", "=s"()
11237  %vec1 = call <4 x half> asm "; def $0", "=s"()
11238  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 2>
11239  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11240  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
11241  ret void
11242}
11243
11244define void @s_shuffle_v3f16_v4f16__7_3_2() {
11245; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_3_2:
11246; GFX900:       ; %bb.0:
11247; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11248; GFX900-NEXT:    ;;#ASMSTART
11249; GFX900-NEXT:    ; def s[4:5]
11250; GFX900-NEXT:    ;;#ASMEND
11251; GFX900-NEXT:    ;;#ASMSTART
11252; GFX900-NEXT:    ; def s[8:9]
11253; GFX900-NEXT:    ;;#ASMEND
11254; GFX900-NEXT:    s_lshr_b32 s4, s9, 16
11255; GFX900-NEXT:    s_lshr_b32 s5, s5, 16
11256; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
11257; GFX900-NEXT:    ;;#ASMSTART
11258; GFX900-NEXT:    ; use s[8:9]
11259; GFX900-NEXT:    ;;#ASMEND
11260; GFX900-NEXT:    s_setpc_b64 s[30:31]
11261;
11262; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_3_2:
11263; GFX90A:       ; %bb.0:
11264; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11265; GFX90A-NEXT:    ;;#ASMSTART
11266; GFX90A-NEXT:    ; def s[4:5]
11267; GFX90A-NEXT:    ;;#ASMEND
11268; GFX90A-NEXT:    ;;#ASMSTART
11269; GFX90A-NEXT:    ; def s[8:9]
11270; GFX90A-NEXT:    ;;#ASMEND
11271; GFX90A-NEXT:    s_lshr_b32 s4, s9, 16
11272; GFX90A-NEXT:    s_lshr_b32 s5, s5, 16
11273; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
11274; GFX90A-NEXT:    ;;#ASMSTART
11275; GFX90A-NEXT:    ; use s[8:9]
11276; GFX90A-NEXT:    ;;#ASMEND
11277; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11278;
11279; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_2:
11280; GFX940:       ; %bb.0:
11281; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11282; GFX940-NEXT:    ;;#ASMSTART
11283; GFX940-NEXT:    ; def s[0:1]
11284; GFX940-NEXT:    ;;#ASMEND
11285; GFX940-NEXT:    ;;#ASMSTART
11286; GFX940-NEXT:    ; def s[8:9]
11287; GFX940-NEXT:    ;;#ASMEND
11288; GFX940-NEXT:    s_lshr_b32 s0, s9, 16
11289; GFX940-NEXT:    s_lshr_b32 s1, s1, 16
11290; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
11291; GFX940-NEXT:    ;;#ASMSTART
11292; GFX940-NEXT:    ; use s[8:9]
11293; GFX940-NEXT:    ;;#ASMEND
11294; GFX940-NEXT:    s_setpc_b64 s[30:31]
11295  %vec0 = call <4 x half> asm "; def $0", "=s"()
11296  %vec1 = call <4 x half> asm "; def $0", "=s"()
11297  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 2>
11298  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11299  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
11300  ret void
11301}
11302
11303define void @s_shuffle_v3f16_v4f16__7_4_2() {
11304; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_4_2:
11305; GFX900:       ; %bb.0:
11306; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11307; GFX900-NEXT:    ;;#ASMSTART
11308; GFX900-NEXT:    ; def s[4:5]
11309; GFX900-NEXT:    ;;#ASMEND
11310; GFX900-NEXT:    ;;#ASMSTART
11311; GFX900-NEXT:    ; def s[8:9]
11312; GFX900-NEXT:    ;;#ASMEND
11313; GFX900-NEXT:    s_lshr_b32 s5, s5, 16
11314; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
11315; GFX900-NEXT:    ;;#ASMSTART
11316; GFX900-NEXT:    ; use s[8:9]
11317; GFX900-NEXT:    ;;#ASMEND
11318; GFX900-NEXT:    s_setpc_b64 s[30:31]
11319;
11320; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_4_2:
11321; GFX90A:       ; %bb.0:
11322; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11323; GFX90A-NEXT:    ;;#ASMSTART
11324; GFX90A-NEXT:    ; def s[4:5]
11325; GFX90A-NEXT:    ;;#ASMEND
11326; GFX90A-NEXT:    ;;#ASMSTART
11327; GFX90A-NEXT:    ; def s[8:9]
11328; GFX90A-NEXT:    ;;#ASMEND
11329; GFX90A-NEXT:    s_lshr_b32 s5, s5, 16
11330; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
11331; GFX90A-NEXT:    ;;#ASMSTART
11332; GFX90A-NEXT:    ; use s[8:9]
11333; GFX90A-NEXT:    ;;#ASMEND
11334; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11335;
11336; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_2:
11337; GFX940:       ; %bb.0:
11338; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11339; GFX940-NEXT:    ;;#ASMSTART
11340; GFX940-NEXT:    ; def s[0:1]
11341; GFX940-NEXT:    ;;#ASMEND
11342; GFX940-NEXT:    ;;#ASMSTART
11343; GFX940-NEXT:    ; def s[8:9]
11344; GFX940-NEXT:    ;;#ASMEND
11345; GFX940-NEXT:    s_lshr_b32 s1, s1, 16
11346; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
11347; GFX940-NEXT:    ;;#ASMSTART
11348; GFX940-NEXT:    ; use s[8:9]
11349; GFX940-NEXT:    ;;#ASMEND
11350; GFX940-NEXT:    s_setpc_b64 s[30:31]
11351  %vec0 = call <4 x half> asm "; def $0", "=s"()
11352  %vec1 = call <4 x half> asm "; def $0", "=s"()
11353  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 2>
11354  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11355  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
11356  ret void
11357}
11358
11359define void @s_shuffle_v3f16_v4f16__7_5_2() {
11360; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_5_2:
11361; GFX900:       ; %bb.0:
11362; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11363; GFX900-NEXT:    ;;#ASMSTART
11364; GFX900-NEXT:    ; def s[4:5]
11365; GFX900-NEXT:    ;;#ASMEND
11366; GFX900-NEXT:    ;;#ASMSTART
11367; GFX900-NEXT:    ; def s[8:9]
11368; GFX900-NEXT:    ;;#ASMEND
11369; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
11370; GFX900-NEXT:    s_lshr_b32 s5, s5, 16
11371; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
11372; GFX900-NEXT:    ;;#ASMSTART
11373; GFX900-NEXT:    ; use s[8:9]
11374; GFX900-NEXT:    ;;#ASMEND
11375; GFX900-NEXT:    s_setpc_b64 s[30:31]
11376;
11377; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_5_2:
11378; GFX90A:       ; %bb.0:
11379; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11380; GFX90A-NEXT:    ;;#ASMSTART
11381; GFX90A-NEXT:    ; def s[4:5]
11382; GFX90A-NEXT:    ;;#ASMEND
11383; GFX90A-NEXT:    ;;#ASMSTART
11384; GFX90A-NEXT:    ; def s[8:9]
11385; GFX90A-NEXT:    ;;#ASMEND
11386; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
11387; GFX90A-NEXT:    s_lshr_b32 s5, s5, 16
11388; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
11389; GFX90A-NEXT:    ;;#ASMSTART
11390; GFX90A-NEXT:    ; use s[8:9]
11391; GFX90A-NEXT:    ;;#ASMEND
11392; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11393;
11394; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_2:
11395; GFX940:       ; %bb.0:
11396; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11397; GFX940-NEXT:    ;;#ASMSTART
11398; GFX940-NEXT:    ; def s[0:1]
11399; GFX940-NEXT:    ;;#ASMEND
11400; GFX940-NEXT:    ;;#ASMSTART
11401; GFX940-NEXT:    ; def s[8:9]
11402; GFX940-NEXT:    ;;#ASMEND
11403; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
11404; GFX940-NEXT:    s_lshr_b32 s1, s1, 16
11405; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
11406; GFX940-NEXT:    ;;#ASMSTART
11407; GFX940-NEXT:    ; use s[8:9]
11408; GFX940-NEXT:    ;;#ASMEND
11409; GFX940-NEXT:    s_setpc_b64 s[30:31]
11410  %vec0 = call <4 x half> asm "; def $0", "=s"()
11411  %vec1 = call <4 x half> asm "; def $0", "=s"()
11412  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 2>
11413  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11414  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
11415  ret void
11416}
11417
11418define void @s_shuffle_v3f16_v4f16__7_6_2() {
11419; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_6_2:
11420; GFX900:       ; %bb.0:
11421; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11422; GFX900-NEXT:    ;;#ASMSTART
11423; GFX900-NEXT:    ; def s[4:5]
11424; GFX900-NEXT:    ;;#ASMEND
11425; GFX900-NEXT:    ;;#ASMSTART
11426; GFX900-NEXT:    ; def s[8:9]
11427; GFX900-NEXT:    ;;#ASMEND
11428; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
11429; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
11430; GFX900-NEXT:    ;;#ASMSTART
11431; GFX900-NEXT:    ; use s[8:9]
11432; GFX900-NEXT:    ;;#ASMEND
11433; GFX900-NEXT:    s_setpc_b64 s[30:31]
11434;
11435; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_6_2:
11436; GFX90A:       ; %bb.0:
11437; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11438; GFX90A-NEXT:    ;;#ASMSTART
11439; GFX90A-NEXT:    ; def s[4:5]
11440; GFX90A-NEXT:    ;;#ASMEND
11441; GFX90A-NEXT:    ;;#ASMSTART
11442; GFX90A-NEXT:    ; def s[8:9]
11443; GFX90A-NEXT:    ;;#ASMEND
11444; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
11445; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
11446; GFX90A-NEXT:    ;;#ASMSTART
11447; GFX90A-NEXT:    ; use s[8:9]
11448; GFX90A-NEXT:    ;;#ASMEND
11449; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11450;
11451; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_2:
11452; GFX940:       ; %bb.0:
11453; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11454; GFX940-NEXT:    ;;#ASMSTART
11455; GFX940-NEXT:    ; def s[0:1]
11456; GFX940-NEXT:    ;;#ASMEND
11457; GFX940-NEXT:    ;;#ASMSTART
11458; GFX940-NEXT:    ; def s[8:9]
11459; GFX940-NEXT:    ;;#ASMEND
11460; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
11461; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
11462; GFX940-NEXT:    ;;#ASMSTART
11463; GFX940-NEXT:    ; use s[8:9]
11464; GFX940-NEXT:    ;;#ASMEND
11465; GFX940-NEXT:    s_setpc_b64 s[30:31]
11466  %vec0 = call <4 x half> asm "; def $0", "=s"()
11467  %vec1 = call <4 x half> asm "; def $0", "=s"()
11468  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 2>
11469  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11470  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
11471  ret void
11472}
11473
11474define void @s_shuffle_v3f16_v4f16__u_3_3() {
11475; GFX900-LABEL: s_shuffle_v3f16_v4f16__u_3_3:
11476; GFX900:       ; %bb.0:
11477; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11478; GFX900-NEXT:    ;;#ASMSTART
11479; GFX900-NEXT:    ; def s[4:5]
11480; GFX900-NEXT:    ;;#ASMEND
11481; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
11482; GFX900-NEXT:    s_mov_b32 s8, s5
11483; GFX900-NEXT:    ;;#ASMSTART
11484; GFX900-NEXT:    ; use s[8:9]
11485; GFX900-NEXT:    ;;#ASMEND
11486; GFX900-NEXT:    s_setpc_b64 s[30:31]
11487;
11488; GFX90A-LABEL: s_shuffle_v3f16_v4f16__u_3_3:
11489; GFX90A:       ; %bb.0:
11490; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11491; GFX90A-NEXT:    ;;#ASMSTART
11492; GFX90A-NEXT:    ; def s[4:5]
11493; GFX90A-NEXT:    ;;#ASMEND
11494; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
11495; GFX90A-NEXT:    s_mov_b32 s8, s5
11496; GFX90A-NEXT:    ;;#ASMSTART
11497; GFX90A-NEXT:    ; use s[8:9]
11498; GFX90A-NEXT:    ;;#ASMEND
11499; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11500;
11501; GFX940-LABEL: s_shuffle_v3f16_v4f16__u_3_3:
11502; GFX940:       ; %bb.0:
11503; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11504; GFX940-NEXT:    ;;#ASMSTART
11505; GFX940-NEXT:    ; def s[0:1]
11506; GFX940-NEXT:    ;;#ASMEND
11507; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
11508; GFX940-NEXT:    s_mov_b32 s8, s1
11509; GFX940-NEXT:    ;;#ASMSTART
11510; GFX940-NEXT:    ; use s[8:9]
11511; GFX940-NEXT:    ;;#ASMEND
11512; GFX940-NEXT:    s_setpc_b64 s[30:31]
11513  %vec0 = call <4 x half> asm "; def $0", "=s"()
11514  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 poison, i32 3, i32 3>
11515  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11516  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
11517  ret void
11518}
11519
11520define void @s_shuffle_v3f16_v4f16__0_3_3() {
11521; GFX900-LABEL: s_shuffle_v3f16_v4f16__0_3_3:
11522; GFX900:       ; %bb.0:
11523; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11524; GFX900-NEXT:    ;;#ASMSTART
11525; GFX900-NEXT:    ; def s[4:5]
11526; GFX900-NEXT:    ;;#ASMEND
11527; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
11528; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
11529; GFX900-NEXT:    ;;#ASMSTART
11530; GFX900-NEXT:    ; use s[8:9]
11531; GFX900-NEXT:    ;;#ASMEND
11532; GFX900-NEXT:    s_setpc_b64 s[30:31]
11533;
11534; GFX90A-LABEL: s_shuffle_v3f16_v4f16__0_3_3:
11535; GFX90A:       ; %bb.0:
11536; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11537; GFX90A-NEXT:    ;;#ASMSTART
11538; GFX90A-NEXT:    ; def s[4:5]
11539; GFX90A-NEXT:    ;;#ASMEND
11540; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
11541; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
11542; GFX90A-NEXT:    ;;#ASMSTART
11543; GFX90A-NEXT:    ; use s[8:9]
11544; GFX90A-NEXT:    ;;#ASMEND
11545; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11546;
11547; GFX940-LABEL: s_shuffle_v3f16_v4f16__0_3_3:
11548; GFX940:       ; %bb.0:
11549; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11550; GFX940-NEXT:    ;;#ASMSTART
11551; GFX940-NEXT:    ; def s[0:1]
11552; GFX940-NEXT:    ;;#ASMEND
11553; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
11554; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
11555; GFX940-NEXT:    ;;#ASMSTART
11556; GFX940-NEXT:    ; use s[8:9]
11557; GFX940-NEXT:    ;;#ASMEND
11558; GFX940-NEXT:    s_setpc_b64 s[30:31]
11559  %vec0 = call <4 x half> asm "; def $0", "=s"()
11560  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 3, i32 3>
11561  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11562  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
11563  ret void
11564}
11565
11566define void @s_shuffle_v3f16_v4f16__1_3_3() {
11567; GFX900-LABEL: s_shuffle_v3f16_v4f16__1_3_3:
11568; GFX900:       ; %bb.0:
11569; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11570; GFX900-NEXT:    ;;#ASMSTART
11571; GFX900-NEXT:    ; def s[4:5]
11572; GFX900-NEXT:    ;;#ASMEND
11573; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
11574; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
11575; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
11576; GFX900-NEXT:    ;;#ASMSTART
11577; GFX900-NEXT:    ; use s[8:9]
11578; GFX900-NEXT:    ;;#ASMEND
11579; GFX900-NEXT:    s_setpc_b64 s[30:31]
11580;
11581; GFX90A-LABEL: s_shuffle_v3f16_v4f16__1_3_3:
11582; GFX90A:       ; %bb.0:
11583; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11584; GFX90A-NEXT:    ;;#ASMSTART
11585; GFX90A-NEXT:    ; def s[4:5]
11586; GFX90A-NEXT:    ;;#ASMEND
11587; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
11588; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
11589; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
11590; GFX90A-NEXT:    ;;#ASMSTART
11591; GFX90A-NEXT:    ; use s[8:9]
11592; GFX90A-NEXT:    ;;#ASMEND
11593; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11594;
11595; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_3_3:
11596; GFX940:       ; %bb.0:
11597; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11598; GFX940-NEXT:    ;;#ASMSTART
11599; GFX940-NEXT:    ; def s[0:1]
11600; GFX940-NEXT:    ;;#ASMEND
11601; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
11602; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
11603; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
11604; GFX940-NEXT:    ;;#ASMSTART
11605; GFX940-NEXT:    ; use s[8:9]
11606; GFX940-NEXT:    ;;#ASMEND
11607; GFX940-NEXT:    s_setpc_b64 s[30:31]
11608  %vec0 = call <4 x half> asm "; def $0", "=s"()
11609  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 1, i32 3, i32 3>
11610  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11611  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
11612  ret void
11613}
11614
11615define void @s_shuffle_v3f16_v4f16__2_3_3() {
11616; GFX900-LABEL: s_shuffle_v3f16_v4f16__2_3_3:
11617; GFX900:       ; %bb.0:
11618; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11619; GFX900-NEXT:    ;;#ASMSTART
11620; GFX900-NEXT:    ; def s[4:5]
11621; GFX900-NEXT:    ;;#ASMEND
11622; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
11623; GFX900-NEXT:    s_mov_b32 s8, s5
11624; GFX900-NEXT:    ;;#ASMSTART
11625; GFX900-NEXT:    ; use s[8:9]
11626; GFX900-NEXT:    ;;#ASMEND
11627; GFX900-NEXT:    s_setpc_b64 s[30:31]
11628;
11629; GFX90A-LABEL: s_shuffle_v3f16_v4f16__2_3_3:
11630; GFX90A:       ; %bb.0:
11631; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11632; GFX90A-NEXT:    ;;#ASMSTART
11633; GFX90A-NEXT:    ; def s[4:5]
11634; GFX90A-NEXT:    ;;#ASMEND
11635; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
11636; GFX90A-NEXT:    s_mov_b32 s8, s5
11637; GFX90A-NEXT:    ;;#ASMSTART
11638; GFX90A-NEXT:    ; use s[8:9]
11639; GFX90A-NEXT:    ;;#ASMEND
11640; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11641;
11642; GFX940-LABEL: s_shuffle_v3f16_v4f16__2_3_3:
11643; GFX940:       ; %bb.0:
11644; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11645; GFX940-NEXT:    ;;#ASMSTART
11646; GFX940-NEXT:    ; def s[0:1]
11647; GFX940-NEXT:    ;;#ASMEND
11648; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
11649; GFX940-NEXT:    s_mov_b32 s8, s1
11650; GFX940-NEXT:    ;;#ASMSTART
11651; GFX940-NEXT:    ; use s[8:9]
11652; GFX940-NEXT:    ;;#ASMEND
11653; GFX940-NEXT:    s_setpc_b64 s[30:31]
11654  %vec0 = call <4 x half> asm "; def $0", "=s"()
11655  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 2, i32 3, i32 3>
11656  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11657  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
11658  ret void
11659}
11660
11661define void @s_shuffle_v3f16_v4f16__3_3_3() {
11662; GFX900-LABEL: s_shuffle_v3f16_v4f16__3_3_3:
11663; GFX900:       ; %bb.0:
11664; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11665; GFX900-NEXT:    ;;#ASMSTART
11666; GFX900-NEXT:    ; def s[4:5]
11667; GFX900-NEXT:    ;;#ASMEND
11668; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
11669; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s9, s9
11670; GFX900-NEXT:    ;;#ASMSTART
11671; GFX900-NEXT:    ; use s[8:9]
11672; GFX900-NEXT:    ;;#ASMEND
11673; GFX900-NEXT:    s_setpc_b64 s[30:31]
11674;
11675; GFX90A-LABEL: s_shuffle_v3f16_v4f16__3_3_3:
11676; GFX90A:       ; %bb.0:
11677; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11678; GFX90A-NEXT:    ;;#ASMSTART
11679; GFX90A-NEXT:    ; def s[4:5]
11680; GFX90A-NEXT:    ;;#ASMEND
11681; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
11682; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s9, s9
11683; GFX90A-NEXT:    ;;#ASMSTART
11684; GFX90A-NEXT:    ; use s[8:9]
11685; GFX90A-NEXT:    ;;#ASMEND
11686; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11687;
11688; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_3_3:
11689; GFX940:       ; %bb.0:
11690; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11691; GFX940-NEXT:    ;;#ASMSTART
11692; GFX940-NEXT:    ; def s[0:1]
11693; GFX940-NEXT:    ;;#ASMEND
11694; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
11695; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s9, s9
11696; GFX940-NEXT:    ;;#ASMSTART
11697; GFX940-NEXT:    ; use s[8:9]
11698; GFX940-NEXT:    ;;#ASMEND
11699; GFX940-NEXT:    s_setpc_b64 s[30:31]
11700  %vec0 = call <4 x half> asm "; def $0", "=s"()
11701  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 3, i32 3, i32 3>
11702  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11703  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
11704  ret void
11705}
11706
11707define void @s_shuffle_v3f16_v4f16__4_3_3() {
11708; GFX900-LABEL: s_shuffle_v3f16_v4f16__4_3_3:
11709; GFX900:       ; %bb.0:
11710; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11711; GFX900-NEXT:    ;;#ASMSTART
11712; GFX900-NEXT:    ; def s[4:5]
11713; GFX900-NEXT:    ;;#ASMEND
11714; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
11715; GFX900-NEXT:    s_mov_b32 s8, s5
11716; GFX900-NEXT:    ;;#ASMSTART
11717; GFX900-NEXT:    ; use s[8:9]
11718; GFX900-NEXT:    ;;#ASMEND
11719; GFX900-NEXT:    s_setpc_b64 s[30:31]
11720;
11721; GFX90A-LABEL: s_shuffle_v3f16_v4f16__4_3_3:
11722; GFX90A:       ; %bb.0:
11723; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11724; GFX90A-NEXT:    ;;#ASMSTART
11725; GFX90A-NEXT:    ; def s[4:5]
11726; GFX90A-NEXT:    ;;#ASMEND
11727; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
11728; GFX90A-NEXT:    s_mov_b32 s8, s5
11729; GFX90A-NEXT:    ;;#ASMSTART
11730; GFX90A-NEXT:    ; use s[8:9]
11731; GFX90A-NEXT:    ;;#ASMEND
11732; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11733;
11734; GFX940-LABEL: s_shuffle_v3f16_v4f16__4_3_3:
11735; GFX940:       ; %bb.0:
11736; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11737; GFX940-NEXT:    ;;#ASMSTART
11738; GFX940-NEXT:    ; def s[0:1]
11739; GFX940-NEXT:    ;;#ASMEND
11740; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
11741; GFX940-NEXT:    s_mov_b32 s8, s1
11742; GFX940-NEXT:    ;;#ASMSTART
11743; GFX940-NEXT:    ; use s[8:9]
11744; GFX940-NEXT:    ;;#ASMEND
11745; GFX940-NEXT:    s_setpc_b64 s[30:31]
11746  %vec0 = call <4 x half> asm "; def $0", "=s"()
11747  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 4, i32 3, i32 3>
11748  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11749  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
11750  ret void
11751}
11752
11753define void @s_shuffle_v3f16_v4f16__5_3_3() {
11754; GFX900-LABEL: s_shuffle_v3f16_v4f16__5_3_3:
11755; GFX900:       ; %bb.0:
11756; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11757; GFX900-NEXT:    ;;#ASMSTART
11758; GFX900-NEXT:    ; def s[4:5]
11759; GFX900-NEXT:    ;;#ASMEND
11760; GFX900-NEXT:    ;;#ASMSTART
11761; GFX900-NEXT:    ; def s[6:7]
11762; GFX900-NEXT:    ;;#ASMEND
11763; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
11764; GFX900-NEXT:    s_lshr_b32 s4, s6, 16
11765; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
11766; GFX900-NEXT:    ;;#ASMSTART
11767; GFX900-NEXT:    ; use s[8:9]
11768; GFX900-NEXT:    ;;#ASMEND
11769; GFX900-NEXT:    s_setpc_b64 s[30:31]
11770;
11771; GFX90A-LABEL: s_shuffle_v3f16_v4f16__5_3_3:
11772; GFX90A:       ; %bb.0:
11773; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11774; GFX90A-NEXT:    ;;#ASMSTART
11775; GFX90A-NEXT:    ; def s[4:5]
11776; GFX90A-NEXT:    ;;#ASMEND
11777; GFX90A-NEXT:    ;;#ASMSTART
11778; GFX90A-NEXT:    ; def s[6:7]
11779; GFX90A-NEXT:    ;;#ASMEND
11780; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
11781; GFX90A-NEXT:    s_lshr_b32 s4, s6, 16
11782; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
11783; GFX90A-NEXT:    ;;#ASMSTART
11784; GFX90A-NEXT:    ; use s[8:9]
11785; GFX90A-NEXT:    ;;#ASMEND
11786; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11787;
11788; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_3_3:
11789; GFX940:       ; %bb.0:
11790; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11791; GFX940-NEXT:    ;;#ASMSTART
11792; GFX940-NEXT:    ; def s[0:1]
11793; GFX940-NEXT:    ;;#ASMEND
11794; GFX940-NEXT:    ;;#ASMSTART
11795; GFX940-NEXT:    ; def s[2:3]
11796; GFX940-NEXT:    ;;#ASMEND
11797; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
11798; GFX940-NEXT:    s_lshr_b32 s0, s2, 16
11799; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
11800; GFX940-NEXT:    ;;#ASMSTART
11801; GFX940-NEXT:    ; use s[8:9]
11802; GFX940-NEXT:    ;;#ASMEND
11803; GFX940-NEXT:    s_setpc_b64 s[30:31]
11804  %vec0 = call <4 x half> asm "; def $0", "=s"()
11805  %vec1 = call <4 x half> asm "; def $0", "=s"()
11806  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 3, i32 3>
11807  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11808  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
11809  ret void
11810}
11811
11812define void @s_shuffle_v3f16_v4f16__6_3_3() {
11813; GFX900-LABEL: s_shuffle_v3f16_v4f16__6_3_3:
11814; GFX900:       ; %bb.0:
11815; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11816; GFX900-NEXT:    ;;#ASMSTART
11817; GFX900-NEXT:    ; def s[4:5]
11818; GFX900-NEXT:    ;;#ASMEND
11819; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
11820; GFX900-NEXT:    ;;#ASMSTART
11821; GFX900-NEXT:    ; def s[6:7]
11822; GFX900-NEXT:    ;;#ASMEND
11823; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s9
11824; GFX900-NEXT:    ;;#ASMSTART
11825; GFX900-NEXT:    ; use s[8:9]
11826; GFX900-NEXT:    ;;#ASMEND
11827; GFX900-NEXT:    s_setpc_b64 s[30:31]
11828;
11829; GFX90A-LABEL: s_shuffle_v3f16_v4f16__6_3_3:
11830; GFX90A:       ; %bb.0:
11831; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11832; GFX90A-NEXT:    ;;#ASMSTART
11833; GFX90A-NEXT:    ; def s[4:5]
11834; GFX90A-NEXT:    ;;#ASMEND
11835; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
11836; GFX90A-NEXT:    ;;#ASMSTART
11837; GFX90A-NEXT:    ; def s[6:7]
11838; GFX90A-NEXT:    ;;#ASMEND
11839; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s9
11840; GFX90A-NEXT:    ;;#ASMSTART
11841; GFX90A-NEXT:    ; use s[8:9]
11842; GFX90A-NEXT:    ;;#ASMEND
11843; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11844;
11845; GFX940-LABEL: s_shuffle_v3f16_v4f16__6_3_3:
11846; GFX940:       ; %bb.0:
11847; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11848; GFX940-NEXT:    ;;#ASMSTART
11849; GFX940-NEXT:    ; def s[0:1]
11850; GFX940-NEXT:    ;;#ASMEND
11851; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
11852; GFX940-NEXT:    ;;#ASMSTART
11853; GFX940-NEXT:    ; def s[2:3]
11854; GFX940-NEXT:    ;;#ASMEND
11855; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s9
11856; GFX940-NEXT:    ;;#ASMSTART
11857; GFX940-NEXT:    ; use s[8:9]
11858; GFX940-NEXT:    ;;#ASMEND
11859; GFX940-NEXT:    s_setpc_b64 s[30:31]
11860  %vec0 = call <4 x half> asm "; def $0", "=s"()
11861  %vec1 = call <4 x half> asm "; def $0", "=s"()
11862  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 3, i32 3>
11863  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11864  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
11865  ret void
11866}
11867
11868define void @s_shuffle_v3f16_v4f16__7_3_3() {
11869; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_3_3:
11870; GFX900:       ; %bb.0:
11871; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11872; GFX900-NEXT:    ;;#ASMSTART
11873; GFX900-NEXT:    ; def s[4:5]
11874; GFX900-NEXT:    ;;#ASMEND
11875; GFX900-NEXT:    ;;#ASMSTART
11876; GFX900-NEXT:    ; def s[6:7]
11877; GFX900-NEXT:    ;;#ASMEND
11878; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
11879; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
11880; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
11881; GFX900-NEXT:    ;;#ASMSTART
11882; GFX900-NEXT:    ; use s[8:9]
11883; GFX900-NEXT:    ;;#ASMEND
11884; GFX900-NEXT:    s_setpc_b64 s[30:31]
11885;
11886; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_3_3:
11887; GFX90A:       ; %bb.0:
11888; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11889; GFX90A-NEXT:    ;;#ASMSTART
11890; GFX90A-NEXT:    ; def s[4:5]
11891; GFX90A-NEXT:    ;;#ASMEND
11892; GFX90A-NEXT:    ;;#ASMSTART
11893; GFX90A-NEXT:    ; def s[6:7]
11894; GFX90A-NEXT:    ;;#ASMEND
11895; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
11896; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
11897; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
11898; GFX90A-NEXT:    ;;#ASMSTART
11899; GFX90A-NEXT:    ; use s[8:9]
11900; GFX90A-NEXT:    ;;#ASMEND
11901; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11902;
11903; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_3:
11904; GFX940:       ; %bb.0:
11905; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11906; GFX940-NEXT:    ;;#ASMSTART
11907; GFX940-NEXT:    ; def s[0:1]
11908; GFX940-NEXT:    ;;#ASMEND
11909; GFX940-NEXT:    ;;#ASMSTART
11910; GFX940-NEXT:    ; def s[2:3]
11911; GFX940-NEXT:    ;;#ASMEND
11912; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
11913; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
11914; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
11915; GFX940-NEXT:    ;;#ASMSTART
11916; GFX940-NEXT:    ; use s[8:9]
11917; GFX940-NEXT:    ;;#ASMEND
11918; GFX940-NEXT:    s_setpc_b64 s[30:31]
11919  %vec0 = call <4 x half> asm "; def $0", "=s"()
11920  %vec1 = call <4 x half> asm "; def $0", "=s"()
11921  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 3>
11922  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11923  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
11924  ret void
11925}
11926
11927define void @s_shuffle_v3f16_v4f16__7_u_3() {
11928; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_u_3:
11929; GFX900:       ; %bb.0:
11930; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11931; GFX900-NEXT:    ;;#ASMSTART
11932; GFX900-NEXT:    ; def s[4:5]
11933; GFX900-NEXT:    ;;#ASMEND
11934; GFX900-NEXT:    ;;#ASMSTART
11935; GFX900-NEXT:    ; def s[6:7]
11936; GFX900-NEXT:    ;;#ASMEND
11937; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
11938; GFX900-NEXT:    s_lshr_b32 s8, s7, 16
11939; GFX900-NEXT:    ;;#ASMSTART
11940; GFX900-NEXT:    ; use s[8:9]
11941; GFX900-NEXT:    ;;#ASMEND
11942; GFX900-NEXT:    s_setpc_b64 s[30:31]
11943;
11944; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_u_3:
11945; GFX90A:       ; %bb.0:
11946; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11947; GFX90A-NEXT:    ;;#ASMSTART
11948; GFX90A-NEXT:    ; def s[4:5]
11949; GFX90A-NEXT:    ;;#ASMEND
11950; GFX90A-NEXT:    ;;#ASMSTART
11951; GFX90A-NEXT:    ; def s[6:7]
11952; GFX90A-NEXT:    ;;#ASMEND
11953; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
11954; GFX90A-NEXT:    s_lshr_b32 s8, s7, 16
11955; GFX90A-NEXT:    ;;#ASMSTART
11956; GFX90A-NEXT:    ; use s[8:9]
11957; GFX90A-NEXT:    ;;#ASMEND
11958; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11959;
11960; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_u_3:
11961; GFX940:       ; %bb.0:
11962; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11963; GFX940-NEXT:    ;;#ASMSTART
11964; GFX940-NEXT:    ; def s[0:1]
11965; GFX940-NEXT:    ;;#ASMEND
11966; GFX940-NEXT:    ;;#ASMSTART
11967; GFX940-NEXT:    ; def s[2:3]
11968; GFX940-NEXT:    ;;#ASMEND
11969; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
11970; GFX940-NEXT:    s_lshr_b32 s8, s3, 16
11971; GFX940-NEXT:    ;;#ASMSTART
11972; GFX940-NEXT:    ; use s[8:9]
11973; GFX940-NEXT:    ;;#ASMEND
11974; GFX940-NEXT:    s_setpc_b64 s[30:31]
11975  %vec0 = call <4 x half> asm "; def $0", "=s"()
11976  %vec1 = call <4 x half> asm "; def $0", "=s"()
11977  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 3>
11978  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
11979  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
11980  ret void
11981}
11982
11983define void @s_shuffle_v3f16_v4f16__7_0_3() {
11984; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_0_3:
11985; GFX900:       ; %bb.0:
11986; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11987; GFX900-NEXT:    ;;#ASMSTART
11988; GFX900-NEXT:    ; def s[6:7]
11989; GFX900-NEXT:    ;;#ASMEND
11990; GFX900-NEXT:    s_lshr_b32 s6, s7, 16
11991; GFX900-NEXT:    ;;#ASMSTART
11992; GFX900-NEXT:    ; def s[4:5]
11993; GFX900-NEXT:    ;;#ASMEND
11994; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s6, s4
11995; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
11996; GFX900-NEXT:    ;;#ASMSTART
11997; GFX900-NEXT:    ; use s[8:9]
11998; GFX900-NEXT:    ;;#ASMEND
11999; GFX900-NEXT:    s_setpc_b64 s[30:31]
12000;
12001; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_0_3:
12002; GFX90A:       ; %bb.0:
12003; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12004; GFX90A-NEXT:    ;;#ASMSTART
12005; GFX90A-NEXT:    ; def s[6:7]
12006; GFX90A-NEXT:    ;;#ASMEND
12007; GFX90A-NEXT:    s_lshr_b32 s6, s7, 16
12008; GFX90A-NEXT:    ;;#ASMSTART
12009; GFX90A-NEXT:    ; def s[4:5]
12010; GFX90A-NEXT:    ;;#ASMEND
12011; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s6, s4
12012; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
12013; GFX90A-NEXT:    ;;#ASMSTART
12014; GFX90A-NEXT:    ; use s[8:9]
12015; GFX90A-NEXT:    ;;#ASMEND
12016; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12017;
12018; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_3:
12019; GFX940:       ; %bb.0:
12020; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12021; GFX940-NEXT:    ;;#ASMSTART
12022; GFX940-NEXT:    ; def s[2:3]
12023; GFX940-NEXT:    ;;#ASMEND
12024; GFX940-NEXT:    s_lshr_b32 s2, s3, 16
12025; GFX940-NEXT:    ;;#ASMSTART
12026; GFX940-NEXT:    ; def s[0:1]
12027; GFX940-NEXT:    ;;#ASMEND
12028; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s2, s0
12029; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
12030; GFX940-NEXT:    ;;#ASMSTART
12031; GFX940-NEXT:    ; use s[8:9]
12032; GFX940-NEXT:    ;;#ASMEND
12033; GFX940-NEXT:    s_setpc_b64 s[30:31]
12034  %vec0 = call <4 x half> asm "; def $0", "=s"()
12035  %vec1 = call <4 x half> asm "; def $0", "=s"()
12036  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 3>
12037  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12038  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
12039  ret void
12040}
12041
12042define void @s_shuffle_v3f16_v4f16__7_1_3() {
12043; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_1_3:
12044; GFX900:       ; %bb.0:
12045; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12046; GFX900-NEXT:    ;;#ASMSTART
12047; GFX900-NEXT:    ; def s[4:5]
12048; GFX900-NEXT:    ;;#ASMEND
12049; GFX900-NEXT:    ;;#ASMSTART
12050; GFX900-NEXT:    ; def s[6:7]
12051; GFX900-NEXT:    ;;#ASMEND
12052; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
12053; GFX900-NEXT:    s_lshr_b32 s6, s7, 16
12054; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s6, s4
12055; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
12056; GFX900-NEXT:    ;;#ASMSTART
12057; GFX900-NEXT:    ; use s[8:9]
12058; GFX900-NEXT:    ;;#ASMEND
12059; GFX900-NEXT:    s_setpc_b64 s[30:31]
12060;
12061; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_1_3:
12062; GFX90A:       ; %bb.0:
12063; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12064; GFX90A-NEXT:    ;;#ASMSTART
12065; GFX90A-NEXT:    ; def s[4:5]
12066; GFX90A-NEXT:    ;;#ASMEND
12067; GFX90A-NEXT:    ;;#ASMSTART
12068; GFX90A-NEXT:    ; def s[6:7]
12069; GFX90A-NEXT:    ;;#ASMEND
12070; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
12071; GFX90A-NEXT:    s_lshr_b32 s6, s7, 16
12072; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s6, s4
12073; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
12074; GFX90A-NEXT:    ;;#ASMSTART
12075; GFX90A-NEXT:    ; use s[8:9]
12076; GFX90A-NEXT:    ;;#ASMEND
12077; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12078;
12079; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_3:
12080; GFX940:       ; %bb.0:
12081; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12082; GFX940-NEXT:    ;;#ASMSTART
12083; GFX940-NEXT:    ; def s[0:1]
12084; GFX940-NEXT:    ;;#ASMEND
12085; GFX940-NEXT:    ;;#ASMSTART
12086; GFX940-NEXT:    ; def s[2:3]
12087; GFX940-NEXT:    ;;#ASMEND
12088; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
12089; GFX940-NEXT:    s_lshr_b32 s2, s3, 16
12090; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s2, s0
12091; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
12092; GFX940-NEXT:    ;;#ASMSTART
12093; GFX940-NEXT:    ; use s[8:9]
12094; GFX940-NEXT:    ;;#ASMEND
12095; GFX940-NEXT:    s_setpc_b64 s[30:31]
12096  %vec0 = call <4 x half> asm "; def $0", "=s"()
12097  %vec1 = call <4 x half> asm "; def $0", "=s"()
12098  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 3>
12099  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12100  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
12101  ret void
12102}
12103
12104define void @s_shuffle_v3f16_v4f16__7_2_3() {
12105; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_2_3:
12106; GFX900:       ; %bb.0:
12107; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12108; GFX900-NEXT:    ;;#ASMSTART
12109; GFX900-NEXT:    ; def s[4:5]
12110; GFX900-NEXT:    ;;#ASMEND
12111; GFX900-NEXT:    ;;#ASMSTART
12112; GFX900-NEXT:    ; def s[6:7]
12113; GFX900-NEXT:    ;;#ASMEND
12114; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
12115; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
12116; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
12117; GFX900-NEXT:    ;;#ASMSTART
12118; GFX900-NEXT:    ; use s[8:9]
12119; GFX900-NEXT:    ;;#ASMEND
12120; GFX900-NEXT:    s_setpc_b64 s[30:31]
12121;
12122; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_2_3:
12123; GFX90A:       ; %bb.0:
12124; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12125; GFX90A-NEXT:    ;;#ASMSTART
12126; GFX90A-NEXT:    ; def s[4:5]
12127; GFX90A-NEXT:    ;;#ASMEND
12128; GFX90A-NEXT:    ;;#ASMSTART
12129; GFX90A-NEXT:    ; def s[6:7]
12130; GFX90A-NEXT:    ;;#ASMEND
12131; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
12132; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
12133; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
12134; GFX90A-NEXT:    ;;#ASMSTART
12135; GFX90A-NEXT:    ; use s[8:9]
12136; GFX90A-NEXT:    ;;#ASMEND
12137; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12138;
12139; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_3:
12140; GFX940:       ; %bb.0:
12141; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12142; GFX940-NEXT:    ;;#ASMSTART
12143; GFX940-NEXT:    ; def s[0:1]
12144; GFX940-NEXT:    ;;#ASMEND
12145; GFX940-NEXT:    ;;#ASMSTART
12146; GFX940-NEXT:    ; def s[2:3]
12147; GFX940-NEXT:    ;;#ASMEND
12148; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
12149; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
12150; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
12151; GFX940-NEXT:    ;;#ASMSTART
12152; GFX940-NEXT:    ; use s[8:9]
12153; GFX940-NEXT:    ;;#ASMEND
12154; GFX940-NEXT:    s_setpc_b64 s[30:31]
12155  %vec0 = call <4 x half> asm "; def $0", "=s"()
12156  %vec1 = call <4 x half> asm "; def $0", "=s"()
12157  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 3>
12158  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12159  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
12160  ret void
12161}
12162
12163define void @s_shuffle_v3f16_v4f16__7_4_3() {
12164; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_4_3:
12165; GFX900:       ; %bb.0:
12166; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12167; GFX900-NEXT:    ;;#ASMSTART
12168; GFX900-NEXT:    ; def s[4:5]
12169; GFX900-NEXT:    ;;#ASMEND
12170; GFX900-NEXT:    ;;#ASMSTART
12171; GFX900-NEXT:    ; def s[6:7]
12172; GFX900-NEXT:    ;;#ASMEND
12173; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
12174; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s6
12175; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
12176; GFX900-NEXT:    ;;#ASMSTART
12177; GFX900-NEXT:    ; use s[8:9]
12178; GFX900-NEXT:    ;;#ASMEND
12179; GFX900-NEXT:    s_setpc_b64 s[30:31]
12180;
12181; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_4_3:
12182; GFX90A:       ; %bb.0:
12183; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12184; GFX90A-NEXT:    ;;#ASMSTART
12185; GFX90A-NEXT:    ; def s[4:5]
12186; GFX90A-NEXT:    ;;#ASMEND
12187; GFX90A-NEXT:    ;;#ASMSTART
12188; GFX90A-NEXT:    ; def s[6:7]
12189; GFX90A-NEXT:    ;;#ASMEND
12190; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
12191; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s6
12192; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
12193; GFX90A-NEXT:    ;;#ASMSTART
12194; GFX90A-NEXT:    ; use s[8:9]
12195; GFX90A-NEXT:    ;;#ASMEND
12196; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12197;
12198; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_3:
12199; GFX940:       ; %bb.0:
12200; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12201; GFX940-NEXT:    ;;#ASMSTART
12202; GFX940-NEXT:    ; def s[0:1]
12203; GFX940-NEXT:    ;;#ASMEND
12204; GFX940-NEXT:    ;;#ASMSTART
12205; GFX940-NEXT:    ; def s[2:3]
12206; GFX940-NEXT:    ;;#ASMEND
12207; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
12208; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s2
12209; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
12210; GFX940-NEXT:    ;;#ASMSTART
12211; GFX940-NEXT:    ; use s[8:9]
12212; GFX940-NEXT:    ;;#ASMEND
12213; GFX940-NEXT:    s_setpc_b64 s[30:31]
12214  %vec0 = call <4 x half> asm "; def $0", "=s"()
12215  %vec1 = call <4 x half> asm "; def $0", "=s"()
12216  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 3>
12217  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12218  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
12219  ret void
12220}
12221
12222define void @s_shuffle_v3f16_v4f16__7_5_3() {
12223; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_5_3:
12224; GFX900:       ; %bb.0:
12225; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12226; GFX900-NEXT:    ;;#ASMSTART
12227; GFX900-NEXT:    ; def s[4:5]
12228; GFX900-NEXT:    ;;#ASMEND
12229; GFX900-NEXT:    ;;#ASMSTART
12230; GFX900-NEXT:    ; def s[6:7]
12231; GFX900-NEXT:    ;;#ASMEND
12232; GFX900-NEXT:    s_lshr_b32 s4, s6, 16
12233; GFX900-NEXT:    s_lshr_b32 s6, s7, 16
12234; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s6, s4
12235; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
12236; GFX900-NEXT:    ;;#ASMSTART
12237; GFX900-NEXT:    ; use s[8:9]
12238; GFX900-NEXT:    ;;#ASMEND
12239; GFX900-NEXT:    s_setpc_b64 s[30:31]
12240;
12241; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_5_3:
12242; GFX90A:       ; %bb.0:
12243; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12244; GFX90A-NEXT:    ;;#ASMSTART
12245; GFX90A-NEXT:    ; def s[4:5]
12246; GFX90A-NEXT:    ;;#ASMEND
12247; GFX90A-NEXT:    ;;#ASMSTART
12248; GFX90A-NEXT:    ; def s[6:7]
12249; GFX90A-NEXT:    ;;#ASMEND
12250; GFX90A-NEXT:    s_lshr_b32 s4, s6, 16
12251; GFX90A-NEXT:    s_lshr_b32 s6, s7, 16
12252; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s6, s4
12253; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
12254; GFX90A-NEXT:    ;;#ASMSTART
12255; GFX90A-NEXT:    ; use s[8:9]
12256; GFX90A-NEXT:    ;;#ASMEND
12257; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12258;
12259; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_3:
12260; GFX940:       ; %bb.0:
12261; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12262; GFX940-NEXT:    ;;#ASMSTART
12263; GFX940-NEXT:    ; def s[0:1]
12264; GFX940-NEXT:    ;;#ASMEND
12265; GFX940-NEXT:    ;;#ASMSTART
12266; GFX940-NEXT:    ; def s[2:3]
12267; GFX940-NEXT:    ;;#ASMEND
12268; GFX940-NEXT:    s_lshr_b32 s0, s2, 16
12269; GFX940-NEXT:    s_lshr_b32 s2, s3, 16
12270; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s2, s0
12271; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
12272; GFX940-NEXT:    ;;#ASMSTART
12273; GFX940-NEXT:    ; use s[8:9]
12274; GFX940-NEXT:    ;;#ASMEND
12275; GFX940-NEXT:    s_setpc_b64 s[30:31]
12276  %vec0 = call <4 x half> asm "; def $0", "=s"()
12277  %vec1 = call <4 x half> asm "; def $0", "=s"()
12278  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 3>
12279  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12280  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
12281  ret void
12282}
12283
12284define void @s_shuffle_v3f16_v4f16__7_6_3() {
12285; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_6_3:
12286; GFX900:       ; %bb.0:
12287; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12288; GFX900-NEXT:    ;;#ASMSTART
12289; GFX900-NEXT:    ; def s[4:5]
12290; GFX900-NEXT:    ;;#ASMEND
12291; GFX900-NEXT:    ;;#ASMSTART
12292; GFX900-NEXT:    ; def s[6:7]
12293; GFX900-NEXT:    ;;#ASMEND
12294; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
12295; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s7
12296; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
12297; GFX900-NEXT:    ;;#ASMSTART
12298; GFX900-NEXT:    ; use s[8:9]
12299; GFX900-NEXT:    ;;#ASMEND
12300; GFX900-NEXT:    s_setpc_b64 s[30:31]
12301;
12302; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_6_3:
12303; GFX90A:       ; %bb.0:
12304; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12305; GFX90A-NEXT:    ;;#ASMSTART
12306; GFX90A-NEXT:    ; def s[4:5]
12307; GFX90A-NEXT:    ;;#ASMEND
12308; GFX90A-NEXT:    ;;#ASMSTART
12309; GFX90A-NEXT:    ; def s[6:7]
12310; GFX90A-NEXT:    ;;#ASMEND
12311; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
12312; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s7
12313; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
12314; GFX90A-NEXT:    ;;#ASMSTART
12315; GFX90A-NEXT:    ; use s[8:9]
12316; GFX90A-NEXT:    ;;#ASMEND
12317; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12318;
12319; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_3:
12320; GFX940:       ; %bb.0:
12321; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12322; GFX940-NEXT:    ;;#ASMSTART
12323; GFX940-NEXT:    ; def s[0:1]
12324; GFX940-NEXT:    ;;#ASMEND
12325; GFX940-NEXT:    ;;#ASMSTART
12326; GFX940-NEXT:    ; def s[2:3]
12327; GFX940-NEXT:    ;;#ASMEND
12328; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
12329; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s3
12330; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
12331; GFX940-NEXT:    ;;#ASMSTART
12332; GFX940-NEXT:    ; use s[8:9]
12333; GFX940-NEXT:    ;;#ASMEND
12334; GFX940-NEXT:    s_setpc_b64 s[30:31]
12335  %vec0 = call <4 x half> asm "; def $0", "=s"()
12336  %vec1 = call <4 x half> asm "; def $0", "=s"()
12337  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 3>
12338  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12339  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
12340  ret void
12341}
12342
12343define void @s_shuffle_v3f16_v4f16__u_4_4() {
12344; GFX9-LABEL: s_shuffle_v3f16_v4f16__u_4_4:
12345; GFX9:       ; %bb.0:
12346; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12347; GFX9-NEXT:    ;;#ASMSTART
12348; GFX9-NEXT:    ; use s[8:9]
12349; GFX9-NEXT:    ;;#ASMEND
12350; GFX9-NEXT:    s_setpc_b64 s[30:31]
12351  %vec0 = call <4 x half> asm "; def $0", "=s"()
12352  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 poison, i32 4, i32 4>
12353  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12354  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
12355  ret void
12356}
12357
12358define void @s_shuffle_v3f16_v4f16__0_4_4() {
12359; GFX900-LABEL: s_shuffle_v3f16_v4f16__0_4_4:
12360; GFX900:       ; %bb.0:
12361; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12362; GFX900-NEXT:    ;;#ASMSTART
12363; GFX900-NEXT:    ; def s[8:9]
12364; GFX900-NEXT:    ;;#ASMEND
12365; GFX900-NEXT:    ;;#ASMSTART
12366; GFX900-NEXT:    ; use s[8:9]
12367; GFX900-NEXT:    ;;#ASMEND
12368; GFX900-NEXT:    s_setpc_b64 s[30:31]
12369;
12370; GFX90A-LABEL: s_shuffle_v3f16_v4f16__0_4_4:
12371; GFX90A:       ; %bb.0:
12372; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12373; GFX90A-NEXT:    ;;#ASMSTART
12374; GFX90A-NEXT:    ; def s[8:9]
12375; GFX90A-NEXT:    ;;#ASMEND
12376; GFX90A-NEXT:    ;;#ASMSTART
12377; GFX90A-NEXT:    ; use s[8:9]
12378; GFX90A-NEXT:    ;;#ASMEND
12379; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12380;
12381; GFX940-LABEL: s_shuffle_v3f16_v4f16__0_4_4:
12382; GFX940:       ; %bb.0:
12383; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12384; GFX940-NEXT:    ;;#ASMSTART
12385; GFX940-NEXT:    ; def s[8:9]
12386; GFX940-NEXT:    ;;#ASMEND
12387; GFX940-NEXT:    s_nop 0
12388; GFX940-NEXT:    ;;#ASMSTART
12389; GFX940-NEXT:    ; use s[8:9]
12390; GFX940-NEXT:    ;;#ASMEND
12391; GFX940-NEXT:    s_setpc_b64 s[30:31]
12392  %vec0 = call <4 x half> asm "; def $0", "=s"()
12393  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 4, i32 4>
12394  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12395  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
12396  ret void
12397}
12398
12399define void @s_shuffle_v3f16_v4f16__1_4_4() {
12400; GFX900-LABEL: s_shuffle_v3f16_v4f16__1_4_4:
12401; GFX900:       ; %bb.0:
12402; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12403; GFX900-NEXT:    ;;#ASMSTART
12404; GFX900-NEXT:    ; def s[4:5]
12405; GFX900-NEXT:    ;;#ASMEND
12406; GFX900-NEXT:    s_lshr_b32 s8, s4, 16
12407; GFX900-NEXT:    ;;#ASMSTART
12408; GFX900-NEXT:    ; use s[8:9]
12409; GFX900-NEXT:    ;;#ASMEND
12410; GFX900-NEXT:    s_setpc_b64 s[30:31]
12411;
12412; GFX90A-LABEL: s_shuffle_v3f16_v4f16__1_4_4:
12413; GFX90A:       ; %bb.0:
12414; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12415; GFX90A-NEXT:    ;;#ASMSTART
12416; GFX90A-NEXT:    ; def s[4:5]
12417; GFX90A-NEXT:    ;;#ASMEND
12418; GFX90A-NEXT:    s_lshr_b32 s8, s4, 16
12419; GFX90A-NEXT:    ;;#ASMSTART
12420; GFX90A-NEXT:    ; use s[8:9]
12421; GFX90A-NEXT:    ;;#ASMEND
12422; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12423;
12424; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_4_4:
12425; GFX940:       ; %bb.0:
12426; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12427; GFX940-NEXT:    ;;#ASMSTART
12428; GFX940-NEXT:    ; def s[0:1]
12429; GFX940-NEXT:    ;;#ASMEND
12430; GFX940-NEXT:    s_lshr_b32 s8, s0, 16
12431; GFX940-NEXT:    ;;#ASMSTART
12432; GFX940-NEXT:    ; use s[8:9]
12433; GFX940-NEXT:    ;;#ASMEND
12434; GFX940-NEXT:    s_setpc_b64 s[30:31]
12435  %vec0 = call <4 x half> asm "; def $0", "=s"()
12436  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 1, i32 4, i32 4>
12437  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12438  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
12439  ret void
12440}
12441
12442define void @s_shuffle_v3f16_v4f16__2_4_4() {
12443; GFX900-LABEL: s_shuffle_v3f16_v4f16__2_4_4:
12444; GFX900:       ; %bb.0:
12445; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12446; GFX900-NEXT:    ;;#ASMSTART
12447; GFX900-NEXT:    ; def s[4:5]
12448; GFX900-NEXT:    ;;#ASMEND
12449; GFX900-NEXT:    s_mov_b32 s8, s5
12450; GFX900-NEXT:    ;;#ASMSTART
12451; GFX900-NEXT:    ; use s[8:9]
12452; GFX900-NEXT:    ;;#ASMEND
12453; GFX900-NEXT:    s_setpc_b64 s[30:31]
12454;
12455; GFX90A-LABEL: s_shuffle_v3f16_v4f16__2_4_4:
12456; GFX90A:       ; %bb.0:
12457; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12458; GFX90A-NEXT:    ;;#ASMSTART
12459; GFX90A-NEXT:    ; def s[4:5]
12460; GFX90A-NEXT:    ;;#ASMEND
12461; GFX90A-NEXT:    s_mov_b32 s8, s5
12462; GFX90A-NEXT:    ;;#ASMSTART
12463; GFX90A-NEXT:    ; use s[8:9]
12464; GFX90A-NEXT:    ;;#ASMEND
12465; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12466;
12467; GFX940-LABEL: s_shuffle_v3f16_v4f16__2_4_4:
12468; GFX940:       ; %bb.0:
12469; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12470; GFX940-NEXT:    ;;#ASMSTART
12471; GFX940-NEXT:    ; def s[0:1]
12472; GFX940-NEXT:    ;;#ASMEND
12473; GFX940-NEXT:    s_mov_b32 s8, s1
12474; GFX940-NEXT:    ;;#ASMSTART
12475; GFX940-NEXT:    ; use s[8:9]
12476; GFX940-NEXT:    ;;#ASMEND
12477; GFX940-NEXT:    s_setpc_b64 s[30:31]
12478  %vec0 = call <4 x half> asm "; def $0", "=s"()
12479  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 2, i32 4, i32 4>
12480  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12481  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
12482  ret void
12483}
12484
12485define void @s_shuffle_v3f16_v4f16__3_4_4() {
12486; GFX900-LABEL: s_shuffle_v3f16_v4f16__3_4_4:
12487; GFX900:       ; %bb.0:
12488; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12489; GFX900-NEXT:    ;;#ASMSTART
12490; GFX900-NEXT:    ; def s[4:5]
12491; GFX900-NEXT:    ;;#ASMEND
12492; GFX900-NEXT:    s_lshr_b32 s8, s5, 16
12493; GFX900-NEXT:    ;;#ASMSTART
12494; GFX900-NEXT:    ; use s[8:9]
12495; GFX900-NEXT:    ;;#ASMEND
12496; GFX900-NEXT:    s_setpc_b64 s[30:31]
12497;
12498; GFX90A-LABEL: s_shuffle_v3f16_v4f16__3_4_4:
12499; GFX90A:       ; %bb.0:
12500; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12501; GFX90A-NEXT:    ;;#ASMSTART
12502; GFX90A-NEXT:    ; def s[4:5]
12503; GFX90A-NEXT:    ;;#ASMEND
12504; GFX90A-NEXT:    s_lshr_b32 s8, s5, 16
12505; GFX90A-NEXT:    ;;#ASMSTART
12506; GFX90A-NEXT:    ; use s[8:9]
12507; GFX90A-NEXT:    ;;#ASMEND
12508; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12509;
12510; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_4_4:
12511; GFX940:       ; %bb.0:
12512; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12513; GFX940-NEXT:    ;;#ASMSTART
12514; GFX940-NEXT:    ; def s[0:1]
12515; GFX940-NEXT:    ;;#ASMEND
12516; GFX940-NEXT:    s_lshr_b32 s8, s1, 16
12517; GFX940-NEXT:    ;;#ASMSTART
12518; GFX940-NEXT:    ; use s[8:9]
12519; GFX940-NEXT:    ;;#ASMEND
12520; GFX940-NEXT:    s_setpc_b64 s[30:31]
12521  %vec0 = call <4 x half> asm "; def $0", "=s"()
12522  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 3, i32 4, i32 4>
12523  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12524  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
12525  ret void
12526}
12527
12528define void @s_shuffle_v3f16_v4f16__4_4_4() {
12529; GFX9-LABEL: s_shuffle_v3f16_v4f16__4_4_4:
12530; GFX9:       ; %bb.0:
12531; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12532; GFX9-NEXT:    ;;#ASMSTART
12533; GFX9-NEXT:    ; use s[8:9]
12534; GFX9-NEXT:    ;;#ASMEND
12535; GFX9-NEXT:    s_setpc_b64 s[30:31]
12536  %vec0 = call <4 x half> asm "; def $0", "=s"()
12537  %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 4, i32 4, i32 4>
12538  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12539  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
12540  ret void
12541}
12542
12543define void @s_shuffle_v3f16_v4f16__5_4_4() {
12544; GFX900-LABEL: s_shuffle_v3f16_v4f16__5_4_4:
12545; GFX900:       ; %bb.0:
12546; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12547; GFX900-NEXT:    ;;#ASMSTART
12548; GFX900-NEXT:    ; def s[4:5]
12549; GFX900-NEXT:    ;;#ASMEND
12550; GFX900-NEXT:    s_lshr_b32 s5, s4, 16
12551; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
12552; GFX900-NEXT:    s_mov_b32 s9, s4
12553; GFX900-NEXT:    ;;#ASMSTART
12554; GFX900-NEXT:    ; use s[8:9]
12555; GFX900-NEXT:    ;;#ASMEND
12556; GFX900-NEXT:    s_setpc_b64 s[30:31]
12557;
12558; GFX90A-LABEL: s_shuffle_v3f16_v4f16__5_4_4:
12559; GFX90A:       ; %bb.0:
12560; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12561; GFX90A-NEXT:    ;;#ASMSTART
12562; GFX90A-NEXT:    ; def s[4:5]
12563; GFX90A-NEXT:    ;;#ASMEND
12564; GFX90A-NEXT:    s_lshr_b32 s5, s4, 16
12565; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
12566; GFX90A-NEXT:    s_mov_b32 s9, s4
12567; GFX90A-NEXT:    ;;#ASMSTART
12568; GFX90A-NEXT:    ; use s[8:9]
12569; GFX90A-NEXT:    ;;#ASMEND
12570; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12571;
12572; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_4_4:
12573; GFX940:       ; %bb.0:
12574; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12575; GFX940-NEXT:    ;;#ASMSTART
12576; GFX940-NEXT:    ; def s[0:1]
12577; GFX940-NEXT:    ;;#ASMEND
12578; GFX940-NEXT:    s_lshr_b32 s1, s0, 16
12579; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
12580; GFX940-NEXT:    s_mov_b32 s9, s0
12581; GFX940-NEXT:    ;;#ASMSTART
12582; GFX940-NEXT:    ; use s[8:9]
12583; GFX940-NEXT:    ;;#ASMEND
12584; GFX940-NEXT:    s_setpc_b64 s[30:31]
12585  %vec0 = call <4 x half> asm "; def $0", "=s"()
12586  %vec1 = call <4 x half> asm "; def $0", "=s"()
12587  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 4, i32 4>
12588  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12589  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
12590  ret void
12591}
12592
12593define void @s_shuffle_v3f16_v4f16__6_4_4() {
12594; GFX900-LABEL: s_shuffle_v3f16_v4f16__6_4_4:
12595; GFX900:       ; %bb.0:
12596; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12597; GFX900-NEXT:    ;;#ASMSTART
12598; GFX900-NEXT:    ; def s[4:5]
12599; GFX900-NEXT:    ;;#ASMEND
12600; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
12601; GFX900-NEXT:    s_mov_b32 s9, s4
12602; GFX900-NEXT:    ;;#ASMSTART
12603; GFX900-NEXT:    ; use s[8:9]
12604; GFX900-NEXT:    ;;#ASMEND
12605; GFX900-NEXT:    s_setpc_b64 s[30:31]
12606;
12607; GFX90A-LABEL: s_shuffle_v3f16_v4f16__6_4_4:
12608; GFX90A:       ; %bb.0:
12609; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12610; GFX90A-NEXT:    ;;#ASMSTART
12611; GFX90A-NEXT:    ; def s[4:5]
12612; GFX90A-NEXT:    ;;#ASMEND
12613; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
12614; GFX90A-NEXT:    s_mov_b32 s9, s4
12615; GFX90A-NEXT:    ;;#ASMSTART
12616; GFX90A-NEXT:    ; use s[8:9]
12617; GFX90A-NEXT:    ;;#ASMEND
12618; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12619;
12620; GFX940-LABEL: s_shuffle_v3f16_v4f16__6_4_4:
12621; GFX940:       ; %bb.0:
12622; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12623; GFX940-NEXT:    ;;#ASMSTART
12624; GFX940-NEXT:    ; def s[0:1]
12625; GFX940-NEXT:    ;;#ASMEND
12626; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
12627; GFX940-NEXT:    s_mov_b32 s9, s0
12628; GFX940-NEXT:    ;;#ASMSTART
12629; GFX940-NEXT:    ; use s[8:9]
12630; GFX940-NEXT:    ;;#ASMEND
12631; GFX940-NEXT:    s_setpc_b64 s[30:31]
12632  %vec0 = call <4 x half> asm "; def $0", "=s"()
12633  %vec1 = call <4 x half> asm "; def $0", "=s"()
12634  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 4, i32 4>
12635  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12636  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
12637  ret void
12638}
12639
12640define void @s_shuffle_v3f16_v4f16__7_4_4() {
12641; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_4_4:
12642; GFX900:       ; %bb.0:
12643; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12644; GFX900-NEXT:    ;;#ASMSTART
12645; GFX900-NEXT:    ; def s[4:5]
12646; GFX900-NEXT:    ;;#ASMEND
12647; GFX900-NEXT:    s_lshr_b32 s5, s5, 16
12648; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
12649; GFX900-NEXT:    s_mov_b32 s9, s4
12650; GFX900-NEXT:    ;;#ASMSTART
12651; GFX900-NEXT:    ; use s[8:9]
12652; GFX900-NEXT:    ;;#ASMEND
12653; GFX900-NEXT:    s_setpc_b64 s[30:31]
12654;
12655; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_4_4:
12656; GFX90A:       ; %bb.0:
12657; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12658; GFX90A-NEXT:    ;;#ASMSTART
12659; GFX90A-NEXT:    ; def s[4:5]
12660; GFX90A-NEXT:    ;;#ASMEND
12661; GFX90A-NEXT:    s_lshr_b32 s5, s5, 16
12662; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
12663; GFX90A-NEXT:    s_mov_b32 s9, s4
12664; GFX90A-NEXT:    ;;#ASMSTART
12665; GFX90A-NEXT:    ; use s[8:9]
12666; GFX90A-NEXT:    ;;#ASMEND
12667; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12668;
12669; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_4:
12670; GFX940:       ; %bb.0:
12671; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12672; GFX940-NEXT:    ;;#ASMSTART
12673; GFX940-NEXT:    ; def s[0:1]
12674; GFX940-NEXT:    ;;#ASMEND
12675; GFX940-NEXT:    s_lshr_b32 s1, s1, 16
12676; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
12677; GFX940-NEXT:    s_mov_b32 s9, s0
12678; GFX940-NEXT:    ;;#ASMSTART
12679; GFX940-NEXT:    ; use s[8:9]
12680; GFX940-NEXT:    ;;#ASMEND
12681; GFX940-NEXT:    s_setpc_b64 s[30:31]
12682  %vec0 = call <4 x half> asm "; def $0", "=s"()
12683  %vec1 = call <4 x half> asm "; def $0", "=s"()
12684  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 4>
12685  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12686  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
12687  ret void
12688}
12689
12690define void @s_shuffle_v3f16_v4f16__7_u_4() {
12691; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_u_4:
12692; GFX900:       ; %bb.0:
12693; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12694; GFX900-NEXT:    ;;#ASMSTART
12695; GFX900-NEXT:    ; def s[4:5]
12696; GFX900-NEXT:    ;;#ASMEND
12697; GFX900-NEXT:    s_lshr_b32 s8, s5, 16
12698; GFX900-NEXT:    s_mov_b32 s9, s4
12699; GFX900-NEXT:    ;;#ASMSTART
12700; GFX900-NEXT:    ; use s[8:9]
12701; GFX900-NEXT:    ;;#ASMEND
12702; GFX900-NEXT:    s_setpc_b64 s[30:31]
12703;
12704; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_u_4:
12705; GFX90A:       ; %bb.0:
12706; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12707; GFX90A-NEXT:    ;;#ASMSTART
12708; GFX90A-NEXT:    ; def s[4:5]
12709; GFX90A-NEXT:    ;;#ASMEND
12710; GFX90A-NEXT:    s_lshr_b32 s8, s5, 16
12711; GFX90A-NEXT:    s_mov_b32 s9, s4
12712; GFX90A-NEXT:    ;;#ASMSTART
12713; GFX90A-NEXT:    ; use s[8:9]
12714; GFX90A-NEXT:    ;;#ASMEND
12715; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12716;
12717; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_u_4:
12718; GFX940:       ; %bb.0:
12719; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12720; GFX940-NEXT:    ;;#ASMSTART
12721; GFX940-NEXT:    ; def s[0:1]
12722; GFX940-NEXT:    ;;#ASMEND
12723; GFX940-NEXT:    s_lshr_b32 s8, s1, 16
12724; GFX940-NEXT:    s_mov_b32 s9, s0
12725; GFX940-NEXT:    ;;#ASMSTART
12726; GFX940-NEXT:    ; use s[8:9]
12727; GFX940-NEXT:    ;;#ASMEND
12728; GFX940-NEXT:    s_setpc_b64 s[30:31]
12729  %vec0 = call <4 x half> asm "; def $0", "=s"()
12730  %vec1 = call <4 x half> asm "; def $0", "=s"()
12731  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 4>
12732  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12733  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
12734  ret void
12735}
12736
12737define void @s_shuffle_v3f16_v4f16__7_0_4() {
12738; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_0_4:
12739; GFX900:       ; %bb.0:
12740; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12741; GFX900-NEXT:    ;;#ASMSTART
12742; GFX900-NEXT:    ; def s[4:5]
12743; GFX900-NEXT:    ;;#ASMEND
12744; GFX900-NEXT:    ;;#ASMSTART
12745; GFX900-NEXT:    ; def s[6:7]
12746; GFX900-NEXT:    ;;#ASMEND
12747; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
12748; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
12749; GFX900-NEXT:    s_mov_b32 s9, s6
12750; GFX900-NEXT:    ;;#ASMSTART
12751; GFX900-NEXT:    ; use s[8:9]
12752; GFX900-NEXT:    ;;#ASMEND
12753; GFX900-NEXT:    s_setpc_b64 s[30:31]
12754;
12755; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_0_4:
12756; GFX90A:       ; %bb.0:
12757; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12758; GFX90A-NEXT:    ;;#ASMSTART
12759; GFX90A-NEXT:    ; def s[4:5]
12760; GFX90A-NEXT:    ;;#ASMEND
12761; GFX90A-NEXT:    ;;#ASMSTART
12762; GFX90A-NEXT:    ; def s[6:7]
12763; GFX90A-NEXT:    ;;#ASMEND
12764; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
12765; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
12766; GFX90A-NEXT:    s_mov_b32 s9, s6
12767; GFX90A-NEXT:    ;;#ASMSTART
12768; GFX90A-NEXT:    ; use s[8:9]
12769; GFX90A-NEXT:    ;;#ASMEND
12770; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12771;
12772; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_4:
12773; GFX940:       ; %bb.0:
12774; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12775; GFX940-NEXT:    ;;#ASMSTART
12776; GFX940-NEXT:    ; def s[0:1]
12777; GFX940-NEXT:    ;;#ASMEND
12778; GFX940-NEXT:    ;;#ASMSTART
12779; GFX940-NEXT:    ; def s[2:3]
12780; GFX940-NEXT:    ;;#ASMEND
12781; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
12782; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
12783; GFX940-NEXT:    s_mov_b32 s9, s2
12784; GFX940-NEXT:    ;;#ASMSTART
12785; GFX940-NEXT:    ; use s[8:9]
12786; GFX940-NEXT:    ;;#ASMEND
12787; GFX940-NEXT:    s_setpc_b64 s[30:31]
12788  %vec0 = call <4 x half> asm "; def $0", "=s"()
12789  %vec1 = call <4 x half> asm "; def $0", "=s"()
12790  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 4>
12791  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12792  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
12793  ret void
12794}
12795
12796define void @s_shuffle_v3f16_v4f16__7_1_4() {
12797; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_1_4:
12798; GFX900:       ; %bb.0:
12799; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12800; GFX900-NEXT:    ;;#ASMSTART
12801; GFX900-NEXT:    ; def s[4:5]
12802; GFX900-NEXT:    ;;#ASMEND
12803; GFX900-NEXT:    ;;#ASMSTART
12804; GFX900-NEXT:    ; def s[6:7]
12805; GFX900-NEXT:    ;;#ASMEND
12806; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
12807; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
12808; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
12809; GFX900-NEXT:    s_mov_b32 s9, s6
12810; GFX900-NEXT:    ;;#ASMSTART
12811; GFX900-NEXT:    ; use s[8:9]
12812; GFX900-NEXT:    ;;#ASMEND
12813; GFX900-NEXT:    s_setpc_b64 s[30:31]
12814;
12815; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_1_4:
12816; GFX90A:       ; %bb.0:
12817; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12818; GFX90A-NEXT:    ;;#ASMSTART
12819; GFX90A-NEXT:    ; def s[4:5]
12820; GFX90A-NEXT:    ;;#ASMEND
12821; GFX90A-NEXT:    ;;#ASMSTART
12822; GFX90A-NEXT:    ; def s[6:7]
12823; GFX90A-NEXT:    ;;#ASMEND
12824; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
12825; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
12826; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
12827; GFX90A-NEXT:    s_mov_b32 s9, s6
12828; GFX90A-NEXT:    ;;#ASMSTART
12829; GFX90A-NEXT:    ; use s[8:9]
12830; GFX90A-NEXT:    ;;#ASMEND
12831; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12832;
12833; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_4:
12834; GFX940:       ; %bb.0:
12835; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12836; GFX940-NEXT:    ;;#ASMSTART
12837; GFX940-NEXT:    ; def s[0:1]
12838; GFX940-NEXT:    ;;#ASMEND
12839; GFX940-NEXT:    ;;#ASMSTART
12840; GFX940-NEXT:    ; def s[2:3]
12841; GFX940-NEXT:    ;;#ASMEND
12842; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
12843; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
12844; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
12845; GFX940-NEXT:    s_mov_b32 s9, s2
12846; GFX940-NEXT:    ;;#ASMSTART
12847; GFX940-NEXT:    ; use s[8:9]
12848; GFX940-NEXT:    ;;#ASMEND
12849; GFX940-NEXT:    s_setpc_b64 s[30:31]
12850  %vec0 = call <4 x half> asm "; def $0", "=s"()
12851  %vec1 = call <4 x half> asm "; def $0", "=s"()
12852  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 4>
12853  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12854  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
12855  ret void
12856}
12857
12858define void @s_shuffle_v3f16_v4f16__7_2_4() {
12859; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_2_4:
12860; GFX900:       ; %bb.0:
12861; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12862; GFX900-NEXT:    ;;#ASMSTART
12863; GFX900-NEXT:    ; def s[4:5]
12864; GFX900-NEXT:    ;;#ASMEND
12865; GFX900-NEXT:    ;;#ASMSTART
12866; GFX900-NEXT:    ; def s[6:7]
12867; GFX900-NEXT:    ;;#ASMEND
12868; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
12869; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
12870; GFX900-NEXT:    s_mov_b32 s9, s6
12871; GFX900-NEXT:    ;;#ASMSTART
12872; GFX900-NEXT:    ; use s[8:9]
12873; GFX900-NEXT:    ;;#ASMEND
12874; GFX900-NEXT:    s_setpc_b64 s[30:31]
12875;
12876; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_2_4:
12877; GFX90A:       ; %bb.0:
12878; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12879; GFX90A-NEXT:    ;;#ASMSTART
12880; GFX90A-NEXT:    ; def s[4:5]
12881; GFX90A-NEXT:    ;;#ASMEND
12882; GFX90A-NEXT:    ;;#ASMSTART
12883; GFX90A-NEXT:    ; def s[6:7]
12884; GFX90A-NEXT:    ;;#ASMEND
12885; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
12886; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
12887; GFX90A-NEXT:    s_mov_b32 s9, s6
12888; GFX90A-NEXT:    ;;#ASMSTART
12889; GFX90A-NEXT:    ; use s[8:9]
12890; GFX90A-NEXT:    ;;#ASMEND
12891; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12892;
12893; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_4:
12894; GFX940:       ; %bb.0:
12895; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12896; GFX940-NEXT:    ;;#ASMSTART
12897; GFX940-NEXT:    ; def s[0:1]
12898; GFX940-NEXT:    ;;#ASMEND
12899; GFX940-NEXT:    ;;#ASMSTART
12900; GFX940-NEXT:    ; def s[2:3]
12901; GFX940-NEXT:    ;;#ASMEND
12902; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
12903; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
12904; GFX940-NEXT:    s_mov_b32 s9, s2
12905; GFX940-NEXT:    ;;#ASMSTART
12906; GFX940-NEXT:    ; use s[8:9]
12907; GFX940-NEXT:    ;;#ASMEND
12908; GFX940-NEXT:    s_setpc_b64 s[30:31]
12909  %vec0 = call <4 x half> asm "; def $0", "=s"()
12910  %vec1 = call <4 x half> asm "; def $0", "=s"()
12911  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 4>
12912  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12913  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
12914  ret void
12915}
12916
12917define void @s_shuffle_v3f16_v4f16__7_3_4() {
12918; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_3_4:
12919; GFX900:       ; %bb.0:
12920; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12921; GFX900-NEXT:    ;;#ASMSTART
12922; GFX900-NEXT:    ; def s[4:5]
12923; GFX900-NEXT:    ;;#ASMEND
12924; GFX900-NEXT:    ;;#ASMSTART
12925; GFX900-NEXT:    ; def s[6:7]
12926; GFX900-NEXT:    ;;#ASMEND
12927; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
12928; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
12929; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
12930; GFX900-NEXT:    s_mov_b32 s9, s6
12931; GFX900-NEXT:    ;;#ASMSTART
12932; GFX900-NEXT:    ; use s[8:9]
12933; GFX900-NEXT:    ;;#ASMEND
12934; GFX900-NEXT:    s_setpc_b64 s[30:31]
12935;
12936; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_3_4:
12937; GFX90A:       ; %bb.0:
12938; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12939; GFX90A-NEXT:    ;;#ASMSTART
12940; GFX90A-NEXT:    ; def s[4:5]
12941; GFX90A-NEXT:    ;;#ASMEND
12942; GFX90A-NEXT:    ;;#ASMSTART
12943; GFX90A-NEXT:    ; def s[6:7]
12944; GFX90A-NEXT:    ;;#ASMEND
12945; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
12946; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
12947; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
12948; GFX90A-NEXT:    s_mov_b32 s9, s6
12949; GFX90A-NEXT:    ;;#ASMSTART
12950; GFX90A-NEXT:    ; use s[8:9]
12951; GFX90A-NEXT:    ;;#ASMEND
12952; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12953;
12954; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_4:
12955; GFX940:       ; %bb.0:
12956; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12957; GFX940-NEXT:    ;;#ASMSTART
12958; GFX940-NEXT:    ; def s[0:1]
12959; GFX940-NEXT:    ;;#ASMEND
12960; GFX940-NEXT:    ;;#ASMSTART
12961; GFX940-NEXT:    ; def s[2:3]
12962; GFX940-NEXT:    ;;#ASMEND
12963; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
12964; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
12965; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
12966; GFX940-NEXT:    s_mov_b32 s9, s2
12967; GFX940-NEXT:    ;;#ASMSTART
12968; GFX940-NEXT:    ; use s[8:9]
12969; GFX940-NEXT:    ;;#ASMEND
12970; GFX940-NEXT:    s_setpc_b64 s[30:31]
12971  %vec0 = call <4 x half> asm "; def $0", "=s"()
12972  %vec1 = call <4 x half> asm "; def $0", "=s"()
12973  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 4>
12974  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
12975  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
12976  ret void
12977}
12978
12979define void @s_shuffle_v3f16_v4f16__7_5_4() {
12980; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_5_4:
12981; GFX900:       ; %bb.0:
12982; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12983; GFX900-NEXT:    ;;#ASMSTART
12984; GFX900-NEXT:    ; def s[4:5]
12985; GFX900-NEXT:    ;;#ASMEND
12986; GFX900-NEXT:    s_lshr_b32 s6, s4, 16
12987; GFX900-NEXT:    s_lshr_b32 s5, s5, 16
12988; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s6
12989; GFX900-NEXT:    s_mov_b32 s9, s4
12990; GFX900-NEXT:    ;;#ASMSTART
12991; GFX900-NEXT:    ; use s[8:9]
12992; GFX900-NEXT:    ;;#ASMEND
12993; GFX900-NEXT:    s_setpc_b64 s[30:31]
12994;
12995; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_5_4:
12996; GFX90A:       ; %bb.0:
12997; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12998; GFX90A-NEXT:    ;;#ASMSTART
12999; GFX90A-NEXT:    ; def s[4:5]
13000; GFX90A-NEXT:    ;;#ASMEND
13001; GFX90A-NEXT:    s_lshr_b32 s6, s4, 16
13002; GFX90A-NEXT:    s_lshr_b32 s5, s5, 16
13003; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s6
13004; GFX90A-NEXT:    s_mov_b32 s9, s4
13005; GFX90A-NEXT:    ;;#ASMSTART
13006; GFX90A-NEXT:    ; use s[8:9]
13007; GFX90A-NEXT:    ;;#ASMEND
13008; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13009;
13010; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_4:
13011; GFX940:       ; %bb.0:
13012; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13013; GFX940-NEXT:    ;;#ASMSTART
13014; GFX940-NEXT:    ; def s[0:1]
13015; GFX940-NEXT:    ;;#ASMEND
13016; GFX940-NEXT:    s_lshr_b32 s2, s0, 16
13017; GFX940-NEXT:    s_lshr_b32 s1, s1, 16
13018; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s2
13019; GFX940-NEXT:    s_mov_b32 s9, s0
13020; GFX940-NEXT:    ;;#ASMSTART
13021; GFX940-NEXT:    ; use s[8:9]
13022; GFX940-NEXT:    ;;#ASMEND
13023; GFX940-NEXT:    s_setpc_b64 s[30:31]
13024  %vec0 = call <4 x half> asm "; def $0", "=s"()
13025  %vec1 = call <4 x half> asm "; def $0", "=s"()
13026  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 4>
13027  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13028  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
13029  ret void
13030}
13031
13032define void @s_shuffle_v3f16_v4f16__7_6_4() {
13033; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_6_4:
13034; GFX900:       ; %bb.0:
13035; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13036; GFX900-NEXT:    ;;#ASMSTART
13037; GFX900-NEXT:    ; def s[4:5]
13038; GFX900-NEXT:    ;;#ASMEND
13039; GFX900-NEXT:    s_lshr_b32 s6, s5, 16
13040; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
13041; GFX900-NEXT:    s_mov_b32 s9, s4
13042; GFX900-NEXT:    ;;#ASMSTART
13043; GFX900-NEXT:    ; use s[8:9]
13044; GFX900-NEXT:    ;;#ASMEND
13045; GFX900-NEXT:    s_setpc_b64 s[30:31]
13046;
13047; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_6_4:
13048; GFX90A:       ; %bb.0:
13049; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13050; GFX90A-NEXT:    ;;#ASMSTART
13051; GFX90A-NEXT:    ; def s[4:5]
13052; GFX90A-NEXT:    ;;#ASMEND
13053; GFX90A-NEXT:    s_lshr_b32 s6, s5, 16
13054; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
13055; GFX90A-NEXT:    s_mov_b32 s9, s4
13056; GFX90A-NEXT:    ;;#ASMSTART
13057; GFX90A-NEXT:    ; use s[8:9]
13058; GFX90A-NEXT:    ;;#ASMEND
13059; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13060;
13061; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_4:
13062; GFX940:       ; %bb.0:
13063; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13064; GFX940-NEXT:    ;;#ASMSTART
13065; GFX940-NEXT:    ; def s[0:1]
13066; GFX940-NEXT:    ;;#ASMEND
13067; GFX940-NEXT:    s_lshr_b32 s2, s1, 16
13068; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s2, s1
13069; GFX940-NEXT:    s_mov_b32 s9, s0
13070; GFX940-NEXT:    ;;#ASMSTART
13071; GFX940-NEXT:    ; use s[8:9]
13072; GFX940-NEXT:    ;;#ASMEND
13073; GFX940-NEXT:    s_setpc_b64 s[30:31]
13074  %vec0 = call <4 x half> asm "; def $0", "=s"()
13075  %vec1 = call <4 x half> asm "; def $0", "=s"()
13076  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 4>
13077  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13078  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
13079  ret void
13080}
13081
13082define void @s_shuffle_v3f16_v4f16__u_5_5() {
13083; GFX9-LABEL: s_shuffle_v3f16_v4f16__u_5_5:
13084; GFX9:       ; %bb.0:
13085; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13086; GFX9-NEXT:    ;;#ASMSTART
13087; GFX9-NEXT:    ; def s[8:9]
13088; GFX9-NEXT:    ;;#ASMEND
13089; GFX9-NEXT:    s_lshr_b32 s9, s8, 16
13090; GFX9-NEXT:    ;;#ASMSTART
13091; GFX9-NEXT:    ; use s[8:9]
13092; GFX9-NEXT:    ;;#ASMEND
13093; GFX9-NEXT:    s_setpc_b64 s[30:31]
13094  %vec0 = call <4 x half> asm "; def $0", "=s"()
13095  %vec1 = call <4 x half> asm "; def $0", "=s"()
13096  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 poison, i32 5, i32 5>
13097  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13098  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
13099  ret void
13100}
13101
13102define void @s_shuffle_v3f16_v4f16__0_5_5() {
13103; GFX900-LABEL: s_shuffle_v3f16_v4f16__0_5_5:
13104; GFX900:       ; %bb.0:
13105; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13106; GFX900-NEXT:    ;;#ASMSTART
13107; GFX900-NEXT:    ; def s[6:7]
13108; GFX900-NEXT:    ;;#ASMEND
13109; GFX900-NEXT:    s_lshr_b32 s9, s6, 16
13110; GFX900-NEXT:    ;;#ASMSTART
13111; GFX900-NEXT:    ; def s[4:5]
13112; GFX900-NEXT:    ;;#ASMEND
13113; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
13114; GFX900-NEXT:    ;;#ASMSTART
13115; GFX900-NEXT:    ; use s[8:9]
13116; GFX900-NEXT:    ;;#ASMEND
13117; GFX900-NEXT:    s_setpc_b64 s[30:31]
13118;
13119; GFX90A-LABEL: s_shuffle_v3f16_v4f16__0_5_5:
13120; GFX90A:       ; %bb.0:
13121; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13122; GFX90A-NEXT:    ;;#ASMSTART
13123; GFX90A-NEXT:    ; def s[6:7]
13124; GFX90A-NEXT:    ;;#ASMEND
13125; GFX90A-NEXT:    s_lshr_b32 s9, s6, 16
13126; GFX90A-NEXT:    ;;#ASMSTART
13127; GFX90A-NEXT:    ; def s[4:5]
13128; GFX90A-NEXT:    ;;#ASMEND
13129; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
13130; GFX90A-NEXT:    ;;#ASMSTART
13131; GFX90A-NEXT:    ; use s[8:9]
13132; GFX90A-NEXT:    ;;#ASMEND
13133; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13134;
13135; GFX940-LABEL: s_shuffle_v3f16_v4f16__0_5_5:
13136; GFX940:       ; %bb.0:
13137; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13138; GFX940-NEXT:    ;;#ASMSTART
13139; GFX940-NEXT:    ; def s[2:3]
13140; GFX940-NEXT:    ;;#ASMEND
13141; GFX940-NEXT:    s_lshr_b32 s9, s2, 16
13142; GFX940-NEXT:    ;;#ASMSTART
13143; GFX940-NEXT:    ; def s[0:1]
13144; GFX940-NEXT:    ;;#ASMEND
13145; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
13146; GFX940-NEXT:    ;;#ASMSTART
13147; GFX940-NEXT:    ; use s[8:9]
13148; GFX940-NEXT:    ;;#ASMEND
13149; GFX940-NEXT:    s_setpc_b64 s[30:31]
13150  %vec0 = call <4 x half> asm "; def $0", "=s"()
13151  %vec1 = call <4 x half> asm "; def $0", "=s"()
13152  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 0, i32 5, i32 5>
13153  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13154  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
13155  ret void
13156}
13157
13158define void @s_shuffle_v3f16_v4f16__1_5_5() {
13159; GFX900-LABEL: s_shuffle_v3f16_v4f16__1_5_5:
13160; GFX900:       ; %bb.0:
13161; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13162; GFX900-NEXT:    ;;#ASMSTART
13163; GFX900-NEXT:    ; def s[4:5]
13164; GFX900-NEXT:    ;;#ASMEND
13165; GFX900-NEXT:    ;;#ASMSTART
13166; GFX900-NEXT:    ; def s[6:7]
13167; GFX900-NEXT:    ;;#ASMEND
13168; GFX900-NEXT:    s_lshr_b32 s9, s6, 16
13169; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
13170; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
13171; GFX900-NEXT:    ;;#ASMSTART
13172; GFX900-NEXT:    ; use s[8:9]
13173; GFX900-NEXT:    ;;#ASMEND
13174; GFX900-NEXT:    s_setpc_b64 s[30:31]
13175;
13176; GFX90A-LABEL: s_shuffle_v3f16_v4f16__1_5_5:
13177; GFX90A:       ; %bb.0:
13178; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13179; GFX90A-NEXT:    ;;#ASMSTART
13180; GFX90A-NEXT:    ; def s[4:5]
13181; GFX90A-NEXT:    ;;#ASMEND
13182; GFX90A-NEXT:    ;;#ASMSTART
13183; GFX90A-NEXT:    ; def s[6:7]
13184; GFX90A-NEXT:    ;;#ASMEND
13185; GFX90A-NEXT:    s_lshr_b32 s9, s6, 16
13186; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
13187; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
13188; GFX90A-NEXT:    ;;#ASMSTART
13189; GFX90A-NEXT:    ; use s[8:9]
13190; GFX90A-NEXT:    ;;#ASMEND
13191; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13192;
13193; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_5_5:
13194; GFX940:       ; %bb.0:
13195; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13196; GFX940-NEXT:    ;;#ASMSTART
13197; GFX940-NEXT:    ; def s[0:1]
13198; GFX940-NEXT:    ;;#ASMEND
13199; GFX940-NEXT:    ;;#ASMSTART
13200; GFX940-NEXT:    ; def s[2:3]
13201; GFX940-NEXT:    ;;#ASMEND
13202; GFX940-NEXT:    s_lshr_b32 s9, s2, 16
13203; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
13204; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
13205; GFX940-NEXT:    ;;#ASMSTART
13206; GFX940-NEXT:    ; use s[8:9]
13207; GFX940-NEXT:    ;;#ASMEND
13208; GFX940-NEXT:    s_setpc_b64 s[30:31]
13209  %vec0 = call <4 x half> asm "; def $0", "=s"()
13210  %vec1 = call <4 x half> asm "; def $0", "=s"()
13211  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 1, i32 5, i32 5>
13212  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13213  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
13214  ret void
13215}
13216
13217define void @s_shuffle_v3f16_v4f16__2_5_5() {
13218; GFX900-LABEL: s_shuffle_v3f16_v4f16__2_5_5:
13219; GFX900:       ; %bb.0:
13220; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13221; GFX900-NEXT:    ;;#ASMSTART
13222; GFX900-NEXT:    ; def s[6:7]
13223; GFX900-NEXT:    ;;#ASMEND
13224; GFX900-NEXT:    s_lshr_b32 s9, s6, 16
13225; GFX900-NEXT:    ;;#ASMSTART
13226; GFX900-NEXT:    ; def s[4:5]
13227; GFX900-NEXT:    ;;#ASMEND
13228; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s9
13229; GFX900-NEXT:    ;;#ASMSTART
13230; GFX900-NEXT:    ; use s[8:9]
13231; GFX900-NEXT:    ;;#ASMEND
13232; GFX900-NEXT:    s_setpc_b64 s[30:31]
13233;
13234; GFX90A-LABEL: s_shuffle_v3f16_v4f16__2_5_5:
13235; GFX90A:       ; %bb.0:
13236; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13237; GFX90A-NEXT:    ;;#ASMSTART
13238; GFX90A-NEXT:    ; def s[6:7]
13239; GFX90A-NEXT:    ;;#ASMEND
13240; GFX90A-NEXT:    s_lshr_b32 s9, s6, 16
13241; GFX90A-NEXT:    ;;#ASMSTART
13242; GFX90A-NEXT:    ; def s[4:5]
13243; GFX90A-NEXT:    ;;#ASMEND
13244; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s9
13245; GFX90A-NEXT:    ;;#ASMSTART
13246; GFX90A-NEXT:    ; use s[8:9]
13247; GFX90A-NEXT:    ;;#ASMEND
13248; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13249;
13250; GFX940-LABEL: s_shuffle_v3f16_v4f16__2_5_5:
13251; GFX940:       ; %bb.0:
13252; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13253; GFX940-NEXT:    ;;#ASMSTART
13254; GFX940-NEXT:    ; def s[2:3]
13255; GFX940-NEXT:    ;;#ASMEND
13256; GFX940-NEXT:    s_lshr_b32 s9, s2, 16
13257; GFX940-NEXT:    ;;#ASMSTART
13258; GFX940-NEXT:    ; def s[0:1]
13259; GFX940-NEXT:    ;;#ASMEND
13260; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s9
13261; GFX940-NEXT:    ;;#ASMSTART
13262; GFX940-NEXT:    ; use s[8:9]
13263; GFX940-NEXT:    ;;#ASMEND
13264; GFX940-NEXT:    s_setpc_b64 s[30:31]
13265  %vec0 = call <4 x half> asm "; def $0", "=s"()
13266  %vec1 = call <4 x half> asm "; def $0", "=s"()
13267  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 2, i32 5, i32 5>
13268  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13269  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
13270  ret void
13271}
13272
13273define void @s_shuffle_v3f16_v4f16__3_5_5() {
13274; GFX900-LABEL: s_shuffle_v3f16_v4f16__3_5_5:
13275; GFX900:       ; %bb.0:
13276; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13277; GFX900-NEXT:    ;;#ASMSTART
13278; GFX900-NEXT:    ; def s[4:5]
13279; GFX900-NEXT:    ;;#ASMEND
13280; GFX900-NEXT:    ;;#ASMSTART
13281; GFX900-NEXT:    ; def s[6:7]
13282; GFX900-NEXT:    ;;#ASMEND
13283; GFX900-NEXT:    s_lshr_b32 s9, s6, 16
13284; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
13285; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
13286; GFX900-NEXT:    ;;#ASMSTART
13287; GFX900-NEXT:    ; use s[8:9]
13288; GFX900-NEXT:    ;;#ASMEND
13289; GFX900-NEXT:    s_setpc_b64 s[30:31]
13290;
13291; GFX90A-LABEL: s_shuffle_v3f16_v4f16__3_5_5:
13292; GFX90A:       ; %bb.0:
13293; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13294; GFX90A-NEXT:    ;;#ASMSTART
13295; GFX90A-NEXT:    ; def s[4:5]
13296; GFX90A-NEXT:    ;;#ASMEND
13297; GFX90A-NEXT:    ;;#ASMSTART
13298; GFX90A-NEXT:    ; def s[6:7]
13299; GFX90A-NEXT:    ;;#ASMEND
13300; GFX90A-NEXT:    s_lshr_b32 s9, s6, 16
13301; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
13302; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
13303; GFX90A-NEXT:    ;;#ASMSTART
13304; GFX90A-NEXT:    ; use s[8:9]
13305; GFX90A-NEXT:    ;;#ASMEND
13306; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13307;
13308; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_5_5:
13309; GFX940:       ; %bb.0:
13310; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13311; GFX940-NEXT:    ;;#ASMSTART
13312; GFX940-NEXT:    ; def s[0:1]
13313; GFX940-NEXT:    ;;#ASMEND
13314; GFX940-NEXT:    ;;#ASMSTART
13315; GFX940-NEXT:    ; def s[2:3]
13316; GFX940-NEXT:    ;;#ASMEND
13317; GFX940-NEXT:    s_lshr_b32 s9, s2, 16
13318; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
13319; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
13320; GFX940-NEXT:    ;;#ASMSTART
13321; GFX940-NEXT:    ; use s[8:9]
13322; GFX940-NEXT:    ;;#ASMEND
13323; GFX940-NEXT:    s_setpc_b64 s[30:31]
13324  %vec0 = call <4 x half> asm "; def $0", "=s"()
13325  %vec1 = call <4 x half> asm "; def $0", "=s"()
13326  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 3, i32 5, i32 5>
13327  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13328  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
13329  ret void
13330}
13331
13332define void @s_shuffle_v3f16_v4f16__4_5_5() {
13333; GFX9-LABEL: s_shuffle_v3f16_v4f16__4_5_5:
13334; GFX9:       ; %bb.0:
13335; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13336; GFX9-NEXT:    ;;#ASMSTART
13337; GFX9-NEXT:    ; def s[8:9]
13338; GFX9-NEXT:    ;;#ASMEND
13339; GFX9-NEXT:    s_lshr_b32 s9, s8, 16
13340; GFX9-NEXT:    ;;#ASMSTART
13341; GFX9-NEXT:    ; use s[8:9]
13342; GFX9-NEXT:    ;;#ASMEND
13343; GFX9-NEXT:    s_setpc_b64 s[30:31]
13344  %vec0 = call <4 x half> asm "; def $0", "=s"()
13345  %vec1 = call <4 x half> asm "; def $0", "=s"()
13346  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 4, i32 5, i32 5>
13347  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13348  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
13349  ret void
13350}
13351
13352define void @s_shuffle_v3f16_v4f16__5_5_5() {
13353; GFX900-LABEL: s_shuffle_v3f16_v4f16__5_5_5:
13354; GFX900:       ; %bb.0:
13355; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13356; GFX900-NEXT:    ;;#ASMSTART
13357; GFX900-NEXT:    ; def s[4:5]
13358; GFX900-NEXT:    ;;#ASMEND
13359; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
13360; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s9, s9
13361; GFX900-NEXT:    ;;#ASMSTART
13362; GFX900-NEXT:    ; use s[8:9]
13363; GFX900-NEXT:    ;;#ASMEND
13364; GFX900-NEXT:    s_setpc_b64 s[30:31]
13365;
13366; GFX90A-LABEL: s_shuffle_v3f16_v4f16__5_5_5:
13367; GFX90A:       ; %bb.0:
13368; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13369; GFX90A-NEXT:    ;;#ASMSTART
13370; GFX90A-NEXT:    ; def s[4:5]
13371; GFX90A-NEXT:    ;;#ASMEND
13372; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
13373; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s9, s9
13374; GFX90A-NEXT:    ;;#ASMSTART
13375; GFX90A-NEXT:    ; use s[8:9]
13376; GFX90A-NEXT:    ;;#ASMEND
13377; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13378;
13379; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_5_5:
13380; GFX940:       ; %bb.0:
13381; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13382; GFX940-NEXT:    ;;#ASMSTART
13383; GFX940-NEXT:    ; def s[0:1]
13384; GFX940-NEXT:    ;;#ASMEND
13385; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
13386; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s9, s9
13387; GFX940-NEXT:    ;;#ASMSTART
13388; GFX940-NEXT:    ; use s[8:9]
13389; GFX940-NEXT:    ;;#ASMEND
13390; GFX940-NEXT:    s_setpc_b64 s[30:31]
13391  %vec0 = call <4 x half> asm "; def $0", "=s"()
13392  %vec1 = call <4 x half> asm "; def $0", "=s"()
13393  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 5, i32 5>
13394  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13395  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
13396  ret void
13397}
13398
13399define void @s_shuffle_v3f16_v4f16__6_5_5() {
13400; GFX900-LABEL: s_shuffle_v3f16_v4f16__6_5_5:
13401; GFX900:       ; %bb.0:
13402; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13403; GFX900-NEXT:    ;;#ASMSTART
13404; GFX900-NEXT:    ; def s[4:5]
13405; GFX900-NEXT:    ;;#ASMEND
13406; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
13407; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s9
13408; GFX900-NEXT:    ;;#ASMSTART
13409; GFX900-NEXT:    ; use s[8:9]
13410; GFX900-NEXT:    ;;#ASMEND
13411; GFX900-NEXT:    s_setpc_b64 s[30:31]
13412;
13413; GFX90A-LABEL: s_shuffle_v3f16_v4f16__6_5_5:
13414; GFX90A:       ; %bb.0:
13415; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13416; GFX90A-NEXT:    ;;#ASMSTART
13417; GFX90A-NEXT:    ; def s[4:5]
13418; GFX90A-NEXT:    ;;#ASMEND
13419; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
13420; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s9
13421; GFX90A-NEXT:    ;;#ASMSTART
13422; GFX90A-NEXT:    ; use s[8:9]
13423; GFX90A-NEXT:    ;;#ASMEND
13424; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13425;
13426; GFX940-LABEL: s_shuffle_v3f16_v4f16__6_5_5:
13427; GFX940:       ; %bb.0:
13428; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13429; GFX940-NEXT:    ;;#ASMSTART
13430; GFX940-NEXT:    ; def s[0:1]
13431; GFX940-NEXT:    ;;#ASMEND
13432; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
13433; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s9
13434; GFX940-NEXT:    ;;#ASMSTART
13435; GFX940-NEXT:    ; use s[8:9]
13436; GFX940-NEXT:    ;;#ASMEND
13437; GFX940-NEXT:    s_setpc_b64 s[30:31]
13438  %vec0 = call <4 x half> asm "; def $0", "=s"()
13439  %vec1 = call <4 x half> asm "; def $0", "=s"()
13440  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 5, i32 5>
13441  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13442  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
13443  ret void
13444}
13445
13446define void @s_shuffle_v3f16_v4f16__7_5_5() {
13447; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_5_5:
13448; GFX900:       ; %bb.0:
13449; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13450; GFX900-NEXT:    ;;#ASMSTART
13451; GFX900-NEXT:    ; def s[4:5]
13452; GFX900-NEXT:    ;;#ASMEND
13453; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
13454; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
13455; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
13456; GFX900-NEXT:    ;;#ASMSTART
13457; GFX900-NEXT:    ; use s[8:9]
13458; GFX900-NEXT:    ;;#ASMEND
13459; GFX900-NEXT:    s_setpc_b64 s[30:31]
13460;
13461; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_5_5:
13462; GFX90A:       ; %bb.0:
13463; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13464; GFX90A-NEXT:    ;;#ASMSTART
13465; GFX90A-NEXT:    ; def s[4:5]
13466; GFX90A-NEXT:    ;;#ASMEND
13467; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
13468; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
13469; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
13470; GFX90A-NEXT:    ;;#ASMSTART
13471; GFX90A-NEXT:    ; use s[8:9]
13472; GFX90A-NEXT:    ;;#ASMEND
13473; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13474;
13475; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_5:
13476; GFX940:       ; %bb.0:
13477; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13478; GFX940-NEXT:    ;;#ASMSTART
13479; GFX940-NEXT:    ; def s[0:1]
13480; GFX940-NEXT:    ;;#ASMEND
13481; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
13482; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
13483; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
13484; GFX940-NEXT:    ;;#ASMSTART
13485; GFX940-NEXT:    ; use s[8:9]
13486; GFX940-NEXT:    ;;#ASMEND
13487; GFX940-NEXT:    s_setpc_b64 s[30:31]
13488  %vec0 = call <4 x half> asm "; def $0", "=s"()
13489  %vec1 = call <4 x half> asm "; def $0", "=s"()
13490  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 5>
13491  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13492  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
13493  ret void
13494}
13495
13496define void @s_shuffle_v3f16_v4f16__7_u_5() {
13497; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_u_5:
13498; GFX900:       ; %bb.0:
13499; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13500; GFX900-NEXT:    ;;#ASMSTART
13501; GFX900-NEXT:    ; def s[4:5]
13502; GFX900-NEXT:    ;;#ASMEND
13503; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
13504; GFX900-NEXT:    s_lshr_b32 s8, s5, 16
13505; GFX900-NEXT:    ;;#ASMSTART
13506; GFX900-NEXT:    ; use s[8:9]
13507; GFX900-NEXT:    ;;#ASMEND
13508; GFX900-NEXT:    s_setpc_b64 s[30:31]
13509;
13510; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_u_5:
13511; GFX90A:       ; %bb.0:
13512; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13513; GFX90A-NEXT:    ;;#ASMSTART
13514; GFX90A-NEXT:    ; def s[4:5]
13515; GFX90A-NEXT:    ;;#ASMEND
13516; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
13517; GFX90A-NEXT:    s_lshr_b32 s8, s5, 16
13518; GFX90A-NEXT:    ;;#ASMSTART
13519; GFX90A-NEXT:    ; use s[8:9]
13520; GFX90A-NEXT:    ;;#ASMEND
13521; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13522;
13523; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_u_5:
13524; GFX940:       ; %bb.0:
13525; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13526; GFX940-NEXT:    ;;#ASMSTART
13527; GFX940-NEXT:    ; def s[0:1]
13528; GFX940-NEXT:    ;;#ASMEND
13529; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
13530; GFX940-NEXT:    s_lshr_b32 s8, s1, 16
13531; GFX940-NEXT:    ;;#ASMSTART
13532; GFX940-NEXT:    ; use s[8:9]
13533; GFX940-NEXT:    ;;#ASMEND
13534; GFX940-NEXT:    s_setpc_b64 s[30:31]
13535  %vec0 = call <4 x half> asm "; def $0", "=s"()
13536  %vec1 = call <4 x half> asm "; def $0", "=s"()
13537  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 5>
13538  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13539  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
13540  ret void
13541}
13542
13543define void @s_shuffle_v3f16_v4f16__7_0_5() {
13544; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_0_5:
13545; GFX900:       ; %bb.0:
13546; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13547; GFX900-NEXT:    ;;#ASMSTART
13548; GFX900-NEXT:    ; def s[4:5]
13549; GFX900-NEXT:    ;;#ASMEND
13550; GFX900-NEXT:    ;;#ASMSTART
13551; GFX900-NEXT:    ; def s[6:7]
13552; GFX900-NEXT:    ;;#ASMEND
13553; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
13554; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
13555; GFX900-NEXT:    s_lshr_b32 s9, s6, 16
13556; GFX900-NEXT:    ;;#ASMSTART
13557; GFX900-NEXT:    ; use s[8:9]
13558; GFX900-NEXT:    ;;#ASMEND
13559; GFX900-NEXT:    s_setpc_b64 s[30:31]
13560;
13561; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_0_5:
13562; GFX90A:       ; %bb.0:
13563; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13564; GFX90A-NEXT:    ;;#ASMSTART
13565; GFX90A-NEXT:    ; def s[4:5]
13566; GFX90A-NEXT:    ;;#ASMEND
13567; GFX90A-NEXT:    ;;#ASMSTART
13568; GFX90A-NEXT:    ; def s[6:7]
13569; GFX90A-NEXT:    ;;#ASMEND
13570; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
13571; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
13572; GFX90A-NEXT:    s_lshr_b32 s9, s6, 16
13573; GFX90A-NEXT:    ;;#ASMSTART
13574; GFX90A-NEXT:    ; use s[8:9]
13575; GFX90A-NEXT:    ;;#ASMEND
13576; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13577;
13578; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_5:
13579; GFX940:       ; %bb.0:
13580; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13581; GFX940-NEXT:    ;;#ASMSTART
13582; GFX940-NEXT:    ; def s[0:1]
13583; GFX940-NEXT:    ;;#ASMEND
13584; GFX940-NEXT:    ;;#ASMSTART
13585; GFX940-NEXT:    ; def s[2:3]
13586; GFX940-NEXT:    ;;#ASMEND
13587; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
13588; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
13589; GFX940-NEXT:    s_lshr_b32 s9, s2, 16
13590; GFX940-NEXT:    ;;#ASMSTART
13591; GFX940-NEXT:    ; use s[8:9]
13592; GFX940-NEXT:    ;;#ASMEND
13593; GFX940-NEXT:    s_setpc_b64 s[30:31]
13594  %vec0 = call <4 x half> asm "; def $0", "=s"()
13595  %vec1 = call <4 x half> asm "; def $0", "=s"()
13596  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 5>
13597  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13598  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
13599  ret void
13600}
13601
13602define void @s_shuffle_v3f16_v4f16__7_1_5() {
13603; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_1_5:
13604; GFX900:       ; %bb.0:
13605; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13606; GFX900-NEXT:    ;;#ASMSTART
13607; GFX900-NEXT:    ; def s[4:5]
13608; GFX900-NEXT:    ;;#ASMEND
13609; GFX900-NEXT:    ;;#ASMSTART
13610; GFX900-NEXT:    ; def s[6:7]
13611; GFX900-NEXT:    ;;#ASMEND
13612; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
13613; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
13614; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
13615; GFX900-NEXT:    s_lshr_b32 s9, s6, 16
13616; GFX900-NEXT:    ;;#ASMSTART
13617; GFX900-NEXT:    ; use s[8:9]
13618; GFX900-NEXT:    ;;#ASMEND
13619; GFX900-NEXT:    s_setpc_b64 s[30:31]
13620;
13621; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_1_5:
13622; GFX90A:       ; %bb.0:
13623; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13624; GFX90A-NEXT:    ;;#ASMSTART
13625; GFX90A-NEXT:    ; def s[4:5]
13626; GFX90A-NEXT:    ;;#ASMEND
13627; GFX90A-NEXT:    ;;#ASMSTART
13628; GFX90A-NEXT:    ; def s[6:7]
13629; GFX90A-NEXT:    ;;#ASMEND
13630; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
13631; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
13632; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
13633; GFX90A-NEXT:    s_lshr_b32 s9, s6, 16
13634; GFX90A-NEXT:    ;;#ASMSTART
13635; GFX90A-NEXT:    ; use s[8:9]
13636; GFX90A-NEXT:    ;;#ASMEND
13637; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13638;
13639; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_5:
13640; GFX940:       ; %bb.0:
13641; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13642; GFX940-NEXT:    ;;#ASMSTART
13643; GFX940-NEXT:    ; def s[0:1]
13644; GFX940-NEXT:    ;;#ASMEND
13645; GFX940-NEXT:    ;;#ASMSTART
13646; GFX940-NEXT:    ; def s[2:3]
13647; GFX940-NEXT:    ;;#ASMEND
13648; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
13649; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
13650; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
13651; GFX940-NEXT:    s_lshr_b32 s9, s2, 16
13652; GFX940-NEXT:    ;;#ASMSTART
13653; GFX940-NEXT:    ; use s[8:9]
13654; GFX940-NEXT:    ;;#ASMEND
13655; GFX940-NEXT:    s_setpc_b64 s[30:31]
13656  %vec0 = call <4 x half> asm "; def $0", "=s"()
13657  %vec1 = call <4 x half> asm "; def $0", "=s"()
13658  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 5>
13659  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13660  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
13661  ret void
13662}
13663
13664define void @s_shuffle_v3f16_v4f16__7_2_5() {
13665; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_2_5:
13666; GFX900:       ; %bb.0:
13667; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13668; GFX900-NEXT:    ;;#ASMSTART
13669; GFX900-NEXT:    ; def s[4:5]
13670; GFX900-NEXT:    ;;#ASMEND
13671; GFX900-NEXT:    ;;#ASMSTART
13672; GFX900-NEXT:    ; def s[6:7]
13673; GFX900-NEXT:    ;;#ASMEND
13674; GFX900-NEXT:    s_lshr_b32 s4, s7, 16
13675; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
13676; GFX900-NEXT:    s_lshr_b32 s9, s6, 16
13677; GFX900-NEXT:    ;;#ASMSTART
13678; GFX900-NEXT:    ; use s[8:9]
13679; GFX900-NEXT:    ;;#ASMEND
13680; GFX900-NEXT:    s_setpc_b64 s[30:31]
13681;
13682; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_2_5:
13683; GFX90A:       ; %bb.0:
13684; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13685; GFX90A-NEXT:    ;;#ASMSTART
13686; GFX90A-NEXT:    ; def s[4:5]
13687; GFX90A-NEXT:    ;;#ASMEND
13688; GFX90A-NEXT:    ;;#ASMSTART
13689; GFX90A-NEXT:    ; def s[6:7]
13690; GFX90A-NEXT:    ;;#ASMEND
13691; GFX90A-NEXT:    s_lshr_b32 s4, s7, 16
13692; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
13693; GFX90A-NEXT:    s_lshr_b32 s9, s6, 16
13694; GFX90A-NEXT:    ;;#ASMSTART
13695; GFX90A-NEXT:    ; use s[8:9]
13696; GFX90A-NEXT:    ;;#ASMEND
13697; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13698;
13699; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_5:
13700; GFX940:       ; %bb.0:
13701; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13702; GFX940-NEXT:    ;;#ASMSTART
13703; GFX940-NEXT:    ; def s[0:1]
13704; GFX940-NEXT:    ;;#ASMEND
13705; GFX940-NEXT:    ;;#ASMSTART
13706; GFX940-NEXT:    ; def s[2:3]
13707; GFX940-NEXT:    ;;#ASMEND
13708; GFX940-NEXT:    s_lshr_b32 s0, s3, 16
13709; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
13710; GFX940-NEXT:    s_lshr_b32 s9, s2, 16
13711; GFX940-NEXT:    ;;#ASMSTART
13712; GFX940-NEXT:    ; use s[8:9]
13713; GFX940-NEXT:    ;;#ASMEND
13714; GFX940-NEXT:    s_setpc_b64 s[30:31]
13715  %vec0 = call <4 x half> asm "; def $0", "=s"()
13716  %vec1 = call <4 x half> asm "; def $0", "=s"()
13717  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 5>
13718  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13719  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
13720  ret void
13721}
13722
13723define void @s_shuffle_v3f16_v4f16__7_3_5() {
13724; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_3_5:
13725; GFX900:       ; %bb.0:
13726; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13727; GFX900-NEXT:    ;;#ASMSTART
13728; GFX900-NEXT:    ; def s[4:5]
13729; GFX900-NEXT:    ;;#ASMEND
13730; GFX900-NEXT:    ;;#ASMSTART
13731; GFX900-NEXT:    ; def s[6:7]
13732; GFX900-NEXT:    ;;#ASMEND
13733; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
13734; GFX900-NEXT:    s_lshr_b32 s5, s7, 16
13735; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
13736; GFX900-NEXT:    s_lshr_b32 s9, s6, 16
13737; GFX900-NEXT:    ;;#ASMSTART
13738; GFX900-NEXT:    ; use s[8:9]
13739; GFX900-NEXT:    ;;#ASMEND
13740; GFX900-NEXT:    s_setpc_b64 s[30:31]
13741;
13742; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_3_5:
13743; GFX90A:       ; %bb.0:
13744; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13745; GFX90A-NEXT:    ;;#ASMSTART
13746; GFX90A-NEXT:    ; def s[4:5]
13747; GFX90A-NEXT:    ;;#ASMEND
13748; GFX90A-NEXT:    ;;#ASMSTART
13749; GFX90A-NEXT:    ; def s[6:7]
13750; GFX90A-NEXT:    ;;#ASMEND
13751; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
13752; GFX90A-NEXT:    s_lshr_b32 s5, s7, 16
13753; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
13754; GFX90A-NEXT:    s_lshr_b32 s9, s6, 16
13755; GFX90A-NEXT:    ;;#ASMSTART
13756; GFX90A-NEXT:    ; use s[8:9]
13757; GFX90A-NEXT:    ;;#ASMEND
13758; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13759;
13760; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_5:
13761; GFX940:       ; %bb.0:
13762; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13763; GFX940-NEXT:    ;;#ASMSTART
13764; GFX940-NEXT:    ; def s[0:1]
13765; GFX940-NEXT:    ;;#ASMEND
13766; GFX940-NEXT:    ;;#ASMSTART
13767; GFX940-NEXT:    ; def s[2:3]
13768; GFX940-NEXT:    ;;#ASMEND
13769; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
13770; GFX940-NEXT:    s_lshr_b32 s1, s3, 16
13771; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
13772; GFX940-NEXT:    s_lshr_b32 s9, s2, 16
13773; GFX940-NEXT:    ;;#ASMSTART
13774; GFX940-NEXT:    ; use s[8:9]
13775; GFX940-NEXT:    ;;#ASMEND
13776; GFX940-NEXT:    s_setpc_b64 s[30:31]
13777  %vec0 = call <4 x half> asm "; def $0", "=s"()
13778  %vec1 = call <4 x half> asm "; def $0", "=s"()
13779  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 5>
13780  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13781  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
13782  ret void
13783}
13784
13785define void @s_shuffle_v3f16_v4f16__7_4_5() {
13786; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_4_5:
13787; GFX900:       ; %bb.0:
13788; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13789; GFX900-NEXT:    ;;#ASMSTART
13790; GFX900-NEXT:    ; def s[4:5]
13791; GFX900-NEXT:    ;;#ASMEND
13792; GFX900-NEXT:    s_lshr_b32 s5, s5, 16
13793; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
13794; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
13795; GFX900-NEXT:    ;;#ASMSTART
13796; GFX900-NEXT:    ; use s[8:9]
13797; GFX900-NEXT:    ;;#ASMEND
13798; GFX900-NEXT:    s_setpc_b64 s[30:31]
13799;
13800; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_4_5:
13801; GFX90A:       ; %bb.0:
13802; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13803; GFX90A-NEXT:    ;;#ASMSTART
13804; GFX90A-NEXT:    ; def s[4:5]
13805; GFX90A-NEXT:    ;;#ASMEND
13806; GFX90A-NEXT:    s_lshr_b32 s5, s5, 16
13807; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
13808; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
13809; GFX90A-NEXT:    ;;#ASMSTART
13810; GFX90A-NEXT:    ; use s[8:9]
13811; GFX90A-NEXT:    ;;#ASMEND
13812; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13813;
13814; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_5:
13815; GFX940:       ; %bb.0:
13816; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13817; GFX940-NEXT:    ;;#ASMSTART
13818; GFX940-NEXT:    ; def s[0:1]
13819; GFX940-NEXT:    ;;#ASMEND
13820; GFX940-NEXT:    s_lshr_b32 s1, s1, 16
13821; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
13822; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
13823; GFX940-NEXT:    ;;#ASMSTART
13824; GFX940-NEXT:    ; use s[8:9]
13825; GFX940-NEXT:    ;;#ASMEND
13826; GFX940-NEXT:    s_setpc_b64 s[30:31]
13827  %vec0 = call <4 x half> asm "; def $0", "=s"()
13828  %vec1 = call <4 x half> asm "; def $0", "=s"()
13829  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 5>
13830  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13831  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
13832  ret void
13833}
13834
13835define void @s_shuffle_v3f16_v4f16__7_6_5() {
13836; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_6_5:
13837; GFX900:       ; %bb.0:
13838; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13839; GFX900-NEXT:    ;;#ASMSTART
13840; GFX900-NEXT:    ; def s[4:5]
13841; GFX900-NEXT:    ;;#ASMEND
13842; GFX900-NEXT:    s_lshr_b32 s6, s5, 16
13843; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
13844; GFX900-NEXT:    s_lshr_b32 s9, s4, 16
13845; GFX900-NEXT:    ;;#ASMSTART
13846; GFX900-NEXT:    ; use s[8:9]
13847; GFX900-NEXT:    ;;#ASMEND
13848; GFX900-NEXT:    s_setpc_b64 s[30:31]
13849;
13850; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_6_5:
13851; GFX90A:       ; %bb.0:
13852; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13853; GFX90A-NEXT:    ;;#ASMSTART
13854; GFX90A-NEXT:    ; def s[4:5]
13855; GFX90A-NEXT:    ;;#ASMEND
13856; GFX90A-NEXT:    s_lshr_b32 s6, s5, 16
13857; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s6, s5
13858; GFX90A-NEXT:    s_lshr_b32 s9, s4, 16
13859; GFX90A-NEXT:    ;;#ASMSTART
13860; GFX90A-NEXT:    ; use s[8:9]
13861; GFX90A-NEXT:    ;;#ASMEND
13862; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13863;
13864; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_5:
13865; GFX940:       ; %bb.0:
13866; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13867; GFX940-NEXT:    ;;#ASMSTART
13868; GFX940-NEXT:    ; def s[0:1]
13869; GFX940-NEXT:    ;;#ASMEND
13870; GFX940-NEXT:    s_lshr_b32 s2, s1, 16
13871; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s2, s1
13872; GFX940-NEXT:    s_lshr_b32 s9, s0, 16
13873; GFX940-NEXT:    ;;#ASMSTART
13874; GFX940-NEXT:    ; use s[8:9]
13875; GFX940-NEXT:    ;;#ASMEND
13876; GFX940-NEXT:    s_setpc_b64 s[30:31]
13877  %vec0 = call <4 x half> asm "; def $0", "=s"()
13878  %vec1 = call <4 x half> asm "; def $0", "=s"()
13879  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 5>
13880  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13881  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
13882  ret void
13883}
13884
13885define void @s_shuffle_v3f16_v4f16__u_6_6() {
13886; GFX9-LABEL: s_shuffle_v3f16_v4f16__u_6_6:
13887; GFX9:       ; %bb.0:
13888; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13889; GFX9-NEXT:    ;;#ASMSTART
13890; GFX9-NEXT:    ; def s[8:9]
13891; GFX9-NEXT:    ;;#ASMEND
13892; GFX9-NEXT:    s_lshl_b32 s8, s9, 16
13893; GFX9-NEXT:    ;;#ASMSTART
13894; GFX9-NEXT:    ; use s[8:9]
13895; GFX9-NEXT:    ;;#ASMEND
13896; GFX9-NEXT:    s_setpc_b64 s[30:31]
13897  %vec0 = call <4 x half> asm "; def $0", "=s"()
13898  %vec1 = call <4 x half> asm "; def $0", "=s"()
13899  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 poison, i32 6, i32 6>
13900  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13901  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
13902  ret void
13903}
13904
13905define void @s_shuffle_v3f16_v4f16__0_6_6() {
13906; GFX900-LABEL: s_shuffle_v3f16_v4f16__0_6_6:
13907; GFX900:       ; %bb.0:
13908; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13909; GFX900-NEXT:    ;;#ASMSTART
13910; GFX900-NEXT:    ; def s[8:9]
13911; GFX900-NEXT:    ;;#ASMEND
13912; GFX900-NEXT:    ;;#ASMSTART
13913; GFX900-NEXT:    ; def s[4:5]
13914; GFX900-NEXT:    ;;#ASMEND
13915; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
13916; GFX900-NEXT:    ;;#ASMSTART
13917; GFX900-NEXT:    ; use s[8:9]
13918; GFX900-NEXT:    ;;#ASMEND
13919; GFX900-NEXT:    s_setpc_b64 s[30:31]
13920;
13921; GFX90A-LABEL: s_shuffle_v3f16_v4f16__0_6_6:
13922; GFX90A:       ; %bb.0:
13923; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13924; GFX90A-NEXT:    ;;#ASMSTART
13925; GFX90A-NEXT:    ; def s[8:9]
13926; GFX90A-NEXT:    ;;#ASMEND
13927; GFX90A-NEXT:    ;;#ASMSTART
13928; GFX90A-NEXT:    ; def s[4:5]
13929; GFX90A-NEXT:    ;;#ASMEND
13930; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
13931; GFX90A-NEXT:    ;;#ASMSTART
13932; GFX90A-NEXT:    ; use s[8:9]
13933; GFX90A-NEXT:    ;;#ASMEND
13934; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13935;
13936; GFX940-LABEL: s_shuffle_v3f16_v4f16__0_6_6:
13937; GFX940:       ; %bb.0:
13938; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13939; GFX940-NEXT:    ;;#ASMSTART
13940; GFX940-NEXT:    ; def s[8:9]
13941; GFX940-NEXT:    ;;#ASMEND
13942; GFX940-NEXT:    ;;#ASMSTART
13943; GFX940-NEXT:    ; def s[0:1]
13944; GFX940-NEXT:    ;;#ASMEND
13945; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
13946; GFX940-NEXT:    ;;#ASMSTART
13947; GFX940-NEXT:    ; use s[8:9]
13948; GFX940-NEXT:    ;;#ASMEND
13949; GFX940-NEXT:    s_setpc_b64 s[30:31]
13950  %vec0 = call <4 x half> asm "; def $0", "=s"()
13951  %vec1 = call <4 x half> asm "; def $0", "=s"()
13952  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 0, i32 6, i32 6>
13953  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
13954  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
13955  ret void
13956}
13957
13958define void @s_shuffle_v3f16_v4f16__1_6_6() {
13959; GFX900-LABEL: s_shuffle_v3f16_v4f16__1_6_6:
13960; GFX900:       ; %bb.0:
13961; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13962; GFX900-NEXT:    ;;#ASMSTART
13963; GFX900-NEXT:    ; def s[4:5]
13964; GFX900-NEXT:    ;;#ASMEND
13965; GFX900-NEXT:    ;;#ASMSTART
13966; GFX900-NEXT:    ; def s[8:9]
13967; GFX900-NEXT:    ;;#ASMEND
13968; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
13969; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
13970; GFX900-NEXT:    ;;#ASMSTART
13971; GFX900-NEXT:    ; use s[8:9]
13972; GFX900-NEXT:    ;;#ASMEND
13973; GFX900-NEXT:    s_setpc_b64 s[30:31]
13974;
13975; GFX90A-LABEL: s_shuffle_v3f16_v4f16__1_6_6:
13976; GFX90A:       ; %bb.0:
13977; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13978; GFX90A-NEXT:    ;;#ASMSTART
13979; GFX90A-NEXT:    ; def s[4:5]
13980; GFX90A-NEXT:    ;;#ASMEND
13981; GFX90A-NEXT:    ;;#ASMSTART
13982; GFX90A-NEXT:    ; def s[8:9]
13983; GFX90A-NEXT:    ;;#ASMEND
13984; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
13985; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
13986; GFX90A-NEXT:    ;;#ASMSTART
13987; GFX90A-NEXT:    ; use s[8:9]
13988; GFX90A-NEXT:    ;;#ASMEND
13989; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13990;
13991; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_6_6:
13992; GFX940:       ; %bb.0:
13993; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13994; GFX940-NEXT:    ;;#ASMSTART
13995; GFX940-NEXT:    ; def s[0:1]
13996; GFX940-NEXT:    ;;#ASMEND
13997; GFX940-NEXT:    ;;#ASMSTART
13998; GFX940-NEXT:    ; def s[8:9]
13999; GFX940-NEXT:    ;;#ASMEND
14000; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
14001; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
14002; GFX940-NEXT:    ;;#ASMSTART
14003; GFX940-NEXT:    ; use s[8:9]
14004; GFX940-NEXT:    ;;#ASMEND
14005; GFX940-NEXT:    s_setpc_b64 s[30:31]
14006  %vec0 = call <4 x half> asm "; def $0", "=s"()
14007  %vec1 = call <4 x half> asm "; def $0", "=s"()
14008  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 1, i32 6, i32 6>
14009  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14010  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
14011  ret void
14012}
14013
14014define void @s_shuffle_v3f16_v4f16__2_6_6() {
14015; GFX900-LABEL: s_shuffle_v3f16_v4f16__2_6_6:
14016; GFX900:       ; %bb.0:
14017; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14018; GFX900-NEXT:    ;;#ASMSTART
14019; GFX900-NEXT:    ; def s[8:9]
14020; GFX900-NEXT:    ;;#ASMEND
14021; GFX900-NEXT:    ;;#ASMSTART
14022; GFX900-NEXT:    ; def s[4:5]
14023; GFX900-NEXT:    ;;#ASMEND
14024; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s9
14025; GFX900-NEXT:    ;;#ASMSTART
14026; GFX900-NEXT:    ; use s[8:9]
14027; GFX900-NEXT:    ;;#ASMEND
14028; GFX900-NEXT:    s_setpc_b64 s[30:31]
14029;
14030; GFX90A-LABEL: s_shuffle_v3f16_v4f16__2_6_6:
14031; GFX90A:       ; %bb.0:
14032; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14033; GFX90A-NEXT:    ;;#ASMSTART
14034; GFX90A-NEXT:    ; def s[8:9]
14035; GFX90A-NEXT:    ;;#ASMEND
14036; GFX90A-NEXT:    ;;#ASMSTART
14037; GFX90A-NEXT:    ; def s[4:5]
14038; GFX90A-NEXT:    ;;#ASMEND
14039; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s9
14040; GFX90A-NEXT:    ;;#ASMSTART
14041; GFX90A-NEXT:    ; use s[8:9]
14042; GFX90A-NEXT:    ;;#ASMEND
14043; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14044;
14045; GFX940-LABEL: s_shuffle_v3f16_v4f16__2_6_6:
14046; GFX940:       ; %bb.0:
14047; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14048; GFX940-NEXT:    ;;#ASMSTART
14049; GFX940-NEXT:    ; def s[8:9]
14050; GFX940-NEXT:    ;;#ASMEND
14051; GFX940-NEXT:    ;;#ASMSTART
14052; GFX940-NEXT:    ; def s[0:1]
14053; GFX940-NEXT:    ;;#ASMEND
14054; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s9
14055; GFX940-NEXT:    ;;#ASMSTART
14056; GFX940-NEXT:    ; use s[8:9]
14057; GFX940-NEXT:    ;;#ASMEND
14058; GFX940-NEXT:    s_setpc_b64 s[30:31]
14059  %vec0 = call <4 x half> asm "; def $0", "=s"()
14060  %vec1 = call <4 x half> asm "; def $0", "=s"()
14061  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 2, i32 6, i32 6>
14062  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14063  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
14064  ret void
14065}
14066
14067define void @s_shuffle_v3f16_v4f16__3_6_6() {
14068; GFX900-LABEL: s_shuffle_v3f16_v4f16__3_6_6:
14069; GFX900:       ; %bb.0:
14070; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14071; GFX900-NEXT:    ;;#ASMSTART
14072; GFX900-NEXT:    ; def s[4:5]
14073; GFX900-NEXT:    ;;#ASMEND
14074; GFX900-NEXT:    ;;#ASMSTART
14075; GFX900-NEXT:    ; def s[8:9]
14076; GFX900-NEXT:    ;;#ASMEND
14077; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
14078; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
14079; GFX900-NEXT:    ;;#ASMSTART
14080; GFX900-NEXT:    ; use s[8:9]
14081; GFX900-NEXT:    ;;#ASMEND
14082; GFX900-NEXT:    s_setpc_b64 s[30:31]
14083;
14084; GFX90A-LABEL: s_shuffle_v3f16_v4f16__3_6_6:
14085; GFX90A:       ; %bb.0:
14086; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14087; GFX90A-NEXT:    ;;#ASMSTART
14088; GFX90A-NEXT:    ; def s[4:5]
14089; GFX90A-NEXT:    ;;#ASMEND
14090; GFX90A-NEXT:    ;;#ASMSTART
14091; GFX90A-NEXT:    ; def s[8:9]
14092; GFX90A-NEXT:    ;;#ASMEND
14093; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
14094; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
14095; GFX90A-NEXT:    ;;#ASMSTART
14096; GFX90A-NEXT:    ; use s[8:9]
14097; GFX90A-NEXT:    ;;#ASMEND
14098; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14099;
14100; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_6_6:
14101; GFX940:       ; %bb.0:
14102; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14103; GFX940-NEXT:    ;;#ASMSTART
14104; GFX940-NEXT:    ; def s[0:1]
14105; GFX940-NEXT:    ;;#ASMEND
14106; GFX940-NEXT:    ;;#ASMSTART
14107; GFX940-NEXT:    ; def s[8:9]
14108; GFX940-NEXT:    ;;#ASMEND
14109; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
14110; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
14111; GFX940-NEXT:    ;;#ASMSTART
14112; GFX940-NEXT:    ; use s[8:9]
14113; GFX940-NEXT:    ;;#ASMEND
14114; GFX940-NEXT:    s_setpc_b64 s[30:31]
14115  %vec0 = call <4 x half> asm "; def $0", "=s"()
14116  %vec1 = call <4 x half> asm "; def $0", "=s"()
14117  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 3, i32 6, i32 6>
14118  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14119  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
14120  ret void
14121}
14122
14123define void @s_shuffle_v3f16_v4f16__4_6_6() {
14124; GFX9-LABEL: s_shuffle_v3f16_v4f16__4_6_6:
14125; GFX9:       ; %bb.0:
14126; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14127; GFX9-NEXT:    ;;#ASMSTART
14128; GFX9-NEXT:    ; def s[8:9]
14129; GFX9-NEXT:    ;;#ASMEND
14130; GFX9-NEXT:    s_pack_ll_b32_b16 s8, s8, s9
14131; GFX9-NEXT:    ;;#ASMSTART
14132; GFX9-NEXT:    ; use s[8:9]
14133; GFX9-NEXT:    ;;#ASMEND
14134; GFX9-NEXT:    s_setpc_b64 s[30:31]
14135  %vec0 = call <4 x half> asm "; def $0", "=s"()
14136  %vec1 = call <4 x half> asm "; def $0", "=s"()
14137  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 4, i32 6, i32 6>
14138  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14139  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
14140  ret void
14141}
14142
14143define void @s_shuffle_v3f16_v4f16__5_6_6() {
14144; GFX900-LABEL: s_shuffle_v3f16_v4f16__5_6_6:
14145; GFX900:       ; %bb.0:
14146; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14147; GFX900-NEXT:    ;;#ASMSTART
14148; GFX900-NEXT:    ; def s[8:9]
14149; GFX900-NEXT:    ;;#ASMEND
14150; GFX900-NEXT:    s_lshr_b32 s4, s8, 16
14151; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
14152; GFX900-NEXT:    ;;#ASMSTART
14153; GFX900-NEXT:    ; use s[8:9]
14154; GFX900-NEXT:    ;;#ASMEND
14155; GFX900-NEXT:    s_setpc_b64 s[30:31]
14156;
14157; GFX90A-LABEL: s_shuffle_v3f16_v4f16__5_6_6:
14158; GFX90A:       ; %bb.0:
14159; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14160; GFX90A-NEXT:    ;;#ASMSTART
14161; GFX90A-NEXT:    ; def s[8:9]
14162; GFX90A-NEXT:    ;;#ASMEND
14163; GFX90A-NEXT:    s_lshr_b32 s4, s8, 16
14164; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
14165; GFX90A-NEXT:    ;;#ASMSTART
14166; GFX90A-NEXT:    ; use s[8:9]
14167; GFX90A-NEXT:    ;;#ASMEND
14168; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14169;
14170; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_6_6:
14171; GFX940:       ; %bb.0:
14172; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14173; GFX940-NEXT:    ;;#ASMSTART
14174; GFX940-NEXT:    ; def s[8:9]
14175; GFX940-NEXT:    ;;#ASMEND
14176; GFX940-NEXT:    s_lshr_b32 s0, s8, 16
14177; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
14178; GFX940-NEXT:    ;;#ASMSTART
14179; GFX940-NEXT:    ; use s[8:9]
14180; GFX940-NEXT:    ;;#ASMEND
14181; GFX940-NEXT:    s_setpc_b64 s[30:31]
14182  %vec0 = call <4 x half> asm "; def $0", "=s"()
14183  %vec1 = call <4 x half> asm "; def $0", "=s"()
14184  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 6, i32 6>
14185  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14186  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
14187  ret void
14188}
14189
14190define void @s_shuffle_v3f16_v4f16__6_6_6() {
14191; GFX9-LABEL: s_shuffle_v3f16_v4f16__6_6_6:
14192; GFX9:       ; %bb.0:
14193; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14194; GFX9-NEXT:    ;;#ASMSTART
14195; GFX9-NEXT:    ; def s[8:9]
14196; GFX9-NEXT:    ;;#ASMEND
14197; GFX9-NEXT:    s_pack_ll_b32_b16 s8, s9, s9
14198; GFX9-NEXT:    ;;#ASMSTART
14199; GFX9-NEXT:    ; use s[8:9]
14200; GFX9-NEXT:    ;;#ASMEND
14201; GFX9-NEXT:    s_setpc_b64 s[30:31]
14202  %vec0 = call <4 x half> asm "; def $0", "=s"()
14203  %vec1 = call <4 x half> asm "; def $0", "=s"()
14204  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 6, i32 6>
14205  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14206  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
14207  ret void
14208}
14209
14210define void @s_shuffle_v3f16_v4f16__7_6_6() {
14211; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_6_6:
14212; GFX900:       ; %bb.0:
14213; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14214; GFX900-NEXT:    ;;#ASMSTART
14215; GFX900-NEXT:    ; def s[8:9]
14216; GFX900-NEXT:    ;;#ASMEND
14217; GFX900-NEXT:    s_lshr_b32 s4, s9, 16
14218; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
14219; GFX900-NEXT:    ;;#ASMSTART
14220; GFX900-NEXT:    ; use s[8:9]
14221; GFX900-NEXT:    ;;#ASMEND
14222; GFX900-NEXT:    s_setpc_b64 s[30:31]
14223;
14224; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_6_6:
14225; GFX90A:       ; %bb.0:
14226; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14227; GFX90A-NEXT:    ;;#ASMSTART
14228; GFX90A-NEXT:    ; def s[8:9]
14229; GFX90A-NEXT:    ;;#ASMEND
14230; GFX90A-NEXT:    s_lshr_b32 s4, s9, 16
14231; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
14232; GFX90A-NEXT:    ;;#ASMSTART
14233; GFX90A-NEXT:    ; use s[8:9]
14234; GFX90A-NEXT:    ;;#ASMEND
14235; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14236;
14237; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_6:
14238; GFX940:       ; %bb.0:
14239; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14240; GFX940-NEXT:    ;;#ASMSTART
14241; GFX940-NEXT:    ; def s[8:9]
14242; GFX940-NEXT:    ;;#ASMEND
14243; GFX940-NEXT:    s_lshr_b32 s0, s9, 16
14244; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
14245; GFX940-NEXT:    ;;#ASMSTART
14246; GFX940-NEXT:    ; use s[8:9]
14247; GFX940-NEXT:    ;;#ASMEND
14248; GFX940-NEXT:    s_setpc_b64 s[30:31]
14249  %vec0 = call <4 x half> asm "; def $0", "=s"()
14250  %vec1 = call <4 x half> asm "; def $0", "=s"()
14251  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 6>
14252  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14253  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
14254  ret void
14255}
14256
14257define void @s_shuffle_v3f16_v4f16__7_u_6() {
14258; GFX9-LABEL: s_shuffle_v3f16_v4f16__7_u_6:
14259; GFX9:       ; %bb.0:
14260; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14261; GFX9-NEXT:    ;;#ASMSTART
14262; GFX9-NEXT:    ; def s[8:9]
14263; GFX9-NEXT:    ;;#ASMEND
14264; GFX9-NEXT:    s_lshr_b32 s8, s9, 16
14265; GFX9-NEXT:    ;;#ASMSTART
14266; GFX9-NEXT:    ; use s[8:9]
14267; GFX9-NEXT:    ;;#ASMEND
14268; GFX9-NEXT:    s_setpc_b64 s[30:31]
14269  %vec0 = call <4 x half> asm "; def $0", "=s"()
14270  %vec1 = call <4 x half> asm "; def $0", "=s"()
14271  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 6>
14272  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14273  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
14274  ret void
14275}
14276
14277define void @s_shuffle_v3f16_v4f16__7_0_6() {
14278; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_0_6:
14279; GFX900:       ; %bb.0:
14280; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14281; GFX900-NEXT:    ;;#ASMSTART
14282; GFX900-NEXT:    ; def s[4:5]
14283; GFX900-NEXT:    ;;#ASMEND
14284; GFX900-NEXT:    ;;#ASMSTART
14285; GFX900-NEXT:    ; def s[8:9]
14286; GFX900-NEXT:    ;;#ASMEND
14287; GFX900-NEXT:    s_lshr_b32 s5, s9, 16
14288; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
14289; GFX900-NEXT:    ;;#ASMSTART
14290; GFX900-NEXT:    ; use s[8:9]
14291; GFX900-NEXT:    ;;#ASMEND
14292; GFX900-NEXT:    s_setpc_b64 s[30:31]
14293;
14294; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_0_6:
14295; GFX90A:       ; %bb.0:
14296; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14297; GFX90A-NEXT:    ;;#ASMSTART
14298; GFX90A-NEXT:    ; def s[4:5]
14299; GFX90A-NEXT:    ;;#ASMEND
14300; GFX90A-NEXT:    ;;#ASMSTART
14301; GFX90A-NEXT:    ; def s[8:9]
14302; GFX90A-NEXT:    ;;#ASMEND
14303; GFX90A-NEXT:    s_lshr_b32 s5, s9, 16
14304; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
14305; GFX90A-NEXT:    ;;#ASMSTART
14306; GFX90A-NEXT:    ; use s[8:9]
14307; GFX90A-NEXT:    ;;#ASMEND
14308; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14309;
14310; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_6:
14311; GFX940:       ; %bb.0:
14312; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14313; GFX940-NEXT:    ;;#ASMSTART
14314; GFX940-NEXT:    ; def s[0:1]
14315; GFX940-NEXT:    ;;#ASMEND
14316; GFX940-NEXT:    ;;#ASMSTART
14317; GFX940-NEXT:    ; def s[8:9]
14318; GFX940-NEXT:    ;;#ASMEND
14319; GFX940-NEXT:    s_lshr_b32 s1, s9, 16
14320; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
14321; GFX940-NEXT:    ;;#ASMSTART
14322; GFX940-NEXT:    ; use s[8:9]
14323; GFX940-NEXT:    ;;#ASMEND
14324; GFX940-NEXT:    s_setpc_b64 s[30:31]
14325  %vec0 = call <4 x half> asm "; def $0", "=s"()
14326  %vec1 = call <4 x half> asm "; def $0", "=s"()
14327  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 6>
14328  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14329  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
14330  ret void
14331}
14332
14333define void @s_shuffle_v3f16_v4f16__7_1_6() {
14334; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_1_6:
14335; GFX900:       ; %bb.0:
14336; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14337; GFX900-NEXT:    ;;#ASMSTART
14338; GFX900-NEXT:    ; def s[4:5]
14339; GFX900-NEXT:    ;;#ASMEND
14340; GFX900-NEXT:    ;;#ASMSTART
14341; GFX900-NEXT:    ; def s[8:9]
14342; GFX900-NEXT:    ;;#ASMEND
14343; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
14344; GFX900-NEXT:    s_lshr_b32 s5, s9, 16
14345; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
14346; GFX900-NEXT:    ;;#ASMSTART
14347; GFX900-NEXT:    ; use s[8:9]
14348; GFX900-NEXT:    ;;#ASMEND
14349; GFX900-NEXT:    s_setpc_b64 s[30:31]
14350;
14351; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_1_6:
14352; GFX90A:       ; %bb.0:
14353; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14354; GFX90A-NEXT:    ;;#ASMSTART
14355; GFX90A-NEXT:    ; def s[4:5]
14356; GFX90A-NEXT:    ;;#ASMEND
14357; GFX90A-NEXT:    ;;#ASMSTART
14358; GFX90A-NEXT:    ; def s[8:9]
14359; GFX90A-NEXT:    ;;#ASMEND
14360; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
14361; GFX90A-NEXT:    s_lshr_b32 s5, s9, 16
14362; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
14363; GFX90A-NEXT:    ;;#ASMSTART
14364; GFX90A-NEXT:    ; use s[8:9]
14365; GFX90A-NEXT:    ;;#ASMEND
14366; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14367;
14368; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_6:
14369; GFX940:       ; %bb.0:
14370; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14371; GFX940-NEXT:    ;;#ASMSTART
14372; GFX940-NEXT:    ; def s[0:1]
14373; GFX940-NEXT:    ;;#ASMEND
14374; GFX940-NEXT:    ;;#ASMSTART
14375; GFX940-NEXT:    ; def s[8:9]
14376; GFX940-NEXT:    ;;#ASMEND
14377; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
14378; GFX940-NEXT:    s_lshr_b32 s1, s9, 16
14379; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
14380; GFX940-NEXT:    ;;#ASMSTART
14381; GFX940-NEXT:    ; use s[8:9]
14382; GFX940-NEXT:    ;;#ASMEND
14383; GFX940-NEXT:    s_setpc_b64 s[30:31]
14384  %vec0 = call <4 x half> asm "; def $0", "=s"()
14385  %vec1 = call <4 x half> asm "; def $0", "=s"()
14386  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 6>
14387  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14388  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
14389  ret void
14390}
14391
14392define void @s_shuffle_v3f16_v4f16__7_2_6() {
14393; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_2_6:
14394; GFX900:       ; %bb.0:
14395; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14396; GFX900-NEXT:    ;;#ASMSTART
14397; GFX900-NEXT:    ; def s[4:5]
14398; GFX900-NEXT:    ;;#ASMEND
14399; GFX900-NEXT:    ;;#ASMSTART
14400; GFX900-NEXT:    ; def s[8:9]
14401; GFX900-NEXT:    ;;#ASMEND
14402; GFX900-NEXT:    s_lshr_b32 s4, s9, 16
14403; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
14404; GFX900-NEXT:    ;;#ASMSTART
14405; GFX900-NEXT:    ; use s[8:9]
14406; GFX900-NEXT:    ;;#ASMEND
14407; GFX900-NEXT:    s_setpc_b64 s[30:31]
14408;
14409; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_2_6:
14410; GFX90A:       ; %bb.0:
14411; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14412; GFX90A-NEXT:    ;;#ASMSTART
14413; GFX90A-NEXT:    ; def s[4:5]
14414; GFX90A-NEXT:    ;;#ASMEND
14415; GFX90A-NEXT:    ;;#ASMSTART
14416; GFX90A-NEXT:    ; def s[8:9]
14417; GFX90A-NEXT:    ;;#ASMEND
14418; GFX90A-NEXT:    s_lshr_b32 s4, s9, 16
14419; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
14420; GFX90A-NEXT:    ;;#ASMSTART
14421; GFX90A-NEXT:    ; use s[8:9]
14422; GFX90A-NEXT:    ;;#ASMEND
14423; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14424;
14425; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_6:
14426; GFX940:       ; %bb.0:
14427; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14428; GFX940-NEXT:    ;;#ASMSTART
14429; GFX940-NEXT:    ; def s[0:1]
14430; GFX940-NEXT:    ;;#ASMEND
14431; GFX940-NEXT:    ;;#ASMSTART
14432; GFX940-NEXT:    ; def s[8:9]
14433; GFX940-NEXT:    ;;#ASMEND
14434; GFX940-NEXT:    s_lshr_b32 s0, s9, 16
14435; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
14436; GFX940-NEXT:    ;;#ASMSTART
14437; GFX940-NEXT:    ; use s[8:9]
14438; GFX940-NEXT:    ;;#ASMEND
14439; GFX940-NEXT:    s_setpc_b64 s[30:31]
14440  %vec0 = call <4 x half> asm "; def $0", "=s"()
14441  %vec1 = call <4 x half> asm "; def $0", "=s"()
14442  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 6>
14443  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14444  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
14445  ret void
14446}
14447
14448define void @s_shuffle_v3f16_v4f16__7_3_6() {
14449; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_3_6:
14450; GFX900:       ; %bb.0:
14451; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14452; GFX900-NEXT:    ;;#ASMSTART
14453; GFX900-NEXT:    ; def s[4:5]
14454; GFX900-NEXT:    ;;#ASMEND
14455; GFX900-NEXT:    ;;#ASMSTART
14456; GFX900-NEXT:    ; def s[8:9]
14457; GFX900-NEXT:    ;;#ASMEND
14458; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
14459; GFX900-NEXT:    s_lshr_b32 s5, s9, 16
14460; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
14461; GFX900-NEXT:    ;;#ASMSTART
14462; GFX900-NEXT:    ; use s[8:9]
14463; GFX900-NEXT:    ;;#ASMEND
14464; GFX900-NEXT:    s_setpc_b64 s[30:31]
14465;
14466; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_3_6:
14467; GFX90A:       ; %bb.0:
14468; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14469; GFX90A-NEXT:    ;;#ASMSTART
14470; GFX90A-NEXT:    ; def s[4:5]
14471; GFX90A-NEXT:    ;;#ASMEND
14472; GFX90A-NEXT:    ;;#ASMSTART
14473; GFX90A-NEXT:    ; def s[8:9]
14474; GFX90A-NEXT:    ;;#ASMEND
14475; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
14476; GFX90A-NEXT:    s_lshr_b32 s5, s9, 16
14477; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
14478; GFX90A-NEXT:    ;;#ASMSTART
14479; GFX90A-NEXT:    ; use s[8:9]
14480; GFX90A-NEXT:    ;;#ASMEND
14481; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14482;
14483; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_6:
14484; GFX940:       ; %bb.0:
14485; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14486; GFX940-NEXT:    ;;#ASMSTART
14487; GFX940-NEXT:    ; def s[0:1]
14488; GFX940-NEXT:    ;;#ASMEND
14489; GFX940-NEXT:    ;;#ASMSTART
14490; GFX940-NEXT:    ; def s[8:9]
14491; GFX940-NEXT:    ;;#ASMEND
14492; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
14493; GFX940-NEXT:    s_lshr_b32 s1, s9, 16
14494; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
14495; GFX940-NEXT:    ;;#ASMSTART
14496; GFX940-NEXT:    ; use s[8:9]
14497; GFX940-NEXT:    ;;#ASMEND
14498; GFX940-NEXT:    s_setpc_b64 s[30:31]
14499  %vec0 = call <4 x half> asm "; def $0", "=s"()
14500  %vec1 = call <4 x half> asm "; def $0", "=s"()
14501  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 6>
14502  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14503  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
14504  ret void
14505}
14506
14507define void @s_shuffle_v3f16_v4f16__7_4_6() {
14508; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_4_6:
14509; GFX900:       ; %bb.0:
14510; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14511; GFX900-NEXT:    ;;#ASMSTART
14512; GFX900-NEXT:    ; def s[8:9]
14513; GFX900-NEXT:    ;;#ASMEND
14514; GFX900-NEXT:    s_lshr_b32 s4, s9, 16
14515; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s8
14516; GFX900-NEXT:    ;;#ASMSTART
14517; GFX900-NEXT:    ; use s[8:9]
14518; GFX900-NEXT:    ;;#ASMEND
14519; GFX900-NEXT:    s_setpc_b64 s[30:31]
14520;
14521; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_4_6:
14522; GFX90A:       ; %bb.0:
14523; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14524; GFX90A-NEXT:    ;;#ASMSTART
14525; GFX90A-NEXT:    ; def s[8:9]
14526; GFX90A-NEXT:    ;;#ASMEND
14527; GFX90A-NEXT:    s_lshr_b32 s4, s9, 16
14528; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s8
14529; GFX90A-NEXT:    ;;#ASMSTART
14530; GFX90A-NEXT:    ; use s[8:9]
14531; GFX90A-NEXT:    ;;#ASMEND
14532; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14533;
14534; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_6:
14535; GFX940:       ; %bb.0:
14536; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14537; GFX940-NEXT:    ;;#ASMSTART
14538; GFX940-NEXT:    ; def s[8:9]
14539; GFX940-NEXT:    ;;#ASMEND
14540; GFX940-NEXT:    s_lshr_b32 s0, s9, 16
14541; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s8
14542; GFX940-NEXT:    ;;#ASMSTART
14543; GFX940-NEXT:    ; use s[8:9]
14544; GFX940-NEXT:    ;;#ASMEND
14545; GFX940-NEXT:    s_setpc_b64 s[30:31]
14546  %vec0 = call <4 x half> asm "; def $0", "=s"()
14547  %vec1 = call <4 x half> asm "; def $0", "=s"()
14548  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 6>
14549  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14550  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
14551  ret void
14552}
14553
14554define void @s_shuffle_v3f16_v4f16__7_5_6() {
14555; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_5_6:
14556; GFX900:       ; %bb.0:
14557; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14558; GFX900-NEXT:    ;;#ASMSTART
14559; GFX900-NEXT:    ; def s[8:9]
14560; GFX900-NEXT:    ;;#ASMEND
14561; GFX900-NEXT:    s_lshr_b32 s4, s8, 16
14562; GFX900-NEXT:    s_lshr_b32 s5, s9, 16
14563; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
14564; GFX900-NEXT:    ;;#ASMSTART
14565; GFX900-NEXT:    ; use s[8:9]
14566; GFX900-NEXT:    ;;#ASMEND
14567; GFX900-NEXT:    s_setpc_b64 s[30:31]
14568;
14569; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_5_6:
14570; GFX90A:       ; %bb.0:
14571; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14572; GFX90A-NEXT:    ;;#ASMSTART
14573; GFX90A-NEXT:    ; def s[8:9]
14574; GFX90A-NEXT:    ;;#ASMEND
14575; GFX90A-NEXT:    s_lshr_b32 s4, s8, 16
14576; GFX90A-NEXT:    s_lshr_b32 s5, s9, 16
14577; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
14578; GFX90A-NEXT:    ;;#ASMSTART
14579; GFX90A-NEXT:    ; use s[8:9]
14580; GFX90A-NEXT:    ;;#ASMEND
14581; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14582;
14583; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_6:
14584; GFX940:       ; %bb.0:
14585; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14586; GFX940-NEXT:    ;;#ASMSTART
14587; GFX940-NEXT:    ; def s[8:9]
14588; GFX940-NEXT:    ;;#ASMEND
14589; GFX940-NEXT:    s_lshr_b32 s0, s8, 16
14590; GFX940-NEXT:    s_lshr_b32 s1, s9, 16
14591; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
14592; GFX940-NEXT:    ;;#ASMSTART
14593; GFX940-NEXT:    ; use s[8:9]
14594; GFX940-NEXT:    ;;#ASMEND
14595; GFX940-NEXT:    s_setpc_b64 s[30:31]
14596  %vec0 = call <4 x half> asm "; def $0", "=s"()
14597  %vec1 = call <4 x half> asm "; def $0", "=s"()
14598  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 6>
14599  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14600  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
14601  ret void
14602}
14603
14604define void @s_shuffle_v3f16_v4f16__u_7_7() {
14605; GFX900-LABEL: s_shuffle_v3f16_v4f16__u_7_7:
14606; GFX900:       ; %bb.0:
14607; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14608; GFX900-NEXT:    ;;#ASMSTART
14609; GFX900-NEXT:    ; def s[4:5]
14610; GFX900-NEXT:    ;;#ASMEND
14611; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
14612; GFX900-NEXT:    s_mov_b32 s8, s5
14613; GFX900-NEXT:    ;;#ASMSTART
14614; GFX900-NEXT:    ; use s[8:9]
14615; GFX900-NEXT:    ;;#ASMEND
14616; GFX900-NEXT:    s_setpc_b64 s[30:31]
14617;
14618; GFX90A-LABEL: s_shuffle_v3f16_v4f16__u_7_7:
14619; GFX90A:       ; %bb.0:
14620; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14621; GFX90A-NEXT:    ;;#ASMSTART
14622; GFX90A-NEXT:    ; def s[4:5]
14623; GFX90A-NEXT:    ;;#ASMEND
14624; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
14625; GFX90A-NEXT:    s_mov_b32 s8, s5
14626; GFX90A-NEXT:    ;;#ASMSTART
14627; GFX90A-NEXT:    ; use s[8:9]
14628; GFX90A-NEXT:    ;;#ASMEND
14629; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14630;
14631; GFX940-LABEL: s_shuffle_v3f16_v4f16__u_7_7:
14632; GFX940:       ; %bb.0:
14633; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14634; GFX940-NEXT:    ;;#ASMSTART
14635; GFX940-NEXT:    ; def s[0:1]
14636; GFX940-NEXT:    ;;#ASMEND
14637; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
14638; GFX940-NEXT:    s_mov_b32 s8, s1
14639; GFX940-NEXT:    ;;#ASMSTART
14640; GFX940-NEXT:    ; use s[8:9]
14641; GFX940-NEXT:    ;;#ASMEND
14642; GFX940-NEXT:    s_setpc_b64 s[30:31]
14643  %vec0 = call <4 x half> asm "; def $0", "=s"()
14644  %vec1 = call <4 x half> asm "; def $0", "=s"()
14645  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 poison, i32 7, i32 7>
14646  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14647  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
14648  ret void
14649}
14650
14651define void @s_shuffle_v3f16_v4f16__0_7_7() {
14652; GFX900-LABEL: s_shuffle_v3f16_v4f16__0_7_7:
14653; GFX900:       ; %bb.0:
14654; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14655; GFX900-NEXT:    ;;#ASMSTART
14656; GFX900-NEXT:    ; def s[6:7]
14657; GFX900-NEXT:    ;;#ASMEND
14658; GFX900-NEXT:    s_lshr_b32 s9, s7, 16
14659; GFX900-NEXT:    ;;#ASMSTART
14660; GFX900-NEXT:    ; def s[4:5]
14661; GFX900-NEXT:    ;;#ASMEND
14662; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
14663; GFX900-NEXT:    ;;#ASMSTART
14664; GFX900-NEXT:    ; use s[8:9]
14665; GFX900-NEXT:    ;;#ASMEND
14666; GFX900-NEXT:    s_setpc_b64 s[30:31]
14667;
14668; GFX90A-LABEL: s_shuffle_v3f16_v4f16__0_7_7:
14669; GFX90A:       ; %bb.0:
14670; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14671; GFX90A-NEXT:    ;;#ASMSTART
14672; GFX90A-NEXT:    ; def s[6:7]
14673; GFX90A-NEXT:    ;;#ASMEND
14674; GFX90A-NEXT:    s_lshr_b32 s9, s7, 16
14675; GFX90A-NEXT:    ;;#ASMSTART
14676; GFX90A-NEXT:    ; def s[4:5]
14677; GFX90A-NEXT:    ;;#ASMEND
14678; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
14679; GFX90A-NEXT:    ;;#ASMSTART
14680; GFX90A-NEXT:    ; use s[8:9]
14681; GFX90A-NEXT:    ;;#ASMEND
14682; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14683;
14684; GFX940-LABEL: s_shuffle_v3f16_v4f16__0_7_7:
14685; GFX940:       ; %bb.0:
14686; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14687; GFX940-NEXT:    ;;#ASMSTART
14688; GFX940-NEXT:    ; def s[2:3]
14689; GFX940-NEXT:    ;;#ASMEND
14690; GFX940-NEXT:    s_lshr_b32 s9, s3, 16
14691; GFX940-NEXT:    ;;#ASMSTART
14692; GFX940-NEXT:    ; def s[0:1]
14693; GFX940-NEXT:    ;;#ASMEND
14694; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
14695; GFX940-NEXT:    ;;#ASMSTART
14696; GFX940-NEXT:    ; use s[8:9]
14697; GFX940-NEXT:    ;;#ASMEND
14698; GFX940-NEXT:    s_setpc_b64 s[30:31]
14699  %vec0 = call <4 x half> asm "; def $0", "=s"()
14700  %vec1 = call <4 x half> asm "; def $0", "=s"()
14701  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 0, i32 7, i32 7>
14702  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14703  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
14704  ret void
14705}
14706
14707define void @s_shuffle_v3f16_v4f16__1_7_7() {
14708; GFX900-LABEL: s_shuffle_v3f16_v4f16__1_7_7:
14709; GFX900:       ; %bb.0:
14710; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14711; GFX900-NEXT:    ;;#ASMSTART
14712; GFX900-NEXT:    ; def s[4:5]
14713; GFX900-NEXT:    ;;#ASMEND
14714; GFX900-NEXT:    ;;#ASMSTART
14715; GFX900-NEXT:    ; def s[6:7]
14716; GFX900-NEXT:    ;;#ASMEND
14717; GFX900-NEXT:    s_lshr_b32 s9, s7, 16
14718; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
14719; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
14720; GFX900-NEXT:    ;;#ASMSTART
14721; GFX900-NEXT:    ; use s[8:9]
14722; GFX900-NEXT:    ;;#ASMEND
14723; GFX900-NEXT:    s_setpc_b64 s[30:31]
14724;
14725; GFX90A-LABEL: s_shuffle_v3f16_v4f16__1_7_7:
14726; GFX90A:       ; %bb.0:
14727; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14728; GFX90A-NEXT:    ;;#ASMSTART
14729; GFX90A-NEXT:    ; def s[4:5]
14730; GFX90A-NEXT:    ;;#ASMEND
14731; GFX90A-NEXT:    ;;#ASMSTART
14732; GFX90A-NEXT:    ; def s[6:7]
14733; GFX90A-NEXT:    ;;#ASMEND
14734; GFX90A-NEXT:    s_lshr_b32 s9, s7, 16
14735; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
14736; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
14737; GFX90A-NEXT:    ;;#ASMSTART
14738; GFX90A-NEXT:    ; use s[8:9]
14739; GFX90A-NEXT:    ;;#ASMEND
14740; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14741;
14742; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_7_7:
14743; GFX940:       ; %bb.0:
14744; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14745; GFX940-NEXT:    ;;#ASMSTART
14746; GFX940-NEXT:    ; def s[0:1]
14747; GFX940-NEXT:    ;;#ASMEND
14748; GFX940-NEXT:    ;;#ASMSTART
14749; GFX940-NEXT:    ; def s[2:3]
14750; GFX940-NEXT:    ;;#ASMEND
14751; GFX940-NEXT:    s_lshr_b32 s9, s3, 16
14752; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
14753; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
14754; GFX940-NEXT:    ;;#ASMSTART
14755; GFX940-NEXT:    ; use s[8:9]
14756; GFX940-NEXT:    ;;#ASMEND
14757; GFX940-NEXT:    s_setpc_b64 s[30:31]
14758  %vec0 = call <4 x half> asm "; def $0", "=s"()
14759  %vec1 = call <4 x half> asm "; def $0", "=s"()
14760  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 1, i32 7, i32 7>
14761  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14762  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
14763  ret void
14764}
14765
14766define void @s_shuffle_v3f16_v4f16__2_7_7() {
14767; GFX900-LABEL: s_shuffle_v3f16_v4f16__2_7_7:
14768; GFX900:       ; %bb.0:
14769; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14770; GFX900-NEXT:    ;;#ASMSTART
14771; GFX900-NEXT:    ; def s[6:7]
14772; GFX900-NEXT:    ;;#ASMEND
14773; GFX900-NEXT:    s_lshr_b32 s9, s7, 16
14774; GFX900-NEXT:    ;;#ASMSTART
14775; GFX900-NEXT:    ; def s[4:5]
14776; GFX900-NEXT:    ;;#ASMEND
14777; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s9
14778; GFX900-NEXT:    ;;#ASMSTART
14779; GFX900-NEXT:    ; use s[8:9]
14780; GFX900-NEXT:    ;;#ASMEND
14781; GFX900-NEXT:    s_setpc_b64 s[30:31]
14782;
14783; GFX90A-LABEL: s_shuffle_v3f16_v4f16__2_7_7:
14784; GFX90A:       ; %bb.0:
14785; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14786; GFX90A-NEXT:    ;;#ASMSTART
14787; GFX90A-NEXT:    ; def s[6:7]
14788; GFX90A-NEXT:    ;;#ASMEND
14789; GFX90A-NEXT:    s_lshr_b32 s9, s7, 16
14790; GFX90A-NEXT:    ;;#ASMSTART
14791; GFX90A-NEXT:    ; def s[4:5]
14792; GFX90A-NEXT:    ;;#ASMEND
14793; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s9
14794; GFX90A-NEXT:    ;;#ASMSTART
14795; GFX90A-NEXT:    ; use s[8:9]
14796; GFX90A-NEXT:    ;;#ASMEND
14797; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14798;
14799; GFX940-LABEL: s_shuffle_v3f16_v4f16__2_7_7:
14800; GFX940:       ; %bb.0:
14801; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14802; GFX940-NEXT:    ;;#ASMSTART
14803; GFX940-NEXT:    ; def s[2:3]
14804; GFX940-NEXT:    ;;#ASMEND
14805; GFX940-NEXT:    s_lshr_b32 s9, s3, 16
14806; GFX940-NEXT:    ;;#ASMSTART
14807; GFX940-NEXT:    ; def s[0:1]
14808; GFX940-NEXT:    ;;#ASMEND
14809; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s9
14810; GFX940-NEXT:    ;;#ASMSTART
14811; GFX940-NEXT:    ; use s[8:9]
14812; GFX940-NEXT:    ;;#ASMEND
14813; GFX940-NEXT:    s_setpc_b64 s[30:31]
14814  %vec0 = call <4 x half> asm "; def $0", "=s"()
14815  %vec1 = call <4 x half> asm "; def $0", "=s"()
14816  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 2, i32 7, i32 7>
14817  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14818  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
14819  ret void
14820}
14821
14822define void @s_shuffle_v3f16_v4f16__3_7_7() {
14823; GFX900-LABEL: s_shuffle_v3f16_v4f16__3_7_7:
14824; GFX900:       ; %bb.0:
14825; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14826; GFX900-NEXT:    ;;#ASMSTART
14827; GFX900-NEXT:    ; def s[4:5]
14828; GFX900-NEXT:    ;;#ASMEND
14829; GFX900-NEXT:    ;;#ASMSTART
14830; GFX900-NEXT:    ; def s[6:7]
14831; GFX900-NEXT:    ;;#ASMEND
14832; GFX900-NEXT:    s_lshr_b32 s9, s7, 16
14833; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
14834; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
14835; GFX900-NEXT:    ;;#ASMSTART
14836; GFX900-NEXT:    ; use s[8:9]
14837; GFX900-NEXT:    ;;#ASMEND
14838; GFX900-NEXT:    s_setpc_b64 s[30:31]
14839;
14840; GFX90A-LABEL: s_shuffle_v3f16_v4f16__3_7_7:
14841; GFX90A:       ; %bb.0:
14842; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14843; GFX90A-NEXT:    ;;#ASMSTART
14844; GFX90A-NEXT:    ; def s[4:5]
14845; GFX90A-NEXT:    ;;#ASMEND
14846; GFX90A-NEXT:    ;;#ASMSTART
14847; GFX90A-NEXT:    ; def s[6:7]
14848; GFX90A-NEXT:    ;;#ASMEND
14849; GFX90A-NEXT:    s_lshr_b32 s9, s7, 16
14850; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
14851; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
14852; GFX90A-NEXT:    ;;#ASMSTART
14853; GFX90A-NEXT:    ; use s[8:9]
14854; GFX90A-NEXT:    ;;#ASMEND
14855; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14856;
14857; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_7_7:
14858; GFX940:       ; %bb.0:
14859; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14860; GFX940-NEXT:    ;;#ASMSTART
14861; GFX940-NEXT:    ; def s[0:1]
14862; GFX940-NEXT:    ;;#ASMEND
14863; GFX940-NEXT:    ;;#ASMSTART
14864; GFX940-NEXT:    ; def s[2:3]
14865; GFX940-NEXT:    ;;#ASMEND
14866; GFX940-NEXT:    s_lshr_b32 s9, s3, 16
14867; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
14868; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
14869; GFX940-NEXT:    ;;#ASMSTART
14870; GFX940-NEXT:    ; use s[8:9]
14871; GFX940-NEXT:    ;;#ASMEND
14872; GFX940-NEXT:    s_setpc_b64 s[30:31]
14873  %vec0 = call <4 x half> asm "; def $0", "=s"()
14874  %vec1 = call <4 x half> asm "; def $0", "=s"()
14875  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 3, i32 7, i32 7>
14876  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14877  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
14878  ret void
14879}
14880
14881define void @s_shuffle_v3f16_v4f16__4_7_7() {
14882; GFX900-LABEL: s_shuffle_v3f16_v4f16__4_7_7:
14883; GFX900:       ; %bb.0:
14884; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14885; GFX900-NEXT:    ;;#ASMSTART
14886; GFX900-NEXT:    ; def s[4:5]
14887; GFX900-NEXT:    ;;#ASMEND
14888; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
14889; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
14890; GFX900-NEXT:    ;;#ASMSTART
14891; GFX900-NEXT:    ; use s[8:9]
14892; GFX900-NEXT:    ;;#ASMEND
14893; GFX900-NEXT:    s_setpc_b64 s[30:31]
14894;
14895; GFX90A-LABEL: s_shuffle_v3f16_v4f16__4_7_7:
14896; GFX90A:       ; %bb.0:
14897; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14898; GFX90A-NEXT:    ;;#ASMSTART
14899; GFX90A-NEXT:    ; def s[4:5]
14900; GFX90A-NEXT:    ;;#ASMEND
14901; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
14902; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
14903; GFX90A-NEXT:    ;;#ASMSTART
14904; GFX90A-NEXT:    ; use s[8:9]
14905; GFX90A-NEXT:    ;;#ASMEND
14906; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14907;
14908; GFX940-LABEL: s_shuffle_v3f16_v4f16__4_7_7:
14909; GFX940:       ; %bb.0:
14910; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14911; GFX940-NEXT:    ;;#ASMSTART
14912; GFX940-NEXT:    ; def s[0:1]
14913; GFX940-NEXT:    ;;#ASMEND
14914; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
14915; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
14916; GFX940-NEXT:    ;;#ASMSTART
14917; GFX940-NEXT:    ; use s[8:9]
14918; GFX940-NEXT:    ;;#ASMEND
14919; GFX940-NEXT:    s_setpc_b64 s[30:31]
14920  %vec0 = call <4 x half> asm "; def $0", "=s"()
14921  %vec1 = call <4 x half> asm "; def $0", "=s"()
14922  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 4, i32 7, i32 7>
14923  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14924  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
14925  ret void
14926}
14927
14928define void @s_shuffle_v3f16_v4f16__5_7_7() {
14929; GFX900-LABEL: s_shuffle_v3f16_v4f16__5_7_7:
14930; GFX900:       ; %bb.0:
14931; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14932; GFX900-NEXT:    ;;#ASMSTART
14933; GFX900-NEXT:    ; def s[4:5]
14934; GFX900-NEXT:    ;;#ASMEND
14935; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
14936; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
14937; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
14938; GFX900-NEXT:    ;;#ASMSTART
14939; GFX900-NEXT:    ; use s[8:9]
14940; GFX900-NEXT:    ;;#ASMEND
14941; GFX900-NEXT:    s_setpc_b64 s[30:31]
14942;
14943; GFX90A-LABEL: s_shuffle_v3f16_v4f16__5_7_7:
14944; GFX90A:       ; %bb.0:
14945; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14946; GFX90A-NEXT:    ;;#ASMSTART
14947; GFX90A-NEXT:    ; def s[4:5]
14948; GFX90A-NEXT:    ;;#ASMEND
14949; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
14950; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
14951; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s9
14952; GFX90A-NEXT:    ;;#ASMSTART
14953; GFX90A-NEXT:    ; use s[8:9]
14954; GFX90A-NEXT:    ;;#ASMEND
14955; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14956;
14957; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_7_7:
14958; GFX940:       ; %bb.0:
14959; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14960; GFX940-NEXT:    ;;#ASMSTART
14961; GFX940-NEXT:    ; def s[0:1]
14962; GFX940-NEXT:    ;;#ASMEND
14963; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
14964; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
14965; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s9
14966; GFX940-NEXT:    ;;#ASMSTART
14967; GFX940-NEXT:    ; use s[8:9]
14968; GFX940-NEXT:    ;;#ASMEND
14969; GFX940-NEXT:    s_setpc_b64 s[30:31]
14970  %vec0 = call <4 x half> asm "; def $0", "=s"()
14971  %vec1 = call <4 x half> asm "; def $0", "=s"()
14972  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 7, i32 7>
14973  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
14974  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
14975  ret void
14976}
14977
14978define void @s_shuffle_v3f16_v4f16__6_7_7() {
14979; GFX900-LABEL: s_shuffle_v3f16_v4f16__6_7_7:
14980; GFX900:       ; %bb.0:
14981; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14982; GFX900-NEXT:    ;;#ASMSTART
14983; GFX900-NEXT:    ; def s[4:5]
14984; GFX900-NEXT:    ;;#ASMEND
14985; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
14986; GFX900-NEXT:    s_mov_b32 s8, s5
14987; GFX900-NEXT:    ;;#ASMSTART
14988; GFX900-NEXT:    ; use s[8:9]
14989; GFX900-NEXT:    ;;#ASMEND
14990; GFX900-NEXT:    s_setpc_b64 s[30:31]
14991;
14992; GFX90A-LABEL: s_shuffle_v3f16_v4f16__6_7_7:
14993; GFX90A:       ; %bb.0:
14994; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14995; GFX90A-NEXT:    ;;#ASMSTART
14996; GFX90A-NEXT:    ; def s[4:5]
14997; GFX90A-NEXT:    ;;#ASMEND
14998; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
14999; GFX90A-NEXT:    s_mov_b32 s8, s5
15000; GFX90A-NEXT:    ;;#ASMSTART
15001; GFX90A-NEXT:    ; use s[8:9]
15002; GFX90A-NEXT:    ;;#ASMEND
15003; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15004;
15005; GFX940-LABEL: s_shuffle_v3f16_v4f16__6_7_7:
15006; GFX940:       ; %bb.0:
15007; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15008; GFX940-NEXT:    ;;#ASMSTART
15009; GFX940-NEXT:    ; def s[0:1]
15010; GFX940-NEXT:    ;;#ASMEND
15011; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
15012; GFX940-NEXT:    s_mov_b32 s8, s1
15013; GFX940-NEXT:    ;;#ASMSTART
15014; GFX940-NEXT:    ; use s[8:9]
15015; GFX940-NEXT:    ;;#ASMEND
15016; GFX940-NEXT:    s_setpc_b64 s[30:31]
15017  %vec0 = call <4 x half> asm "; def $0", "=s"()
15018  %vec1 = call <4 x half> asm "; def $0", "=s"()
15019  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 7, i32 7>
15020  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
15021  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
15022  ret void
15023}
15024
15025define void @s_shuffle_v3f16_v4f16__7_u_7() {
15026; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_u_7:
15027; GFX900:       ; %bb.0:
15028; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15029; GFX900-NEXT:    ;;#ASMSTART
15030; GFX900-NEXT:    ; def s[4:5]
15031; GFX900-NEXT:    ;;#ASMEND
15032; GFX900-NEXT:    s_lshr_b32 s8, s5, 16
15033; GFX900-NEXT:    s_mov_b32 s9, s8
15034; GFX900-NEXT:    ;;#ASMSTART
15035; GFX900-NEXT:    ; use s[8:9]
15036; GFX900-NEXT:    ;;#ASMEND
15037; GFX900-NEXT:    s_setpc_b64 s[30:31]
15038;
15039; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_u_7:
15040; GFX90A:       ; %bb.0:
15041; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15042; GFX90A-NEXT:    ;;#ASMSTART
15043; GFX90A-NEXT:    ; def s[4:5]
15044; GFX90A-NEXT:    ;;#ASMEND
15045; GFX90A-NEXT:    s_lshr_b32 s8, s5, 16
15046; GFX90A-NEXT:    s_mov_b32 s9, s8
15047; GFX90A-NEXT:    ;;#ASMSTART
15048; GFX90A-NEXT:    ; use s[8:9]
15049; GFX90A-NEXT:    ;;#ASMEND
15050; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15051;
15052; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_u_7:
15053; GFX940:       ; %bb.0:
15054; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15055; GFX940-NEXT:    ;;#ASMSTART
15056; GFX940-NEXT:    ; def s[0:1]
15057; GFX940-NEXT:    ;;#ASMEND
15058; GFX940-NEXT:    s_lshr_b32 s8, s1, 16
15059; GFX940-NEXT:    s_mov_b32 s9, s8
15060; GFX940-NEXT:    ;;#ASMSTART
15061; GFX940-NEXT:    ; use s[8:9]
15062; GFX940-NEXT:    ;;#ASMEND
15063; GFX940-NEXT:    s_setpc_b64 s[30:31]
15064  %vec0 = call <4 x half> asm "; def $0", "=s"()
15065  %vec1 = call <4 x half> asm "; def $0", "=s"()
15066  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 7>
15067  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
15068  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
15069  ret void
15070}
15071
15072define void @s_shuffle_v3f16_v4f16__7_0_7() {
15073; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_0_7:
15074; GFX900:       ; %bb.0:
15075; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15076; GFX900-NEXT:    ;;#ASMSTART
15077; GFX900-NEXT:    ; def s[6:7]
15078; GFX900-NEXT:    ;;#ASMEND
15079; GFX900-NEXT:    s_lshr_b32 s9, s7, 16
15080; GFX900-NEXT:    ;;#ASMSTART
15081; GFX900-NEXT:    ; def s[4:5]
15082; GFX900-NEXT:    ;;#ASMEND
15083; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s9, s4
15084; GFX900-NEXT:    ;;#ASMSTART
15085; GFX900-NEXT:    ; use s[8:9]
15086; GFX900-NEXT:    ;;#ASMEND
15087; GFX900-NEXT:    s_setpc_b64 s[30:31]
15088;
15089; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_0_7:
15090; GFX90A:       ; %bb.0:
15091; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15092; GFX90A-NEXT:    ;;#ASMSTART
15093; GFX90A-NEXT:    ; def s[6:7]
15094; GFX90A-NEXT:    ;;#ASMEND
15095; GFX90A-NEXT:    s_lshr_b32 s9, s7, 16
15096; GFX90A-NEXT:    ;;#ASMSTART
15097; GFX90A-NEXT:    ; def s[4:5]
15098; GFX90A-NEXT:    ;;#ASMEND
15099; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s9, s4
15100; GFX90A-NEXT:    ;;#ASMSTART
15101; GFX90A-NEXT:    ; use s[8:9]
15102; GFX90A-NEXT:    ;;#ASMEND
15103; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15104;
15105; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_7:
15106; GFX940:       ; %bb.0:
15107; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15108; GFX940-NEXT:    ;;#ASMSTART
15109; GFX940-NEXT:    ; def s[2:3]
15110; GFX940-NEXT:    ;;#ASMEND
15111; GFX940-NEXT:    s_lshr_b32 s9, s3, 16
15112; GFX940-NEXT:    ;;#ASMSTART
15113; GFX940-NEXT:    ; def s[0:1]
15114; GFX940-NEXT:    ;;#ASMEND
15115; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s9, s0
15116; GFX940-NEXT:    ;;#ASMSTART
15117; GFX940-NEXT:    ; use s[8:9]
15118; GFX940-NEXT:    ;;#ASMEND
15119; GFX940-NEXT:    s_setpc_b64 s[30:31]
15120  %vec0 = call <4 x half> asm "; def $0", "=s"()
15121  %vec1 = call <4 x half> asm "; def $0", "=s"()
15122  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 7>
15123  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
15124  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
15125  ret void
15126}
15127
15128define void @s_shuffle_v3f16_v4f16__7_1_7() {
15129; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_1_7:
15130; GFX900:       ; %bb.0:
15131; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15132; GFX900-NEXT:    ;;#ASMSTART
15133; GFX900-NEXT:    ; def s[4:5]
15134; GFX900-NEXT:    ;;#ASMEND
15135; GFX900-NEXT:    ;;#ASMSTART
15136; GFX900-NEXT:    ; def s[6:7]
15137; GFX900-NEXT:    ;;#ASMEND
15138; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
15139; GFX900-NEXT:    s_lshr_b32 s9, s7, 16
15140; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s9, s4
15141; GFX900-NEXT:    ;;#ASMSTART
15142; GFX900-NEXT:    ; use s[8:9]
15143; GFX900-NEXT:    ;;#ASMEND
15144; GFX900-NEXT:    s_setpc_b64 s[30:31]
15145;
15146; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_1_7:
15147; GFX90A:       ; %bb.0:
15148; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15149; GFX90A-NEXT:    ;;#ASMSTART
15150; GFX90A-NEXT:    ; def s[4:5]
15151; GFX90A-NEXT:    ;;#ASMEND
15152; GFX90A-NEXT:    ;;#ASMSTART
15153; GFX90A-NEXT:    ; def s[6:7]
15154; GFX90A-NEXT:    ;;#ASMEND
15155; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
15156; GFX90A-NEXT:    s_lshr_b32 s9, s7, 16
15157; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s9, s4
15158; GFX90A-NEXT:    ;;#ASMSTART
15159; GFX90A-NEXT:    ; use s[8:9]
15160; GFX90A-NEXT:    ;;#ASMEND
15161; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15162;
15163; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_7:
15164; GFX940:       ; %bb.0:
15165; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15166; GFX940-NEXT:    ;;#ASMSTART
15167; GFX940-NEXT:    ; def s[0:1]
15168; GFX940-NEXT:    ;;#ASMEND
15169; GFX940-NEXT:    ;;#ASMSTART
15170; GFX940-NEXT:    ; def s[2:3]
15171; GFX940-NEXT:    ;;#ASMEND
15172; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
15173; GFX940-NEXT:    s_lshr_b32 s9, s3, 16
15174; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s9, s0
15175; GFX940-NEXT:    ;;#ASMSTART
15176; GFX940-NEXT:    ; use s[8:9]
15177; GFX940-NEXT:    ;;#ASMEND
15178; GFX940-NEXT:    s_setpc_b64 s[30:31]
15179  %vec0 = call <4 x half> asm "; def $0", "=s"()
15180  %vec1 = call <4 x half> asm "; def $0", "=s"()
15181  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 7>
15182  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
15183  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
15184  ret void
15185}
15186
15187define void @s_shuffle_v3f16_v4f16__7_2_7() {
15188; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_2_7:
15189; GFX900:       ; %bb.0:
15190; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15191; GFX900-NEXT:    ;;#ASMSTART
15192; GFX900-NEXT:    ; def s[6:7]
15193; GFX900-NEXT:    ;;#ASMEND
15194; GFX900-NEXT:    s_lshr_b32 s9, s7, 16
15195; GFX900-NEXT:    ;;#ASMSTART
15196; GFX900-NEXT:    ; def s[4:5]
15197; GFX900-NEXT:    ;;#ASMEND
15198; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s9, s5
15199; GFX900-NEXT:    ;;#ASMSTART
15200; GFX900-NEXT:    ; use s[8:9]
15201; GFX900-NEXT:    ;;#ASMEND
15202; GFX900-NEXT:    s_setpc_b64 s[30:31]
15203;
15204; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_2_7:
15205; GFX90A:       ; %bb.0:
15206; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15207; GFX90A-NEXT:    ;;#ASMSTART
15208; GFX90A-NEXT:    ; def s[6:7]
15209; GFX90A-NEXT:    ;;#ASMEND
15210; GFX90A-NEXT:    s_lshr_b32 s9, s7, 16
15211; GFX90A-NEXT:    ;;#ASMSTART
15212; GFX90A-NEXT:    ; def s[4:5]
15213; GFX90A-NEXT:    ;;#ASMEND
15214; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s9, s5
15215; GFX90A-NEXT:    ;;#ASMSTART
15216; GFX90A-NEXT:    ; use s[8:9]
15217; GFX90A-NEXT:    ;;#ASMEND
15218; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15219;
15220; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_7:
15221; GFX940:       ; %bb.0:
15222; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15223; GFX940-NEXT:    ;;#ASMSTART
15224; GFX940-NEXT:    ; def s[2:3]
15225; GFX940-NEXT:    ;;#ASMEND
15226; GFX940-NEXT:    s_lshr_b32 s9, s3, 16
15227; GFX940-NEXT:    ;;#ASMSTART
15228; GFX940-NEXT:    ; def s[0:1]
15229; GFX940-NEXT:    ;;#ASMEND
15230; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s9, s1
15231; GFX940-NEXT:    ;;#ASMSTART
15232; GFX940-NEXT:    ; use s[8:9]
15233; GFX940-NEXT:    ;;#ASMEND
15234; GFX940-NEXT:    s_setpc_b64 s[30:31]
15235  %vec0 = call <4 x half> asm "; def $0", "=s"()
15236  %vec1 = call <4 x half> asm "; def $0", "=s"()
15237  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 7>
15238  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
15239  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
15240  ret void
15241}
15242
15243define void @s_shuffle_v3f16_v4f16__7_3_7() {
15244; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_3_7:
15245; GFX900:       ; %bb.0:
15246; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15247; GFX900-NEXT:    ;;#ASMSTART
15248; GFX900-NEXT:    ; def s[4:5]
15249; GFX900-NEXT:    ;;#ASMEND
15250; GFX900-NEXT:    ;;#ASMSTART
15251; GFX900-NEXT:    ; def s[6:7]
15252; GFX900-NEXT:    ;;#ASMEND
15253; GFX900-NEXT:    s_lshr_b32 s4, s5, 16
15254; GFX900-NEXT:    s_lshr_b32 s9, s7, 16
15255; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s9, s4
15256; GFX900-NEXT:    ;;#ASMSTART
15257; GFX900-NEXT:    ; use s[8:9]
15258; GFX900-NEXT:    ;;#ASMEND
15259; GFX900-NEXT:    s_setpc_b64 s[30:31]
15260;
15261; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_3_7:
15262; GFX90A:       ; %bb.0:
15263; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15264; GFX90A-NEXT:    ;;#ASMSTART
15265; GFX90A-NEXT:    ; def s[4:5]
15266; GFX90A-NEXT:    ;;#ASMEND
15267; GFX90A-NEXT:    ;;#ASMSTART
15268; GFX90A-NEXT:    ; def s[6:7]
15269; GFX90A-NEXT:    ;;#ASMEND
15270; GFX90A-NEXT:    s_lshr_b32 s4, s5, 16
15271; GFX90A-NEXT:    s_lshr_b32 s9, s7, 16
15272; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s9, s4
15273; GFX90A-NEXT:    ;;#ASMSTART
15274; GFX90A-NEXT:    ; use s[8:9]
15275; GFX90A-NEXT:    ;;#ASMEND
15276; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15277;
15278; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_7:
15279; GFX940:       ; %bb.0:
15280; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15281; GFX940-NEXT:    ;;#ASMSTART
15282; GFX940-NEXT:    ; def s[0:1]
15283; GFX940-NEXT:    ;;#ASMEND
15284; GFX940-NEXT:    ;;#ASMSTART
15285; GFX940-NEXT:    ; def s[2:3]
15286; GFX940-NEXT:    ;;#ASMEND
15287; GFX940-NEXT:    s_lshr_b32 s0, s1, 16
15288; GFX940-NEXT:    s_lshr_b32 s9, s3, 16
15289; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s9, s0
15290; GFX940-NEXT:    ;;#ASMSTART
15291; GFX940-NEXT:    ; use s[8:9]
15292; GFX940-NEXT:    ;;#ASMEND
15293; GFX940-NEXT:    s_setpc_b64 s[30:31]
15294  %vec0 = call <4 x half> asm "; def $0", "=s"()
15295  %vec1 = call <4 x half> asm "; def $0", "=s"()
15296  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 7>
15297  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
15298  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
15299  ret void
15300}
15301
15302define void @s_shuffle_v3f16_v4f16__7_4_7() {
15303; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_4_7:
15304; GFX900:       ; %bb.0:
15305; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15306; GFX900-NEXT:    ;;#ASMSTART
15307; GFX900-NEXT:    ; def s[4:5]
15308; GFX900-NEXT:    ;;#ASMEND
15309; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
15310; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s9, s4
15311; GFX900-NEXT:    ;;#ASMSTART
15312; GFX900-NEXT:    ; use s[8:9]
15313; GFX900-NEXT:    ;;#ASMEND
15314; GFX900-NEXT:    s_setpc_b64 s[30:31]
15315;
15316; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_4_7:
15317; GFX90A:       ; %bb.0:
15318; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15319; GFX90A-NEXT:    ;;#ASMSTART
15320; GFX90A-NEXT:    ; def s[4:5]
15321; GFX90A-NEXT:    ;;#ASMEND
15322; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
15323; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s9, s4
15324; GFX90A-NEXT:    ;;#ASMSTART
15325; GFX90A-NEXT:    ; use s[8:9]
15326; GFX90A-NEXT:    ;;#ASMEND
15327; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15328;
15329; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_7:
15330; GFX940:       ; %bb.0:
15331; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15332; GFX940-NEXT:    ;;#ASMSTART
15333; GFX940-NEXT:    ; def s[0:1]
15334; GFX940-NEXT:    ;;#ASMEND
15335; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
15336; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s9, s0
15337; GFX940-NEXT:    ;;#ASMSTART
15338; GFX940-NEXT:    ; use s[8:9]
15339; GFX940-NEXT:    ;;#ASMEND
15340; GFX940-NEXT:    s_setpc_b64 s[30:31]
15341  %vec0 = call <4 x half> asm "; def $0", "=s"()
15342  %vec1 = call <4 x half> asm "; def $0", "=s"()
15343  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 7>
15344  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
15345  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
15346  ret void
15347}
15348
15349define void @s_shuffle_v3f16_v4f16__7_5_7() {
15350; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_5_7:
15351; GFX900:       ; %bb.0:
15352; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15353; GFX900-NEXT:    ;;#ASMSTART
15354; GFX900-NEXT:    ; def s[4:5]
15355; GFX900-NEXT:    ;;#ASMEND
15356; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
15357; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
15358; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s9, s4
15359; GFX900-NEXT:    ;;#ASMSTART
15360; GFX900-NEXT:    ; use s[8:9]
15361; GFX900-NEXT:    ;;#ASMEND
15362; GFX900-NEXT:    s_setpc_b64 s[30:31]
15363;
15364; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_5_7:
15365; GFX90A:       ; %bb.0:
15366; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15367; GFX90A-NEXT:    ;;#ASMSTART
15368; GFX90A-NEXT:    ; def s[4:5]
15369; GFX90A-NEXT:    ;;#ASMEND
15370; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
15371; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
15372; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s9, s4
15373; GFX90A-NEXT:    ;;#ASMSTART
15374; GFX90A-NEXT:    ; use s[8:9]
15375; GFX90A-NEXT:    ;;#ASMEND
15376; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15377;
15378; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_7:
15379; GFX940:       ; %bb.0:
15380; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15381; GFX940-NEXT:    ;;#ASMSTART
15382; GFX940-NEXT:    ; def s[0:1]
15383; GFX940-NEXT:    ;;#ASMEND
15384; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
15385; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
15386; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s9, s0
15387; GFX940-NEXT:    ;;#ASMSTART
15388; GFX940-NEXT:    ; use s[8:9]
15389; GFX940-NEXT:    ;;#ASMEND
15390; GFX940-NEXT:    s_setpc_b64 s[30:31]
15391  %vec0 = call <4 x half> asm "; def $0", "=s"()
15392  %vec1 = call <4 x half> asm "; def $0", "=s"()
15393  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 7>
15394  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
15395  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
15396  ret void
15397}
15398
15399define void @s_shuffle_v3f16_v4f16__7_6_7() {
15400; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_6_7:
15401; GFX900:       ; %bb.0:
15402; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15403; GFX900-NEXT:    ;;#ASMSTART
15404; GFX900-NEXT:    ; def s[4:5]
15405; GFX900-NEXT:    ;;#ASMEND
15406; GFX900-NEXT:    s_lshr_b32 s9, s5, 16
15407; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s9, s5
15408; GFX900-NEXT:    ;;#ASMSTART
15409; GFX900-NEXT:    ; use s[8:9]
15410; GFX900-NEXT:    ;;#ASMEND
15411; GFX900-NEXT:    s_setpc_b64 s[30:31]
15412;
15413; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_6_7:
15414; GFX90A:       ; %bb.0:
15415; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15416; GFX90A-NEXT:    ;;#ASMSTART
15417; GFX90A-NEXT:    ; def s[4:5]
15418; GFX90A-NEXT:    ;;#ASMEND
15419; GFX90A-NEXT:    s_lshr_b32 s9, s5, 16
15420; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s9, s5
15421; GFX90A-NEXT:    ;;#ASMSTART
15422; GFX90A-NEXT:    ; use s[8:9]
15423; GFX90A-NEXT:    ;;#ASMEND
15424; GFX90A-NEXT:    s_setpc_b64 s[30:31]
15425;
15426; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_7:
15427; GFX940:       ; %bb.0:
15428; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15429; GFX940-NEXT:    ;;#ASMSTART
15430; GFX940-NEXT:    ; def s[0:1]
15431; GFX940-NEXT:    ;;#ASMEND
15432; GFX940-NEXT:    s_lshr_b32 s9, s1, 16
15433; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s9, s1
15434; GFX940-NEXT:    ;;#ASMSTART
15435; GFX940-NEXT:    ; use s[8:9]
15436; GFX940-NEXT:    ;;#ASMEND
15437; GFX940-NEXT:    s_setpc_b64 s[30:31]
15438  %vec0 = call <4 x half> asm "; def $0", "=s"()
15439  %vec1 = call <4 x half> asm "; def $0", "=s"()
15440  %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 7>
15441  %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
15442  call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3)
15443  ret void
15444}
15445;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
15446; GFX90APLUS: {{.*}}
15447