xref: /llvm-project/llvm/test/CodeGen/AMDGPU/shufflevector.v2f16.v3f16.ll (revision 585858aeb6247b3892218edb9d353c63f1c33186)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s
5
6
7define void @v_shuffle_v2f16_v3f16__u_u(ptr addrspace(1) inreg %ptr) {
8; GFX9-LABEL: v_shuffle_v2f16_v3f16__u_u:
9; GFX9:       ; %bb.0:
10; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GFX9-NEXT:    s_setpc_b64 s[30:31]
12  %vec0 = call <4 x half> asm "; def $0", "=v"()
13  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
14  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> poison
15  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
16  ret void
17}
18
19define void @v_shuffle_v2f16_v3f16__0_u(ptr addrspace(1) inreg %ptr) {
20; GFX900-LABEL: v_shuffle_v2f16_v3f16__0_u:
21; GFX900:       ; %bb.0:
22; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23; GFX900-NEXT:    v_mov_b32_e32 v2, 0
24; GFX900-NEXT:    ;;#ASMSTART
25; GFX900-NEXT:    ; def v[0:1]
26; GFX900-NEXT:    ;;#ASMEND
27; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
28; GFX900-NEXT:    s_waitcnt vmcnt(0)
29; GFX900-NEXT:    s_setpc_b64 s[30:31]
30;
31; GFX90A-LABEL: v_shuffle_v2f16_v3f16__0_u:
32; GFX90A:       ; %bb.0:
33; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
35; GFX90A-NEXT:    ;;#ASMSTART
36; GFX90A-NEXT:    ; def v[0:1]
37; GFX90A-NEXT:    ;;#ASMEND
38; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
39; GFX90A-NEXT:    s_waitcnt vmcnt(0)
40; GFX90A-NEXT:    s_setpc_b64 s[30:31]
41;
42; GFX940-LABEL: v_shuffle_v2f16_v3f16__0_u:
43; GFX940:       ; %bb.0:
44; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
45; GFX940-NEXT:    v_mov_b32_e32 v2, 0
46; GFX940-NEXT:    ;;#ASMSTART
47; GFX940-NEXT:    ; def v[0:1]
48; GFX940-NEXT:    ;;#ASMEND
49; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
50; GFX940-NEXT:    s_waitcnt vmcnt(0)
51; GFX940-NEXT:    s_setpc_b64 s[30:31]
52  %vec0 = call <4 x half> asm "; def $0", "=v"()
53  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
54  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 0, i32 poison>
55  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
56  ret void
57}
58
59define void @v_shuffle_v2f16_v3f16__1_u(ptr addrspace(1) inreg %ptr) {
60; GFX900-LABEL: v_shuffle_v2f16_v3f16__1_u:
61; GFX900:       ; %bb.0:
62; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
63; GFX900-NEXT:    ;;#ASMSTART
64; GFX900-NEXT:    ; def v[0:1]
65; GFX900-NEXT:    ;;#ASMEND
66; GFX900-NEXT:    v_mov_b32_e32 v2, 0
67; GFX900-NEXT:    v_alignbit_b32 v0, s4, v0, 16
68; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
69; GFX900-NEXT:    s_waitcnt vmcnt(0)
70; GFX900-NEXT:    s_setpc_b64 s[30:31]
71;
72; GFX90A-LABEL: v_shuffle_v2f16_v3f16__1_u:
73; GFX90A:       ; %bb.0:
74; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75; GFX90A-NEXT:    ;;#ASMSTART
76; GFX90A-NEXT:    ; def v[0:1]
77; GFX90A-NEXT:    ;;#ASMEND
78; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
79; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v0, 16
80; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
81; GFX90A-NEXT:    s_waitcnt vmcnt(0)
82; GFX90A-NEXT:    s_setpc_b64 s[30:31]
83;
84; GFX940-LABEL: v_shuffle_v2f16_v3f16__1_u:
85; GFX940:       ; %bb.0:
86; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
87; GFX940-NEXT:    ;;#ASMSTART
88; GFX940-NEXT:    ; def v[0:1]
89; GFX940-NEXT:    ;;#ASMEND
90; GFX940-NEXT:    v_mov_b32_e32 v2, 0
91; GFX940-NEXT:    v_alignbit_b32 v0, s0, v0, 16
92; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
93; GFX940-NEXT:    s_waitcnt vmcnt(0)
94; GFX940-NEXT:    s_setpc_b64 s[30:31]
95  %vec0 = call <4 x half> asm "; def $0", "=v"()
96  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
97  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 1, i32 poison>
98  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
99  ret void
100}
101
102define void @v_shuffle_v2f16_v3f16__2_u(ptr addrspace(1) inreg %ptr) {
103; GFX900-LABEL: v_shuffle_v2f16_v3f16__2_u:
104; GFX900:       ; %bb.0:
105; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
106; GFX900-NEXT:    v_mov_b32_e32 v2, 0
107; GFX900-NEXT:    ;;#ASMSTART
108; GFX900-NEXT:    ; def v[0:1]
109; GFX900-NEXT:    ;;#ASMEND
110; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
111; GFX900-NEXT:    s_waitcnt vmcnt(0)
112; GFX900-NEXT:    s_setpc_b64 s[30:31]
113;
114; GFX90A-LABEL: v_shuffle_v2f16_v3f16__2_u:
115; GFX90A:       ; %bb.0:
116; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
117; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
118; GFX90A-NEXT:    ;;#ASMSTART
119; GFX90A-NEXT:    ; def v[0:1]
120; GFX90A-NEXT:    ;;#ASMEND
121; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
122; GFX90A-NEXT:    s_waitcnt vmcnt(0)
123; GFX90A-NEXT:    s_setpc_b64 s[30:31]
124;
125; GFX940-LABEL: v_shuffle_v2f16_v3f16__2_u:
126; GFX940:       ; %bb.0:
127; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
128; GFX940-NEXT:    v_mov_b32_e32 v2, 0
129; GFX940-NEXT:    ;;#ASMSTART
130; GFX940-NEXT:    ; def v[0:1]
131; GFX940-NEXT:    ;;#ASMEND
132; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
133; GFX940-NEXT:    s_waitcnt vmcnt(0)
134; GFX940-NEXT:    s_setpc_b64 s[30:31]
135  %vec0 = call <4 x half> asm "; def $0", "=v"()
136  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
137  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 2, i32 poison>
138  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
139  ret void
140}
141
142define void @v_shuffle_v2f16_v3f16__3_u(ptr addrspace(1) inreg %ptr) {
143; GFX9-LABEL: v_shuffle_v2f16_v3f16__3_u:
144; GFX9:       ; %bb.0:
145; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
146; GFX9-NEXT:    s_setpc_b64 s[30:31]
147  %vec0 = call <4 x half> asm "; def $0", "=v"()
148  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
149  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 3, i32 poison>
150  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
151  ret void
152}
153
154define void @v_shuffle_v2f16_v3f16__4_u(ptr addrspace(1) inreg %ptr) {
155; GFX900-LABEL: v_shuffle_v2f16_v3f16__4_u:
156; GFX900:       ; %bb.0:
157; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
158; GFX900-NEXT:    ;;#ASMSTART
159; GFX900-NEXT:    ; def v[0:1]
160; GFX900-NEXT:    ;;#ASMEND
161; GFX900-NEXT:    v_mov_b32_e32 v2, 0
162; GFX900-NEXT:    v_alignbit_b32 v0, s4, v0, 16
163; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
164; GFX900-NEXT:    s_waitcnt vmcnt(0)
165; GFX900-NEXT:    s_setpc_b64 s[30:31]
166;
167; GFX90A-LABEL: v_shuffle_v2f16_v3f16__4_u:
168; GFX90A:       ; %bb.0:
169; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
170; GFX90A-NEXT:    ;;#ASMSTART
171; GFX90A-NEXT:    ; def v[0:1]
172; GFX90A-NEXT:    ;;#ASMEND
173; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
174; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v0, 16
175; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
176; GFX90A-NEXT:    s_waitcnt vmcnt(0)
177; GFX90A-NEXT:    s_setpc_b64 s[30:31]
178;
179; GFX940-LABEL: v_shuffle_v2f16_v3f16__4_u:
180; GFX940:       ; %bb.0:
181; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
182; GFX940-NEXT:    ;;#ASMSTART
183; GFX940-NEXT:    ; def v[0:1]
184; GFX940-NEXT:    ;;#ASMEND
185; GFX940-NEXT:    v_mov_b32_e32 v2, 0
186; GFX940-NEXT:    v_alignbit_b32 v0, s0, v0, 16
187; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
188; GFX940-NEXT:    s_waitcnt vmcnt(0)
189; GFX940-NEXT:    s_setpc_b64 s[30:31]
190  %vec0 = call <4 x half> asm "; def $0", "=v"()
191  %vec1 = call <4 x half> asm "; def $0", "=v"()
192  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
193  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
194  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 poison>
195  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
196  ret void
197}
198
199define void @v_shuffle_v2f16_v3f16__5_u(ptr addrspace(1) inreg %ptr) {
200; GFX900-LABEL: v_shuffle_v2f16_v3f16__5_u:
201; GFX900:       ; %bb.0:
202; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
203; GFX900-NEXT:    v_mov_b32_e32 v2, 0
204; GFX900-NEXT:    ;;#ASMSTART
205; GFX900-NEXT:    ; def v[0:1]
206; GFX900-NEXT:    ;;#ASMEND
207; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
208; GFX900-NEXT:    s_waitcnt vmcnt(0)
209; GFX900-NEXT:    s_setpc_b64 s[30:31]
210;
211; GFX90A-LABEL: v_shuffle_v2f16_v3f16__5_u:
212; GFX90A:       ; %bb.0:
213; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
214; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
215; GFX90A-NEXT:    ;;#ASMSTART
216; GFX90A-NEXT:    ; def v[0:1]
217; GFX90A-NEXT:    ;;#ASMEND
218; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
219; GFX90A-NEXT:    s_waitcnt vmcnt(0)
220; GFX90A-NEXT:    s_setpc_b64 s[30:31]
221;
222; GFX940-LABEL: v_shuffle_v2f16_v3f16__5_u:
223; GFX940:       ; %bb.0:
224; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
225; GFX940-NEXT:    v_mov_b32_e32 v2, 0
226; GFX940-NEXT:    ;;#ASMSTART
227; GFX940-NEXT:    ; def v[0:1]
228; GFX940-NEXT:    ;;#ASMEND
229; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
230; GFX940-NEXT:    s_waitcnt vmcnt(0)
231; GFX940-NEXT:    s_setpc_b64 s[30:31]
232  %vec0 = call <4 x half> asm "; def $0", "=v"()
233  %vec1 = call <4 x half> asm "; def $0", "=v"()
234  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
235  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
236  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 poison>
237  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
238  ret void
239}
240
241define void @v_shuffle_v2f16_v3f16__5_0(ptr addrspace(1) inreg %ptr) {
242; GFX900-LABEL: v_shuffle_v2f16_v3f16__5_0:
243; GFX900:       ; %bb.0:
244; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245; GFX900-NEXT:    ;;#ASMSTART
246; GFX900-NEXT:    ; def v[0:1]
247; GFX900-NEXT:    ;;#ASMEND
248; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
249; GFX900-NEXT:    v_mov_b32_e32 v3, 0
250; GFX900-NEXT:    ;;#ASMSTART
251; GFX900-NEXT:    ; def v[1:2]
252; GFX900-NEXT:    ;;#ASMEND
253; GFX900-NEXT:    v_perm_b32 v0, v0, v2, s4
254; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
255; GFX900-NEXT:    s_waitcnt vmcnt(0)
256; GFX900-NEXT:    s_setpc_b64 s[30:31]
257;
258; GFX90A-LABEL: v_shuffle_v2f16_v3f16__5_0:
259; GFX90A:       ; %bb.0:
260; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
261; GFX90A-NEXT:    ;;#ASMSTART
262; GFX90A-NEXT:    ; def v[0:1]
263; GFX90A-NEXT:    ;;#ASMEND
264; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
265; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
266; GFX90A-NEXT:    ;;#ASMSTART
267; GFX90A-NEXT:    ; def v[2:3]
268; GFX90A-NEXT:    ;;#ASMEND
269; GFX90A-NEXT:    v_perm_b32 v0, v0, v3, s4
270; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
271; GFX90A-NEXT:    s_waitcnt vmcnt(0)
272; GFX90A-NEXT:    s_setpc_b64 s[30:31]
273;
274; GFX940-LABEL: v_shuffle_v2f16_v3f16__5_0:
275; GFX940:       ; %bb.0:
276; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
277; GFX940-NEXT:    ;;#ASMSTART
278; GFX940-NEXT:    ; def v[0:1]
279; GFX940-NEXT:    ;;#ASMEND
280; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
281; GFX940-NEXT:    v_mov_b32_e32 v4, 0
282; GFX940-NEXT:    ;;#ASMSTART
283; GFX940-NEXT:    ; def v[2:3]
284; GFX940-NEXT:    ;;#ASMEND
285; GFX940-NEXT:    s_nop 0
286; GFX940-NEXT:    v_perm_b32 v0, v0, v3, s2
287; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
288; GFX940-NEXT:    s_waitcnt vmcnt(0)
289; GFX940-NEXT:    s_setpc_b64 s[30:31]
290  %vec0 = call <4 x half> asm "; def $0", "=v"()
291  %vec1 = call <4 x half> asm "; def $0", "=v"()
292  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
293  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
294  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 0>
295  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
296  ret void
297}
298
299define void @v_shuffle_v2f16_v3f16__5_1(ptr addrspace(1) inreg %ptr) {
300; GFX900-LABEL: v_shuffle_v2f16_v3f16__5_1:
301; GFX900:       ; %bb.0:
302; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
303; GFX900-NEXT:    ;;#ASMSTART
304; GFX900-NEXT:    ; def v[0:1]
305; GFX900-NEXT:    ;;#ASMEND
306; GFX900-NEXT:    s_mov_b32 s4, 0xffff
307; GFX900-NEXT:    v_mov_b32_e32 v3, 0
308; GFX900-NEXT:    ;;#ASMSTART
309; GFX900-NEXT:    ; def v[1:2]
310; GFX900-NEXT:    ;;#ASMEND
311; GFX900-NEXT:    v_bfi_b32 v0, s4, v2, v0
312; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
313; GFX900-NEXT:    s_waitcnt vmcnt(0)
314; GFX900-NEXT:    s_setpc_b64 s[30:31]
315;
316; GFX90A-LABEL: v_shuffle_v2f16_v3f16__5_1:
317; GFX90A:       ; %bb.0:
318; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
319; GFX90A-NEXT:    ;;#ASMSTART
320; GFX90A-NEXT:    ; def v[0:1]
321; GFX90A-NEXT:    ;;#ASMEND
322; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
323; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
324; GFX90A-NEXT:    ;;#ASMSTART
325; GFX90A-NEXT:    ; def v[2:3]
326; GFX90A-NEXT:    ;;#ASMEND
327; GFX90A-NEXT:    v_bfi_b32 v0, s4, v3, v0
328; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
329; GFX90A-NEXT:    s_waitcnt vmcnt(0)
330; GFX90A-NEXT:    s_setpc_b64 s[30:31]
331;
332; GFX940-LABEL: v_shuffle_v2f16_v3f16__5_1:
333; GFX940:       ; %bb.0:
334; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
335; GFX940-NEXT:    ;;#ASMSTART
336; GFX940-NEXT:    ; def v[0:1]
337; GFX940-NEXT:    ;;#ASMEND
338; GFX940-NEXT:    s_mov_b32 s2, 0xffff
339; GFX940-NEXT:    v_mov_b32_e32 v4, 0
340; GFX940-NEXT:    ;;#ASMSTART
341; GFX940-NEXT:    ; def v[2:3]
342; GFX940-NEXT:    ;;#ASMEND
343; GFX940-NEXT:    s_nop 0
344; GFX940-NEXT:    v_bfi_b32 v0, s2, v3, v0
345; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
346; GFX940-NEXT:    s_waitcnt vmcnt(0)
347; GFX940-NEXT:    s_setpc_b64 s[30:31]
348  %vec0 = call <4 x half> asm "; def $0", "=v"()
349  %vec1 = call <4 x half> asm "; def $0", "=v"()
350  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
351  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
352  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 1>
353  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
354  ret void
355}
356
357define void @v_shuffle_v2f16_v3f16__5_2(ptr addrspace(1) inreg %ptr) {
358; GFX900-LABEL: v_shuffle_v2f16_v3f16__5_2:
359; GFX900:       ; %bb.0:
360; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
361; GFX900-NEXT:    ;;#ASMSTART
362; GFX900-NEXT:    ; def v[0:1]
363; GFX900-NEXT:    ;;#ASMEND
364; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
365; GFX900-NEXT:    v_mov_b32_e32 v4, 0
366; GFX900-NEXT:    ;;#ASMSTART
367; GFX900-NEXT:    ; def v[2:3]
368; GFX900-NEXT:    ;;#ASMEND
369; GFX900-NEXT:    v_perm_b32 v0, v1, v3, s4
370; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
371; GFX900-NEXT:    s_waitcnt vmcnt(0)
372; GFX900-NEXT:    s_setpc_b64 s[30:31]
373;
374; GFX90A-LABEL: v_shuffle_v2f16_v3f16__5_2:
375; GFX90A:       ; %bb.0:
376; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
377; GFX90A-NEXT:    ;;#ASMSTART
378; GFX90A-NEXT:    ; def v[0:1]
379; GFX90A-NEXT:    ;;#ASMEND
380; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
381; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
382; GFX90A-NEXT:    ;;#ASMSTART
383; GFX90A-NEXT:    ; def v[2:3]
384; GFX90A-NEXT:    ;;#ASMEND
385; GFX90A-NEXT:    v_perm_b32 v0, v1, v3, s4
386; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
387; GFX90A-NEXT:    s_waitcnt vmcnt(0)
388; GFX90A-NEXT:    s_setpc_b64 s[30:31]
389;
390; GFX940-LABEL: v_shuffle_v2f16_v3f16__5_2:
391; GFX940:       ; %bb.0:
392; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
393; GFX940-NEXT:    ;;#ASMSTART
394; GFX940-NEXT:    ; def v[0:1]
395; GFX940-NEXT:    ;;#ASMEND
396; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
397; GFX940-NEXT:    v_mov_b32_e32 v4, 0
398; GFX940-NEXT:    ;;#ASMSTART
399; GFX940-NEXT:    ; def v[2:3]
400; GFX940-NEXT:    ;;#ASMEND
401; GFX940-NEXT:    s_nop 0
402; GFX940-NEXT:    v_perm_b32 v0, v1, v3, s2
403; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
404; GFX940-NEXT:    s_waitcnt vmcnt(0)
405; GFX940-NEXT:    s_setpc_b64 s[30:31]
406  %vec0 = call <4 x half> asm "; def $0", "=v"()
407  %vec1 = call <4 x half> asm "; def $0", "=v"()
408  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
409  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
410  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 2>
411  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
412  ret void
413}
414
415define void @v_shuffle_v2f16_v3f16__5_3(ptr addrspace(1) inreg %ptr) {
416; GFX900-LABEL: v_shuffle_v2f16_v3f16__5_3:
417; GFX900:       ; %bb.0:
418; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
419; GFX900-NEXT:    ;;#ASMSTART
420; GFX900-NEXT:    ; def v[0:1]
421; GFX900-NEXT:    ;;#ASMEND
422; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
423; GFX900-NEXT:    v_mov_b32_e32 v2, 0
424; GFX900-NEXT:    v_perm_b32 v0, v0, v1, s4
425; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
426; GFX900-NEXT:    s_waitcnt vmcnt(0)
427; GFX900-NEXT:    s_setpc_b64 s[30:31]
428;
429; GFX90A-LABEL: v_shuffle_v2f16_v3f16__5_3:
430; GFX90A:       ; %bb.0:
431; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
432; GFX90A-NEXT:    ;;#ASMSTART
433; GFX90A-NEXT:    ; def v[0:1]
434; GFX90A-NEXT:    ;;#ASMEND
435; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
436; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
437; GFX90A-NEXT:    v_perm_b32 v0, v0, v1, s4
438; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
439; GFX90A-NEXT:    s_waitcnt vmcnt(0)
440; GFX90A-NEXT:    s_setpc_b64 s[30:31]
441;
442; GFX940-LABEL: v_shuffle_v2f16_v3f16__5_3:
443; GFX940:       ; %bb.0:
444; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
445; GFX940-NEXT:    ;;#ASMSTART
446; GFX940-NEXT:    ; def v[0:1]
447; GFX940-NEXT:    ;;#ASMEND
448; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
449; GFX940-NEXT:    v_mov_b32_e32 v2, 0
450; GFX940-NEXT:    v_perm_b32 v0, v0, v1, s2
451; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
452; GFX940-NEXT:    s_waitcnt vmcnt(0)
453; GFX940-NEXT:    s_setpc_b64 s[30:31]
454  %vec0 = call <4 x half> asm "; def $0", "=v"()
455  %vec1 = call <4 x half> asm "; def $0", "=v"()
456  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
457  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
458  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 3>
459  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
460  ret void
461}
462
463define void @v_shuffle_v2f16_v3f16__5_4(ptr addrspace(1) inreg %ptr) {
464; GFX900-LABEL: v_shuffle_v2f16_v3f16__5_4:
465; GFX900:       ; %bb.0:
466; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
467; GFX900-NEXT:    ;;#ASMSTART
468; GFX900-NEXT:    ; def v[0:1]
469; GFX900-NEXT:    ;;#ASMEND
470; GFX900-NEXT:    s_mov_b32 s4, 0xffff
471; GFX900-NEXT:    v_mov_b32_e32 v2, 0
472; GFX900-NEXT:    v_bfi_b32 v0, s4, v1, v0
473; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
474; GFX900-NEXT:    s_waitcnt vmcnt(0)
475; GFX900-NEXT:    s_setpc_b64 s[30:31]
476;
477; GFX90A-LABEL: v_shuffle_v2f16_v3f16__5_4:
478; GFX90A:       ; %bb.0:
479; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
480; GFX90A-NEXT:    ;;#ASMSTART
481; GFX90A-NEXT:    ; def v[0:1]
482; GFX90A-NEXT:    ;;#ASMEND
483; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
484; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
485; GFX90A-NEXT:    v_bfi_b32 v0, s4, v1, v0
486; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
487; GFX90A-NEXT:    s_waitcnt vmcnt(0)
488; GFX90A-NEXT:    s_setpc_b64 s[30:31]
489;
490; GFX940-LABEL: v_shuffle_v2f16_v3f16__5_4:
491; GFX940:       ; %bb.0:
492; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
493; GFX940-NEXT:    ;;#ASMSTART
494; GFX940-NEXT:    ; def v[0:1]
495; GFX940-NEXT:    ;;#ASMEND
496; GFX940-NEXT:    s_mov_b32 s2, 0xffff
497; GFX940-NEXT:    v_mov_b32_e32 v2, 0
498; GFX940-NEXT:    v_bfi_b32 v0, s2, v1, v0
499; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
500; GFX940-NEXT:    s_waitcnt vmcnt(0)
501; GFX940-NEXT:    s_setpc_b64 s[30:31]
502  %vec0 = call <4 x half> asm "; def $0", "=v"()
503  %vec1 = call <4 x half> asm "; def $0", "=v"()
504  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
505  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
506  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 4>
507  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
508  ret void
509}
510
511define void @v_shuffle_v2f16_v3f16__5_5(ptr addrspace(1) inreg %ptr) {
512; GFX900-LABEL: v_shuffle_v2f16_v3f16__5_5:
513; GFX900:       ; %bb.0:
514; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
515; GFX900-NEXT:    ;;#ASMSTART
516; GFX900-NEXT:    ; def v[0:1]
517; GFX900-NEXT:    ;;#ASMEND
518; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
519; GFX900-NEXT:    v_mov_b32_e32 v2, 0
520; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
521; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
522; GFX900-NEXT:    s_waitcnt vmcnt(0)
523; GFX900-NEXT:    s_setpc_b64 s[30:31]
524;
525; GFX90A-LABEL: v_shuffle_v2f16_v3f16__5_5:
526; GFX90A:       ; %bb.0:
527; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
528; GFX90A-NEXT:    ;;#ASMSTART
529; GFX90A-NEXT:    ; def v[0:1]
530; GFX90A-NEXT:    ;;#ASMEND
531; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
532; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
533; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
534; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
535; GFX90A-NEXT:    s_waitcnt vmcnt(0)
536; GFX90A-NEXT:    s_setpc_b64 s[30:31]
537;
538; GFX940-LABEL: v_shuffle_v2f16_v3f16__5_5:
539; GFX940:       ; %bb.0:
540; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
541; GFX940-NEXT:    ;;#ASMSTART
542; GFX940-NEXT:    ; def v[0:1]
543; GFX940-NEXT:    ;;#ASMEND
544; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
545; GFX940-NEXT:    v_mov_b32_e32 v2, 0
546; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
547; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
548; GFX940-NEXT:    s_waitcnt vmcnt(0)
549; GFX940-NEXT:    s_setpc_b64 s[30:31]
550  %vec0 = call <4 x half> asm "; def $0", "=v"()
551  %vec1 = call <4 x half> asm "; def $0", "=v"()
552  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
553  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
554  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 5>
555  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
556  ret void
557}
558
559define void @v_shuffle_v2f16_v3f16__u_0(ptr addrspace(1) inreg %ptr) {
560; GFX900-LABEL: v_shuffle_v2f16_v3f16__u_0:
561; GFX900:       ; %bb.0:
562; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
563; GFX900-NEXT:    ;;#ASMSTART
564; GFX900-NEXT:    ; def v[0:1]
565; GFX900-NEXT:    ;;#ASMEND
566; GFX900-NEXT:    v_mov_b32_e32 v2, 0
567; GFX900-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
568; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
569; GFX900-NEXT:    s_waitcnt vmcnt(0)
570; GFX900-NEXT:    s_setpc_b64 s[30:31]
571;
572; GFX90A-LABEL: v_shuffle_v2f16_v3f16__u_0:
573; GFX90A:       ; %bb.0:
574; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
575; GFX90A-NEXT:    ;;#ASMSTART
576; GFX90A-NEXT:    ; def v[0:1]
577; GFX90A-NEXT:    ;;#ASMEND
578; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
579; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
580; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
581; GFX90A-NEXT:    s_waitcnt vmcnt(0)
582; GFX90A-NEXT:    s_setpc_b64 s[30:31]
583;
584; GFX940-LABEL: v_shuffle_v2f16_v3f16__u_0:
585; GFX940:       ; %bb.0:
586; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
587; GFX940-NEXT:    ;;#ASMSTART
588; GFX940-NEXT:    ; def v[0:1]
589; GFX940-NEXT:    ;;#ASMEND
590; GFX940-NEXT:    v_mov_b32_e32 v2, 0
591; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
592; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
593; GFX940-NEXT:    s_waitcnt vmcnt(0)
594; GFX940-NEXT:    s_setpc_b64 s[30:31]
595  %vec0 = call <4 x half> asm "; def $0", "=v"()
596  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
597  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 poison, i32 0>
598  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
599  ret void
600}
601
602define void @v_shuffle_v2f16_v3f16__0_0(ptr addrspace(1) inreg %ptr) {
603; GFX900-LABEL: v_shuffle_v2f16_v3f16__0_0:
604; GFX900:       ; %bb.0:
605; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
606; GFX900-NEXT:    ;;#ASMSTART
607; GFX900-NEXT:    ; def v[0:1]
608; GFX900-NEXT:    ;;#ASMEND
609; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
610; GFX900-NEXT:    v_mov_b32_e32 v2, 0
611; GFX900-NEXT:    v_perm_b32 v0, v0, v0, s4
612; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
613; GFX900-NEXT:    s_waitcnt vmcnt(0)
614; GFX900-NEXT:    s_setpc_b64 s[30:31]
615;
616; GFX90A-LABEL: v_shuffle_v2f16_v3f16__0_0:
617; GFX90A:       ; %bb.0:
618; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
619; GFX90A-NEXT:    ;;#ASMSTART
620; GFX90A-NEXT:    ; def v[0:1]
621; GFX90A-NEXT:    ;;#ASMEND
622; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
623; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
624; GFX90A-NEXT:    v_perm_b32 v0, v0, v0, s4
625; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
626; GFX90A-NEXT:    s_waitcnt vmcnt(0)
627; GFX90A-NEXT:    s_setpc_b64 s[30:31]
628;
629; GFX940-LABEL: v_shuffle_v2f16_v3f16__0_0:
630; GFX940:       ; %bb.0:
631; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
632; GFX940-NEXT:    ;;#ASMSTART
633; GFX940-NEXT:    ; def v[0:1]
634; GFX940-NEXT:    ;;#ASMEND
635; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
636; GFX940-NEXT:    v_mov_b32_e32 v2, 0
637; GFX940-NEXT:    v_perm_b32 v0, v0, v0, s2
638; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
639; GFX940-NEXT:    s_waitcnt vmcnt(0)
640; GFX940-NEXT:    s_setpc_b64 s[30:31]
641  %vec0 = call <4 x half> asm "; def $0", "=v"()
642  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
643  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> zeroinitializer
644  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
645  ret void
646}
647
648define void @v_shuffle_v2f16_v3f16__1_0(ptr addrspace(1) inreg %ptr) {
649; GFX900-LABEL: v_shuffle_v2f16_v3f16__1_0:
650; GFX900:       ; %bb.0:
651; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
652; GFX900-NEXT:    ;;#ASMSTART
653; GFX900-NEXT:    ; def v[0:1]
654; GFX900-NEXT:    ;;#ASMEND
655; GFX900-NEXT:    v_mov_b32_e32 v2, 0
656; GFX900-NEXT:    v_alignbit_b32 v0, v0, v0, 16
657; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
658; GFX900-NEXT:    s_waitcnt vmcnt(0)
659; GFX900-NEXT:    s_setpc_b64 s[30:31]
660;
661; GFX90A-LABEL: v_shuffle_v2f16_v3f16__1_0:
662; GFX90A:       ; %bb.0:
663; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
664; GFX90A-NEXT:    ;;#ASMSTART
665; GFX90A-NEXT:    ; def v[0:1]
666; GFX90A-NEXT:    ;;#ASMEND
667; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
668; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v0, 16
669; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
670; GFX90A-NEXT:    s_waitcnt vmcnt(0)
671; GFX90A-NEXT:    s_setpc_b64 s[30:31]
672;
673; GFX940-LABEL: v_shuffle_v2f16_v3f16__1_0:
674; GFX940:       ; %bb.0:
675; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
676; GFX940-NEXT:    ;;#ASMSTART
677; GFX940-NEXT:    ; def v[0:1]
678; GFX940-NEXT:    ;;#ASMEND
679; GFX940-NEXT:    v_mov_b32_e32 v2, 0
680; GFX940-NEXT:    v_alignbit_b32 v0, v0, v0, 16
681; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
682; GFX940-NEXT:    s_waitcnt vmcnt(0)
683; GFX940-NEXT:    s_setpc_b64 s[30:31]
684  %vec0 = call <4 x half> asm "; def $0", "=v"()
685  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
686  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 1, i32 0>
687  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
688  ret void
689}
690
691define void @v_shuffle_v2f16_v3f16__2_0(ptr addrspace(1) inreg %ptr) {
692; GFX900-LABEL: v_shuffle_v2f16_v3f16__2_0:
693; GFX900:       ; %bb.0:
694; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
695; GFX900-NEXT:    ;;#ASMSTART
696; GFX900-NEXT:    ; def v[0:1]
697; GFX900-NEXT:    ;;#ASMEND
698; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
699; GFX900-NEXT:    v_mov_b32_e32 v2, 0
700; GFX900-NEXT:    v_perm_b32 v0, v0, v1, s4
701; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
702; GFX900-NEXT:    s_waitcnt vmcnt(0)
703; GFX900-NEXT:    s_setpc_b64 s[30:31]
704;
705; GFX90A-LABEL: v_shuffle_v2f16_v3f16__2_0:
706; GFX90A:       ; %bb.0:
707; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
708; GFX90A-NEXT:    ;;#ASMSTART
709; GFX90A-NEXT:    ; def v[0:1]
710; GFX90A-NEXT:    ;;#ASMEND
711; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
712; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
713; GFX90A-NEXT:    v_perm_b32 v0, v0, v1, s4
714; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
715; GFX90A-NEXT:    s_waitcnt vmcnt(0)
716; GFX90A-NEXT:    s_setpc_b64 s[30:31]
717;
718; GFX940-LABEL: v_shuffle_v2f16_v3f16__2_0:
719; GFX940:       ; %bb.0:
720; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
721; GFX940-NEXT:    ;;#ASMSTART
722; GFX940-NEXT:    ; def v[0:1]
723; GFX940-NEXT:    ;;#ASMEND
724; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
725; GFX940-NEXT:    v_mov_b32_e32 v2, 0
726; GFX940-NEXT:    v_perm_b32 v0, v0, v1, s2
727; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
728; GFX940-NEXT:    s_waitcnt vmcnt(0)
729; GFX940-NEXT:    s_setpc_b64 s[30:31]
730  %vec0 = call <4 x half> asm "; def $0", "=v"()
731  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
732  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 2, i32 0>
733  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
734  ret void
735}
736
737define void @v_shuffle_v2f16_v3f16__3_0(ptr addrspace(1) inreg %ptr) {
738; GFX900-LABEL: v_shuffle_v2f16_v3f16__3_0:
739; GFX900:       ; %bb.0:
740; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
741; GFX900-NEXT:    ;;#ASMSTART
742; GFX900-NEXT:    ; def v[0:1]
743; GFX900-NEXT:    ;;#ASMEND
744; GFX900-NEXT:    v_mov_b32_e32 v2, 0
745; GFX900-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
746; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
747; GFX900-NEXT:    s_waitcnt vmcnt(0)
748; GFX900-NEXT:    s_setpc_b64 s[30:31]
749;
750; GFX90A-LABEL: v_shuffle_v2f16_v3f16__3_0:
751; GFX90A:       ; %bb.0:
752; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
753; GFX90A-NEXT:    ;;#ASMSTART
754; GFX90A-NEXT:    ; def v[0:1]
755; GFX90A-NEXT:    ;;#ASMEND
756; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
757; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
758; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
759; GFX90A-NEXT:    s_waitcnt vmcnt(0)
760; GFX90A-NEXT:    s_setpc_b64 s[30:31]
761;
762; GFX940-LABEL: v_shuffle_v2f16_v3f16__3_0:
763; GFX940:       ; %bb.0:
764; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
765; GFX940-NEXT:    ;;#ASMSTART
766; GFX940-NEXT:    ; def v[0:1]
767; GFX940-NEXT:    ;;#ASMEND
768; GFX940-NEXT:    v_mov_b32_e32 v2, 0
769; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
770; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
771; GFX940-NEXT:    s_waitcnt vmcnt(0)
772; GFX940-NEXT:    s_setpc_b64 s[30:31]
773  %vec0 = call <4 x half> asm "; def $0", "=v"()
774  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
775  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 3, i32 0>
776  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
777  ret void
778}
779
780define void @v_shuffle_v2f16_v3f16__4_0(ptr addrspace(1) inreg %ptr) {
781; GFX900-LABEL: v_shuffle_v2f16_v3f16__4_0:
782; GFX900:       ; %bb.0:
783; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
784; GFX900-NEXT:    ;;#ASMSTART
785; GFX900-NEXT:    ; def v[0:1]
786; GFX900-NEXT:    ;;#ASMEND
787; GFX900-NEXT:    v_mov_b32_e32 v3, 0
788; GFX900-NEXT:    ;;#ASMSTART
789; GFX900-NEXT:    ; def v[1:2]
790; GFX900-NEXT:    ;;#ASMEND
791; GFX900-NEXT:    v_alignbit_b32 v0, v0, v1, 16
792; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
793; GFX900-NEXT:    s_waitcnt vmcnt(0)
794; GFX900-NEXT:    s_setpc_b64 s[30:31]
795;
796; GFX90A-LABEL: v_shuffle_v2f16_v3f16__4_0:
797; GFX90A:       ; %bb.0:
798; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
799; GFX90A-NEXT:    ;;#ASMSTART
800; GFX90A-NEXT:    ; def v[0:1]
801; GFX90A-NEXT:    ;;#ASMEND
802; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
803; GFX90A-NEXT:    ;;#ASMSTART
804; GFX90A-NEXT:    ; def v[2:3]
805; GFX90A-NEXT:    ;;#ASMEND
806; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v2, 16
807; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
808; GFX90A-NEXT:    s_waitcnt vmcnt(0)
809; GFX90A-NEXT:    s_setpc_b64 s[30:31]
810;
811; GFX940-LABEL: v_shuffle_v2f16_v3f16__4_0:
812; GFX940:       ; %bb.0:
813; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
814; GFX940-NEXT:    ;;#ASMSTART
815; GFX940-NEXT:    ; def v[0:1]
816; GFX940-NEXT:    ;;#ASMEND
817; GFX940-NEXT:    v_mov_b32_e32 v4, 0
818; GFX940-NEXT:    ;;#ASMSTART
819; GFX940-NEXT:    ; def v[2:3]
820; GFX940-NEXT:    ;;#ASMEND
821; GFX940-NEXT:    s_nop 0
822; GFX940-NEXT:    v_alignbit_b32 v0, v0, v2, 16
823; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
824; GFX940-NEXT:    s_waitcnt vmcnt(0)
825; GFX940-NEXT:    s_setpc_b64 s[30:31]
826  %vec0 = call <4 x half> asm "; def $0", "=v"()
827  %vec1 = call <4 x half> asm "; def $0", "=v"()
828  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
829  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
830  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 0>
831  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
832  ret void
833}
834
835define void @v_shuffle_v2f16_v3f16__u_1(ptr addrspace(1) inreg %ptr) {
836; GFX900-LABEL: v_shuffle_v2f16_v3f16__u_1:
837; GFX900:       ; %bb.0:
838; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
839; GFX900-NEXT:    v_mov_b32_e32 v2, 0
840; GFX900-NEXT:    ;;#ASMSTART
841; GFX900-NEXT:    ; def v[0:1]
842; GFX900-NEXT:    ;;#ASMEND
843; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
844; GFX900-NEXT:    s_waitcnt vmcnt(0)
845; GFX900-NEXT:    s_setpc_b64 s[30:31]
846;
847; GFX90A-LABEL: v_shuffle_v2f16_v3f16__u_1:
848; GFX90A:       ; %bb.0:
849; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
850; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
851; GFX90A-NEXT:    ;;#ASMSTART
852; GFX90A-NEXT:    ; def v[0:1]
853; GFX90A-NEXT:    ;;#ASMEND
854; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
855; GFX90A-NEXT:    s_waitcnt vmcnt(0)
856; GFX90A-NEXT:    s_setpc_b64 s[30:31]
857;
858; GFX940-LABEL: v_shuffle_v2f16_v3f16__u_1:
859; GFX940:       ; %bb.0:
860; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
861; GFX940-NEXT:    v_mov_b32_e32 v2, 0
862; GFX940-NEXT:    ;;#ASMSTART
863; GFX940-NEXT:    ; def v[0:1]
864; GFX940-NEXT:    ;;#ASMEND
865; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
866; GFX940-NEXT:    s_waitcnt vmcnt(0)
867; GFX940-NEXT:    s_setpc_b64 s[30:31]
868  %vec0 = call <4 x half> asm "; def $0", "=v"()
869  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
870  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 poison, i32 1>
871  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
872  ret void
873}
874
875define void @v_shuffle_v2f16_v3f16__0_1(ptr addrspace(1) inreg %ptr) {
876; GFX900-LABEL: v_shuffle_v2f16_v3f16__0_1:
877; GFX900:       ; %bb.0:
878; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
879; GFX900-NEXT:    v_mov_b32_e32 v2, 0
880; GFX900-NEXT:    ;;#ASMSTART
881; GFX900-NEXT:    ; def v[0:1]
882; GFX900-NEXT:    ;;#ASMEND
883; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
884; GFX900-NEXT:    s_waitcnt vmcnt(0)
885; GFX900-NEXT:    s_setpc_b64 s[30:31]
886;
887; GFX90A-LABEL: v_shuffle_v2f16_v3f16__0_1:
888; GFX90A:       ; %bb.0:
889; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
890; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
891; GFX90A-NEXT:    ;;#ASMSTART
892; GFX90A-NEXT:    ; def v[0:1]
893; GFX90A-NEXT:    ;;#ASMEND
894; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
895; GFX90A-NEXT:    s_waitcnt vmcnt(0)
896; GFX90A-NEXT:    s_setpc_b64 s[30:31]
897;
898; GFX940-LABEL: v_shuffle_v2f16_v3f16__0_1:
899; GFX940:       ; %bb.0:
900; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
901; GFX940-NEXT:    v_mov_b32_e32 v2, 0
902; GFX940-NEXT:    ;;#ASMSTART
903; GFX940-NEXT:    ; def v[0:1]
904; GFX940-NEXT:    ;;#ASMEND
905; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
906; GFX940-NEXT:    s_waitcnt vmcnt(0)
907; GFX940-NEXT:    s_setpc_b64 s[30:31]
908  %vec0 = call <4 x half> asm "; def $0", "=v"()
909  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
910  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 0, i32 1>
911  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
912  ret void
913}
914
915define void @v_shuffle_v2f16_v3f16__1_1(ptr addrspace(1) inreg %ptr) {
916; GFX900-LABEL: v_shuffle_v2f16_v3f16__1_1:
917; GFX900:       ; %bb.0:
918; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
919; GFX900-NEXT:    ;;#ASMSTART
920; GFX900-NEXT:    ; def v[0:1]
921; GFX900-NEXT:    ;;#ASMEND
922; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
923; GFX900-NEXT:    v_mov_b32_e32 v2, 0
924; GFX900-NEXT:    v_perm_b32 v0, v0, v0, s4
925; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
926; GFX900-NEXT:    s_waitcnt vmcnt(0)
927; GFX900-NEXT:    s_setpc_b64 s[30:31]
928;
929; GFX90A-LABEL: v_shuffle_v2f16_v3f16__1_1:
930; GFX90A:       ; %bb.0:
931; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
932; GFX90A-NEXT:    ;;#ASMSTART
933; GFX90A-NEXT:    ; def v[0:1]
934; GFX90A-NEXT:    ;;#ASMEND
935; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
936; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
937; GFX90A-NEXT:    v_perm_b32 v0, v0, v0, s4
938; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
939; GFX90A-NEXT:    s_waitcnt vmcnt(0)
940; GFX90A-NEXT:    s_setpc_b64 s[30:31]
941;
942; GFX940-LABEL: v_shuffle_v2f16_v3f16__1_1:
943; GFX940:       ; %bb.0:
944; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
945; GFX940-NEXT:    ;;#ASMSTART
946; GFX940-NEXT:    ; def v[0:1]
947; GFX940-NEXT:    ;;#ASMEND
948; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
949; GFX940-NEXT:    v_mov_b32_e32 v2, 0
950; GFX940-NEXT:    v_perm_b32 v0, v0, v0, s2
951; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
952; GFX940-NEXT:    s_waitcnt vmcnt(0)
953; GFX940-NEXT:    s_setpc_b64 s[30:31]
954  %vec0 = call <4 x half> asm "; def $0", "=v"()
955  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
956  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 1, i32 1>
957  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
958  ret void
959}
960
961define void @v_shuffle_v2f16_v3f16__2_1(ptr addrspace(1) inreg %ptr) {
962; GFX900-LABEL: v_shuffle_v2f16_v3f16__2_1:
963; GFX900:       ; %bb.0:
964; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
965; GFX900-NEXT:    ;;#ASMSTART
966; GFX900-NEXT:    ; def v[0:1]
967; GFX900-NEXT:    ;;#ASMEND
968; GFX900-NEXT:    s_mov_b32 s4, 0xffff
969; GFX900-NEXT:    v_mov_b32_e32 v2, 0
970; GFX900-NEXT:    v_bfi_b32 v0, s4, v1, v0
971; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
972; GFX900-NEXT:    s_waitcnt vmcnt(0)
973; GFX900-NEXT:    s_setpc_b64 s[30:31]
974;
975; GFX90A-LABEL: v_shuffle_v2f16_v3f16__2_1:
976; GFX90A:       ; %bb.0:
977; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
978; GFX90A-NEXT:    ;;#ASMSTART
979; GFX90A-NEXT:    ; def v[0:1]
980; GFX90A-NEXT:    ;;#ASMEND
981; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
982; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
983; GFX90A-NEXT:    v_bfi_b32 v0, s4, v1, v0
984; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
985; GFX90A-NEXT:    s_waitcnt vmcnt(0)
986; GFX90A-NEXT:    s_setpc_b64 s[30:31]
987;
988; GFX940-LABEL: v_shuffle_v2f16_v3f16__2_1:
989; GFX940:       ; %bb.0:
990; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
991; GFX940-NEXT:    ;;#ASMSTART
992; GFX940-NEXT:    ; def v[0:1]
993; GFX940-NEXT:    ;;#ASMEND
994; GFX940-NEXT:    s_mov_b32 s2, 0xffff
995; GFX940-NEXT:    v_mov_b32_e32 v2, 0
996; GFX940-NEXT:    v_bfi_b32 v0, s2, v1, v0
997; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
998; GFX940-NEXT:    s_waitcnt vmcnt(0)
999; GFX940-NEXT:    s_setpc_b64 s[30:31]
1000  %vec0 = call <4 x half> asm "; def $0", "=v"()
1001  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1002  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 2, i32 1>
1003  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
1004  ret void
1005}
1006
1007define void @v_shuffle_v2f16_v3f16__3_1(ptr addrspace(1) inreg %ptr) {
1008; GFX900-LABEL: v_shuffle_v2f16_v3f16__3_1:
1009; GFX900:       ; %bb.0:
1010; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1011; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1012; GFX900-NEXT:    ;;#ASMSTART
1013; GFX900-NEXT:    ; def v[0:1]
1014; GFX900-NEXT:    ;;#ASMEND
1015; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
1016; GFX900-NEXT:    s_waitcnt vmcnt(0)
1017; GFX900-NEXT:    s_setpc_b64 s[30:31]
1018;
1019; GFX90A-LABEL: v_shuffle_v2f16_v3f16__3_1:
1020; GFX90A:       ; %bb.0:
1021; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1022; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1023; GFX90A-NEXT:    ;;#ASMSTART
1024; GFX90A-NEXT:    ; def v[0:1]
1025; GFX90A-NEXT:    ;;#ASMEND
1026; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
1027; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1028; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1029;
1030; GFX940-LABEL: v_shuffle_v2f16_v3f16__3_1:
1031; GFX940:       ; %bb.0:
1032; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1033; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1034; GFX940-NEXT:    ;;#ASMSTART
1035; GFX940-NEXT:    ; def v[0:1]
1036; GFX940-NEXT:    ;;#ASMEND
1037; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
1038; GFX940-NEXT:    s_waitcnt vmcnt(0)
1039; GFX940-NEXT:    s_setpc_b64 s[30:31]
1040  %vec0 = call <4 x half> asm "; def $0", "=v"()
1041  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1042  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 3, i32 1>
1043  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
1044  ret void
1045}
1046
1047define void @v_shuffle_v2f16_v3f16__4_1(ptr addrspace(1) inreg %ptr) {
1048; GFX900-LABEL: v_shuffle_v2f16_v3f16__4_1:
1049; GFX900:       ; %bb.0:
1050; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1051; GFX900-NEXT:    ;;#ASMSTART
1052; GFX900-NEXT:    ; def v[0:1]
1053; GFX900-NEXT:    ;;#ASMEND
1054; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1055; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1056; GFX900-NEXT:    ;;#ASMSTART
1057; GFX900-NEXT:    ; def v[1:2]
1058; GFX900-NEXT:    ;;#ASMEND
1059; GFX900-NEXT:    v_perm_b32 v0, v0, v1, s4
1060; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
1061; GFX900-NEXT:    s_waitcnt vmcnt(0)
1062; GFX900-NEXT:    s_setpc_b64 s[30:31]
1063;
1064; GFX90A-LABEL: v_shuffle_v2f16_v3f16__4_1:
1065; GFX90A:       ; %bb.0:
1066; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1067; GFX90A-NEXT:    ;;#ASMSTART
1068; GFX90A-NEXT:    ; def v[0:1]
1069; GFX90A-NEXT:    ;;#ASMEND
1070; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1071; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1072; GFX90A-NEXT:    ;;#ASMSTART
1073; GFX90A-NEXT:    ; def v[2:3]
1074; GFX90A-NEXT:    ;;#ASMEND
1075; GFX90A-NEXT:    v_perm_b32 v0, v0, v2, s4
1076; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
1077; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1078; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1079;
1080; GFX940-LABEL: v_shuffle_v2f16_v3f16__4_1:
1081; GFX940:       ; %bb.0:
1082; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1083; GFX940-NEXT:    ;;#ASMSTART
1084; GFX940-NEXT:    ; def v[0:1]
1085; GFX940-NEXT:    ;;#ASMEND
1086; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1087; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1088; GFX940-NEXT:    ;;#ASMSTART
1089; GFX940-NEXT:    ; def v[2:3]
1090; GFX940-NEXT:    ;;#ASMEND
1091; GFX940-NEXT:    s_nop 0
1092; GFX940-NEXT:    v_perm_b32 v0, v0, v2, s2
1093; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
1094; GFX940-NEXT:    s_waitcnt vmcnt(0)
1095; GFX940-NEXT:    s_setpc_b64 s[30:31]
1096  %vec0 = call <4 x half> asm "; def $0", "=v"()
1097  %vec1 = call <4 x half> asm "; def $0", "=v"()
1098  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1099  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1100  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 1>
1101  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
1102  ret void
1103}
1104
1105define void @v_shuffle_v2f16_v3f16__u_2(ptr addrspace(1) inreg %ptr) {
1106; GFX900-LABEL: v_shuffle_v2f16_v3f16__u_2:
1107; GFX900:       ; %bb.0:
1108; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1109; GFX900-NEXT:    ;;#ASMSTART
1110; GFX900-NEXT:    ; def v[0:1]
1111; GFX900-NEXT:    ;;#ASMEND
1112; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1113; GFX900-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
1114; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
1115; GFX900-NEXT:    s_waitcnt vmcnt(0)
1116; GFX900-NEXT:    s_setpc_b64 s[30:31]
1117;
1118; GFX90A-LABEL: v_shuffle_v2f16_v3f16__u_2:
1119; GFX90A:       ; %bb.0:
1120; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1121; GFX90A-NEXT:    ;;#ASMSTART
1122; GFX90A-NEXT:    ; def v[0:1]
1123; GFX90A-NEXT:    ;;#ASMEND
1124; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1125; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
1126; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
1127; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1128; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1129;
1130; GFX940-LABEL: v_shuffle_v2f16_v3f16__u_2:
1131; GFX940:       ; %bb.0:
1132; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1133; GFX940-NEXT:    ;;#ASMSTART
1134; GFX940-NEXT:    ; def v[0:1]
1135; GFX940-NEXT:    ;;#ASMEND
1136; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1137; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
1138; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
1139; GFX940-NEXT:    s_waitcnt vmcnt(0)
1140; GFX940-NEXT:    s_setpc_b64 s[30:31]
1141  %vec0 = call <4 x half> asm "; def $0", "=v"()
1142  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1143  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 poison, i32 2>
1144  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
1145  ret void
1146}
1147
1148define void @v_shuffle_v2f16_v3f16__0_2(ptr addrspace(1) inreg %ptr) {
1149; GFX900-LABEL: v_shuffle_v2f16_v3f16__0_2:
1150; GFX900:       ; %bb.0:
1151; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1152; GFX900-NEXT:    ;;#ASMSTART
1153; GFX900-NEXT:    ; def v[0:1]
1154; GFX900-NEXT:    ;;#ASMEND
1155; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1156; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1157; GFX900-NEXT:    v_perm_b32 v0, v1, v0, s4
1158; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
1159; GFX900-NEXT:    s_waitcnt vmcnt(0)
1160; GFX900-NEXT:    s_setpc_b64 s[30:31]
1161;
1162; GFX90A-LABEL: v_shuffle_v2f16_v3f16__0_2:
1163; GFX90A:       ; %bb.0:
1164; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1165; GFX90A-NEXT:    ;;#ASMSTART
1166; GFX90A-NEXT:    ; def v[0:1]
1167; GFX90A-NEXT:    ;;#ASMEND
1168; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1169; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1170; GFX90A-NEXT:    v_perm_b32 v0, v1, v0, s4
1171; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
1172; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1173; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1174;
1175; GFX940-LABEL: v_shuffle_v2f16_v3f16__0_2:
1176; GFX940:       ; %bb.0:
1177; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1178; GFX940-NEXT:    ;;#ASMSTART
1179; GFX940-NEXT:    ; def v[0:1]
1180; GFX940-NEXT:    ;;#ASMEND
1181; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1182; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1183; GFX940-NEXT:    v_perm_b32 v0, v1, v0, s2
1184; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
1185; GFX940-NEXT:    s_waitcnt vmcnt(0)
1186; GFX940-NEXT:    s_setpc_b64 s[30:31]
1187  %vec0 = call <4 x half> asm "; def $0", "=v"()
1188  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1189  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 0, i32 2>
1190  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
1191  ret void
1192}
1193
1194define void @v_shuffle_v2f16_v3f16__1_2(ptr addrspace(1) inreg %ptr) {
1195; GFX900-LABEL: v_shuffle_v2f16_v3f16__1_2:
1196; GFX900:       ; %bb.0:
1197; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1198; GFX900-NEXT:    ;;#ASMSTART
1199; GFX900-NEXT:    ; def v[0:1]
1200; GFX900-NEXT:    ;;#ASMEND
1201; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1202; GFX900-NEXT:    v_alignbit_b32 v0, v1, v0, 16
1203; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
1204; GFX900-NEXT:    s_waitcnt vmcnt(0)
1205; GFX900-NEXT:    s_setpc_b64 s[30:31]
1206;
1207; GFX90A-LABEL: v_shuffle_v2f16_v3f16__1_2:
1208; GFX90A:       ; %bb.0:
1209; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1210; GFX90A-NEXT:    ;;#ASMSTART
1211; GFX90A-NEXT:    ; def v[0:1]
1212; GFX90A-NEXT:    ;;#ASMEND
1213; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1214; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v0, 16
1215; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
1216; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1217; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1218;
1219; GFX940-LABEL: v_shuffle_v2f16_v3f16__1_2:
1220; GFX940:       ; %bb.0:
1221; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1222; GFX940-NEXT:    ;;#ASMSTART
1223; GFX940-NEXT:    ; def v[0:1]
1224; GFX940-NEXT:    ;;#ASMEND
1225; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1226; GFX940-NEXT:    v_alignbit_b32 v0, v1, v0, 16
1227; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
1228; GFX940-NEXT:    s_waitcnt vmcnt(0)
1229; GFX940-NEXT:    s_setpc_b64 s[30:31]
1230  %vec0 = call <4 x half> asm "; def $0", "=v"()
1231  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1232  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 1, i32 2>
1233  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
1234  ret void
1235}
1236
1237define void @v_shuffle_v2f16_v3f16__2_2(ptr addrspace(1) inreg %ptr) {
1238; GFX900-LABEL: v_shuffle_v2f16_v3f16__2_2:
1239; GFX900:       ; %bb.0:
1240; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1241; GFX900-NEXT:    ;;#ASMSTART
1242; GFX900-NEXT:    ; def v[0:1]
1243; GFX900-NEXT:    ;;#ASMEND
1244; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1245; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1246; GFX900-NEXT:    v_perm_b32 v0, v1, v1, s4
1247; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
1248; GFX900-NEXT:    s_waitcnt vmcnt(0)
1249; GFX900-NEXT:    s_setpc_b64 s[30:31]
1250;
1251; GFX90A-LABEL: v_shuffle_v2f16_v3f16__2_2:
1252; GFX90A:       ; %bb.0:
1253; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1254; GFX90A-NEXT:    ;;#ASMSTART
1255; GFX90A-NEXT:    ; def v[0:1]
1256; GFX90A-NEXT:    ;;#ASMEND
1257; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1258; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1259; GFX90A-NEXT:    v_perm_b32 v0, v1, v1, s4
1260; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
1261; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1262; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1263;
1264; GFX940-LABEL: v_shuffle_v2f16_v3f16__2_2:
1265; GFX940:       ; %bb.0:
1266; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1267; GFX940-NEXT:    ;;#ASMSTART
1268; GFX940-NEXT:    ; def v[0:1]
1269; GFX940-NEXT:    ;;#ASMEND
1270; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1271; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1272; GFX940-NEXT:    v_perm_b32 v0, v1, v1, s2
1273; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
1274; GFX940-NEXT:    s_waitcnt vmcnt(0)
1275; GFX940-NEXT:    s_setpc_b64 s[30:31]
1276  %vec0 = call <4 x half> asm "; def $0", "=v"()
1277  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1278  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 2, i32 2>
1279  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
1280  ret void
1281}
1282
1283define void @v_shuffle_v2f16_v3f16__3_2(ptr addrspace(1) inreg %ptr) {
1284; GFX900-LABEL: v_shuffle_v2f16_v3f16__3_2:
1285; GFX900:       ; %bb.0:
1286; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1287; GFX900-NEXT:    ;;#ASMSTART
1288; GFX900-NEXT:    ; def v[0:1]
1289; GFX900-NEXT:    ;;#ASMEND
1290; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1291; GFX900-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
1292; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
1293; GFX900-NEXT:    s_waitcnt vmcnt(0)
1294; GFX900-NEXT:    s_setpc_b64 s[30:31]
1295;
1296; GFX90A-LABEL: v_shuffle_v2f16_v3f16__3_2:
1297; GFX90A:       ; %bb.0:
1298; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1299; GFX90A-NEXT:    ;;#ASMSTART
1300; GFX90A-NEXT:    ; def v[0:1]
1301; GFX90A-NEXT:    ;;#ASMEND
1302; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1303; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
1304; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
1305; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1306; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1307;
1308; GFX940-LABEL: v_shuffle_v2f16_v3f16__3_2:
1309; GFX940:       ; %bb.0:
1310; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1311; GFX940-NEXT:    ;;#ASMSTART
1312; GFX940-NEXT:    ; def v[0:1]
1313; GFX940-NEXT:    ;;#ASMEND
1314; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1315; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
1316; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
1317; GFX940-NEXT:    s_waitcnt vmcnt(0)
1318; GFX940-NEXT:    s_setpc_b64 s[30:31]
1319  %vec0 = call <4 x half> asm "; def $0", "=v"()
1320  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1321  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 3, i32 2>
1322  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
1323  ret void
1324}
1325
1326define void @v_shuffle_v2f16_v3f16__4_2(ptr addrspace(1) inreg %ptr) {
1327; GFX900-LABEL: v_shuffle_v2f16_v3f16__4_2:
1328; GFX900:       ; %bb.0:
1329; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1330; GFX900-NEXT:    ;;#ASMSTART
1331; GFX900-NEXT:    ; def v[0:1]
1332; GFX900-NEXT:    ;;#ASMEND
1333; GFX900-NEXT:    v_mov_b32_e32 v4, 0
1334; GFX900-NEXT:    ;;#ASMSTART
1335; GFX900-NEXT:    ; def v[2:3]
1336; GFX900-NEXT:    ;;#ASMEND
1337; GFX900-NEXT:    v_alignbit_b32 v0, v1, v2, 16
1338; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
1339; GFX900-NEXT:    s_waitcnt vmcnt(0)
1340; GFX900-NEXT:    s_setpc_b64 s[30:31]
1341;
1342; GFX90A-LABEL: v_shuffle_v2f16_v3f16__4_2:
1343; GFX90A:       ; %bb.0:
1344; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1345; GFX90A-NEXT:    ;;#ASMSTART
1346; GFX90A-NEXT:    ; def v[0:1]
1347; GFX90A-NEXT:    ;;#ASMEND
1348; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1349; GFX90A-NEXT:    ;;#ASMSTART
1350; GFX90A-NEXT:    ; def v[2:3]
1351; GFX90A-NEXT:    ;;#ASMEND
1352; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v2, 16
1353; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
1354; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1355; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1356;
1357; GFX940-LABEL: v_shuffle_v2f16_v3f16__4_2:
1358; GFX940:       ; %bb.0:
1359; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1360; GFX940-NEXT:    ;;#ASMSTART
1361; GFX940-NEXT:    ; def v[0:1]
1362; GFX940-NEXT:    ;;#ASMEND
1363; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1364; GFX940-NEXT:    ;;#ASMSTART
1365; GFX940-NEXT:    ; def v[2:3]
1366; GFX940-NEXT:    ;;#ASMEND
1367; GFX940-NEXT:    s_nop 0
1368; GFX940-NEXT:    v_alignbit_b32 v0, v1, v2, 16
1369; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
1370; GFX940-NEXT:    s_waitcnt vmcnt(0)
1371; GFX940-NEXT:    s_setpc_b64 s[30:31]
1372  %vec0 = call <4 x half> asm "; def $0", "=v"()
1373  %vec1 = call <4 x half> asm "; def $0", "=v"()
1374  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1375  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1376  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 2>
1377  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
1378  ret void
1379}
1380
1381define void @v_shuffle_v2f16_v3f16__u_3(ptr addrspace(1) inreg %ptr) {
1382; GFX9-LABEL: v_shuffle_v2f16_v3f16__u_3:
1383; GFX9:       ; %bb.0:
1384; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1385; GFX9-NEXT:    s_setpc_b64 s[30:31]
1386  %vec0 = call <4 x half> asm "; def $0", "=v"()
1387  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1388  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 poison, i32 3>
1389  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
1390  ret void
1391}
1392
1393define void @v_shuffle_v2f16_v3f16__0_3(ptr addrspace(1) inreg %ptr) {
1394; GFX900-LABEL: v_shuffle_v2f16_v3f16__0_3:
1395; GFX900:       ; %bb.0:
1396; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1397; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1398; GFX900-NEXT:    ;;#ASMSTART
1399; GFX900-NEXT:    ; def v[0:1]
1400; GFX900-NEXT:    ;;#ASMEND
1401; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
1402; GFX900-NEXT:    s_waitcnt vmcnt(0)
1403; GFX900-NEXT:    s_setpc_b64 s[30:31]
1404;
1405; GFX90A-LABEL: v_shuffle_v2f16_v3f16__0_3:
1406; GFX90A:       ; %bb.0:
1407; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1408; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1409; GFX90A-NEXT:    ;;#ASMSTART
1410; GFX90A-NEXT:    ; def v[0:1]
1411; GFX90A-NEXT:    ;;#ASMEND
1412; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
1413; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1414; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1415;
1416; GFX940-LABEL: v_shuffle_v2f16_v3f16__0_3:
1417; GFX940:       ; %bb.0:
1418; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1419; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1420; GFX940-NEXT:    ;;#ASMSTART
1421; GFX940-NEXT:    ; def v[0:1]
1422; GFX940-NEXT:    ;;#ASMEND
1423; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
1424; GFX940-NEXT:    s_waitcnt vmcnt(0)
1425; GFX940-NEXT:    s_setpc_b64 s[30:31]
1426  %vec0 = call <4 x half> asm "; def $0", "=v"()
1427  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1428  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 0, i32 3>
1429  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
1430  ret void
1431}
1432
1433define void @v_shuffle_v2f16_v3f16__1_3(ptr addrspace(1) inreg %ptr) {
1434; GFX900-LABEL: v_shuffle_v2f16_v3f16__1_3:
1435; GFX900:       ; %bb.0:
1436; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1437; GFX900-NEXT:    ;;#ASMSTART
1438; GFX900-NEXT:    ; def v[0:1]
1439; GFX900-NEXT:    ;;#ASMEND
1440; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1441; GFX900-NEXT:    v_alignbit_b32 v0, s4, v0, 16
1442; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
1443; GFX900-NEXT:    s_waitcnt vmcnt(0)
1444; GFX900-NEXT:    s_setpc_b64 s[30:31]
1445;
1446; GFX90A-LABEL: v_shuffle_v2f16_v3f16__1_3:
1447; GFX90A:       ; %bb.0:
1448; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1449; GFX90A-NEXT:    ;;#ASMSTART
1450; GFX90A-NEXT:    ; def v[0:1]
1451; GFX90A-NEXT:    ;;#ASMEND
1452; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1453; GFX90A-NEXT:    v_alignbit_b32 v0, s4, v0, 16
1454; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
1455; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1456; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1457;
1458; GFX940-LABEL: v_shuffle_v2f16_v3f16__1_3:
1459; GFX940:       ; %bb.0:
1460; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1461; GFX940-NEXT:    ;;#ASMSTART
1462; GFX940-NEXT:    ; def v[0:1]
1463; GFX940-NEXT:    ;;#ASMEND
1464; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1465; GFX940-NEXT:    v_alignbit_b32 v0, s0, v0, 16
1466; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
1467; GFX940-NEXT:    s_waitcnt vmcnt(0)
1468; GFX940-NEXT:    s_setpc_b64 s[30:31]
1469  %vec0 = call <4 x half> asm "; def $0", "=v"()
1470  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1471  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 1, i32 3>
1472  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
1473  ret void
1474}
1475
1476define void @v_shuffle_v2f16_v3f16__2_3(ptr addrspace(1) inreg %ptr) {
1477; GFX900-LABEL: v_shuffle_v2f16_v3f16__2_3:
1478; GFX900:       ; %bb.0:
1479; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1480; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1481; GFX900-NEXT:    ;;#ASMSTART
1482; GFX900-NEXT:    ; def v[0:1]
1483; GFX900-NEXT:    ;;#ASMEND
1484; GFX900-NEXT:    global_store_dword v2, v1, s[16:17]
1485; GFX900-NEXT:    s_waitcnt vmcnt(0)
1486; GFX900-NEXT:    s_setpc_b64 s[30:31]
1487;
1488; GFX90A-LABEL: v_shuffle_v2f16_v3f16__2_3:
1489; GFX90A:       ; %bb.0:
1490; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1491; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1492; GFX90A-NEXT:    ;;#ASMSTART
1493; GFX90A-NEXT:    ; def v[0:1]
1494; GFX90A-NEXT:    ;;#ASMEND
1495; GFX90A-NEXT:    global_store_dword v2, v1, s[16:17]
1496; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1497; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1498;
1499; GFX940-LABEL: v_shuffle_v2f16_v3f16__2_3:
1500; GFX940:       ; %bb.0:
1501; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1502; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1503; GFX940-NEXT:    ;;#ASMSTART
1504; GFX940-NEXT:    ; def v[0:1]
1505; GFX940-NEXT:    ;;#ASMEND
1506; GFX940-NEXT:    global_store_dword v2, v1, s[0:1] sc0 sc1
1507; GFX940-NEXT:    s_waitcnt vmcnt(0)
1508; GFX940-NEXT:    s_setpc_b64 s[30:31]
1509  %vec0 = call <4 x half> asm "; def $0", "=v"()
1510  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1511  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 2, i32 3>
1512  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
1513  ret void
1514}
1515
1516define void @v_shuffle_v2f16_v3f16__3_3(ptr addrspace(1) inreg %ptr) {
1517; GFX9-LABEL: v_shuffle_v2f16_v3f16__3_3:
1518; GFX9:       ; %bb.0:
1519; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1520; GFX9-NEXT:    s_setpc_b64 s[30:31]
1521  %vec0 = call <4 x half> asm "; def $0", "=v"()
1522  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1523  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 3, i32 3>
1524  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
1525  ret void
1526}
1527
1528define void @v_shuffle_v2f16_v3f16__4_3(ptr addrspace(1) inreg %ptr) {
1529; GFX900-LABEL: v_shuffle_v2f16_v3f16__4_3:
1530; GFX900:       ; %bb.0:
1531; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1532; GFX900-NEXT:    ;;#ASMSTART
1533; GFX900-NEXT:    ; def v[0:1]
1534; GFX900-NEXT:    ;;#ASMEND
1535; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1536; GFX900-NEXT:    v_alignbit_b32 v0, v0, v0, 16
1537; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
1538; GFX900-NEXT:    s_waitcnt vmcnt(0)
1539; GFX900-NEXT:    s_setpc_b64 s[30:31]
1540;
1541; GFX90A-LABEL: v_shuffle_v2f16_v3f16__4_3:
1542; GFX90A:       ; %bb.0:
1543; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1544; GFX90A-NEXT:    ;;#ASMSTART
1545; GFX90A-NEXT:    ; def v[0:1]
1546; GFX90A-NEXT:    ;;#ASMEND
1547; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1548; GFX90A-NEXT:    v_alignbit_b32 v0, v0, v0, 16
1549; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
1550; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1551; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1552;
1553; GFX940-LABEL: v_shuffle_v2f16_v3f16__4_3:
1554; GFX940:       ; %bb.0:
1555; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1556; GFX940-NEXT:    ;;#ASMSTART
1557; GFX940-NEXT:    ; def v[0:1]
1558; GFX940-NEXT:    ;;#ASMEND
1559; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1560; GFX940-NEXT:    v_alignbit_b32 v0, v0, v0, 16
1561; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
1562; GFX940-NEXT:    s_waitcnt vmcnt(0)
1563; GFX940-NEXT:    s_setpc_b64 s[30:31]
1564  %vec0 = call <4 x half> asm "; def $0", "=v"()
1565  %vec1 = call <4 x half> asm "; def $0", "=v"()
1566  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1567  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1568  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 3>
1569  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
1570  ret void
1571}
1572
1573define void @v_shuffle_v2f16_v3f16__u_4(ptr addrspace(1) inreg %ptr) {
1574; GFX900-LABEL: v_shuffle_v2f16_v3f16__u_4:
1575; GFX900:       ; %bb.0:
1576; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1577; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1578; GFX900-NEXT:    ;;#ASMSTART
1579; GFX900-NEXT:    ; def v[0:1]
1580; GFX900-NEXT:    ;;#ASMEND
1581; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
1582; GFX900-NEXT:    s_waitcnt vmcnt(0)
1583; GFX900-NEXT:    s_setpc_b64 s[30:31]
1584;
1585; GFX90A-LABEL: v_shuffle_v2f16_v3f16__u_4:
1586; GFX90A:       ; %bb.0:
1587; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1588; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1589; GFX90A-NEXT:    ;;#ASMSTART
1590; GFX90A-NEXT:    ; def v[0:1]
1591; GFX90A-NEXT:    ;;#ASMEND
1592; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
1593; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1594; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1595;
1596; GFX940-LABEL: v_shuffle_v2f16_v3f16__u_4:
1597; GFX940:       ; %bb.0:
1598; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1599; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1600; GFX940-NEXT:    ;;#ASMSTART
1601; GFX940-NEXT:    ; def v[0:1]
1602; GFX940-NEXT:    ;;#ASMEND
1603; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
1604; GFX940-NEXT:    s_waitcnt vmcnt(0)
1605; GFX940-NEXT:    s_setpc_b64 s[30:31]
1606  %vec0 = call <4 x half> asm "; def $0", "=v"()
1607  %vec1 = call <4 x half> asm "; def $0", "=v"()
1608  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1609  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1610  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 poison, i32 4>
1611  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
1612  ret void
1613}
1614
1615define void @v_shuffle_v2f16_v3f16__0_4(ptr addrspace(1) inreg %ptr) {
1616; GFX900-LABEL: v_shuffle_v2f16_v3f16__0_4:
1617; GFX900:       ; %bb.0:
1618; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1619; GFX900-NEXT:    ;;#ASMSTART
1620; GFX900-NEXT:    ; def v[0:1]
1621; GFX900-NEXT:    ;;#ASMEND
1622; GFX900-NEXT:    s_mov_b32 s4, 0xffff
1623; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1624; GFX900-NEXT:    ;;#ASMSTART
1625; GFX900-NEXT:    ; def v[1:2]
1626; GFX900-NEXT:    ;;#ASMEND
1627; GFX900-NEXT:    v_bfi_b32 v0, s4, v0, v1
1628; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
1629; GFX900-NEXT:    s_waitcnt vmcnt(0)
1630; GFX900-NEXT:    s_setpc_b64 s[30:31]
1631;
1632; GFX90A-LABEL: v_shuffle_v2f16_v3f16__0_4:
1633; GFX90A:       ; %bb.0:
1634; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1635; GFX90A-NEXT:    ;;#ASMSTART
1636; GFX90A-NEXT:    ; def v[0:1]
1637; GFX90A-NEXT:    ;;#ASMEND
1638; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
1639; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1640; GFX90A-NEXT:    ;;#ASMSTART
1641; GFX90A-NEXT:    ; def v[2:3]
1642; GFX90A-NEXT:    ;;#ASMEND
1643; GFX90A-NEXT:    v_bfi_b32 v0, s4, v0, v2
1644; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
1645; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1646; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1647;
1648; GFX940-LABEL: v_shuffle_v2f16_v3f16__0_4:
1649; GFX940:       ; %bb.0:
1650; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1651; GFX940-NEXT:    ;;#ASMSTART
1652; GFX940-NEXT:    ; def v[0:1]
1653; GFX940-NEXT:    ;;#ASMEND
1654; GFX940-NEXT:    s_mov_b32 s2, 0xffff
1655; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1656; GFX940-NEXT:    ;;#ASMSTART
1657; GFX940-NEXT:    ; def v[2:3]
1658; GFX940-NEXT:    ;;#ASMEND
1659; GFX940-NEXT:    s_nop 0
1660; GFX940-NEXT:    v_bfi_b32 v0, s2, v0, v2
1661; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
1662; GFX940-NEXT:    s_waitcnt vmcnt(0)
1663; GFX940-NEXT:    s_setpc_b64 s[30:31]
1664  %vec0 = call <4 x half> asm "; def $0", "=v"()
1665  %vec1 = call <4 x half> asm "; def $0", "=v"()
1666  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1667  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1668  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 0, i32 4>
1669  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
1670  ret void
1671}
1672
1673define void @v_shuffle_v2f16_v3f16__1_4(ptr addrspace(1) inreg %ptr) {
1674; GFX900-LABEL: v_shuffle_v2f16_v3f16__1_4:
1675; GFX900:       ; %bb.0:
1676; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1677; GFX900-NEXT:    ;;#ASMSTART
1678; GFX900-NEXT:    ; def v[0:1]
1679; GFX900-NEXT:    ;;#ASMEND
1680; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1681; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1682; GFX900-NEXT:    ;;#ASMSTART
1683; GFX900-NEXT:    ; def v[1:2]
1684; GFX900-NEXT:    ;;#ASMEND
1685; GFX900-NEXT:    v_perm_b32 v0, v1, v0, s4
1686; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
1687; GFX900-NEXT:    s_waitcnt vmcnt(0)
1688; GFX900-NEXT:    s_setpc_b64 s[30:31]
1689;
1690; GFX90A-LABEL: v_shuffle_v2f16_v3f16__1_4:
1691; GFX90A:       ; %bb.0:
1692; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1693; GFX90A-NEXT:    ;;#ASMSTART
1694; GFX90A-NEXT:    ; def v[0:1]
1695; GFX90A-NEXT:    ;;#ASMEND
1696; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1697; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1698; GFX90A-NEXT:    ;;#ASMSTART
1699; GFX90A-NEXT:    ; def v[2:3]
1700; GFX90A-NEXT:    ;;#ASMEND
1701; GFX90A-NEXT:    v_perm_b32 v0, v2, v0, s4
1702; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
1703; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1704; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1705;
1706; GFX940-LABEL: v_shuffle_v2f16_v3f16__1_4:
1707; GFX940:       ; %bb.0:
1708; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1709; GFX940-NEXT:    ;;#ASMSTART
1710; GFX940-NEXT:    ; def v[0:1]
1711; GFX940-NEXT:    ;;#ASMEND
1712; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1713; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1714; GFX940-NEXT:    ;;#ASMSTART
1715; GFX940-NEXT:    ; def v[2:3]
1716; GFX940-NEXT:    ;;#ASMEND
1717; GFX940-NEXT:    s_nop 0
1718; GFX940-NEXT:    v_perm_b32 v0, v2, v0, s2
1719; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
1720; GFX940-NEXT:    s_waitcnt vmcnt(0)
1721; GFX940-NEXT:    s_setpc_b64 s[30:31]
1722  %vec0 = call <4 x half> asm "; def $0", "=v"()
1723  %vec1 = call <4 x half> asm "; def $0", "=v"()
1724  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1725  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1726  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 1, i32 4>
1727  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
1728  ret void
1729}
1730
1731define void @v_shuffle_v2f16_v3f16__2_4(ptr addrspace(1) inreg %ptr) {
1732; GFX900-LABEL: v_shuffle_v2f16_v3f16__2_4:
1733; GFX900:       ; %bb.0:
1734; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1735; GFX900-NEXT:    ;;#ASMSTART
1736; GFX900-NEXT:    ; def v[0:1]
1737; GFX900-NEXT:    ;;#ASMEND
1738; GFX900-NEXT:    s_mov_b32 s4, 0xffff
1739; GFX900-NEXT:    v_mov_b32_e32 v4, 0
1740; GFX900-NEXT:    ;;#ASMSTART
1741; GFX900-NEXT:    ; def v[2:3]
1742; GFX900-NEXT:    ;;#ASMEND
1743; GFX900-NEXT:    v_bfi_b32 v0, s4, v1, v2
1744; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
1745; GFX900-NEXT:    s_waitcnt vmcnt(0)
1746; GFX900-NEXT:    s_setpc_b64 s[30:31]
1747;
1748; GFX90A-LABEL: v_shuffle_v2f16_v3f16__2_4:
1749; GFX90A:       ; %bb.0:
1750; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1751; GFX90A-NEXT:    ;;#ASMSTART
1752; GFX90A-NEXT:    ; def v[0:1]
1753; GFX90A-NEXT:    ;;#ASMEND
1754; GFX90A-NEXT:    s_mov_b32 s4, 0xffff
1755; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1756; GFX90A-NEXT:    ;;#ASMSTART
1757; GFX90A-NEXT:    ; def v[2:3]
1758; GFX90A-NEXT:    ;;#ASMEND
1759; GFX90A-NEXT:    v_bfi_b32 v0, s4, v1, v2
1760; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
1761; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1762; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1763;
1764; GFX940-LABEL: v_shuffle_v2f16_v3f16__2_4:
1765; GFX940:       ; %bb.0:
1766; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1767; GFX940-NEXT:    ;;#ASMSTART
1768; GFX940-NEXT:    ; def v[0:1]
1769; GFX940-NEXT:    ;;#ASMEND
1770; GFX940-NEXT:    s_mov_b32 s2, 0xffff
1771; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1772; GFX940-NEXT:    ;;#ASMSTART
1773; GFX940-NEXT:    ; def v[2:3]
1774; GFX940-NEXT:    ;;#ASMEND
1775; GFX940-NEXT:    s_nop 0
1776; GFX940-NEXT:    v_bfi_b32 v0, s2, v1, v2
1777; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
1778; GFX940-NEXT:    s_waitcnt vmcnt(0)
1779; GFX940-NEXT:    s_setpc_b64 s[30:31]
1780  %vec0 = call <4 x half> asm "; def $0", "=v"()
1781  %vec1 = call <4 x half> asm "; def $0", "=v"()
1782  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1783  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1784  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 2, i32 4>
1785  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
1786  ret void
1787}
1788
1789define void @v_shuffle_v2f16_v3f16__3_4(ptr addrspace(1) inreg %ptr) {
1790; GFX900-LABEL: v_shuffle_v2f16_v3f16__3_4:
1791; GFX900:       ; %bb.0:
1792; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1793; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1794; GFX900-NEXT:    ;;#ASMSTART
1795; GFX900-NEXT:    ; def v[0:1]
1796; GFX900-NEXT:    ;;#ASMEND
1797; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
1798; GFX900-NEXT:    s_waitcnt vmcnt(0)
1799; GFX900-NEXT:    s_setpc_b64 s[30:31]
1800;
1801; GFX90A-LABEL: v_shuffle_v2f16_v3f16__3_4:
1802; GFX90A:       ; %bb.0:
1803; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1804; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1805; GFX90A-NEXT:    ;;#ASMSTART
1806; GFX90A-NEXT:    ; def v[0:1]
1807; GFX90A-NEXT:    ;;#ASMEND
1808; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
1809; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1810; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1811;
1812; GFX940-LABEL: v_shuffle_v2f16_v3f16__3_4:
1813; GFX940:       ; %bb.0:
1814; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1815; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1816; GFX940-NEXT:    ;;#ASMSTART
1817; GFX940-NEXT:    ; def v[0:1]
1818; GFX940-NEXT:    ;;#ASMEND
1819; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
1820; GFX940-NEXT:    s_waitcnt vmcnt(0)
1821; GFX940-NEXT:    s_setpc_b64 s[30:31]
1822  %vec0 = call <4 x half> asm "; def $0", "=v"()
1823  %vec1 = call <4 x half> asm "; def $0", "=v"()
1824  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1825  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1826  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 3, i32 4>
1827  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
1828  ret void
1829}
1830
1831define void @v_shuffle_v2f16_v3f16__4_4(ptr addrspace(1) inreg %ptr) {
1832; GFX900-LABEL: v_shuffle_v2f16_v3f16__4_4:
1833; GFX900:       ; %bb.0:
1834; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1835; GFX900-NEXT:    ;;#ASMSTART
1836; GFX900-NEXT:    ; def v[0:1]
1837; GFX900-NEXT:    ;;#ASMEND
1838; GFX900-NEXT:    s_mov_b32 s4, 0x7060302
1839; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1840; GFX900-NEXT:    v_perm_b32 v0, v0, v0, s4
1841; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
1842; GFX900-NEXT:    s_waitcnt vmcnt(0)
1843; GFX900-NEXT:    s_setpc_b64 s[30:31]
1844;
1845; GFX90A-LABEL: v_shuffle_v2f16_v3f16__4_4:
1846; GFX90A:       ; %bb.0:
1847; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1848; GFX90A-NEXT:    ;;#ASMSTART
1849; GFX90A-NEXT:    ; def v[0:1]
1850; GFX90A-NEXT:    ;;#ASMEND
1851; GFX90A-NEXT:    s_mov_b32 s4, 0x7060302
1852; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1853; GFX90A-NEXT:    v_perm_b32 v0, v0, v0, s4
1854; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
1855; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1856; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1857;
1858; GFX940-LABEL: v_shuffle_v2f16_v3f16__4_4:
1859; GFX940:       ; %bb.0:
1860; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1861; GFX940-NEXT:    ;;#ASMSTART
1862; GFX940-NEXT:    ; def v[0:1]
1863; GFX940-NEXT:    ;;#ASMEND
1864; GFX940-NEXT:    s_mov_b32 s2, 0x7060302
1865; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1866; GFX940-NEXT:    v_perm_b32 v0, v0, v0, s2
1867; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
1868; GFX940-NEXT:    s_waitcnt vmcnt(0)
1869; GFX940-NEXT:    s_setpc_b64 s[30:31]
1870  %vec0 = call <4 x half> asm "; def $0", "=v"()
1871  %vec1 = call <4 x half> asm "; def $0", "=v"()
1872  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1873  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1874  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 4>
1875  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
1876  ret void
1877}
1878
1879define void @v_shuffle_v2f16_v3f16__u_5(ptr addrspace(1) inreg %ptr) {
1880; GFX900-LABEL: v_shuffle_v2f16_v3f16__u_5:
1881; GFX900:       ; %bb.0:
1882; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1883; GFX900-NEXT:    ;;#ASMSTART
1884; GFX900-NEXT:    ; def v[0:1]
1885; GFX900-NEXT:    ;;#ASMEND
1886; GFX900-NEXT:    v_mov_b32_e32 v2, 0
1887; GFX900-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
1888; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
1889; GFX900-NEXT:    s_waitcnt vmcnt(0)
1890; GFX900-NEXT:    s_setpc_b64 s[30:31]
1891;
1892; GFX90A-LABEL: v_shuffle_v2f16_v3f16__u_5:
1893; GFX90A:       ; %bb.0:
1894; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1895; GFX90A-NEXT:    ;;#ASMSTART
1896; GFX90A-NEXT:    ; def v[0:1]
1897; GFX90A-NEXT:    ;;#ASMEND
1898; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
1899; GFX90A-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
1900; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
1901; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1902; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1903;
1904; GFX940-LABEL: v_shuffle_v2f16_v3f16__u_5:
1905; GFX940:       ; %bb.0:
1906; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1907; GFX940-NEXT:    ;;#ASMSTART
1908; GFX940-NEXT:    ; def v[0:1]
1909; GFX940-NEXT:    ;;#ASMEND
1910; GFX940-NEXT:    v_mov_b32_e32 v2, 0
1911; GFX940-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
1912; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
1913; GFX940-NEXT:    s_waitcnt vmcnt(0)
1914; GFX940-NEXT:    s_setpc_b64 s[30:31]
1915  %vec0 = call <4 x half> asm "; def $0", "=v"()
1916  %vec1 = call <4 x half> asm "; def $0", "=v"()
1917  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1918  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1919  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 poison, i32 5>
1920  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
1921  ret void
1922}
1923
1924define void @v_shuffle_v2f16_v3f16__0_5(ptr addrspace(1) inreg %ptr) {
1925; GFX900-LABEL: v_shuffle_v2f16_v3f16__0_5:
1926; GFX900:       ; %bb.0:
1927; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1928; GFX900-NEXT:    ;;#ASMSTART
1929; GFX900-NEXT:    ; def v[0:1]
1930; GFX900-NEXT:    ;;#ASMEND
1931; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
1932; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1933; GFX900-NEXT:    ;;#ASMSTART
1934; GFX900-NEXT:    ; def v[1:2]
1935; GFX900-NEXT:    ;;#ASMEND
1936; GFX900-NEXT:    v_perm_b32 v0, v2, v0, s4
1937; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
1938; GFX900-NEXT:    s_waitcnt vmcnt(0)
1939; GFX900-NEXT:    s_setpc_b64 s[30:31]
1940;
1941; GFX90A-LABEL: v_shuffle_v2f16_v3f16__0_5:
1942; GFX90A:       ; %bb.0:
1943; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1944; GFX90A-NEXT:    ;;#ASMSTART
1945; GFX90A-NEXT:    ; def v[0:1]
1946; GFX90A-NEXT:    ;;#ASMEND
1947; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
1948; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
1949; GFX90A-NEXT:    ;;#ASMSTART
1950; GFX90A-NEXT:    ; def v[2:3]
1951; GFX90A-NEXT:    ;;#ASMEND
1952; GFX90A-NEXT:    v_perm_b32 v0, v3, v0, s4
1953; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
1954; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1955; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1956;
1957; GFX940-LABEL: v_shuffle_v2f16_v3f16__0_5:
1958; GFX940:       ; %bb.0:
1959; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1960; GFX940-NEXT:    ;;#ASMSTART
1961; GFX940-NEXT:    ; def v[0:1]
1962; GFX940-NEXT:    ;;#ASMEND
1963; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
1964; GFX940-NEXT:    v_mov_b32_e32 v4, 0
1965; GFX940-NEXT:    ;;#ASMSTART
1966; GFX940-NEXT:    ; def v[2:3]
1967; GFX940-NEXT:    ;;#ASMEND
1968; GFX940-NEXT:    s_nop 0
1969; GFX940-NEXT:    v_perm_b32 v0, v3, v0, s2
1970; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
1971; GFX940-NEXT:    s_waitcnt vmcnt(0)
1972; GFX940-NEXT:    s_setpc_b64 s[30:31]
1973  %vec0 = call <4 x half> asm "; def $0", "=v"()
1974  %vec1 = call <4 x half> asm "; def $0", "=v"()
1975  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1976  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
1977  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 0, i32 5>
1978  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
1979  ret void
1980}
1981
1982define void @v_shuffle_v2f16_v3f16__1_5(ptr addrspace(1) inreg %ptr) {
1983; GFX900-LABEL: v_shuffle_v2f16_v3f16__1_5:
1984; GFX900:       ; %bb.0:
1985; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1986; GFX900-NEXT:    ;;#ASMSTART
1987; GFX900-NEXT:    ; def v[0:1]
1988; GFX900-NEXT:    ;;#ASMEND
1989; GFX900-NEXT:    v_mov_b32_e32 v3, 0
1990; GFX900-NEXT:    ;;#ASMSTART
1991; GFX900-NEXT:    ; def v[1:2]
1992; GFX900-NEXT:    ;;#ASMEND
1993; GFX900-NEXT:    v_alignbit_b32 v0, v2, v0, 16
1994; GFX900-NEXT:    global_store_dword v3, v0, s[16:17]
1995; GFX900-NEXT:    s_waitcnt vmcnt(0)
1996; GFX900-NEXT:    s_setpc_b64 s[30:31]
1997;
1998; GFX90A-LABEL: v_shuffle_v2f16_v3f16__1_5:
1999; GFX90A:       ; %bb.0:
2000; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2001; GFX90A-NEXT:    ;;#ASMSTART
2002; GFX90A-NEXT:    ; def v[0:1]
2003; GFX90A-NEXT:    ;;#ASMEND
2004; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2005; GFX90A-NEXT:    ;;#ASMSTART
2006; GFX90A-NEXT:    ; def v[2:3]
2007; GFX90A-NEXT:    ;;#ASMEND
2008; GFX90A-NEXT:    v_alignbit_b32 v0, v3, v0, 16
2009; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
2010; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2011; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2012;
2013; GFX940-LABEL: v_shuffle_v2f16_v3f16__1_5:
2014; GFX940:       ; %bb.0:
2015; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2016; GFX940-NEXT:    ;;#ASMSTART
2017; GFX940-NEXT:    ; def v[0:1]
2018; GFX940-NEXT:    ;;#ASMEND
2019; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2020; GFX940-NEXT:    ;;#ASMSTART
2021; GFX940-NEXT:    ; def v[2:3]
2022; GFX940-NEXT:    ;;#ASMEND
2023; GFX940-NEXT:    s_nop 0
2024; GFX940-NEXT:    v_alignbit_b32 v0, v3, v0, 16
2025; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
2026; GFX940-NEXT:    s_waitcnt vmcnt(0)
2027; GFX940-NEXT:    s_setpc_b64 s[30:31]
2028  %vec0 = call <4 x half> asm "; def $0", "=v"()
2029  %vec1 = call <4 x half> asm "; def $0", "=v"()
2030  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2031  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2032  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 1, i32 5>
2033  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
2034  ret void
2035}
2036
2037define void @v_shuffle_v2f16_v3f16__2_5(ptr addrspace(1) inreg %ptr) {
2038; GFX900-LABEL: v_shuffle_v2f16_v3f16__2_5:
2039; GFX900:       ; %bb.0:
2040; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2041; GFX900-NEXT:    ;;#ASMSTART
2042; GFX900-NEXT:    ; def v[0:1]
2043; GFX900-NEXT:    ;;#ASMEND
2044; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2045; GFX900-NEXT:    v_mov_b32_e32 v4, 0
2046; GFX900-NEXT:    ;;#ASMSTART
2047; GFX900-NEXT:    ; def v[2:3]
2048; GFX900-NEXT:    ;;#ASMEND
2049; GFX900-NEXT:    v_perm_b32 v0, v3, v1, s4
2050; GFX900-NEXT:    global_store_dword v4, v0, s[16:17]
2051; GFX900-NEXT:    s_waitcnt vmcnt(0)
2052; GFX900-NEXT:    s_setpc_b64 s[30:31]
2053;
2054; GFX90A-LABEL: v_shuffle_v2f16_v3f16__2_5:
2055; GFX90A:       ; %bb.0:
2056; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2057; GFX90A-NEXT:    ;;#ASMSTART
2058; GFX90A-NEXT:    ; def v[0:1]
2059; GFX90A-NEXT:    ;;#ASMEND
2060; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2061; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
2062; GFX90A-NEXT:    ;;#ASMSTART
2063; GFX90A-NEXT:    ; def v[2:3]
2064; GFX90A-NEXT:    ;;#ASMEND
2065; GFX90A-NEXT:    v_perm_b32 v0, v3, v1, s4
2066; GFX90A-NEXT:    global_store_dword v4, v0, s[16:17]
2067; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2068; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2069;
2070; GFX940-LABEL: v_shuffle_v2f16_v3f16__2_5:
2071; GFX940:       ; %bb.0:
2072; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2073; GFX940-NEXT:    ;;#ASMSTART
2074; GFX940-NEXT:    ; def v[0:1]
2075; GFX940-NEXT:    ;;#ASMEND
2076; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2077; GFX940-NEXT:    v_mov_b32_e32 v4, 0
2078; GFX940-NEXT:    ;;#ASMSTART
2079; GFX940-NEXT:    ; def v[2:3]
2080; GFX940-NEXT:    ;;#ASMEND
2081; GFX940-NEXT:    s_nop 0
2082; GFX940-NEXT:    v_perm_b32 v0, v3, v1, s2
2083; GFX940-NEXT:    global_store_dword v4, v0, s[0:1] sc0 sc1
2084; GFX940-NEXT:    s_waitcnt vmcnt(0)
2085; GFX940-NEXT:    s_setpc_b64 s[30:31]
2086  %vec0 = call <4 x half> asm "; def $0", "=v"()
2087  %vec1 = call <4 x half> asm "; def $0", "=v"()
2088  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2089  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2090  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 2, i32 5>
2091  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
2092  ret void
2093}
2094
2095define void @v_shuffle_v2f16_v3f16__3_5(ptr addrspace(1) inreg %ptr) {
2096; GFX900-LABEL: v_shuffle_v2f16_v3f16__3_5:
2097; GFX900:       ; %bb.0:
2098; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2099; GFX900-NEXT:    ;;#ASMSTART
2100; GFX900-NEXT:    ; def v[0:1]
2101; GFX900-NEXT:    ;;#ASMEND
2102; GFX900-NEXT:    s_mov_b32 s4, 0x5040100
2103; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2104; GFX900-NEXT:    v_perm_b32 v0, v1, v0, s4
2105; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
2106; GFX900-NEXT:    s_waitcnt vmcnt(0)
2107; GFX900-NEXT:    s_setpc_b64 s[30:31]
2108;
2109; GFX90A-LABEL: v_shuffle_v2f16_v3f16__3_5:
2110; GFX90A:       ; %bb.0:
2111; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2112; GFX90A-NEXT:    ;;#ASMSTART
2113; GFX90A-NEXT:    ; def v[0:1]
2114; GFX90A-NEXT:    ;;#ASMEND
2115; GFX90A-NEXT:    s_mov_b32 s4, 0x5040100
2116; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2117; GFX90A-NEXT:    v_perm_b32 v0, v1, v0, s4
2118; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
2119; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2120; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2121;
2122; GFX940-LABEL: v_shuffle_v2f16_v3f16__3_5:
2123; GFX940:       ; %bb.0:
2124; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2125; GFX940-NEXT:    ;;#ASMSTART
2126; GFX940-NEXT:    ; def v[0:1]
2127; GFX940-NEXT:    ;;#ASMEND
2128; GFX940-NEXT:    s_mov_b32 s2, 0x5040100
2129; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2130; GFX940-NEXT:    v_perm_b32 v0, v1, v0, s2
2131; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
2132; GFX940-NEXT:    s_waitcnt vmcnt(0)
2133; GFX940-NEXT:    s_setpc_b64 s[30:31]
2134  %vec0 = call <4 x half> asm "; def $0", "=v"()
2135  %vec1 = call <4 x half> asm "; def $0", "=v"()
2136  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2137  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2138  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 3, i32 5>
2139  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
2140  ret void
2141}
2142
2143define void @v_shuffle_v2f16_v3f16__4_5(ptr addrspace(1) inreg %ptr) {
2144; GFX900-LABEL: v_shuffle_v2f16_v3f16__4_5:
2145; GFX900:       ; %bb.0:
2146; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2147; GFX900-NEXT:    ;;#ASMSTART
2148; GFX900-NEXT:    ; def v[0:1]
2149; GFX900-NEXT:    ;;#ASMEND
2150; GFX900-NEXT:    v_mov_b32_e32 v2, 0
2151; GFX900-NEXT:    v_alignbit_b32 v0, v1, v0, 16
2152; GFX900-NEXT:    global_store_dword v2, v0, s[16:17]
2153; GFX900-NEXT:    s_waitcnt vmcnt(0)
2154; GFX900-NEXT:    s_setpc_b64 s[30:31]
2155;
2156; GFX90A-LABEL: v_shuffle_v2f16_v3f16__4_5:
2157; GFX90A:       ; %bb.0:
2158; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2159; GFX90A-NEXT:    ;;#ASMSTART
2160; GFX90A-NEXT:    ; def v[0:1]
2161; GFX90A-NEXT:    ;;#ASMEND
2162; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
2163; GFX90A-NEXT:    v_alignbit_b32 v0, v1, v0, 16
2164; GFX90A-NEXT:    global_store_dword v2, v0, s[16:17]
2165; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2166; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2167;
2168; GFX940-LABEL: v_shuffle_v2f16_v3f16__4_5:
2169; GFX940:       ; %bb.0:
2170; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2171; GFX940-NEXT:    ;;#ASMSTART
2172; GFX940-NEXT:    ; def v[0:1]
2173; GFX940-NEXT:    ;;#ASMEND
2174; GFX940-NEXT:    v_mov_b32_e32 v2, 0
2175; GFX940-NEXT:    v_alignbit_b32 v0, v1, v0, 16
2176; GFX940-NEXT:    global_store_dword v2, v0, s[0:1] sc0 sc1
2177; GFX940-NEXT:    s_waitcnt vmcnt(0)
2178; GFX940-NEXT:    s_setpc_b64 s[30:31]
2179  %vec0 = call <4 x half> asm "; def $0", "=v"()
2180  %vec1 = call <4 x half> asm "; def $0", "=v"()
2181  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2182  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2183  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 5>
2184  store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4
2185  ret void
2186}
2187
2188define void @s_shuffle_v2f16_v3f16__u_u() {
2189; GFX9-LABEL: s_shuffle_v2f16_v3f16__u_u:
2190; GFX9:       ; %bb.0:
2191; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2192; GFX9-NEXT:    ;;#ASMSTART
2193; GFX9-NEXT:    ; use s8
2194; GFX9-NEXT:    ;;#ASMEND
2195; GFX9-NEXT:    s_setpc_b64 s[30:31]
2196  %vec0 = call <4 x half> asm "; def $0", "=s"()
2197  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2198  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> poison
2199  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
2200  ret void
2201}
2202
2203define void @s_shuffle_v2f16_v3f16__0_u() {
2204; GFX900-LABEL: s_shuffle_v2f16_v3f16__0_u:
2205; GFX900:       ; %bb.0:
2206; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2207; GFX900-NEXT:    ;;#ASMSTART
2208; GFX900-NEXT:    ; def s[8:9]
2209; GFX900-NEXT:    ;;#ASMEND
2210; GFX900-NEXT:    ;;#ASMSTART
2211; GFX900-NEXT:    ; use s8
2212; GFX900-NEXT:    ;;#ASMEND
2213; GFX900-NEXT:    s_setpc_b64 s[30:31]
2214;
2215; GFX90A-LABEL: s_shuffle_v2f16_v3f16__0_u:
2216; GFX90A:       ; %bb.0:
2217; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2218; GFX90A-NEXT:    ;;#ASMSTART
2219; GFX90A-NEXT:    ; def s[8:9]
2220; GFX90A-NEXT:    ;;#ASMEND
2221; GFX90A-NEXT:    ;;#ASMSTART
2222; GFX90A-NEXT:    ; use s8
2223; GFX90A-NEXT:    ;;#ASMEND
2224; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2225;
2226; GFX940-LABEL: s_shuffle_v2f16_v3f16__0_u:
2227; GFX940:       ; %bb.0:
2228; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2229; GFX940-NEXT:    ;;#ASMSTART
2230; GFX940-NEXT:    ; def s[8:9]
2231; GFX940-NEXT:    ;;#ASMEND
2232; GFX940-NEXT:    s_nop 0
2233; GFX940-NEXT:    ;;#ASMSTART
2234; GFX940-NEXT:    ; use s8
2235; GFX940-NEXT:    ;;#ASMEND
2236; GFX940-NEXT:    s_setpc_b64 s[30:31]
2237  %vec0 = call <4 x half> asm "; def $0", "=s"()
2238  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2239  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 0, i32 poison>
2240  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
2241  ret void
2242}
2243
2244define void @s_shuffle_v2f16_v3f16__1_u() {
2245; GFX900-LABEL: s_shuffle_v2f16_v3f16__1_u:
2246; GFX900:       ; %bb.0:
2247; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2248; GFX900-NEXT:    ;;#ASMSTART
2249; GFX900-NEXT:    ; def s[4:5]
2250; GFX900-NEXT:    ;;#ASMEND
2251; GFX900-NEXT:    s_lshr_b32 s8, s4, 16
2252; GFX900-NEXT:    ;;#ASMSTART
2253; GFX900-NEXT:    ; use s8
2254; GFX900-NEXT:    ;;#ASMEND
2255; GFX900-NEXT:    s_setpc_b64 s[30:31]
2256;
2257; GFX90A-LABEL: s_shuffle_v2f16_v3f16__1_u:
2258; GFX90A:       ; %bb.0:
2259; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2260; GFX90A-NEXT:    ;;#ASMSTART
2261; GFX90A-NEXT:    ; def s[4:5]
2262; GFX90A-NEXT:    ;;#ASMEND
2263; GFX90A-NEXT:    s_lshr_b32 s8, s4, 16
2264; GFX90A-NEXT:    ;;#ASMSTART
2265; GFX90A-NEXT:    ; use s8
2266; GFX90A-NEXT:    ;;#ASMEND
2267; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2268;
2269; GFX940-LABEL: s_shuffle_v2f16_v3f16__1_u:
2270; GFX940:       ; %bb.0:
2271; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2272; GFX940-NEXT:    ;;#ASMSTART
2273; GFX940-NEXT:    ; def s[0:1]
2274; GFX940-NEXT:    ;;#ASMEND
2275; GFX940-NEXT:    s_lshr_b32 s8, s0, 16
2276; GFX940-NEXT:    ;;#ASMSTART
2277; GFX940-NEXT:    ; use s8
2278; GFX940-NEXT:    ;;#ASMEND
2279; GFX940-NEXT:    s_setpc_b64 s[30:31]
2280  %vec0 = call <4 x half> asm "; def $0", "=s"()
2281  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2282  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 1, i32 poison>
2283  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
2284  ret void
2285}
2286
2287define void @s_shuffle_v2f16_v3f16__2_u() {
2288; GFX900-LABEL: s_shuffle_v2f16_v3f16__2_u:
2289; GFX900:       ; %bb.0:
2290; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2291; GFX900-NEXT:    ;;#ASMSTART
2292; GFX900-NEXT:    ; def s[4:5]
2293; GFX900-NEXT:    ;;#ASMEND
2294; GFX900-NEXT:    s_mov_b32 s8, s5
2295; GFX900-NEXT:    ;;#ASMSTART
2296; GFX900-NEXT:    ; use s8
2297; GFX900-NEXT:    ;;#ASMEND
2298; GFX900-NEXT:    s_setpc_b64 s[30:31]
2299;
2300; GFX90A-LABEL: s_shuffle_v2f16_v3f16__2_u:
2301; GFX90A:       ; %bb.0:
2302; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2303; GFX90A-NEXT:    ;;#ASMSTART
2304; GFX90A-NEXT:    ; def s[4:5]
2305; GFX90A-NEXT:    ;;#ASMEND
2306; GFX90A-NEXT:    s_mov_b32 s8, s5
2307; GFX90A-NEXT:    ;;#ASMSTART
2308; GFX90A-NEXT:    ; use s8
2309; GFX90A-NEXT:    ;;#ASMEND
2310; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2311;
2312; GFX940-LABEL: s_shuffle_v2f16_v3f16__2_u:
2313; GFX940:       ; %bb.0:
2314; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2315; GFX940-NEXT:    ;;#ASMSTART
2316; GFX940-NEXT:    ; def s[0:1]
2317; GFX940-NEXT:    ;;#ASMEND
2318; GFX940-NEXT:    s_mov_b32 s8, s1
2319; GFX940-NEXT:    ;;#ASMSTART
2320; GFX940-NEXT:    ; use s8
2321; GFX940-NEXT:    ;;#ASMEND
2322; GFX940-NEXT:    s_setpc_b64 s[30:31]
2323  %vec0 = call <4 x half> asm "; def $0", "=s"()
2324  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2325  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 2, i32 poison>
2326  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
2327  ret void
2328}
2329
2330define void @s_shuffle_v2f16_v3f16__3_u() {
2331; GFX9-LABEL: s_shuffle_v2f16_v3f16__3_u:
2332; GFX9:       ; %bb.0:
2333; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2334; GFX9-NEXT:    ;;#ASMSTART
2335; GFX9-NEXT:    ; use s8
2336; GFX9-NEXT:    ;;#ASMEND
2337; GFX9-NEXT:    s_setpc_b64 s[30:31]
2338  %vec0 = call <4 x half> asm "; def $0", "=s"()
2339  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2340  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 3, i32 poison>
2341  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
2342  ret void
2343}
2344
2345define void @s_shuffle_v2f16_v3f16__4_u() {
2346; GFX900-LABEL: s_shuffle_v2f16_v3f16__4_u:
2347; GFX900:       ; %bb.0:
2348; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2349; GFX900-NEXT:    ;;#ASMSTART
2350; GFX900-NEXT:    ; def s[4:5]
2351; GFX900-NEXT:    ;;#ASMEND
2352; GFX900-NEXT:    s_lshr_b32 s8, s4, 16
2353; GFX900-NEXT:    ;;#ASMSTART
2354; GFX900-NEXT:    ; use s8
2355; GFX900-NEXT:    ;;#ASMEND
2356; GFX900-NEXT:    s_setpc_b64 s[30:31]
2357;
2358; GFX90A-LABEL: s_shuffle_v2f16_v3f16__4_u:
2359; GFX90A:       ; %bb.0:
2360; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2361; GFX90A-NEXT:    ;;#ASMSTART
2362; GFX90A-NEXT:    ; def s[4:5]
2363; GFX90A-NEXT:    ;;#ASMEND
2364; GFX90A-NEXT:    s_lshr_b32 s8, s4, 16
2365; GFX90A-NEXT:    ;;#ASMSTART
2366; GFX90A-NEXT:    ; use s8
2367; GFX90A-NEXT:    ;;#ASMEND
2368; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2369;
2370; GFX940-LABEL: s_shuffle_v2f16_v3f16__4_u:
2371; GFX940:       ; %bb.0:
2372; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2373; GFX940-NEXT:    ;;#ASMSTART
2374; GFX940-NEXT:    ; def s[0:1]
2375; GFX940-NEXT:    ;;#ASMEND
2376; GFX940-NEXT:    s_lshr_b32 s8, s0, 16
2377; GFX940-NEXT:    ;;#ASMSTART
2378; GFX940-NEXT:    ; use s8
2379; GFX940-NEXT:    ;;#ASMEND
2380; GFX940-NEXT:    s_setpc_b64 s[30:31]
2381  %vec0 = call <4 x half> asm "; def $0", "=s"()
2382  %vec1 = call <4 x half> asm "; def $0", "=s"()
2383  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2384  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2385  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 poison>
2386  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
2387  ret void
2388}
2389
2390define void @s_shuffle_v2f16_v3f16__5_u() {
2391; GFX900-LABEL: s_shuffle_v2f16_v3f16__5_u:
2392; GFX900:       ; %bb.0:
2393; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2394; GFX900-NEXT:    ;;#ASMSTART
2395; GFX900-NEXT:    ; def s[4:5]
2396; GFX900-NEXT:    ;;#ASMEND
2397; GFX900-NEXT:    s_mov_b32 s8, s5
2398; GFX900-NEXT:    ;;#ASMSTART
2399; GFX900-NEXT:    ; use s8
2400; GFX900-NEXT:    ;;#ASMEND
2401; GFX900-NEXT:    s_setpc_b64 s[30:31]
2402;
2403; GFX90A-LABEL: s_shuffle_v2f16_v3f16__5_u:
2404; GFX90A:       ; %bb.0:
2405; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2406; GFX90A-NEXT:    ;;#ASMSTART
2407; GFX90A-NEXT:    ; def s[4:5]
2408; GFX90A-NEXT:    ;;#ASMEND
2409; GFX90A-NEXT:    s_mov_b32 s8, s5
2410; GFX90A-NEXT:    ;;#ASMSTART
2411; GFX90A-NEXT:    ; use s8
2412; GFX90A-NEXT:    ;;#ASMEND
2413; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2414;
2415; GFX940-LABEL: s_shuffle_v2f16_v3f16__5_u:
2416; GFX940:       ; %bb.0:
2417; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2418; GFX940-NEXT:    ;;#ASMSTART
2419; GFX940-NEXT:    ; def s[0:1]
2420; GFX940-NEXT:    ;;#ASMEND
2421; GFX940-NEXT:    s_mov_b32 s8, s1
2422; GFX940-NEXT:    ;;#ASMSTART
2423; GFX940-NEXT:    ; use s8
2424; GFX940-NEXT:    ;;#ASMEND
2425; GFX940-NEXT:    s_setpc_b64 s[30:31]
2426  %vec0 = call <4 x half> asm "; def $0", "=s"()
2427  %vec1 = call <4 x half> asm "; def $0", "=s"()
2428  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2429  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2430  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 poison>
2431  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
2432  ret void
2433}
2434
2435define void @s_shuffle_v2f16_v3f16__5_0() {
2436; GFX900-LABEL: s_shuffle_v2f16_v3f16__5_0:
2437; GFX900:       ; %bb.0:
2438; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2439; GFX900-NEXT:    ;;#ASMSTART
2440; GFX900-NEXT:    ; def s[4:5]
2441; GFX900-NEXT:    ;;#ASMEND
2442; GFX900-NEXT:    ;;#ASMSTART
2443; GFX900-NEXT:    ; def s[6:7]
2444; GFX900-NEXT:    ;;#ASMEND
2445; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
2446; GFX900-NEXT:    ;;#ASMSTART
2447; GFX900-NEXT:    ; use s8
2448; GFX900-NEXT:    ;;#ASMEND
2449; GFX900-NEXT:    s_setpc_b64 s[30:31]
2450;
2451; GFX90A-LABEL: s_shuffle_v2f16_v3f16__5_0:
2452; GFX90A:       ; %bb.0:
2453; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2454; GFX90A-NEXT:    ;;#ASMSTART
2455; GFX90A-NEXT:    ; def s[4:5]
2456; GFX90A-NEXT:    ;;#ASMEND
2457; GFX90A-NEXT:    ;;#ASMSTART
2458; GFX90A-NEXT:    ; def s[6:7]
2459; GFX90A-NEXT:    ;;#ASMEND
2460; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
2461; GFX90A-NEXT:    ;;#ASMSTART
2462; GFX90A-NEXT:    ; use s8
2463; GFX90A-NEXT:    ;;#ASMEND
2464; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2465;
2466; GFX940-LABEL: s_shuffle_v2f16_v3f16__5_0:
2467; GFX940:       ; %bb.0:
2468; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2469; GFX940-NEXT:    ;;#ASMSTART
2470; GFX940-NEXT:    ; def s[0:1]
2471; GFX940-NEXT:    ;;#ASMEND
2472; GFX940-NEXT:    ;;#ASMSTART
2473; GFX940-NEXT:    ; def s[2:3]
2474; GFX940-NEXT:    ;;#ASMEND
2475; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s0
2476; GFX940-NEXT:    ;;#ASMSTART
2477; GFX940-NEXT:    ; use s8
2478; GFX940-NEXT:    ;;#ASMEND
2479; GFX940-NEXT:    s_setpc_b64 s[30:31]
2480  %vec0 = call <4 x half> asm "; def $0", "=s"()
2481  %vec1 = call <4 x half> asm "; def $0", "=s"()
2482  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2483  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2484  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 0>
2485  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
2486  ret void
2487}
2488
2489define void @s_shuffle_v2f16_v3f16__5_1() {
2490; GFX900-LABEL: s_shuffle_v2f16_v3f16__5_1:
2491; GFX900:       ; %bb.0:
2492; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2493; GFX900-NEXT:    ;;#ASMSTART
2494; GFX900-NEXT:    ; def s[4:5]
2495; GFX900-NEXT:    ;;#ASMEND
2496; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
2497; GFX900-NEXT:    ;;#ASMSTART
2498; GFX900-NEXT:    ; def s[6:7]
2499; GFX900-NEXT:    ;;#ASMEND
2500; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
2501; GFX900-NEXT:    ;;#ASMSTART
2502; GFX900-NEXT:    ; use s8
2503; GFX900-NEXT:    ;;#ASMEND
2504; GFX900-NEXT:    s_setpc_b64 s[30:31]
2505;
2506; GFX90A-LABEL: s_shuffle_v2f16_v3f16__5_1:
2507; GFX90A:       ; %bb.0:
2508; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2509; GFX90A-NEXT:    ;;#ASMSTART
2510; GFX90A-NEXT:    ; def s[4:5]
2511; GFX90A-NEXT:    ;;#ASMEND
2512; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
2513; GFX90A-NEXT:    ;;#ASMSTART
2514; GFX90A-NEXT:    ; def s[6:7]
2515; GFX90A-NEXT:    ;;#ASMEND
2516; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s4
2517; GFX90A-NEXT:    ;;#ASMSTART
2518; GFX90A-NEXT:    ; use s8
2519; GFX90A-NEXT:    ;;#ASMEND
2520; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2521;
2522; GFX940-LABEL: s_shuffle_v2f16_v3f16__5_1:
2523; GFX940:       ; %bb.0:
2524; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2525; GFX940-NEXT:    ;;#ASMSTART
2526; GFX940-NEXT:    ; def s[0:1]
2527; GFX940-NEXT:    ;;#ASMEND
2528; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
2529; GFX940-NEXT:    ;;#ASMSTART
2530; GFX940-NEXT:    ; def s[2:3]
2531; GFX940-NEXT:    ;;#ASMEND
2532; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s0
2533; GFX940-NEXT:    ;;#ASMSTART
2534; GFX940-NEXT:    ; use s8
2535; GFX940-NEXT:    ;;#ASMEND
2536; GFX940-NEXT:    s_setpc_b64 s[30:31]
2537  %vec0 = call <4 x half> asm "; def $0", "=s"()
2538  %vec1 = call <4 x half> asm "; def $0", "=s"()
2539  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2540  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2541  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 1>
2542  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
2543  ret void
2544}
2545
2546define void @s_shuffle_v2f16_v3f16__5_2() {
2547; GFX900-LABEL: s_shuffle_v2f16_v3f16__5_2:
2548; GFX900:       ; %bb.0:
2549; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2550; GFX900-NEXT:    ;;#ASMSTART
2551; GFX900-NEXT:    ; def s[4:5]
2552; GFX900-NEXT:    ;;#ASMEND
2553; GFX900-NEXT:    ;;#ASMSTART
2554; GFX900-NEXT:    ; def s[6:7]
2555; GFX900-NEXT:    ;;#ASMEND
2556; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
2557; GFX900-NEXT:    ;;#ASMSTART
2558; GFX900-NEXT:    ; use s8
2559; GFX900-NEXT:    ;;#ASMEND
2560; GFX900-NEXT:    s_setpc_b64 s[30:31]
2561;
2562; GFX90A-LABEL: s_shuffle_v2f16_v3f16__5_2:
2563; GFX90A:       ; %bb.0:
2564; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2565; GFX90A-NEXT:    ;;#ASMSTART
2566; GFX90A-NEXT:    ; def s[4:5]
2567; GFX90A-NEXT:    ;;#ASMEND
2568; GFX90A-NEXT:    ;;#ASMSTART
2569; GFX90A-NEXT:    ; def s[6:7]
2570; GFX90A-NEXT:    ;;#ASMEND
2571; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s7, s5
2572; GFX90A-NEXT:    ;;#ASMSTART
2573; GFX90A-NEXT:    ; use s8
2574; GFX90A-NEXT:    ;;#ASMEND
2575; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2576;
2577; GFX940-LABEL: s_shuffle_v2f16_v3f16__5_2:
2578; GFX940:       ; %bb.0:
2579; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2580; GFX940-NEXT:    ;;#ASMSTART
2581; GFX940-NEXT:    ; def s[0:1]
2582; GFX940-NEXT:    ;;#ASMEND
2583; GFX940-NEXT:    ;;#ASMSTART
2584; GFX940-NEXT:    ; def s[2:3]
2585; GFX940-NEXT:    ;;#ASMEND
2586; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s3, s1
2587; GFX940-NEXT:    ;;#ASMSTART
2588; GFX940-NEXT:    ; use s8
2589; GFX940-NEXT:    ;;#ASMEND
2590; GFX940-NEXT:    s_setpc_b64 s[30:31]
2591  %vec0 = call <4 x half> asm "; def $0", "=s"()
2592  %vec1 = call <4 x half> asm "; def $0", "=s"()
2593  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2594  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2595  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 2>
2596  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
2597  ret void
2598}
2599
2600define void @s_shuffle_v2f16_v3f16__5_3() {
2601; GFX900-LABEL: s_shuffle_v2f16_v3f16__5_3:
2602; GFX900:       ; %bb.0:
2603; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2604; GFX900-NEXT:    ;;#ASMSTART
2605; GFX900-NEXT:    ; def s[4:5]
2606; GFX900-NEXT:    ;;#ASMEND
2607; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
2608; GFX900-NEXT:    ;;#ASMSTART
2609; GFX900-NEXT:    ; use s8
2610; GFX900-NEXT:    ;;#ASMEND
2611; GFX900-NEXT:    s_setpc_b64 s[30:31]
2612;
2613; GFX90A-LABEL: s_shuffle_v2f16_v3f16__5_3:
2614; GFX90A:       ; %bb.0:
2615; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2616; GFX90A-NEXT:    ;;#ASMSTART
2617; GFX90A-NEXT:    ; def s[4:5]
2618; GFX90A-NEXT:    ;;#ASMEND
2619; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
2620; GFX90A-NEXT:    ;;#ASMSTART
2621; GFX90A-NEXT:    ; use s8
2622; GFX90A-NEXT:    ;;#ASMEND
2623; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2624;
2625; GFX940-LABEL: s_shuffle_v2f16_v3f16__5_3:
2626; GFX940:       ; %bb.0:
2627; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2628; GFX940-NEXT:    ;;#ASMSTART
2629; GFX940-NEXT:    ; def s[0:1]
2630; GFX940-NEXT:    ;;#ASMEND
2631; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
2632; GFX940-NEXT:    ;;#ASMSTART
2633; GFX940-NEXT:    ; use s8
2634; GFX940-NEXT:    ;;#ASMEND
2635; GFX940-NEXT:    s_setpc_b64 s[30:31]
2636  %vec0 = call <4 x half> asm "; def $0", "=s"()
2637  %vec1 = call <4 x half> asm "; def $0", "=s"()
2638  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2639  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2640  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 3>
2641  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
2642  ret void
2643}
2644
2645define void @s_shuffle_v2f16_v3f16__5_4() {
2646; GFX900-LABEL: s_shuffle_v2f16_v3f16__5_4:
2647; GFX900:       ; %bb.0:
2648; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2649; GFX900-NEXT:    ;;#ASMSTART
2650; GFX900-NEXT:    ; def s[4:5]
2651; GFX900-NEXT:    ;;#ASMEND
2652; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
2653; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
2654; GFX900-NEXT:    ;;#ASMSTART
2655; GFX900-NEXT:    ; use s8
2656; GFX900-NEXT:    ;;#ASMEND
2657; GFX900-NEXT:    s_setpc_b64 s[30:31]
2658;
2659; GFX90A-LABEL: s_shuffle_v2f16_v3f16__5_4:
2660; GFX90A:       ; %bb.0:
2661; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2662; GFX90A-NEXT:    ;;#ASMSTART
2663; GFX90A-NEXT:    ; def s[4:5]
2664; GFX90A-NEXT:    ;;#ASMEND
2665; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
2666; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
2667; GFX90A-NEXT:    ;;#ASMSTART
2668; GFX90A-NEXT:    ; use s8
2669; GFX90A-NEXT:    ;;#ASMEND
2670; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2671;
2672; GFX940-LABEL: s_shuffle_v2f16_v3f16__5_4:
2673; GFX940:       ; %bb.0:
2674; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2675; GFX940-NEXT:    ;;#ASMSTART
2676; GFX940-NEXT:    ; def s[0:1]
2677; GFX940-NEXT:    ;;#ASMEND
2678; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
2679; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
2680; GFX940-NEXT:    ;;#ASMSTART
2681; GFX940-NEXT:    ; use s8
2682; GFX940-NEXT:    ;;#ASMEND
2683; GFX940-NEXT:    s_setpc_b64 s[30:31]
2684  %vec0 = call <4 x half> asm "; def $0", "=s"()
2685  %vec1 = call <4 x half> asm "; def $0", "=s"()
2686  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2687  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2688  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 4>
2689  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
2690  ret void
2691}
2692
2693define void @s_shuffle_v2f16_v3f16__5_5() {
2694; GFX900-LABEL: s_shuffle_v2f16_v3f16__5_5:
2695; GFX900:       ; %bb.0:
2696; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2697; GFX900-NEXT:    ;;#ASMSTART
2698; GFX900-NEXT:    ; def s[4:5]
2699; GFX900-NEXT:    ;;#ASMEND
2700; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
2701; GFX900-NEXT:    ;;#ASMSTART
2702; GFX900-NEXT:    ; use s8
2703; GFX900-NEXT:    ;;#ASMEND
2704; GFX900-NEXT:    s_setpc_b64 s[30:31]
2705;
2706; GFX90A-LABEL: s_shuffle_v2f16_v3f16__5_5:
2707; GFX90A:       ; %bb.0:
2708; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2709; GFX90A-NEXT:    ;;#ASMSTART
2710; GFX90A-NEXT:    ; def s[4:5]
2711; GFX90A-NEXT:    ;;#ASMEND
2712; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
2713; GFX90A-NEXT:    ;;#ASMSTART
2714; GFX90A-NEXT:    ; use s8
2715; GFX90A-NEXT:    ;;#ASMEND
2716; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2717;
2718; GFX940-LABEL: s_shuffle_v2f16_v3f16__5_5:
2719; GFX940:       ; %bb.0:
2720; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2721; GFX940-NEXT:    ;;#ASMSTART
2722; GFX940-NEXT:    ; def s[0:1]
2723; GFX940-NEXT:    ;;#ASMEND
2724; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
2725; GFX940-NEXT:    ;;#ASMSTART
2726; GFX940-NEXT:    ; use s8
2727; GFX940-NEXT:    ;;#ASMEND
2728; GFX940-NEXT:    s_setpc_b64 s[30:31]
2729  %vec0 = call <4 x half> asm "; def $0", "=s"()
2730  %vec1 = call <4 x half> asm "; def $0", "=s"()
2731  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2732  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2733  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 5>
2734  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
2735  ret void
2736}
2737
2738define void @s_shuffle_v2f16_v3f16__u_0() {
2739; GFX900-LABEL: s_shuffle_v2f16_v3f16__u_0:
2740; GFX900:       ; %bb.0:
2741; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2742; GFX900-NEXT:    ;;#ASMSTART
2743; GFX900-NEXT:    ; def s[4:5]
2744; GFX900-NEXT:    ;;#ASMEND
2745; GFX900-NEXT:    s_lshl_b32 s8, s4, 16
2746; GFX900-NEXT:    ;;#ASMSTART
2747; GFX900-NEXT:    ; use s8
2748; GFX900-NEXT:    ;;#ASMEND
2749; GFX900-NEXT:    s_setpc_b64 s[30:31]
2750;
2751; GFX90A-LABEL: s_shuffle_v2f16_v3f16__u_0:
2752; GFX90A:       ; %bb.0:
2753; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2754; GFX90A-NEXT:    ;;#ASMSTART
2755; GFX90A-NEXT:    ; def s[4:5]
2756; GFX90A-NEXT:    ;;#ASMEND
2757; GFX90A-NEXT:    s_lshl_b32 s8, s4, 16
2758; GFX90A-NEXT:    ;;#ASMSTART
2759; GFX90A-NEXT:    ; use s8
2760; GFX90A-NEXT:    ;;#ASMEND
2761; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2762;
2763; GFX940-LABEL: s_shuffle_v2f16_v3f16__u_0:
2764; GFX940:       ; %bb.0:
2765; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2766; GFX940-NEXT:    ;;#ASMSTART
2767; GFX940-NEXT:    ; def s[0:1]
2768; GFX940-NEXT:    ;;#ASMEND
2769; GFX940-NEXT:    s_lshl_b32 s8, s0, 16
2770; GFX940-NEXT:    ;;#ASMSTART
2771; GFX940-NEXT:    ; use s8
2772; GFX940-NEXT:    ;;#ASMEND
2773; GFX940-NEXT:    s_setpc_b64 s[30:31]
2774  %vec0 = call <4 x half> asm "; def $0", "=s"()
2775  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2776  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 poison, i32 0>
2777  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
2778  ret void
2779}
2780
2781define void @s_shuffle_v2f16_v3f16__0_0() {
2782; GFX900-LABEL: s_shuffle_v2f16_v3f16__0_0:
2783; GFX900:       ; %bb.0:
2784; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2785; GFX900-NEXT:    ;;#ASMSTART
2786; GFX900-NEXT:    ; def s[4:5]
2787; GFX900-NEXT:    ;;#ASMEND
2788; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
2789; GFX900-NEXT:    ;;#ASMSTART
2790; GFX900-NEXT:    ; use s8
2791; GFX900-NEXT:    ;;#ASMEND
2792; GFX900-NEXT:    s_setpc_b64 s[30:31]
2793;
2794; GFX90A-LABEL: s_shuffle_v2f16_v3f16__0_0:
2795; GFX90A:       ; %bb.0:
2796; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2797; GFX90A-NEXT:    ;;#ASMSTART
2798; GFX90A-NEXT:    ; def s[4:5]
2799; GFX90A-NEXT:    ;;#ASMEND
2800; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
2801; GFX90A-NEXT:    ;;#ASMSTART
2802; GFX90A-NEXT:    ; use s8
2803; GFX90A-NEXT:    ;;#ASMEND
2804; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2805;
2806; GFX940-LABEL: s_shuffle_v2f16_v3f16__0_0:
2807; GFX940:       ; %bb.0:
2808; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2809; GFX940-NEXT:    ;;#ASMSTART
2810; GFX940-NEXT:    ; def s[0:1]
2811; GFX940-NEXT:    ;;#ASMEND
2812; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s0
2813; GFX940-NEXT:    ;;#ASMSTART
2814; GFX940-NEXT:    ; use s8
2815; GFX940-NEXT:    ;;#ASMEND
2816; GFX940-NEXT:    s_setpc_b64 s[30:31]
2817  %vec0 = call <4 x half> asm "; def $0", "=s"()
2818  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2819  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> zeroinitializer
2820  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
2821  ret void
2822}
2823
2824define void @s_shuffle_v2f16_v3f16__1_0() {
2825; GFX900-LABEL: s_shuffle_v2f16_v3f16__1_0:
2826; GFX900:       ; %bb.0:
2827; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2828; GFX900-NEXT:    ;;#ASMSTART
2829; GFX900-NEXT:    ; def s[4:5]
2830; GFX900-NEXT:    ;;#ASMEND
2831; GFX900-NEXT:    s_lshr_b32 s5, s4, 16
2832; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
2833; GFX900-NEXT:    ;;#ASMSTART
2834; GFX900-NEXT:    ; use s8
2835; GFX900-NEXT:    ;;#ASMEND
2836; GFX900-NEXT:    s_setpc_b64 s[30:31]
2837;
2838; GFX90A-LABEL: s_shuffle_v2f16_v3f16__1_0:
2839; GFX90A:       ; %bb.0:
2840; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2841; GFX90A-NEXT:    ;;#ASMSTART
2842; GFX90A-NEXT:    ; def s[4:5]
2843; GFX90A-NEXT:    ;;#ASMEND
2844; GFX90A-NEXT:    s_lshr_b32 s5, s4, 16
2845; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
2846; GFX90A-NEXT:    ;;#ASMSTART
2847; GFX90A-NEXT:    ; use s8
2848; GFX90A-NEXT:    ;;#ASMEND
2849; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2850;
2851; GFX940-LABEL: s_shuffle_v2f16_v3f16__1_0:
2852; GFX940:       ; %bb.0:
2853; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2854; GFX940-NEXT:    ;;#ASMSTART
2855; GFX940-NEXT:    ; def s[0:1]
2856; GFX940-NEXT:    ;;#ASMEND
2857; GFX940-NEXT:    s_lshr_b32 s1, s0, 16
2858; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
2859; GFX940-NEXT:    ;;#ASMSTART
2860; GFX940-NEXT:    ; use s8
2861; GFX940-NEXT:    ;;#ASMEND
2862; GFX940-NEXT:    s_setpc_b64 s[30:31]
2863  %vec0 = call <4 x half> asm "; def $0", "=s"()
2864  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2865  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 1, i32 0>
2866  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
2867  ret void
2868}
2869
2870define void @s_shuffle_v2f16_v3f16__2_0() {
2871; GFX900-LABEL: s_shuffle_v2f16_v3f16__2_0:
2872; GFX900:       ; %bb.0:
2873; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2874; GFX900-NEXT:    ;;#ASMSTART
2875; GFX900-NEXT:    ; def s[4:5]
2876; GFX900-NEXT:    ;;#ASMEND
2877; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
2878; GFX900-NEXT:    ;;#ASMSTART
2879; GFX900-NEXT:    ; use s8
2880; GFX900-NEXT:    ;;#ASMEND
2881; GFX900-NEXT:    s_setpc_b64 s[30:31]
2882;
2883; GFX90A-LABEL: s_shuffle_v2f16_v3f16__2_0:
2884; GFX90A:       ; %bb.0:
2885; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2886; GFX90A-NEXT:    ;;#ASMSTART
2887; GFX90A-NEXT:    ; def s[4:5]
2888; GFX90A-NEXT:    ;;#ASMEND
2889; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
2890; GFX90A-NEXT:    ;;#ASMSTART
2891; GFX90A-NEXT:    ; use s8
2892; GFX90A-NEXT:    ;;#ASMEND
2893; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2894;
2895; GFX940-LABEL: s_shuffle_v2f16_v3f16__2_0:
2896; GFX940:       ; %bb.0:
2897; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2898; GFX940-NEXT:    ;;#ASMSTART
2899; GFX940-NEXT:    ; def s[0:1]
2900; GFX940-NEXT:    ;;#ASMEND
2901; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
2902; GFX940-NEXT:    ;;#ASMSTART
2903; GFX940-NEXT:    ; use s8
2904; GFX940-NEXT:    ;;#ASMEND
2905; GFX940-NEXT:    s_setpc_b64 s[30:31]
2906  %vec0 = call <4 x half> asm "; def $0", "=s"()
2907  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2908  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 2, i32 0>
2909  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
2910  ret void
2911}
2912
2913define void @s_shuffle_v2f16_v3f16__3_0() {
2914; GFX900-LABEL: s_shuffle_v2f16_v3f16__3_0:
2915; GFX900:       ; %bb.0:
2916; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2917; GFX900-NEXT:    ;;#ASMSTART
2918; GFX900-NEXT:    ; def s[4:5]
2919; GFX900-NEXT:    ;;#ASMEND
2920; GFX900-NEXT:    s_lshl_b32 s8, s4, 16
2921; GFX900-NEXT:    ;;#ASMSTART
2922; GFX900-NEXT:    ; use s8
2923; GFX900-NEXT:    ;;#ASMEND
2924; GFX900-NEXT:    s_setpc_b64 s[30:31]
2925;
2926; GFX90A-LABEL: s_shuffle_v2f16_v3f16__3_0:
2927; GFX90A:       ; %bb.0:
2928; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2929; GFX90A-NEXT:    ;;#ASMSTART
2930; GFX90A-NEXT:    ; def s[4:5]
2931; GFX90A-NEXT:    ;;#ASMEND
2932; GFX90A-NEXT:    s_lshl_b32 s8, s4, 16
2933; GFX90A-NEXT:    ;;#ASMSTART
2934; GFX90A-NEXT:    ; use s8
2935; GFX90A-NEXT:    ;;#ASMEND
2936; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2937;
2938; GFX940-LABEL: s_shuffle_v2f16_v3f16__3_0:
2939; GFX940:       ; %bb.0:
2940; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2941; GFX940-NEXT:    ;;#ASMSTART
2942; GFX940-NEXT:    ; def s[0:1]
2943; GFX940-NEXT:    ;;#ASMEND
2944; GFX940-NEXT:    s_lshl_b32 s8, s0, 16
2945; GFX940-NEXT:    ;;#ASMSTART
2946; GFX940-NEXT:    ; use s8
2947; GFX940-NEXT:    ;;#ASMEND
2948; GFX940-NEXT:    s_setpc_b64 s[30:31]
2949  %vec0 = call <4 x half> asm "; def $0", "=s"()
2950  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
2951  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 3, i32 0>
2952  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
2953  ret void
2954}
2955
2956define void @s_shuffle_v2f16_v3f16__4_0() {
2957; GFX900-LABEL: s_shuffle_v2f16_v3f16__4_0:
2958; GFX900:       ; %bb.0:
2959; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2960; GFX900-NEXT:    ;;#ASMSTART
2961; GFX900-NEXT:    ; def s[4:5]
2962; GFX900-NEXT:    ;;#ASMEND
2963; GFX900-NEXT:    ;;#ASMSTART
2964; GFX900-NEXT:    ; def s[6:7]
2965; GFX900-NEXT:    ;;#ASMEND
2966; GFX900-NEXT:    s_lshr_b32 s5, s6, 16
2967; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
2968; GFX900-NEXT:    ;;#ASMSTART
2969; GFX900-NEXT:    ; use s8
2970; GFX900-NEXT:    ;;#ASMEND
2971; GFX900-NEXT:    s_setpc_b64 s[30:31]
2972;
2973; GFX90A-LABEL: s_shuffle_v2f16_v3f16__4_0:
2974; GFX90A:       ; %bb.0:
2975; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2976; GFX90A-NEXT:    ;;#ASMSTART
2977; GFX90A-NEXT:    ; def s[4:5]
2978; GFX90A-NEXT:    ;;#ASMEND
2979; GFX90A-NEXT:    ;;#ASMSTART
2980; GFX90A-NEXT:    ; def s[6:7]
2981; GFX90A-NEXT:    ;;#ASMEND
2982; GFX90A-NEXT:    s_lshr_b32 s5, s6, 16
2983; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
2984; GFX90A-NEXT:    ;;#ASMSTART
2985; GFX90A-NEXT:    ; use s8
2986; GFX90A-NEXT:    ;;#ASMEND
2987; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2988;
2989; GFX940-LABEL: s_shuffle_v2f16_v3f16__4_0:
2990; GFX940:       ; %bb.0:
2991; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2992; GFX940-NEXT:    ;;#ASMSTART
2993; GFX940-NEXT:    ; def s[0:1]
2994; GFX940-NEXT:    ;;#ASMEND
2995; GFX940-NEXT:    ;;#ASMSTART
2996; GFX940-NEXT:    ; def s[2:3]
2997; GFX940-NEXT:    ;;#ASMEND
2998; GFX940-NEXT:    s_lshr_b32 s1, s2, 16
2999; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
3000; GFX940-NEXT:    ;;#ASMSTART
3001; GFX940-NEXT:    ; use s8
3002; GFX940-NEXT:    ;;#ASMEND
3003; GFX940-NEXT:    s_setpc_b64 s[30:31]
3004  %vec0 = call <4 x half> asm "; def $0", "=s"()
3005  %vec1 = call <4 x half> asm "; def $0", "=s"()
3006  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3007  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3008  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 0>
3009  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
3010  ret void
3011}
3012
3013define void @s_shuffle_v2f16_v3f16__u_1() {
3014; GFX900-LABEL: s_shuffle_v2f16_v3f16__u_1:
3015; GFX900:       ; %bb.0:
3016; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3017; GFX900-NEXT:    ;;#ASMSTART
3018; GFX900-NEXT:    ; def s[8:9]
3019; GFX900-NEXT:    ;;#ASMEND
3020; GFX900-NEXT:    ;;#ASMSTART
3021; GFX900-NEXT:    ; use s8
3022; GFX900-NEXT:    ;;#ASMEND
3023; GFX900-NEXT:    s_setpc_b64 s[30:31]
3024;
3025; GFX90A-LABEL: s_shuffle_v2f16_v3f16__u_1:
3026; GFX90A:       ; %bb.0:
3027; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3028; GFX90A-NEXT:    ;;#ASMSTART
3029; GFX90A-NEXT:    ; def s[8:9]
3030; GFX90A-NEXT:    ;;#ASMEND
3031; GFX90A-NEXT:    ;;#ASMSTART
3032; GFX90A-NEXT:    ; use s8
3033; GFX90A-NEXT:    ;;#ASMEND
3034; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3035;
3036; GFX940-LABEL: s_shuffle_v2f16_v3f16__u_1:
3037; GFX940:       ; %bb.0:
3038; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3039; GFX940-NEXT:    ;;#ASMSTART
3040; GFX940-NEXT:    ; def s[8:9]
3041; GFX940-NEXT:    ;;#ASMEND
3042; GFX940-NEXT:    s_nop 0
3043; GFX940-NEXT:    ;;#ASMSTART
3044; GFX940-NEXT:    ; use s8
3045; GFX940-NEXT:    ;;#ASMEND
3046; GFX940-NEXT:    s_setpc_b64 s[30:31]
3047  %vec0 = call <4 x half> asm "; def $0", "=s"()
3048  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3049  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 poison, i32 1>
3050  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
3051  ret void
3052}
3053
3054define void @s_shuffle_v2f16_v3f16__0_1() {
3055; GFX900-LABEL: s_shuffle_v2f16_v3f16__0_1:
3056; GFX900:       ; %bb.0:
3057; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3058; GFX900-NEXT:    ;;#ASMSTART
3059; GFX900-NEXT:    ; def s[8:9]
3060; GFX900-NEXT:    ;;#ASMEND
3061; GFX900-NEXT:    ;;#ASMSTART
3062; GFX900-NEXT:    ; use s8
3063; GFX900-NEXT:    ;;#ASMEND
3064; GFX900-NEXT:    s_setpc_b64 s[30:31]
3065;
3066; GFX90A-LABEL: s_shuffle_v2f16_v3f16__0_1:
3067; GFX90A:       ; %bb.0:
3068; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3069; GFX90A-NEXT:    ;;#ASMSTART
3070; GFX90A-NEXT:    ; def s[8:9]
3071; GFX90A-NEXT:    ;;#ASMEND
3072; GFX90A-NEXT:    ;;#ASMSTART
3073; GFX90A-NEXT:    ; use s8
3074; GFX90A-NEXT:    ;;#ASMEND
3075; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3076;
3077; GFX940-LABEL: s_shuffle_v2f16_v3f16__0_1:
3078; GFX940:       ; %bb.0:
3079; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3080; GFX940-NEXT:    ;;#ASMSTART
3081; GFX940-NEXT:    ; def s[8:9]
3082; GFX940-NEXT:    ;;#ASMEND
3083; GFX940-NEXT:    s_nop 0
3084; GFX940-NEXT:    ;;#ASMSTART
3085; GFX940-NEXT:    ; use s8
3086; GFX940-NEXT:    ;;#ASMEND
3087; GFX940-NEXT:    s_setpc_b64 s[30:31]
3088  %vec0 = call <4 x half> asm "; def $0", "=s"()
3089  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3090  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 0, i32 1>
3091  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
3092  ret void
3093}
3094
3095define void @s_shuffle_v2f16_v3f16__1_1() {
3096; GFX900-LABEL: s_shuffle_v2f16_v3f16__1_1:
3097; GFX900:       ; %bb.0:
3098; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3099; GFX900-NEXT:    ;;#ASMSTART
3100; GFX900-NEXT:    ; def s[4:5]
3101; GFX900-NEXT:    ;;#ASMEND
3102; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
3103; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
3104; GFX900-NEXT:    ;;#ASMSTART
3105; GFX900-NEXT:    ; use s8
3106; GFX900-NEXT:    ;;#ASMEND
3107; GFX900-NEXT:    s_setpc_b64 s[30:31]
3108;
3109; GFX90A-LABEL: s_shuffle_v2f16_v3f16__1_1:
3110; GFX90A:       ; %bb.0:
3111; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3112; GFX90A-NEXT:    ;;#ASMSTART
3113; GFX90A-NEXT:    ; def s[4:5]
3114; GFX90A-NEXT:    ;;#ASMEND
3115; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
3116; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
3117; GFX90A-NEXT:    ;;#ASMSTART
3118; GFX90A-NEXT:    ; use s8
3119; GFX90A-NEXT:    ;;#ASMEND
3120; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3121;
3122; GFX940-LABEL: s_shuffle_v2f16_v3f16__1_1:
3123; GFX940:       ; %bb.0:
3124; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3125; GFX940-NEXT:    ;;#ASMSTART
3126; GFX940-NEXT:    ; def s[0:1]
3127; GFX940-NEXT:    ;;#ASMEND
3128; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
3129; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s0
3130; GFX940-NEXT:    ;;#ASMSTART
3131; GFX940-NEXT:    ; use s8
3132; GFX940-NEXT:    ;;#ASMEND
3133; GFX940-NEXT:    s_setpc_b64 s[30:31]
3134  %vec0 = call <4 x half> asm "; def $0", "=s"()
3135  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3136  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 1, i32 1>
3137  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
3138  ret void
3139}
3140
3141define void @s_shuffle_v2f16_v3f16__2_1() {
3142; GFX900-LABEL: s_shuffle_v2f16_v3f16__2_1:
3143; GFX900:       ; %bb.0:
3144; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3145; GFX900-NEXT:    ;;#ASMSTART
3146; GFX900-NEXT:    ; def s[4:5]
3147; GFX900-NEXT:    ;;#ASMEND
3148; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
3149; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
3150; GFX900-NEXT:    ;;#ASMSTART
3151; GFX900-NEXT:    ; use s8
3152; GFX900-NEXT:    ;;#ASMEND
3153; GFX900-NEXT:    s_setpc_b64 s[30:31]
3154;
3155; GFX90A-LABEL: s_shuffle_v2f16_v3f16__2_1:
3156; GFX90A:       ; %bb.0:
3157; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3158; GFX90A-NEXT:    ;;#ASMSTART
3159; GFX90A-NEXT:    ; def s[4:5]
3160; GFX90A-NEXT:    ;;#ASMEND
3161; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
3162; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
3163; GFX90A-NEXT:    ;;#ASMSTART
3164; GFX90A-NEXT:    ; use s8
3165; GFX90A-NEXT:    ;;#ASMEND
3166; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3167;
3168; GFX940-LABEL: s_shuffle_v2f16_v3f16__2_1:
3169; GFX940:       ; %bb.0:
3170; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3171; GFX940-NEXT:    ;;#ASMSTART
3172; GFX940-NEXT:    ; def s[0:1]
3173; GFX940-NEXT:    ;;#ASMEND
3174; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
3175; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
3176; GFX940-NEXT:    ;;#ASMSTART
3177; GFX940-NEXT:    ; use s8
3178; GFX940-NEXT:    ;;#ASMEND
3179; GFX940-NEXT:    s_setpc_b64 s[30:31]
3180  %vec0 = call <4 x half> asm "; def $0", "=s"()
3181  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3182  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 2, i32 1>
3183  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
3184  ret void
3185}
3186
3187define void @s_shuffle_v2f16_v3f16__3_1() {
3188; GFX900-LABEL: s_shuffle_v2f16_v3f16__3_1:
3189; GFX900:       ; %bb.0:
3190; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3191; GFX900-NEXT:    ;;#ASMSTART
3192; GFX900-NEXT:    ; def s[8:9]
3193; GFX900-NEXT:    ;;#ASMEND
3194; GFX900-NEXT:    ;;#ASMSTART
3195; GFX900-NEXT:    ; use s8
3196; GFX900-NEXT:    ;;#ASMEND
3197; GFX900-NEXT:    s_setpc_b64 s[30:31]
3198;
3199; GFX90A-LABEL: s_shuffle_v2f16_v3f16__3_1:
3200; GFX90A:       ; %bb.0:
3201; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3202; GFX90A-NEXT:    ;;#ASMSTART
3203; GFX90A-NEXT:    ; def s[8:9]
3204; GFX90A-NEXT:    ;;#ASMEND
3205; GFX90A-NEXT:    ;;#ASMSTART
3206; GFX90A-NEXT:    ; use s8
3207; GFX90A-NEXT:    ;;#ASMEND
3208; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3209;
3210; GFX940-LABEL: s_shuffle_v2f16_v3f16__3_1:
3211; GFX940:       ; %bb.0:
3212; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3213; GFX940-NEXT:    ;;#ASMSTART
3214; GFX940-NEXT:    ; def s[8:9]
3215; GFX940-NEXT:    ;;#ASMEND
3216; GFX940-NEXT:    s_nop 0
3217; GFX940-NEXT:    ;;#ASMSTART
3218; GFX940-NEXT:    ; use s8
3219; GFX940-NEXT:    ;;#ASMEND
3220; GFX940-NEXT:    s_setpc_b64 s[30:31]
3221  %vec0 = call <4 x half> asm "; def $0", "=s"()
3222  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3223  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 3, i32 1>
3224  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
3225  ret void
3226}
3227
3228define void @s_shuffle_v2f16_v3f16__4_1() {
3229; GFX900-LABEL: s_shuffle_v2f16_v3f16__4_1:
3230; GFX900:       ; %bb.0:
3231; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3232; GFX900-NEXT:    ;;#ASMSTART
3233; GFX900-NEXT:    ; def s[4:5]
3234; GFX900-NEXT:    ;;#ASMEND
3235; GFX900-NEXT:    ;;#ASMSTART
3236; GFX900-NEXT:    ; def s[6:7]
3237; GFX900-NEXT:    ;;#ASMEND
3238; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
3239; GFX900-NEXT:    s_lshr_b32 s5, s6, 16
3240; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
3241; GFX900-NEXT:    ;;#ASMSTART
3242; GFX900-NEXT:    ; use s8
3243; GFX900-NEXT:    ;;#ASMEND
3244; GFX900-NEXT:    s_setpc_b64 s[30:31]
3245;
3246; GFX90A-LABEL: s_shuffle_v2f16_v3f16__4_1:
3247; GFX90A:       ; %bb.0:
3248; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3249; GFX90A-NEXT:    ;;#ASMSTART
3250; GFX90A-NEXT:    ; def s[4:5]
3251; GFX90A-NEXT:    ;;#ASMEND
3252; GFX90A-NEXT:    ;;#ASMSTART
3253; GFX90A-NEXT:    ; def s[6:7]
3254; GFX90A-NEXT:    ;;#ASMEND
3255; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
3256; GFX90A-NEXT:    s_lshr_b32 s5, s6, 16
3257; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
3258; GFX90A-NEXT:    ;;#ASMSTART
3259; GFX90A-NEXT:    ; use s8
3260; GFX90A-NEXT:    ;;#ASMEND
3261; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3262;
3263; GFX940-LABEL: s_shuffle_v2f16_v3f16__4_1:
3264; GFX940:       ; %bb.0:
3265; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3266; GFX940-NEXT:    ;;#ASMSTART
3267; GFX940-NEXT:    ; def s[0:1]
3268; GFX940-NEXT:    ;;#ASMEND
3269; GFX940-NEXT:    ;;#ASMSTART
3270; GFX940-NEXT:    ; def s[2:3]
3271; GFX940-NEXT:    ;;#ASMEND
3272; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
3273; GFX940-NEXT:    s_lshr_b32 s1, s2, 16
3274; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
3275; GFX940-NEXT:    ;;#ASMSTART
3276; GFX940-NEXT:    ; use s8
3277; GFX940-NEXT:    ;;#ASMEND
3278; GFX940-NEXT:    s_setpc_b64 s[30:31]
3279  %vec0 = call <4 x half> asm "; def $0", "=s"()
3280  %vec1 = call <4 x half> asm "; def $0", "=s"()
3281  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3282  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3283  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 1>
3284  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
3285  ret void
3286}
3287
3288define void @s_shuffle_v2f16_v3f16__u_2() {
3289; GFX900-LABEL: s_shuffle_v2f16_v3f16__u_2:
3290; GFX900:       ; %bb.0:
3291; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3292; GFX900-NEXT:    ;;#ASMSTART
3293; GFX900-NEXT:    ; def s[4:5]
3294; GFX900-NEXT:    ;;#ASMEND
3295; GFX900-NEXT:    s_lshl_b32 s8, s5, 16
3296; GFX900-NEXT:    ;;#ASMSTART
3297; GFX900-NEXT:    ; use s8
3298; GFX900-NEXT:    ;;#ASMEND
3299; GFX900-NEXT:    s_setpc_b64 s[30:31]
3300;
3301; GFX90A-LABEL: s_shuffle_v2f16_v3f16__u_2:
3302; GFX90A:       ; %bb.0:
3303; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3304; GFX90A-NEXT:    ;;#ASMSTART
3305; GFX90A-NEXT:    ; def s[4:5]
3306; GFX90A-NEXT:    ;;#ASMEND
3307; GFX90A-NEXT:    s_lshl_b32 s8, s5, 16
3308; GFX90A-NEXT:    ;;#ASMSTART
3309; GFX90A-NEXT:    ; use s8
3310; GFX90A-NEXT:    ;;#ASMEND
3311; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3312;
3313; GFX940-LABEL: s_shuffle_v2f16_v3f16__u_2:
3314; GFX940:       ; %bb.0:
3315; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3316; GFX940-NEXT:    ;;#ASMSTART
3317; GFX940-NEXT:    ; def s[0:1]
3318; GFX940-NEXT:    ;;#ASMEND
3319; GFX940-NEXT:    s_lshl_b32 s8, s1, 16
3320; GFX940-NEXT:    ;;#ASMSTART
3321; GFX940-NEXT:    ; use s8
3322; GFX940-NEXT:    ;;#ASMEND
3323; GFX940-NEXT:    s_setpc_b64 s[30:31]
3324  %vec0 = call <4 x half> asm "; def $0", "=s"()
3325  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3326  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 poison, i32 2>
3327  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
3328  ret void
3329}
3330
3331define void @s_shuffle_v2f16_v3f16__0_2() {
3332; GFX900-LABEL: s_shuffle_v2f16_v3f16__0_2:
3333; GFX900:       ; %bb.0:
3334; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3335; GFX900-NEXT:    ;;#ASMSTART
3336; GFX900-NEXT:    ; def s[4:5]
3337; GFX900-NEXT:    ;;#ASMEND
3338; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
3339; GFX900-NEXT:    ;;#ASMSTART
3340; GFX900-NEXT:    ; use s8
3341; GFX900-NEXT:    ;;#ASMEND
3342; GFX900-NEXT:    s_setpc_b64 s[30:31]
3343;
3344; GFX90A-LABEL: s_shuffle_v2f16_v3f16__0_2:
3345; GFX90A:       ; %bb.0:
3346; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3347; GFX90A-NEXT:    ;;#ASMSTART
3348; GFX90A-NEXT:    ; def s[4:5]
3349; GFX90A-NEXT:    ;;#ASMEND
3350; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
3351; GFX90A-NEXT:    ;;#ASMSTART
3352; GFX90A-NEXT:    ; use s8
3353; GFX90A-NEXT:    ;;#ASMEND
3354; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3355;
3356; GFX940-LABEL: s_shuffle_v2f16_v3f16__0_2:
3357; GFX940:       ; %bb.0:
3358; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3359; GFX940-NEXT:    ;;#ASMSTART
3360; GFX940-NEXT:    ; def s[0:1]
3361; GFX940-NEXT:    ;;#ASMEND
3362; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
3363; GFX940-NEXT:    ;;#ASMSTART
3364; GFX940-NEXT:    ; use s8
3365; GFX940-NEXT:    ;;#ASMEND
3366; GFX940-NEXT:    s_setpc_b64 s[30:31]
3367  %vec0 = call <4 x half> asm "; def $0", "=s"()
3368  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3369  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 0, i32 2>
3370  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
3371  ret void
3372}
3373
3374define void @s_shuffle_v2f16_v3f16__1_2() {
3375; GFX900-LABEL: s_shuffle_v2f16_v3f16__1_2:
3376; GFX900:       ; %bb.0:
3377; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3378; GFX900-NEXT:    ;;#ASMSTART
3379; GFX900-NEXT:    ; def s[4:5]
3380; GFX900-NEXT:    ;;#ASMEND
3381; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
3382; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
3383; GFX900-NEXT:    ;;#ASMSTART
3384; GFX900-NEXT:    ; use s8
3385; GFX900-NEXT:    ;;#ASMEND
3386; GFX900-NEXT:    s_setpc_b64 s[30:31]
3387;
3388; GFX90A-LABEL: s_shuffle_v2f16_v3f16__1_2:
3389; GFX90A:       ; %bb.0:
3390; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3391; GFX90A-NEXT:    ;;#ASMSTART
3392; GFX90A-NEXT:    ; def s[4:5]
3393; GFX90A-NEXT:    ;;#ASMEND
3394; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
3395; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
3396; GFX90A-NEXT:    ;;#ASMSTART
3397; GFX90A-NEXT:    ; use s8
3398; GFX90A-NEXT:    ;;#ASMEND
3399; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3400;
3401; GFX940-LABEL: s_shuffle_v2f16_v3f16__1_2:
3402; GFX940:       ; %bb.0:
3403; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3404; GFX940-NEXT:    ;;#ASMSTART
3405; GFX940-NEXT:    ; def s[0:1]
3406; GFX940-NEXT:    ;;#ASMEND
3407; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
3408; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
3409; GFX940-NEXT:    ;;#ASMSTART
3410; GFX940-NEXT:    ; use s8
3411; GFX940-NEXT:    ;;#ASMEND
3412; GFX940-NEXT:    s_setpc_b64 s[30:31]
3413  %vec0 = call <4 x half> asm "; def $0", "=s"()
3414  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3415  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 1, i32 2>
3416  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
3417  ret void
3418}
3419
3420define void @s_shuffle_v2f16_v3f16__2_2() {
3421; GFX900-LABEL: s_shuffle_v2f16_v3f16__2_2:
3422; GFX900:       ; %bb.0:
3423; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3424; GFX900-NEXT:    ;;#ASMSTART
3425; GFX900-NEXT:    ; def s[4:5]
3426; GFX900-NEXT:    ;;#ASMEND
3427; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
3428; GFX900-NEXT:    ;;#ASMSTART
3429; GFX900-NEXT:    ; use s8
3430; GFX900-NEXT:    ;;#ASMEND
3431; GFX900-NEXT:    s_setpc_b64 s[30:31]
3432;
3433; GFX90A-LABEL: s_shuffle_v2f16_v3f16__2_2:
3434; GFX90A:       ; %bb.0:
3435; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3436; GFX90A-NEXT:    ;;#ASMSTART
3437; GFX90A-NEXT:    ; def s[4:5]
3438; GFX90A-NEXT:    ;;#ASMEND
3439; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s5
3440; GFX90A-NEXT:    ;;#ASMSTART
3441; GFX90A-NEXT:    ; use s8
3442; GFX90A-NEXT:    ;;#ASMEND
3443; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3444;
3445; GFX940-LABEL: s_shuffle_v2f16_v3f16__2_2:
3446; GFX940:       ; %bb.0:
3447; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3448; GFX940-NEXT:    ;;#ASMSTART
3449; GFX940-NEXT:    ; def s[0:1]
3450; GFX940-NEXT:    ;;#ASMEND
3451; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s1
3452; GFX940-NEXT:    ;;#ASMSTART
3453; GFX940-NEXT:    ; use s8
3454; GFX940-NEXT:    ;;#ASMEND
3455; GFX940-NEXT:    s_setpc_b64 s[30:31]
3456  %vec0 = call <4 x half> asm "; def $0", "=s"()
3457  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3458  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 2, i32 2>
3459  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
3460  ret void
3461}
3462
3463define void @s_shuffle_v2f16_v3f16__3_2() {
3464; GFX900-LABEL: s_shuffle_v2f16_v3f16__3_2:
3465; GFX900:       ; %bb.0:
3466; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3467; GFX900-NEXT:    ;;#ASMSTART
3468; GFX900-NEXT:    ; def s[4:5]
3469; GFX900-NEXT:    ;;#ASMEND
3470; GFX900-NEXT:    s_lshl_b32 s8, s5, 16
3471; GFX900-NEXT:    ;;#ASMSTART
3472; GFX900-NEXT:    ; use s8
3473; GFX900-NEXT:    ;;#ASMEND
3474; GFX900-NEXT:    s_setpc_b64 s[30:31]
3475;
3476; GFX90A-LABEL: s_shuffle_v2f16_v3f16__3_2:
3477; GFX90A:       ; %bb.0:
3478; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3479; GFX90A-NEXT:    ;;#ASMSTART
3480; GFX90A-NEXT:    ; def s[4:5]
3481; GFX90A-NEXT:    ;;#ASMEND
3482; GFX90A-NEXT:    s_lshl_b32 s8, s5, 16
3483; GFX90A-NEXT:    ;;#ASMSTART
3484; GFX90A-NEXT:    ; use s8
3485; GFX90A-NEXT:    ;;#ASMEND
3486; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3487;
3488; GFX940-LABEL: s_shuffle_v2f16_v3f16__3_2:
3489; GFX940:       ; %bb.0:
3490; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3491; GFX940-NEXT:    ;;#ASMSTART
3492; GFX940-NEXT:    ; def s[0:1]
3493; GFX940-NEXT:    ;;#ASMEND
3494; GFX940-NEXT:    s_lshl_b32 s8, s1, 16
3495; GFX940-NEXT:    ;;#ASMSTART
3496; GFX940-NEXT:    ; use s8
3497; GFX940-NEXT:    ;;#ASMEND
3498; GFX940-NEXT:    s_setpc_b64 s[30:31]
3499  %vec0 = call <4 x half> asm "; def $0", "=s"()
3500  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3501  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 3, i32 2>
3502  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
3503  ret void
3504}
3505
3506define void @s_shuffle_v2f16_v3f16__4_2() {
3507; GFX900-LABEL: s_shuffle_v2f16_v3f16__4_2:
3508; GFX900:       ; %bb.0:
3509; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3510; GFX900-NEXT:    ;;#ASMSTART
3511; GFX900-NEXT:    ; def s[4:5]
3512; GFX900-NEXT:    ;;#ASMEND
3513; GFX900-NEXT:    ;;#ASMSTART
3514; GFX900-NEXT:    ; def s[6:7]
3515; GFX900-NEXT:    ;;#ASMEND
3516; GFX900-NEXT:    s_lshr_b32 s4, s6, 16
3517; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
3518; GFX900-NEXT:    ;;#ASMSTART
3519; GFX900-NEXT:    ; use s8
3520; GFX900-NEXT:    ;;#ASMEND
3521; GFX900-NEXT:    s_setpc_b64 s[30:31]
3522;
3523; GFX90A-LABEL: s_shuffle_v2f16_v3f16__4_2:
3524; GFX90A:       ; %bb.0:
3525; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3526; GFX90A-NEXT:    ;;#ASMSTART
3527; GFX90A-NEXT:    ; def s[4:5]
3528; GFX90A-NEXT:    ;;#ASMEND
3529; GFX90A-NEXT:    ;;#ASMSTART
3530; GFX90A-NEXT:    ; def s[6:7]
3531; GFX90A-NEXT:    ;;#ASMEND
3532; GFX90A-NEXT:    s_lshr_b32 s4, s6, 16
3533; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
3534; GFX90A-NEXT:    ;;#ASMSTART
3535; GFX90A-NEXT:    ; use s8
3536; GFX90A-NEXT:    ;;#ASMEND
3537; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3538;
3539; GFX940-LABEL: s_shuffle_v2f16_v3f16__4_2:
3540; GFX940:       ; %bb.0:
3541; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3542; GFX940-NEXT:    ;;#ASMSTART
3543; GFX940-NEXT:    ; def s[0:1]
3544; GFX940-NEXT:    ;;#ASMEND
3545; GFX940-NEXT:    ;;#ASMSTART
3546; GFX940-NEXT:    ; def s[2:3]
3547; GFX940-NEXT:    ;;#ASMEND
3548; GFX940-NEXT:    s_lshr_b32 s0, s2, 16
3549; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
3550; GFX940-NEXT:    ;;#ASMSTART
3551; GFX940-NEXT:    ; use s8
3552; GFX940-NEXT:    ;;#ASMEND
3553; GFX940-NEXT:    s_setpc_b64 s[30:31]
3554  %vec0 = call <4 x half> asm "; def $0", "=s"()
3555  %vec1 = call <4 x half> asm "; def $0", "=s"()
3556  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3557  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3558  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 2>
3559  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
3560  ret void
3561}
3562
3563define void @s_shuffle_v2f16_v3f16__u_3() {
3564; GFX9-LABEL: s_shuffle_v2f16_v3f16__u_3:
3565; GFX9:       ; %bb.0:
3566; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3567; GFX9-NEXT:    ;;#ASMSTART
3568; GFX9-NEXT:    ; use s8
3569; GFX9-NEXT:    ;;#ASMEND
3570; GFX9-NEXT:    s_setpc_b64 s[30:31]
3571  %vec0 = call <4 x half> asm "; def $0", "=s"()
3572  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3573  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 poison, i32 3>
3574  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
3575  ret void
3576}
3577
3578define void @s_shuffle_v2f16_v3f16__0_3() {
3579; GFX900-LABEL: s_shuffle_v2f16_v3f16__0_3:
3580; GFX900:       ; %bb.0:
3581; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3582; GFX900-NEXT:    ;;#ASMSTART
3583; GFX900-NEXT:    ; def s[8:9]
3584; GFX900-NEXT:    ;;#ASMEND
3585; GFX900-NEXT:    ;;#ASMSTART
3586; GFX900-NEXT:    ; use s8
3587; GFX900-NEXT:    ;;#ASMEND
3588; GFX900-NEXT:    s_setpc_b64 s[30:31]
3589;
3590; GFX90A-LABEL: s_shuffle_v2f16_v3f16__0_3:
3591; GFX90A:       ; %bb.0:
3592; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3593; GFX90A-NEXT:    ;;#ASMSTART
3594; GFX90A-NEXT:    ; def s[8:9]
3595; GFX90A-NEXT:    ;;#ASMEND
3596; GFX90A-NEXT:    ;;#ASMSTART
3597; GFX90A-NEXT:    ; use s8
3598; GFX90A-NEXT:    ;;#ASMEND
3599; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3600;
3601; GFX940-LABEL: s_shuffle_v2f16_v3f16__0_3:
3602; GFX940:       ; %bb.0:
3603; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3604; GFX940-NEXT:    ;;#ASMSTART
3605; GFX940-NEXT:    ; def s[8:9]
3606; GFX940-NEXT:    ;;#ASMEND
3607; GFX940-NEXT:    s_nop 0
3608; GFX940-NEXT:    ;;#ASMSTART
3609; GFX940-NEXT:    ; use s8
3610; GFX940-NEXT:    ;;#ASMEND
3611; GFX940-NEXT:    s_setpc_b64 s[30:31]
3612  %vec0 = call <4 x half> asm "; def $0", "=s"()
3613  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3614  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 0, i32 3>
3615  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
3616  ret void
3617}
3618
3619define void @s_shuffle_v2f16_v3f16__1_3() {
3620; GFX900-LABEL: s_shuffle_v2f16_v3f16__1_3:
3621; GFX900:       ; %bb.0:
3622; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3623; GFX900-NEXT:    ;;#ASMSTART
3624; GFX900-NEXT:    ; def s[4:5]
3625; GFX900-NEXT:    ;;#ASMEND
3626; GFX900-NEXT:    s_lshr_b32 s8, s4, 16
3627; GFX900-NEXT:    ;;#ASMSTART
3628; GFX900-NEXT:    ; use s8
3629; GFX900-NEXT:    ;;#ASMEND
3630; GFX900-NEXT:    s_setpc_b64 s[30:31]
3631;
3632; GFX90A-LABEL: s_shuffle_v2f16_v3f16__1_3:
3633; GFX90A:       ; %bb.0:
3634; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3635; GFX90A-NEXT:    ;;#ASMSTART
3636; GFX90A-NEXT:    ; def s[4:5]
3637; GFX90A-NEXT:    ;;#ASMEND
3638; GFX90A-NEXT:    s_lshr_b32 s8, s4, 16
3639; GFX90A-NEXT:    ;;#ASMSTART
3640; GFX90A-NEXT:    ; use s8
3641; GFX90A-NEXT:    ;;#ASMEND
3642; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3643;
3644; GFX940-LABEL: s_shuffle_v2f16_v3f16__1_3:
3645; GFX940:       ; %bb.0:
3646; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3647; GFX940-NEXT:    ;;#ASMSTART
3648; GFX940-NEXT:    ; def s[0:1]
3649; GFX940-NEXT:    ;;#ASMEND
3650; GFX940-NEXT:    s_lshr_b32 s8, s0, 16
3651; GFX940-NEXT:    ;;#ASMSTART
3652; GFX940-NEXT:    ; use s8
3653; GFX940-NEXT:    ;;#ASMEND
3654; GFX940-NEXT:    s_setpc_b64 s[30:31]
3655  %vec0 = call <4 x half> asm "; def $0", "=s"()
3656  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3657  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 1, i32 3>
3658  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
3659  ret void
3660}
3661
3662define void @s_shuffle_v2f16_v3f16__2_3() {
3663; GFX900-LABEL: s_shuffle_v2f16_v3f16__2_3:
3664; GFX900:       ; %bb.0:
3665; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3666; GFX900-NEXT:    ;;#ASMSTART
3667; GFX900-NEXT:    ; def s[4:5]
3668; GFX900-NEXT:    ;;#ASMEND
3669; GFX900-NEXT:    s_mov_b32 s8, s5
3670; GFX900-NEXT:    ;;#ASMSTART
3671; GFX900-NEXT:    ; use s8
3672; GFX900-NEXT:    ;;#ASMEND
3673; GFX900-NEXT:    s_setpc_b64 s[30:31]
3674;
3675; GFX90A-LABEL: s_shuffle_v2f16_v3f16__2_3:
3676; GFX90A:       ; %bb.0:
3677; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3678; GFX90A-NEXT:    ;;#ASMSTART
3679; GFX90A-NEXT:    ; def s[4:5]
3680; GFX90A-NEXT:    ;;#ASMEND
3681; GFX90A-NEXT:    s_mov_b32 s8, s5
3682; GFX90A-NEXT:    ;;#ASMSTART
3683; GFX90A-NEXT:    ; use s8
3684; GFX90A-NEXT:    ;;#ASMEND
3685; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3686;
3687; GFX940-LABEL: s_shuffle_v2f16_v3f16__2_3:
3688; GFX940:       ; %bb.0:
3689; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3690; GFX940-NEXT:    ;;#ASMSTART
3691; GFX940-NEXT:    ; def s[0:1]
3692; GFX940-NEXT:    ;;#ASMEND
3693; GFX940-NEXT:    s_mov_b32 s8, s1
3694; GFX940-NEXT:    ;;#ASMSTART
3695; GFX940-NEXT:    ; use s8
3696; GFX940-NEXT:    ;;#ASMEND
3697; GFX940-NEXT:    s_setpc_b64 s[30:31]
3698  %vec0 = call <4 x half> asm "; def $0", "=s"()
3699  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3700  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 2, i32 3>
3701  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
3702  ret void
3703}
3704
3705define void @s_shuffle_v2f16_v3f16__3_3() {
3706; GFX9-LABEL: s_shuffle_v2f16_v3f16__3_3:
3707; GFX9:       ; %bb.0:
3708; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3709; GFX9-NEXT:    ;;#ASMSTART
3710; GFX9-NEXT:    ; use s8
3711; GFX9-NEXT:    ;;#ASMEND
3712; GFX9-NEXT:    s_setpc_b64 s[30:31]
3713  %vec0 = call <4 x half> asm "; def $0", "=s"()
3714  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3715  %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 3, i32 3>
3716  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
3717  ret void
3718}
3719
3720define void @s_shuffle_v2f16_v3f16__4_3() {
3721; GFX900-LABEL: s_shuffle_v2f16_v3f16__4_3:
3722; GFX900:       ; %bb.0:
3723; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3724; GFX900-NEXT:    ;;#ASMSTART
3725; GFX900-NEXT:    ; def s[4:5]
3726; GFX900-NEXT:    ;;#ASMEND
3727; GFX900-NEXT:    s_lshr_b32 s5, s4, 16
3728; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
3729; GFX900-NEXT:    ;;#ASMSTART
3730; GFX900-NEXT:    ; use s8
3731; GFX900-NEXT:    ;;#ASMEND
3732; GFX900-NEXT:    s_setpc_b64 s[30:31]
3733;
3734; GFX90A-LABEL: s_shuffle_v2f16_v3f16__4_3:
3735; GFX90A:       ; %bb.0:
3736; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3737; GFX90A-NEXT:    ;;#ASMSTART
3738; GFX90A-NEXT:    ; def s[4:5]
3739; GFX90A-NEXT:    ;;#ASMEND
3740; GFX90A-NEXT:    s_lshr_b32 s5, s4, 16
3741; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
3742; GFX90A-NEXT:    ;;#ASMSTART
3743; GFX90A-NEXT:    ; use s8
3744; GFX90A-NEXT:    ;;#ASMEND
3745; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3746;
3747; GFX940-LABEL: s_shuffle_v2f16_v3f16__4_3:
3748; GFX940:       ; %bb.0:
3749; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3750; GFX940-NEXT:    ;;#ASMSTART
3751; GFX940-NEXT:    ; def s[0:1]
3752; GFX940-NEXT:    ;;#ASMEND
3753; GFX940-NEXT:    s_lshr_b32 s1, s0, 16
3754; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
3755; GFX940-NEXT:    ;;#ASMSTART
3756; GFX940-NEXT:    ; use s8
3757; GFX940-NEXT:    ;;#ASMEND
3758; GFX940-NEXT:    s_setpc_b64 s[30:31]
3759  %vec0 = call <4 x half> asm "; def $0", "=s"()
3760  %vec1 = call <4 x half> asm "; def $0", "=s"()
3761  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3762  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3763  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 3>
3764  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
3765  ret void
3766}
3767
3768define void @s_shuffle_v2f16_v3f16__u_4() {
3769; GFX900-LABEL: s_shuffle_v2f16_v3f16__u_4:
3770; GFX900:       ; %bb.0:
3771; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3772; GFX900-NEXT:    ;;#ASMSTART
3773; GFX900-NEXT:    ; def s[8:9]
3774; GFX900-NEXT:    ;;#ASMEND
3775; GFX900-NEXT:    ;;#ASMSTART
3776; GFX900-NEXT:    ; use s8
3777; GFX900-NEXT:    ;;#ASMEND
3778; GFX900-NEXT:    s_setpc_b64 s[30:31]
3779;
3780; GFX90A-LABEL: s_shuffle_v2f16_v3f16__u_4:
3781; GFX90A:       ; %bb.0:
3782; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3783; GFX90A-NEXT:    ;;#ASMSTART
3784; GFX90A-NEXT:    ; def s[8:9]
3785; GFX90A-NEXT:    ;;#ASMEND
3786; GFX90A-NEXT:    ;;#ASMSTART
3787; GFX90A-NEXT:    ; use s8
3788; GFX90A-NEXT:    ;;#ASMEND
3789; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3790;
3791; GFX940-LABEL: s_shuffle_v2f16_v3f16__u_4:
3792; GFX940:       ; %bb.0:
3793; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3794; GFX940-NEXT:    ;;#ASMSTART
3795; GFX940-NEXT:    ; def s[8:9]
3796; GFX940-NEXT:    ;;#ASMEND
3797; GFX940-NEXT:    s_nop 0
3798; GFX940-NEXT:    ;;#ASMSTART
3799; GFX940-NEXT:    ; use s8
3800; GFX940-NEXT:    ;;#ASMEND
3801; GFX940-NEXT:    s_setpc_b64 s[30:31]
3802  %vec0 = call <4 x half> asm "; def $0", "=s"()
3803  %vec1 = call <4 x half> asm "; def $0", "=s"()
3804  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3805  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3806  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 poison, i32 4>
3807  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
3808  ret void
3809}
3810
3811define void @s_shuffle_v2f16_v3f16__0_4() {
3812; GFX900-LABEL: s_shuffle_v2f16_v3f16__0_4:
3813; GFX900:       ; %bb.0:
3814; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3815; GFX900-NEXT:    ;;#ASMSTART
3816; GFX900-NEXT:    ; def s[4:5]
3817; GFX900-NEXT:    ;;#ASMEND
3818; GFX900-NEXT:    ;;#ASMSTART
3819; GFX900-NEXT:    ; def s[6:7]
3820; GFX900-NEXT:    ;;#ASMEND
3821; GFX900-NEXT:    s_lshr_b32 s5, s6, 16
3822; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
3823; GFX900-NEXT:    ;;#ASMSTART
3824; GFX900-NEXT:    ; use s8
3825; GFX900-NEXT:    ;;#ASMEND
3826; GFX900-NEXT:    s_setpc_b64 s[30:31]
3827;
3828; GFX90A-LABEL: s_shuffle_v2f16_v3f16__0_4:
3829; GFX90A:       ; %bb.0:
3830; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3831; GFX90A-NEXT:    ;;#ASMSTART
3832; GFX90A-NEXT:    ; def s[4:5]
3833; GFX90A-NEXT:    ;;#ASMEND
3834; GFX90A-NEXT:    ;;#ASMSTART
3835; GFX90A-NEXT:    ; def s[6:7]
3836; GFX90A-NEXT:    ;;#ASMEND
3837; GFX90A-NEXT:    s_lshr_b32 s5, s6, 16
3838; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
3839; GFX90A-NEXT:    ;;#ASMSTART
3840; GFX90A-NEXT:    ; use s8
3841; GFX90A-NEXT:    ;;#ASMEND
3842; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3843;
3844; GFX940-LABEL: s_shuffle_v2f16_v3f16__0_4:
3845; GFX940:       ; %bb.0:
3846; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3847; GFX940-NEXT:    ;;#ASMSTART
3848; GFX940-NEXT:    ; def s[0:1]
3849; GFX940-NEXT:    ;;#ASMEND
3850; GFX940-NEXT:    ;;#ASMSTART
3851; GFX940-NEXT:    ; def s[2:3]
3852; GFX940-NEXT:    ;;#ASMEND
3853; GFX940-NEXT:    s_lshr_b32 s1, s2, 16
3854; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
3855; GFX940-NEXT:    ;;#ASMSTART
3856; GFX940-NEXT:    ; use s8
3857; GFX940-NEXT:    ;;#ASMEND
3858; GFX940-NEXT:    s_setpc_b64 s[30:31]
3859  %vec0 = call <4 x half> asm "; def $0", "=s"()
3860  %vec1 = call <4 x half> asm "; def $0", "=s"()
3861  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3862  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3863  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 0, i32 4>
3864  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
3865  ret void
3866}
3867
3868define void @s_shuffle_v2f16_v3f16__1_4() {
3869; GFX900-LABEL: s_shuffle_v2f16_v3f16__1_4:
3870; GFX900:       ; %bb.0:
3871; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3872; GFX900-NEXT:    ;;#ASMSTART
3873; GFX900-NEXT:    ; def s[4:5]
3874; GFX900-NEXT:    ;;#ASMEND
3875; GFX900-NEXT:    ;;#ASMSTART
3876; GFX900-NEXT:    ; def s[6:7]
3877; GFX900-NEXT:    ;;#ASMEND
3878; GFX900-NEXT:    s_lshr_b32 s5, s6, 16
3879; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
3880; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
3881; GFX900-NEXT:    ;;#ASMSTART
3882; GFX900-NEXT:    ; use s8
3883; GFX900-NEXT:    ;;#ASMEND
3884; GFX900-NEXT:    s_setpc_b64 s[30:31]
3885;
3886; GFX90A-LABEL: s_shuffle_v2f16_v3f16__1_4:
3887; GFX90A:       ; %bb.0:
3888; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3889; GFX90A-NEXT:    ;;#ASMSTART
3890; GFX90A-NEXT:    ; def s[4:5]
3891; GFX90A-NEXT:    ;;#ASMEND
3892; GFX90A-NEXT:    ;;#ASMSTART
3893; GFX90A-NEXT:    ; def s[6:7]
3894; GFX90A-NEXT:    ;;#ASMEND
3895; GFX90A-NEXT:    s_lshr_b32 s5, s6, 16
3896; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
3897; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
3898; GFX90A-NEXT:    ;;#ASMSTART
3899; GFX90A-NEXT:    ; use s8
3900; GFX90A-NEXT:    ;;#ASMEND
3901; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3902;
3903; GFX940-LABEL: s_shuffle_v2f16_v3f16__1_4:
3904; GFX940:       ; %bb.0:
3905; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3906; GFX940-NEXT:    ;;#ASMSTART
3907; GFX940-NEXT:    ; def s[0:1]
3908; GFX940-NEXT:    ;;#ASMEND
3909; GFX940-NEXT:    ;;#ASMSTART
3910; GFX940-NEXT:    ; def s[2:3]
3911; GFX940-NEXT:    ;;#ASMEND
3912; GFX940-NEXT:    s_lshr_b32 s1, s2, 16
3913; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
3914; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
3915; GFX940-NEXT:    ;;#ASMSTART
3916; GFX940-NEXT:    ; use s8
3917; GFX940-NEXT:    ;;#ASMEND
3918; GFX940-NEXT:    s_setpc_b64 s[30:31]
3919  %vec0 = call <4 x half> asm "; def $0", "=s"()
3920  %vec1 = call <4 x half> asm "; def $0", "=s"()
3921  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3922  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3923  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 1, i32 4>
3924  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
3925  ret void
3926}
3927
3928define void @s_shuffle_v2f16_v3f16__2_4() {
3929; GFX900-LABEL: s_shuffle_v2f16_v3f16__2_4:
3930; GFX900:       ; %bb.0:
3931; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3932; GFX900-NEXT:    ;;#ASMSTART
3933; GFX900-NEXT:    ; def s[4:5]
3934; GFX900-NEXT:    ;;#ASMEND
3935; GFX900-NEXT:    ;;#ASMSTART
3936; GFX900-NEXT:    ; def s[6:7]
3937; GFX900-NEXT:    ;;#ASMEND
3938; GFX900-NEXT:    s_lshr_b32 s4, s6, 16
3939; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
3940; GFX900-NEXT:    ;;#ASMSTART
3941; GFX900-NEXT:    ; use s8
3942; GFX900-NEXT:    ;;#ASMEND
3943; GFX900-NEXT:    s_setpc_b64 s[30:31]
3944;
3945; GFX90A-LABEL: s_shuffle_v2f16_v3f16__2_4:
3946; GFX90A:       ; %bb.0:
3947; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3948; GFX90A-NEXT:    ;;#ASMSTART
3949; GFX90A-NEXT:    ; def s[4:5]
3950; GFX90A-NEXT:    ;;#ASMEND
3951; GFX90A-NEXT:    ;;#ASMSTART
3952; GFX90A-NEXT:    ; def s[6:7]
3953; GFX90A-NEXT:    ;;#ASMEND
3954; GFX90A-NEXT:    s_lshr_b32 s4, s6, 16
3955; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s4
3956; GFX90A-NEXT:    ;;#ASMSTART
3957; GFX90A-NEXT:    ; use s8
3958; GFX90A-NEXT:    ;;#ASMEND
3959; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3960;
3961; GFX940-LABEL: s_shuffle_v2f16_v3f16__2_4:
3962; GFX940:       ; %bb.0:
3963; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3964; GFX940-NEXT:    ;;#ASMSTART
3965; GFX940-NEXT:    ; def s[0:1]
3966; GFX940-NEXT:    ;;#ASMEND
3967; GFX940-NEXT:    ;;#ASMSTART
3968; GFX940-NEXT:    ; def s[2:3]
3969; GFX940-NEXT:    ;;#ASMEND
3970; GFX940-NEXT:    s_lshr_b32 s0, s2, 16
3971; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s0
3972; GFX940-NEXT:    ;;#ASMSTART
3973; GFX940-NEXT:    ; use s8
3974; GFX940-NEXT:    ;;#ASMEND
3975; GFX940-NEXT:    s_setpc_b64 s[30:31]
3976  %vec0 = call <4 x half> asm "; def $0", "=s"()
3977  %vec1 = call <4 x half> asm "; def $0", "=s"()
3978  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3979  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
3980  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 2, i32 4>
3981  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
3982  ret void
3983}
3984
3985define void @s_shuffle_v2f16_v3f16__3_4() {
3986; GFX900-LABEL: s_shuffle_v2f16_v3f16__3_4:
3987; GFX900:       ; %bb.0:
3988; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3989; GFX900-NEXT:    ;;#ASMSTART
3990; GFX900-NEXT:    ; def s[8:9]
3991; GFX900-NEXT:    ;;#ASMEND
3992; GFX900-NEXT:    ;;#ASMSTART
3993; GFX900-NEXT:    ; use s8
3994; GFX900-NEXT:    ;;#ASMEND
3995; GFX900-NEXT:    s_setpc_b64 s[30:31]
3996;
3997; GFX90A-LABEL: s_shuffle_v2f16_v3f16__3_4:
3998; GFX90A:       ; %bb.0:
3999; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4000; GFX90A-NEXT:    ;;#ASMSTART
4001; GFX90A-NEXT:    ; def s[8:9]
4002; GFX90A-NEXT:    ;;#ASMEND
4003; GFX90A-NEXT:    ;;#ASMSTART
4004; GFX90A-NEXT:    ; use s8
4005; GFX90A-NEXT:    ;;#ASMEND
4006; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4007;
4008; GFX940-LABEL: s_shuffle_v2f16_v3f16__3_4:
4009; GFX940:       ; %bb.0:
4010; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4011; GFX940-NEXT:    ;;#ASMSTART
4012; GFX940-NEXT:    ; def s[8:9]
4013; GFX940-NEXT:    ;;#ASMEND
4014; GFX940-NEXT:    s_nop 0
4015; GFX940-NEXT:    ;;#ASMSTART
4016; GFX940-NEXT:    ; use s8
4017; GFX940-NEXT:    ;;#ASMEND
4018; GFX940-NEXT:    s_setpc_b64 s[30:31]
4019  %vec0 = call <4 x half> asm "; def $0", "=s"()
4020  %vec1 = call <4 x half> asm "; def $0", "=s"()
4021  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
4022  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
4023  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 3, i32 4>
4024  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
4025  ret void
4026}
4027
4028define void @s_shuffle_v2f16_v3f16__4_4() {
4029; GFX900-LABEL: s_shuffle_v2f16_v3f16__4_4:
4030; GFX900:       ; %bb.0:
4031; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4032; GFX900-NEXT:    ;;#ASMSTART
4033; GFX900-NEXT:    ; def s[4:5]
4034; GFX900-NEXT:    ;;#ASMEND
4035; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
4036; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
4037; GFX900-NEXT:    ;;#ASMSTART
4038; GFX900-NEXT:    ; use s8
4039; GFX900-NEXT:    ;;#ASMEND
4040; GFX900-NEXT:    s_setpc_b64 s[30:31]
4041;
4042; GFX90A-LABEL: s_shuffle_v2f16_v3f16__4_4:
4043; GFX90A:       ; %bb.0:
4044; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4045; GFX90A-NEXT:    ;;#ASMSTART
4046; GFX90A-NEXT:    ; def s[4:5]
4047; GFX90A-NEXT:    ;;#ASMEND
4048; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
4049; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s4
4050; GFX90A-NEXT:    ;;#ASMSTART
4051; GFX90A-NEXT:    ; use s8
4052; GFX90A-NEXT:    ;;#ASMEND
4053; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4054;
4055; GFX940-LABEL: s_shuffle_v2f16_v3f16__4_4:
4056; GFX940:       ; %bb.0:
4057; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4058; GFX940-NEXT:    ;;#ASMSTART
4059; GFX940-NEXT:    ; def s[0:1]
4060; GFX940-NEXT:    ;;#ASMEND
4061; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
4062; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s0
4063; GFX940-NEXT:    ;;#ASMSTART
4064; GFX940-NEXT:    ; use s8
4065; GFX940-NEXT:    ;;#ASMEND
4066; GFX940-NEXT:    s_setpc_b64 s[30:31]
4067  %vec0 = call <4 x half> asm "; def $0", "=s"()
4068  %vec1 = call <4 x half> asm "; def $0", "=s"()
4069  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
4070  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
4071  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 4>
4072  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
4073  ret void
4074}
4075
4076define void @s_shuffle_v2f16_v3f16__u_5() {
4077; GFX900-LABEL: s_shuffle_v2f16_v3f16__u_5:
4078; GFX900:       ; %bb.0:
4079; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4080; GFX900-NEXT:    ;;#ASMSTART
4081; GFX900-NEXT:    ; def s[4:5]
4082; GFX900-NEXT:    ;;#ASMEND
4083; GFX900-NEXT:    s_lshl_b32 s8, s5, 16
4084; GFX900-NEXT:    ;;#ASMSTART
4085; GFX900-NEXT:    ; use s8
4086; GFX900-NEXT:    ;;#ASMEND
4087; GFX900-NEXT:    s_setpc_b64 s[30:31]
4088;
4089; GFX90A-LABEL: s_shuffle_v2f16_v3f16__u_5:
4090; GFX90A:       ; %bb.0:
4091; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4092; GFX90A-NEXT:    ;;#ASMSTART
4093; GFX90A-NEXT:    ; def s[4:5]
4094; GFX90A-NEXT:    ;;#ASMEND
4095; GFX90A-NEXT:    s_lshl_b32 s8, s5, 16
4096; GFX90A-NEXT:    ;;#ASMSTART
4097; GFX90A-NEXT:    ; use s8
4098; GFX90A-NEXT:    ;;#ASMEND
4099; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4100;
4101; GFX940-LABEL: s_shuffle_v2f16_v3f16__u_5:
4102; GFX940:       ; %bb.0:
4103; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4104; GFX940-NEXT:    ;;#ASMSTART
4105; GFX940-NEXT:    ; def s[0:1]
4106; GFX940-NEXT:    ;;#ASMEND
4107; GFX940-NEXT:    s_lshl_b32 s8, s1, 16
4108; GFX940-NEXT:    ;;#ASMSTART
4109; GFX940-NEXT:    ; use s8
4110; GFX940-NEXT:    ;;#ASMEND
4111; GFX940-NEXT:    s_setpc_b64 s[30:31]
4112  %vec0 = call <4 x half> asm "; def $0", "=s"()
4113  %vec1 = call <4 x half> asm "; def $0", "=s"()
4114  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
4115  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
4116  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 poison, i32 5>
4117  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
4118  ret void
4119}
4120
4121define void @s_shuffle_v2f16_v3f16__0_5() {
4122; GFX900-LABEL: s_shuffle_v2f16_v3f16__0_5:
4123; GFX900:       ; %bb.0:
4124; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4125; GFX900-NEXT:    ;;#ASMSTART
4126; GFX900-NEXT:    ; def s[4:5]
4127; GFX900-NEXT:    ;;#ASMEND
4128; GFX900-NEXT:    ;;#ASMSTART
4129; GFX900-NEXT:    ; def s[6:7]
4130; GFX900-NEXT:    ;;#ASMEND
4131; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s7
4132; GFX900-NEXT:    ;;#ASMSTART
4133; GFX900-NEXT:    ; use s8
4134; GFX900-NEXT:    ;;#ASMEND
4135; GFX900-NEXT:    s_setpc_b64 s[30:31]
4136;
4137; GFX90A-LABEL: s_shuffle_v2f16_v3f16__0_5:
4138; GFX90A:       ; %bb.0:
4139; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4140; GFX90A-NEXT:    ;;#ASMSTART
4141; GFX90A-NEXT:    ; def s[4:5]
4142; GFX90A-NEXT:    ;;#ASMEND
4143; GFX90A-NEXT:    ;;#ASMSTART
4144; GFX90A-NEXT:    ; def s[6:7]
4145; GFX90A-NEXT:    ;;#ASMEND
4146; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s7
4147; GFX90A-NEXT:    ;;#ASMSTART
4148; GFX90A-NEXT:    ; use s8
4149; GFX90A-NEXT:    ;;#ASMEND
4150; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4151;
4152; GFX940-LABEL: s_shuffle_v2f16_v3f16__0_5:
4153; GFX940:       ; %bb.0:
4154; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4155; GFX940-NEXT:    ;;#ASMSTART
4156; GFX940-NEXT:    ; def s[0:1]
4157; GFX940-NEXT:    ;;#ASMEND
4158; GFX940-NEXT:    ;;#ASMSTART
4159; GFX940-NEXT:    ; def s[2:3]
4160; GFX940-NEXT:    ;;#ASMEND
4161; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s3
4162; GFX940-NEXT:    ;;#ASMSTART
4163; GFX940-NEXT:    ; use s8
4164; GFX940-NEXT:    ;;#ASMEND
4165; GFX940-NEXT:    s_setpc_b64 s[30:31]
4166  %vec0 = call <4 x half> asm "; def $0", "=s"()
4167  %vec1 = call <4 x half> asm "; def $0", "=s"()
4168  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
4169  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
4170  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 0, i32 5>
4171  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
4172  ret void
4173}
4174
4175define void @s_shuffle_v2f16_v3f16__1_5() {
4176; GFX900-LABEL: s_shuffle_v2f16_v3f16__1_5:
4177; GFX900:       ; %bb.0:
4178; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4179; GFX900-NEXT:    ;;#ASMSTART
4180; GFX900-NEXT:    ; def s[4:5]
4181; GFX900-NEXT:    ;;#ASMEND
4182; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
4183; GFX900-NEXT:    ;;#ASMSTART
4184; GFX900-NEXT:    ; def s[6:7]
4185; GFX900-NEXT:    ;;#ASMEND
4186; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s7
4187; GFX900-NEXT:    ;;#ASMSTART
4188; GFX900-NEXT:    ; use s8
4189; GFX900-NEXT:    ;;#ASMEND
4190; GFX900-NEXT:    s_setpc_b64 s[30:31]
4191;
4192; GFX90A-LABEL: s_shuffle_v2f16_v3f16__1_5:
4193; GFX90A:       ; %bb.0:
4194; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4195; GFX90A-NEXT:    ;;#ASMSTART
4196; GFX90A-NEXT:    ; def s[4:5]
4197; GFX90A-NEXT:    ;;#ASMEND
4198; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
4199; GFX90A-NEXT:    ;;#ASMSTART
4200; GFX90A-NEXT:    ; def s[6:7]
4201; GFX90A-NEXT:    ;;#ASMEND
4202; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s7
4203; GFX90A-NEXT:    ;;#ASMSTART
4204; GFX90A-NEXT:    ; use s8
4205; GFX90A-NEXT:    ;;#ASMEND
4206; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4207;
4208; GFX940-LABEL: s_shuffle_v2f16_v3f16__1_5:
4209; GFX940:       ; %bb.0:
4210; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4211; GFX940-NEXT:    ;;#ASMSTART
4212; GFX940-NEXT:    ; def s[0:1]
4213; GFX940-NEXT:    ;;#ASMEND
4214; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
4215; GFX940-NEXT:    ;;#ASMSTART
4216; GFX940-NEXT:    ; def s[2:3]
4217; GFX940-NEXT:    ;;#ASMEND
4218; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s3
4219; GFX940-NEXT:    ;;#ASMSTART
4220; GFX940-NEXT:    ; use s8
4221; GFX940-NEXT:    ;;#ASMEND
4222; GFX940-NEXT:    s_setpc_b64 s[30:31]
4223  %vec0 = call <4 x half> asm "; def $0", "=s"()
4224  %vec1 = call <4 x half> asm "; def $0", "=s"()
4225  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
4226  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
4227  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 1, i32 5>
4228  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
4229  ret void
4230}
4231
4232define void @s_shuffle_v2f16_v3f16__2_5() {
4233; GFX900-LABEL: s_shuffle_v2f16_v3f16__2_5:
4234; GFX900:       ; %bb.0:
4235; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4236; GFX900-NEXT:    ;;#ASMSTART
4237; GFX900-NEXT:    ; def s[4:5]
4238; GFX900-NEXT:    ;;#ASMEND
4239; GFX900-NEXT:    ;;#ASMSTART
4240; GFX900-NEXT:    ; def s[6:7]
4241; GFX900-NEXT:    ;;#ASMEND
4242; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s5, s7
4243; GFX900-NEXT:    ;;#ASMSTART
4244; GFX900-NEXT:    ; use s8
4245; GFX900-NEXT:    ;;#ASMEND
4246; GFX900-NEXT:    s_setpc_b64 s[30:31]
4247;
4248; GFX90A-LABEL: s_shuffle_v2f16_v3f16__2_5:
4249; GFX90A:       ; %bb.0:
4250; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4251; GFX90A-NEXT:    ;;#ASMSTART
4252; GFX90A-NEXT:    ; def s[4:5]
4253; GFX90A-NEXT:    ;;#ASMEND
4254; GFX90A-NEXT:    ;;#ASMSTART
4255; GFX90A-NEXT:    ; def s[6:7]
4256; GFX90A-NEXT:    ;;#ASMEND
4257; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s5, s7
4258; GFX90A-NEXT:    ;;#ASMSTART
4259; GFX90A-NEXT:    ; use s8
4260; GFX90A-NEXT:    ;;#ASMEND
4261; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4262;
4263; GFX940-LABEL: s_shuffle_v2f16_v3f16__2_5:
4264; GFX940:       ; %bb.0:
4265; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4266; GFX940-NEXT:    ;;#ASMSTART
4267; GFX940-NEXT:    ; def s[0:1]
4268; GFX940-NEXT:    ;;#ASMEND
4269; GFX940-NEXT:    ;;#ASMSTART
4270; GFX940-NEXT:    ; def s[2:3]
4271; GFX940-NEXT:    ;;#ASMEND
4272; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s1, s3
4273; GFX940-NEXT:    ;;#ASMSTART
4274; GFX940-NEXT:    ; use s8
4275; GFX940-NEXT:    ;;#ASMEND
4276; GFX940-NEXT:    s_setpc_b64 s[30:31]
4277  %vec0 = call <4 x half> asm "; def $0", "=s"()
4278  %vec1 = call <4 x half> asm "; def $0", "=s"()
4279  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
4280  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
4281  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 2, i32 5>
4282  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
4283  ret void
4284}
4285
4286define void @s_shuffle_v2f16_v3f16__3_5() {
4287; GFX900-LABEL: s_shuffle_v2f16_v3f16__3_5:
4288; GFX900:       ; %bb.0:
4289; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4290; GFX900-NEXT:    ;;#ASMSTART
4291; GFX900-NEXT:    ; def s[4:5]
4292; GFX900-NEXT:    ;;#ASMEND
4293; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
4294; GFX900-NEXT:    ;;#ASMSTART
4295; GFX900-NEXT:    ; use s8
4296; GFX900-NEXT:    ;;#ASMEND
4297; GFX900-NEXT:    s_setpc_b64 s[30:31]
4298;
4299; GFX90A-LABEL: s_shuffle_v2f16_v3f16__3_5:
4300; GFX90A:       ; %bb.0:
4301; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4302; GFX90A-NEXT:    ;;#ASMSTART
4303; GFX90A-NEXT:    ; def s[4:5]
4304; GFX90A-NEXT:    ;;#ASMEND
4305; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
4306; GFX90A-NEXT:    ;;#ASMSTART
4307; GFX90A-NEXT:    ; use s8
4308; GFX90A-NEXT:    ;;#ASMEND
4309; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4310;
4311; GFX940-LABEL: s_shuffle_v2f16_v3f16__3_5:
4312; GFX940:       ; %bb.0:
4313; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4314; GFX940-NEXT:    ;;#ASMSTART
4315; GFX940-NEXT:    ; def s[0:1]
4316; GFX940-NEXT:    ;;#ASMEND
4317; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
4318; GFX940-NEXT:    ;;#ASMSTART
4319; GFX940-NEXT:    ; use s8
4320; GFX940-NEXT:    ;;#ASMEND
4321; GFX940-NEXT:    s_setpc_b64 s[30:31]
4322  %vec0 = call <4 x half> asm "; def $0", "=s"()
4323  %vec1 = call <4 x half> asm "; def $0", "=s"()
4324  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
4325  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
4326  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 3, i32 5>
4327  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
4328  ret void
4329}
4330
4331define void @s_shuffle_v2f16_v3f16__4_5() {
4332; GFX900-LABEL: s_shuffle_v2f16_v3f16__4_5:
4333; GFX900:       ; %bb.0:
4334; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4335; GFX900-NEXT:    ;;#ASMSTART
4336; GFX900-NEXT:    ; def s[4:5]
4337; GFX900-NEXT:    ;;#ASMEND
4338; GFX900-NEXT:    s_lshr_b32 s4, s4, 16
4339; GFX900-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
4340; GFX900-NEXT:    ;;#ASMSTART
4341; GFX900-NEXT:    ; use s8
4342; GFX900-NEXT:    ;;#ASMEND
4343; GFX900-NEXT:    s_setpc_b64 s[30:31]
4344;
4345; GFX90A-LABEL: s_shuffle_v2f16_v3f16__4_5:
4346; GFX90A:       ; %bb.0:
4347; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4348; GFX90A-NEXT:    ;;#ASMSTART
4349; GFX90A-NEXT:    ; def s[4:5]
4350; GFX90A-NEXT:    ;;#ASMEND
4351; GFX90A-NEXT:    s_lshr_b32 s4, s4, 16
4352; GFX90A-NEXT:    s_pack_ll_b32_b16 s8, s4, s5
4353; GFX90A-NEXT:    ;;#ASMSTART
4354; GFX90A-NEXT:    ; use s8
4355; GFX90A-NEXT:    ;;#ASMEND
4356; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4357;
4358; GFX940-LABEL: s_shuffle_v2f16_v3f16__4_5:
4359; GFX940:       ; %bb.0:
4360; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4361; GFX940-NEXT:    ;;#ASMSTART
4362; GFX940-NEXT:    ; def s[0:1]
4363; GFX940-NEXT:    ;;#ASMEND
4364; GFX940-NEXT:    s_lshr_b32 s0, s0, 16
4365; GFX940-NEXT:    s_pack_ll_b32_b16 s8, s0, s1
4366; GFX940-NEXT:    ;;#ASMSTART
4367; GFX940-NEXT:    ; use s8
4368; GFX940-NEXT:    ;;#ASMEND
4369; GFX940-NEXT:    s_setpc_b64 s[30:31]
4370  %vec0 = call <4 x half> asm "; def $0", "=s"()
4371  %vec1 = call <4 x half> asm "; def $0", "=s"()
4372  %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
4373  %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
4374  %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 5>
4375  call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf)
4376  ret void
4377}
4378;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
4379; GFX90APLUS: {{.*}}
4380