xref: /llvm-project/llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll (revision 5e79ae60a67726805fcc27081f67c41cbd8a1e4e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s
5
6
7define void @v_shuffle_v4i64_v3i64__u_u_u_u(ptr addrspace(1) inreg %ptr) {
8; GFX9-LABEL: v_shuffle_v4i64_v3i64__u_u_u_u:
9; GFX9:       ; %bb.0:
10; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11; GFX9-NEXT:    s_setpc_b64 s[30:31]
12  %vec0 = call <3 x i64> asm "; def $0", "=v"()
13  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> poison
14  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
15  ret void
16}
17
18define void @v_shuffle_v4i64_v3i64__0_u_u_u(ptr addrspace(1) inreg %ptr) {
19; GFX900-LABEL: v_shuffle_v4i64_v3i64__0_u_u_u:
20; GFX900:       ; %bb.0:
21; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22; GFX900-NEXT:    v_mov_b32_e32 v6, 0
23; GFX900-NEXT:    ;;#ASMSTART
24; GFX900-NEXT:    ; def v[0:5]
25; GFX900-NEXT:    ;;#ASMEND
26; GFX900-NEXT:    global_store_dwordx4 v6, v[4:7], s[16:17] offset:16
27; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
28; GFX900-NEXT:    s_waitcnt vmcnt(0)
29; GFX900-NEXT:    s_setpc_b64 s[30:31]
30;
31; GFX90A-LABEL: v_shuffle_v4i64_v3i64__0_u_u_u:
32; GFX90A:       ; %bb.0:
33; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
35; GFX90A-NEXT:    ;;#ASMSTART
36; GFX90A-NEXT:    ; def v[0:5]
37; GFX90A-NEXT:    ;;#ASMEND
38; GFX90A-NEXT:    global_store_dwordx4 v6, v[4:7], s[16:17] offset:16
39; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
40; GFX90A-NEXT:    s_waitcnt vmcnt(0)
41; GFX90A-NEXT:    s_setpc_b64 s[30:31]
42;
43; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_u_u_u:
44; GFX940:       ; %bb.0:
45; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46; GFX940-NEXT:    v_mov_b32_e32 v6, 0
47; GFX940-NEXT:    ;;#ASMSTART
48; GFX940-NEXT:    ; def v[0:5]
49; GFX940-NEXT:    ;;#ASMEND
50; GFX940-NEXT:    global_store_dwordx4 v6, v[4:7], s[0:1] offset:16 sc0 sc1
51; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
52; GFX940-NEXT:    s_waitcnt vmcnt(0)
53; GFX940-NEXT:    s_setpc_b64 s[30:31]
54  %vec0 = call <3 x i64> asm "; def $0", "=v"()
55  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
56  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
57  ret void
58}
59
60define void @v_shuffle_v4i64_v3i64__1_u_u_u(ptr addrspace(1) inreg %ptr) {
61; GFX900-LABEL: v_shuffle_v4i64_v3i64__1_u_u_u:
62; GFX900:       ; %bb.0:
63; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
64; GFX900-NEXT:    v_mov_b32_e32 v6, 0
65; GFX900-NEXT:    ;;#ASMSTART
66; GFX900-NEXT:    ; def v[0:5]
67; GFX900-NEXT:    ;;#ASMEND
68; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
69; GFX900-NEXT:    s_waitcnt vmcnt(0)
70; GFX900-NEXT:    s_setpc_b64 s[30:31]
71;
72; GFX90A-LABEL: v_shuffle_v4i64_v3i64__1_u_u_u:
73; GFX90A:       ; %bb.0:
74; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
76; GFX90A-NEXT:    ;;#ASMSTART
77; GFX90A-NEXT:    ; def v[0:5]
78; GFX90A-NEXT:    ;;#ASMEND
79; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
80; GFX90A-NEXT:    s_waitcnt vmcnt(0)
81; GFX90A-NEXT:    s_setpc_b64 s[30:31]
82;
83; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_u_u_u:
84; GFX940:       ; %bb.0:
85; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86; GFX940-NEXT:    v_mov_b32_e32 v6, 0
87; GFX940-NEXT:    ;;#ASMSTART
88; GFX940-NEXT:    ; def v[0:5]
89; GFX940-NEXT:    ;;#ASMEND
90; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
91; GFX940-NEXT:    s_waitcnt vmcnt(0)
92; GFX940-NEXT:    s_setpc_b64 s[30:31]
93  %vec0 = call <3 x i64> asm "; def $0", "=v"()
94  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
95  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
96  ret void
97}
98
99define void @v_shuffle_v4i64_v3i64__2_u_u_u(ptr addrspace(1) inreg %ptr) {
100; GFX900-LABEL: v_shuffle_v4i64_v3i64__2_u_u_u:
101; GFX900:       ; %bb.0:
102; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103; GFX900-NEXT:    ;;#ASMSTART
104; GFX900-NEXT:    ; def v[0:5]
105; GFX900-NEXT:    ;;#ASMEND
106; GFX900-NEXT:    v_mov_b32_e32 v6, 0
107; GFX900-NEXT:    v_mov_b32_e32 v0, v4
108; GFX900-NEXT:    v_mov_b32_e32 v1, v5
109; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
110; GFX900-NEXT:    s_waitcnt vmcnt(0)
111; GFX900-NEXT:    s_setpc_b64 s[30:31]
112;
113; GFX90A-LABEL: v_shuffle_v4i64_v3i64__2_u_u_u:
114; GFX90A:       ; %bb.0:
115; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
116; GFX90A-NEXT:    ;;#ASMSTART
117; GFX90A-NEXT:    ; def v[0:5]
118; GFX90A-NEXT:    ;;#ASMEND
119; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
120; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
121; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
122; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
123; GFX90A-NEXT:    s_waitcnt vmcnt(0)
124; GFX90A-NEXT:    s_setpc_b64 s[30:31]
125;
126; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_u_u_u:
127; GFX940:       ; %bb.0:
128; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
129; GFX940-NEXT:    ;;#ASMSTART
130; GFX940-NEXT:    ; def v[0:5]
131; GFX940-NEXT:    ;;#ASMEND
132; GFX940-NEXT:    v_mov_b32_e32 v6, 0
133; GFX940-NEXT:    v_mov_b32_e32 v0, v4
134; GFX940-NEXT:    v_mov_b32_e32 v1, v5
135; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
136; GFX940-NEXT:    s_waitcnt vmcnt(0)
137; GFX940-NEXT:    s_setpc_b64 s[30:31]
138  %vec0 = call <3 x i64> asm "; def $0", "=v"()
139  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
140  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
141  ret void
142}
143
144define void @v_shuffle_v4i64_v3i64__3_u_u_u(ptr addrspace(1) inreg %ptr) {
145; GFX9-LABEL: v_shuffle_v4i64_v3i64__3_u_u_u:
146; GFX9:       ; %bb.0:
147; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
148; GFX9-NEXT:    s_setpc_b64 s[30:31]
149  %vec0 = call <3 x i64> asm "; def $0", "=v"()
150  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
151  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
152  ret void
153}
154
155define void @v_shuffle_v4i64_v3i64__4_u_u_u(ptr addrspace(1) inreg %ptr) {
156; GFX900-LABEL: v_shuffle_v4i64_v3i64__4_u_u_u:
157; GFX900:       ; %bb.0:
158; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
159; GFX900-NEXT:    v_mov_b32_e32 v6, 0
160; GFX900-NEXT:    ;;#ASMSTART
161; GFX900-NEXT:    ; def v[0:5]
162; GFX900-NEXT:    ;;#ASMEND
163; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
164; GFX900-NEXT:    s_waitcnt vmcnt(0)
165; GFX900-NEXT:    s_setpc_b64 s[30:31]
166;
167; GFX90A-LABEL: v_shuffle_v4i64_v3i64__4_u_u_u:
168; GFX90A:       ; %bb.0:
169; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
170; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
171; GFX90A-NEXT:    ;;#ASMSTART
172; GFX90A-NEXT:    ; def v[0:5]
173; GFX90A-NEXT:    ;;#ASMEND
174; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
175; GFX90A-NEXT:    s_waitcnt vmcnt(0)
176; GFX90A-NEXT:    s_setpc_b64 s[30:31]
177;
178; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_u_u_u:
179; GFX940:       ; %bb.0:
180; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
181; GFX940-NEXT:    v_mov_b32_e32 v6, 0
182; GFX940-NEXT:    ;;#ASMSTART
183; GFX940-NEXT:    ; def v[0:5]
184; GFX940-NEXT:    ;;#ASMEND
185; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
186; GFX940-NEXT:    s_waitcnt vmcnt(0)
187; GFX940-NEXT:    s_setpc_b64 s[30:31]
188  %vec0 = call <3 x i64> asm "; def $0", "=v"()
189  %vec1 = call <3 x i64> asm "; def $0", "=v"()
190  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 poison, i32 poison, i32 poison>
191  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
192  ret void
193}
194
195define void @v_shuffle_v4i64_v3i64__5_u_u_u(ptr addrspace(1) inreg %ptr) {
196; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_u_u_u:
197; GFX900:       ; %bb.0:
198; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
199; GFX900-NEXT:    ;;#ASMSTART
200; GFX900-NEXT:    ; def v[0:5]
201; GFX900-NEXT:    ;;#ASMEND
202; GFX900-NEXT:    v_mov_b32_e32 v6, 0
203; GFX900-NEXT:    v_mov_b32_e32 v0, v4
204; GFX900-NEXT:    v_mov_b32_e32 v1, v5
205; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
206; GFX900-NEXT:    s_waitcnt vmcnt(0)
207; GFX900-NEXT:    s_setpc_b64 s[30:31]
208;
209; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_u_u_u:
210; GFX90A:       ; %bb.0:
211; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212; GFX90A-NEXT:    ;;#ASMSTART
213; GFX90A-NEXT:    ; def v[0:5]
214; GFX90A-NEXT:    ;;#ASMEND
215; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
216; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
217; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
218; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
219; GFX90A-NEXT:    s_waitcnt vmcnt(0)
220; GFX90A-NEXT:    s_setpc_b64 s[30:31]
221;
222; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_u_u:
223; GFX940:       ; %bb.0:
224; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
225; GFX940-NEXT:    ;;#ASMSTART
226; GFX940-NEXT:    ; def v[0:5]
227; GFX940-NEXT:    ;;#ASMEND
228; GFX940-NEXT:    v_mov_b32_e32 v6, 0
229; GFX940-NEXT:    v_mov_b32_e32 v0, v4
230; GFX940-NEXT:    v_mov_b32_e32 v1, v5
231; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
232; GFX940-NEXT:    s_waitcnt vmcnt(0)
233; GFX940-NEXT:    s_setpc_b64 s[30:31]
234  %vec0 = call <3 x i64> asm "; def $0", "=v"()
235  %vec1 = call <3 x i64> asm "; def $0", "=v"()
236  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 poison, i32 poison>
237  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
238  ret void
239}
240
241define void @v_shuffle_v4i64_v3i64__5_0_u_u(ptr addrspace(1) inreg %ptr) {
242; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_0_u_u:
243; GFX900:       ; %bb.0:
244; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245; GFX900-NEXT:    ;;#ASMSTART
246; GFX900-NEXT:    ; def v[0:5]
247; GFX900-NEXT:    ;;#ASMEND
248; GFX900-NEXT:    ;;#ASMSTART
249; GFX900-NEXT:    ; def v[2:7]
250; GFX900-NEXT:    ;;#ASMEND
251; GFX900-NEXT:    v_mov_b32_e32 v8, 0
252; GFX900-NEXT:    v_mov_b32_e32 v2, v6
253; GFX900-NEXT:    v_mov_b32_e32 v3, v7
254; GFX900-NEXT:    v_mov_b32_e32 v4, v0
255; GFX900-NEXT:    v_mov_b32_e32 v5, v1
256; GFX900-NEXT:    global_store_dwordx4 v8, v[2:5], s[16:17]
257; GFX900-NEXT:    s_waitcnt vmcnt(0)
258; GFX900-NEXT:    s_setpc_b64 s[30:31]
259;
260; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_0_u_u:
261; GFX90A:       ; %bb.0:
262; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
263; GFX90A-NEXT:    ;;#ASMSTART
264; GFX90A-NEXT:    ; def v[0:5]
265; GFX90A-NEXT:    ;;#ASMEND
266; GFX90A-NEXT:    ;;#ASMSTART
267; GFX90A-NEXT:    ; def v[2:7]
268; GFX90A-NEXT:    ;;#ASMEND
269; GFX90A-NEXT:    v_mov_b32_e32 v8, 0
270; GFX90A-NEXT:    v_mov_b32_e32 v2, v6
271; GFX90A-NEXT:    v_mov_b32_e32 v3, v7
272; GFX90A-NEXT:    v_mov_b32_e32 v4, v0
273; GFX90A-NEXT:    v_mov_b32_e32 v5, v1
274; GFX90A-NEXT:    global_store_dwordx4 v8, v[2:5], s[16:17]
275; GFX90A-NEXT:    s_waitcnt vmcnt(0)
276; GFX90A-NEXT:    s_setpc_b64 s[30:31]
277;
278; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_u_u:
279; GFX940:       ; %bb.0:
280; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
281; GFX940-NEXT:    ;;#ASMSTART
282; GFX940-NEXT:    ; def v[0:5]
283; GFX940-NEXT:    ;;#ASMEND
284; GFX940-NEXT:    v_mov_b32_e32 v8, 0
285; GFX940-NEXT:    ;;#ASMSTART
286; GFX940-NEXT:    ; def v[2:7]
287; GFX940-NEXT:    ;;#ASMEND
288; GFX940-NEXT:    s_nop 0
289; GFX940-NEXT:    v_mov_b32_e32 v2, v6
290; GFX940-NEXT:    v_mov_b32_e32 v3, v7
291; GFX940-NEXT:    v_mov_b32_e32 v4, v0
292; GFX940-NEXT:    v_mov_b32_e32 v5, v1
293; GFX940-NEXT:    global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1
294; GFX940-NEXT:    s_waitcnt vmcnt(0)
295; GFX940-NEXT:    s_setpc_b64 s[30:31]
296  %vec0 = call <3 x i64> asm "; def $0", "=v"()
297  %vec1 = call <3 x i64> asm "; def $0", "=v"()
298  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 poison, i32 poison>
299  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
300  ret void
301}
302
303define void @v_shuffle_v4i64_v3i64__5_1_u_u(ptr addrspace(1) inreg %ptr) {
304; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_1_u_u:
305; GFX900:       ; %bb.0:
306; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
307; GFX900-NEXT:    ;;#ASMSTART
308; GFX900-NEXT:    ; def v[0:5]
309; GFX900-NEXT:    ;;#ASMEND
310; GFX900-NEXT:    v_mov_b32_e32 v10, 0
311; GFX900-NEXT:    ;;#ASMSTART
312; GFX900-NEXT:    ; def v[4:9]
313; GFX900-NEXT:    ;;#ASMEND
314; GFX900-NEXT:    v_mov_b32_e32 v0, v8
315; GFX900-NEXT:    v_mov_b32_e32 v1, v9
316; GFX900-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17]
317; GFX900-NEXT:    s_waitcnt vmcnt(0)
318; GFX900-NEXT:    s_setpc_b64 s[30:31]
319;
320; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_1_u_u:
321; GFX90A:       ; %bb.0:
322; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
323; GFX90A-NEXT:    ;;#ASMSTART
324; GFX90A-NEXT:    ; def v[0:5]
325; GFX90A-NEXT:    ;;#ASMEND
326; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
327; GFX90A-NEXT:    ;;#ASMSTART
328; GFX90A-NEXT:    ; def v[4:9]
329; GFX90A-NEXT:    ;;#ASMEND
330; GFX90A-NEXT:    v_mov_b32_e32 v0, v8
331; GFX90A-NEXT:    v_mov_b32_e32 v1, v9
332; GFX90A-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17]
333; GFX90A-NEXT:    s_waitcnt vmcnt(0)
334; GFX90A-NEXT:    s_setpc_b64 s[30:31]
335;
336; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_u_u:
337; GFX940:       ; %bb.0:
338; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
339; GFX940-NEXT:    ;;#ASMSTART
340; GFX940-NEXT:    ; def v[0:5]
341; GFX940-NEXT:    ;;#ASMEND
342; GFX940-NEXT:    v_mov_b32_e32 v10, 0
343; GFX940-NEXT:    ;;#ASMSTART
344; GFX940-NEXT:    ; def v[4:9]
345; GFX940-NEXT:    ;;#ASMEND
346; GFX940-NEXT:    s_nop 0
347; GFX940-NEXT:    v_mov_b32_e32 v0, v8
348; GFX940-NEXT:    v_mov_b32_e32 v1, v9
349; GFX940-NEXT:    global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1
350; GFX940-NEXT:    s_waitcnt vmcnt(0)
351; GFX940-NEXT:    s_setpc_b64 s[30:31]
352  %vec0 = call <3 x i64> asm "; def $0", "=v"()
353  %vec1 = call <3 x i64> asm "; def $0", "=v"()
354  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 poison, i32 poison>
355  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
356  ret void
357}
358
359define void @v_shuffle_v4i64_v3i64__5_2_u_u(ptr addrspace(1) inreg %ptr) {
360; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_2_u_u:
361; GFX900:       ; %bb.0:
362; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
363; GFX900-NEXT:    ;;#ASMSTART
364; GFX900-NEXT:    ; def v[0:5]
365; GFX900-NEXT:    ;;#ASMEND
366; GFX900-NEXT:    v_mov_b32_e32 v12, 0
367; GFX900-NEXT:    ;;#ASMSTART
368; GFX900-NEXT:    ; def v[6:11]
369; GFX900-NEXT:    ;;#ASMEND
370; GFX900-NEXT:    v_mov_b32_e32 v2, v10
371; GFX900-NEXT:    v_mov_b32_e32 v3, v11
372; GFX900-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17]
373; GFX900-NEXT:    s_waitcnt vmcnt(0)
374; GFX900-NEXT:    s_setpc_b64 s[30:31]
375;
376; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_2_u_u:
377; GFX90A:       ; %bb.0:
378; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
379; GFX90A-NEXT:    ;;#ASMSTART
380; GFX90A-NEXT:    ; def v[0:5]
381; GFX90A-NEXT:    ;;#ASMEND
382; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
383; GFX90A-NEXT:    ;;#ASMSTART
384; GFX90A-NEXT:    ; def v[6:11]
385; GFX90A-NEXT:    ;;#ASMEND
386; GFX90A-NEXT:    v_mov_b32_e32 v2, v10
387; GFX90A-NEXT:    v_mov_b32_e32 v3, v11
388; GFX90A-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17]
389; GFX90A-NEXT:    s_waitcnt vmcnt(0)
390; GFX90A-NEXT:    s_setpc_b64 s[30:31]
391;
392; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_u_u:
393; GFX940:       ; %bb.0:
394; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
395; GFX940-NEXT:    ;;#ASMSTART
396; GFX940-NEXT:    ; def v[0:5]
397; GFX940-NEXT:    ;;#ASMEND
398; GFX940-NEXT:    v_mov_b32_e32 v12, 0
399; GFX940-NEXT:    ;;#ASMSTART
400; GFX940-NEXT:    ; def v[6:11]
401; GFX940-NEXT:    ;;#ASMEND
402; GFX940-NEXT:    s_nop 0
403; GFX940-NEXT:    v_mov_b32_e32 v2, v10
404; GFX940-NEXT:    v_mov_b32_e32 v3, v11
405; GFX940-NEXT:    global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1
406; GFX940-NEXT:    s_waitcnt vmcnt(0)
407; GFX940-NEXT:    s_setpc_b64 s[30:31]
408  %vec0 = call <3 x i64> asm "; def $0", "=v"()
409  %vec1 = call <3 x i64> asm "; def $0", "=v"()
410  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 poison, i32 poison>
411  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
412  ret void
413}
414
415define void @v_shuffle_v4i64_v3i64__5_3_u_u(ptr addrspace(1) inreg %ptr) {
416; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_3_u_u:
417; GFX900:       ; %bb.0:
418; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
419; GFX900-NEXT:    ;;#ASMSTART
420; GFX900-NEXT:    ; def v[0:5]
421; GFX900-NEXT:    ;;#ASMEND
422; GFX900-NEXT:    v_mov_b32_e32 v6, 0
423; GFX900-NEXT:    v_mov_b32_e32 v2, v4
424; GFX900-NEXT:    v_mov_b32_e32 v3, v5
425; GFX900-NEXT:    v_mov_b32_e32 v4, v0
426; GFX900-NEXT:    v_mov_b32_e32 v5, v1
427; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
428; GFX900-NEXT:    s_waitcnt vmcnt(0)
429; GFX900-NEXT:    s_setpc_b64 s[30:31]
430;
431; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_3_u_u:
432; GFX90A:       ; %bb.0:
433; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
434; GFX90A-NEXT:    ;;#ASMSTART
435; GFX90A-NEXT:    ; def v[0:5]
436; GFX90A-NEXT:    ;;#ASMEND
437; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
438; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
439; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
440; GFX90A-NEXT:    v_mov_b32_e32 v4, v0
441; GFX90A-NEXT:    v_mov_b32_e32 v5, v1
442; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
443; GFX90A-NEXT:    s_waitcnt vmcnt(0)
444; GFX90A-NEXT:    s_setpc_b64 s[30:31]
445;
446; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_u_u:
447; GFX940:       ; %bb.0:
448; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
449; GFX940-NEXT:    ;;#ASMSTART
450; GFX940-NEXT:    ; def v[0:5]
451; GFX940-NEXT:    ;;#ASMEND
452; GFX940-NEXT:    v_mov_b32_e32 v6, 0
453; GFX940-NEXT:    v_mov_b32_e32 v2, v4
454; GFX940-NEXT:    v_mov_b32_e32 v3, v5
455; GFX940-NEXT:    v_mov_b32_e32 v4, v0
456; GFX940-NEXT:    v_mov_b32_e32 v5, v1
457; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
458; GFX940-NEXT:    s_waitcnt vmcnt(0)
459; GFX940-NEXT:    s_setpc_b64 s[30:31]
460  %vec0 = call <3 x i64> asm "; def $0", "=v"()
461  %vec1 = call <3 x i64> asm "; def $0", "=v"()
462  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 poison, i32 poison>
463  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
464  ret void
465}
466
467define void @v_shuffle_v4i64_v3i64__5_4_u_u(ptr addrspace(1) inreg %ptr) {
468; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_4_u_u:
469; GFX900:       ; %bb.0:
470; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
471; GFX900-NEXT:    ;;#ASMSTART
472; GFX900-NEXT:    ; def v[0:5]
473; GFX900-NEXT:    ;;#ASMEND
474; GFX900-NEXT:    v_mov_b32_e32 v6, 0
475; GFX900-NEXT:    v_mov_b32_e32 v0, v4
476; GFX900-NEXT:    v_mov_b32_e32 v1, v5
477; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
478; GFX900-NEXT:    s_waitcnt vmcnt(0)
479; GFX900-NEXT:    s_setpc_b64 s[30:31]
480;
481; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_4_u_u:
482; GFX90A:       ; %bb.0:
483; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
484; GFX90A-NEXT:    ;;#ASMSTART
485; GFX90A-NEXT:    ; def v[0:5]
486; GFX90A-NEXT:    ;;#ASMEND
487; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
488; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
489; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
490; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
491; GFX90A-NEXT:    s_waitcnt vmcnt(0)
492; GFX90A-NEXT:    s_setpc_b64 s[30:31]
493;
494; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_u_u:
495; GFX940:       ; %bb.0:
496; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
497; GFX940-NEXT:    ;;#ASMSTART
498; GFX940-NEXT:    ; def v[0:5]
499; GFX940-NEXT:    ;;#ASMEND
500; GFX940-NEXT:    v_mov_b32_e32 v6, 0
501; GFX940-NEXT:    v_mov_b32_e32 v0, v4
502; GFX940-NEXT:    v_mov_b32_e32 v1, v5
503; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
504; GFX940-NEXT:    s_waitcnt vmcnt(0)
505; GFX940-NEXT:    s_setpc_b64 s[30:31]
506  %vec0 = call <3 x i64> asm "; def $0", "=v"()
507  %vec1 = call <3 x i64> asm "; def $0", "=v"()
508  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 poison, i32 poison>
509  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
510  ret void
511}
512
513define void @v_shuffle_v4i64_v3i64__5_5_u_u(ptr addrspace(1) inreg %ptr) {
514; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_u:
515; GFX900:       ; %bb.0:
516; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
517; GFX900-NEXT:    ;;#ASMSTART
518; GFX900-NEXT:    ; def v[0:5]
519; GFX900-NEXT:    ;;#ASMEND
520; GFX900-NEXT:    v_mov_b32_e32 v6, 0
521; GFX900-NEXT:    v_mov_b32_e32 v2, v4
522; GFX900-NEXT:    v_mov_b32_e32 v3, v5
523; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
524; GFX900-NEXT:    s_waitcnt vmcnt(0)
525; GFX900-NEXT:    s_setpc_b64 s[30:31]
526;
527; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_u:
528; GFX90A:       ; %bb.0:
529; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
530; GFX90A-NEXT:    ;;#ASMSTART
531; GFX90A-NEXT:    ; def v[0:5]
532; GFX90A-NEXT:    ;;#ASMEND
533; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
534; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
535; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
536; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
537; GFX90A-NEXT:    s_waitcnt vmcnt(0)
538; GFX90A-NEXT:    s_setpc_b64 s[30:31]
539;
540; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_u:
541; GFX940:       ; %bb.0:
542; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
543; GFX940-NEXT:    ;;#ASMSTART
544; GFX940-NEXT:    ; def v[0:5]
545; GFX940-NEXT:    ;;#ASMEND
546; GFX940-NEXT:    v_mov_b32_e32 v6, 0
547; GFX940-NEXT:    v_mov_b32_e32 v2, v4
548; GFX940-NEXT:    v_mov_b32_e32 v3, v5
549; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
550; GFX940-NEXT:    s_waitcnt vmcnt(0)
551; GFX940-NEXT:    s_setpc_b64 s[30:31]
552  %vec0 = call <3 x i64> asm "; def $0", "=v"()
553  %vec1 = call <3 x i64> asm "; def $0", "=v"()
554  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 poison>
555  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
556  ret void
557}
558
559define void @v_shuffle_v4i64_v3i64__5_5_0_u(ptr addrspace(1) inreg %ptr) {
560; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_0_u:
561; GFX900:       ; %bb.0:
562; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
563; GFX900-NEXT:    ;;#ASMSTART
564; GFX900-NEXT:    ; def v[0:5]
565; GFX900-NEXT:    ;;#ASMEND
566; GFX900-NEXT:    ;;#ASMSTART
567; GFX900-NEXT:    ; def v[2:7]
568; GFX900-NEXT:    ;;#ASMEND
569; GFX900-NEXT:    v_mov_b32_e32 v8, 0
570; GFX900-NEXT:    v_mov_b32_e32 v4, v6
571; GFX900-NEXT:    v_mov_b32_e32 v5, v7
572; GFX900-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
573; GFX900-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17]
574; GFX900-NEXT:    s_waitcnt vmcnt(0)
575; GFX900-NEXT:    s_setpc_b64 s[30:31]
576;
577; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_0_u:
578; GFX90A:       ; %bb.0:
579; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
580; GFX90A-NEXT:    ;;#ASMSTART
581; GFX90A-NEXT:    ; def v[0:5]
582; GFX90A-NEXT:    ;;#ASMEND
583; GFX90A-NEXT:    ;;#ASMSTART
584; GFX90A-NEXT:    ; def v[2:7]
585; GFX90A-NEXT:    ;;#ASMEND
586; GFX90A-NEXT:    v_mov_b32_e32 v8, 0
587; GFX90A-NEXT:    v_mov_b32_e32 v4, v6
588; GFX90A-NEXT:    v_mov_b32_e32 v5, v7
589; GFX90A-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
590; GFX90A-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17]
591; GFX90A-NEXT:    s_waitcnt vmcnt(0)
592; GFX90A-NEXT:    s_setpc_b64 s[30:31]
593;
594; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_u:
595; GFX940:       ; %bb.0:
596; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
597; GFX940-NEXT:    ;;#ASMSTART
598; GFX940-NEXT:    ; def v[0:5]
599; GFX940-NEXT:    ;;#ASMEND
600; GFX940-NEXT:    v_mov_b32_e32 v8, 0
601; GFX940-NEXT:    ;;#ASMSTART
602; GFX940-NEXT:    ; def v[2:7]
603; GFX940-NEXT:    ;;#ASMEND
604; GFX940-NEXT:    global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1
605; GFX940-NEXT:    v_mov_b32_e32 v4, v6
606; GFX940-NEXT:    v_mov_b32_e32 v5, v7
607; GFX940-NEXT:    global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1
608; GFX940-NEXT:    s_waitcnt vmcnt(0)
609; GFX940-NEXT:    s_setpc_b64 s[30:31]
610  %vec0 = call <3 x i64> asm "; def $0", "=v"()
611  %vec1 = call <3 x i64> asm "; def $0", "=v"()
612  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 poison>
613  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
614  ret void
615}
616
617define void @v_shuffle_v4i64_v3i64__5_5_1_u(ptr addrspace(1) inreg %ptr) {
618; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_1_u:
619; GFX900:       ; %bb.0:
620; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
621; GFX900-NEXT:    ;;#ASMSTART
622; GFX900-NEXT:    ; def v[0:5]
623; GFX900-NEXT:    ;;#ASMEND
624; GFX900-NEXT:    ;;#ASMSTART
625; GFX900-NEXT:    ; def v[4:9]
626; GFX900-NEXT:    ;;#ASMEND
627; GFX900-NEXT:    v_mov_b32_e32 v10, 0
628; GFX900-NEXT:    v_mov_b32_e32 v6, v8
629; GFX900-NEXT:    v_mov_b32_e32 v7, v9
630; GFX900-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
631; GFX900-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17]
632; GFX900-NEXT:    s_waitcnt vmcnt(0)
633; GFX900-NEXT:    s_setpc_b64 s[30:31]
634;
635; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_1_u:
636; GFX90A:       ; %bb.0:
637; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
638; GFX90A-NEXT:    ;;#ASMSTART
639; GFX90A-NEXT:    ; def v[0:5]
640; GFX90A-NEXT:    ;;#ASMEND
641; GFX90A-NEXT:    ;;#ASMSTART
642; GFX90A-NEXT:    ; def v[4:9]
643; GFX90A-NEXT:    ;;#ASMEND
644; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
645; GFX90A-NEXT:    v_mov_b32_e32 v6, v8
646; GFX90A-NEXT:    v_mov_b32_e32 v7, v9
647; GFX90A-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
648; GFX90A-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17]
649; GFX90A-NEXT:    s_waitcnt vmcnt(0)
650; GFX90A-NEXT:    s_setpc_b64 s[30:31]
651;
652; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_u:
653; GFX940:       ; %bb.0:
654; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
655; GFX940-NEXT:    ;;#ASMSTART
656; GFX940-NEXT:    ; def v[0:5]
657; GFX940-NEXT:    ;;#ASMEND
658; GFX940-NEXT:    v_mov_b32_e32 v10, 0
659; GFX940-NEXT:    ;;#ASMSTART
660; GFX940-NEXT:    ; def v[4:9]
661; GFX940-NEXT:    ;;#ASMEND
662; GFX940-NEXT:    global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1
663; GFX940-NEXT:    v_mov_b32_e32 v6, v8
664; GFX940-NEXT:    v_mov_b32_e32 v7, v9
665; GFX940-NEXT:    global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1
666; GFX940-NEXT:    s_waitcnt vmcnt(0)
667; GFX940-NEXT:    s_setpc_b64 s[30:31]
668  %vec0 = call <3 x i64> asm "; def $0", "=v"()
669  %vec1 = call <3 x i64> asm "; def $0", "=v"()
670  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 poison>
671  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
672  ret void
673}
674
675define void @v_shuffle_v4i64_v3i64__5_5_2_u(ptr addrspace(1) inreg %ptr) {
676; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_2_u:
677; GFX900:       ; %bb.0:
678; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
679; GFX900-NEXT:    ;;#ASMSTART
680; GFX900-NEXT:    ; def v[0:5]
681; GFX900-NEXT:    ;;#ASMEND
682; GFX900-NEXT:    ;;#ASMSTART
683; GFX900-NEXT:    ; def v[6:11]
684; GFX900-NEXT:    ;;#ASMEND
685; GFX900-NEXT:    v_mov_b32_e32 v12, 0
686; GFX900-NEXT:    v_mov_b32_e32 v0, v4
687; GFX900-NEXT:    v_mov_b32_e32 v1, v5
688; GFX900-NEXT:    v_mov_b32_e32 v8, v10
689; GFX900-NEXT:    v_mov_b32_e32 v9, v11
690; GFX900-NEXT:    global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
691; GFX900-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
692; GFX900-NEXT:    s_waitcnt vmcnt(0)
693; GFX900-NEXT:    s_setpc_b64 s[30:31]
694;
695; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_2_u:
696; GFX90A:       ; %bb.0:
697; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
698; GFX90A-NEXT:    ;;#ASMSTART
699; GFX90A-NEXT:    ; def v[0:5]
700; GFX90A-NEXT:    ;;#ASMEND
701; GFX90A-NEXT:    ;;#ASMSTART
702; GFX90A-NEXT:    ; def v[6:11]
703; GFX90A-NEXT:    ;;#ASMEND
704; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
705; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
706; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
707; GFX90A-NEXT:    v_mov_b32_e32 v8, v10
708; GFX90A-NEXT:    v_mov_b32_e32 v9, v11
709; GFX90A-NEXT:    global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
710; GFX90A-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
711; GFX90A-NEXT:    s_waitcnt vmcnt(0)
712; GFX90A-NEXT:    s_setpc_b64 s[30:31]
713;
714; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_u:
715; GFX940:       ; %bb.0:
716; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
717; GFX940-NEXT:    ;;#ASMSTART
718; GFX940-NEXT:    ; def v[0:5]
719; GFX940-NEXT:    ;;#ASMEND
720; GFX940-NEXT:    ;;#ASMSTART
721; GFX940-NEXT:    ; def v[6:11]
722; GFX940-NEXT:    ;;#ASMEND
723; GFX940-NEXT:    v_mov_b32_e32 v12, 0
724; GFX940-NEXT:    v_mov_b32_e32 v0, v4
725; GFX940-NEXT:    v_mov_b32_e32 v1, v5
726; GFX940-NEXT:    v_mov_b32_e32 v8, v10
727; GFX940-NEXT:    v_mov_b32_e32 v9, v11
728; GFX940-NEXT:    global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1
729; GFX940-NEXT:    global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
730; GFX940-NEXT:    s_waitcnt vmcnt(0)
731; GFX940-NEXT:    s_setpc_b64 s[30:31]
732  %vec0 = call <3 x i64> asm "; def $0", "=v"()
733  %vec1 = call <3 x i64> asm "; def $0", "=v"()
734  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 poison>
735  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
736  ret void
737}
738
739define void @v_shuffle_v4i64_v3i64__5_5_3_u(ptr addrspace(1) inreg %ptr) {
740; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_3_u:
741; GFX900:       ; %bb.0:
742; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
743; GFX900-NEXT:    v_mov_b32_e32 v6, 0
744; GFX900-NEXT:    ;;#ASMSTART
745; GFX900-NEXT:    ; def v[0:5]
746; GFX900-NEXT:    ;;#ASMEND
747; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
748; GFX900-NEXT:    s_nop 0
749; GFX900-NEXT:    v_mov_b32_e32 v2, v4
750; GFX900-NEXT:    v_mov_b32_e32 v3, v5
751; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
752; GFX900-NEXT:    s_waitcnt vmcnt(0)
753; GFX900-NEXT:    s_setpc_b64 s[30:31]
754;
755; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_3_u:
756; GFX90A:       ; %bb.0:
757; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
758; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
759; GFX90A-NEXT:    ;;#ASMSTART
760; GFX90A-NEXT:    ; def v[0:5]
761; GFX90A-NEXT:    ;;#ASMEND
762; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
763; GFX90A-NEXT:    s_nop 0
764; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
765; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
766; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
767; GFX90A-NEXT:    s_waitcnt vmcnt(0)
768; GFX90A-NEXT:    s_setpc_b64 s[30:31]
769;
770; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_u:
771; GFX940:       ; %bb.0:
772; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
773; GFX940-NEXT:    v_mov_b32_e32 v6, 0
774; GFX940-NEXT:    ;;#ASMSTART
775; GFX940-NEXT:    ; def v[0:5]
776; GFX940-NEXT:    ;;#ASMEND
777; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
778; GFX940-NEXT:    s_nop 1
779; GFX940-NEXT:    v_mov_b32_e32 v2, v4
780; GFX940-NEXT:    v_mov_b32_e32 v3, v5
781; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
782; GFX940-NEXT:    s_waitcnt vmcnt(0)
783; GFX940-NEXT:    s_setpc_b64 s[30:31]
784  %vec0 = call <3 x i64> asm "; def $0", "=v"()
785  %vec1 = call <3 x i64> asm "; def $0", "=v"()
786  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 poison>
787  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
788  ret void
789}
790
791define void @v_shuffle_v4i64_v3i64__5_5_4_u(ptr addrspace(1) inreg %ptr) {
792; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_4_u:
793; GFX900:       ; %bb.0:
794; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
795; GFX900-NEXT:    v_mov_b32_e32 v6, 0
796; GFX900-NEXT:    ;;#ASMSTART
797; GFX900-NEXT:    ; def v[0:5]
798; GFX900-NEXT:    ;;#ASMEND
799; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
800; GFX900-NEXT:    s_nop 0
801; GFX900-NEXT:    v_mov_b32_e32 v2, v4
802; GFX900-NEXT:    v_mov_b32_e32 v3, v5
803; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
804; GFX900-NEXT:    s_waitcnt vmcnt(0)
805; GFX900-NEXT:    s_setpc_b64 s[30:31]
806;
807; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_4_u:
808; GFX90A:       ; %bb.0:
809; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
810; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
811; GFX90A-NEXT:    ;;#ASMSTART
812; GFX90A-NEXT:    ; def v[0:5]
813; GFX90A-NEXT:    ;;#ASMEND
814; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
815; GFX90A-NEXT:    s_nop 0
816; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
817; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
818; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
819; GFX90A-NEXT:    s_waitcnt vmcnt(0)
820; GFX90A-NEXT:    s_setpc_b64 s[30:31]
821;
822; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_u:
823; GFX940:       ; %bb.0:
824; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
825; GFX940-NEXT:    v_mov_b32_e32 v6, 0
826; GFX940-NEXT:    ;;#ASMSTART
827; GFX940-NEXT:    ; def v[0:5]
828; GFX940-NEXT:    ;;#ASMEND
829; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
830; GFX940-NEXT:    s_nop 1
831; GFX940-NEXT:    v_mov_b32_e32 v2, v4
832; GFX940-NEXT:    v_mov_b32_e32 v3, v5
833; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
834; GFX940-NEXT:    s_waitcnt vmcnt(0)
835; GFX940-NEXT:    s_setpc_b64 s[30:31]
836  %vec0 = call <3 x i64> asm "; def $0", "=v"()
837  %vec1 = call <3 x i64> asm "; def $0", "=v"()
838  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 poison>
839  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
840  ret void
841}
842
843define void @v_shuffle_v4i64_v3i64__5_5_5_u(ptr addrspace(1) inreg %ptr) {
844; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_u:
845; GFX900:       ; %bb.0:
846; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
847; GFX900-NEXT:    ;;#ASMSTART
848; GFX900-NEXT:    ; def v[0:5]
849; GFX900-NEXT:    ;;#ASMEND
850; GFX900-NEXT:    v_mov_b32_e32 v6, 0
851; GFX900-NEXT:    v_mov_b32_e32 v0, v4
852; GFX900-NEXT:    v_mov_b32_e32 v1, v5
853; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
854; GFX900-NEXT:    s_nop 0
855; GFX900-NEXT:    v_mov_b32_e32 v2, v4
856; GFX900-NEXT:    v_mov_b32_e32 v3, v5
857; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
858; GFX900-NEXT:    s_waitcnt vmcnt(0)
859; GFX900-NEXT:    s_setpc_b64 s[30:31]
860;
861; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_u:
862; GFX90A:       ; %bb.0:
863; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
864; GFX90A-NEXT:    ;;#ASMSTART
865; GFX90A-NEXT:    ; def v[0:5]
866; GFX90A-NEXT:    ;;#ASMEND
867; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
868; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
869; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
870; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
871; GFX90A-NEXT:    s_nop 0
872; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
873; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
874; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
875; GFX90A-NEXT:    s_waitcnt vmcnt(0)
876; GFX90A-NEXT:    s_setpc_b64 s[30:31]
877;
878; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_u:
879; GFX940:       ; %bb.0:
880; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
881; GFX940-NEXT:    ;;#ASMSTART
882; GFX940-NEXT:    ; def v[0:5]
883; GFX940-NEXT:    ;;#ASMEND
884; GFX940-NEXT:    v_mov_b32_e32 v6, 0
885; GFX940-NEXT:    v_mov_b32_e32 v0, v4
886; GFX940-NEXT:    v_mov_b32_e32 v1, v5
887; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
888; GFX940-NEXT:    s_nop 1
889; GFX940-NEXT:    v_mov_b32_e32 v2, v4
890; GFX940-NEXT:    v_mov_b32_e32 v3, v5
891; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
892; GFX940-NEXT:    s_waitcnt vmcnt(0)
893; GFX940-NEXT:    s_setpc_b64 s[30:31]
894  %vec0 = call <3 x i64> asm "; def $0", "=v"()
895  %vec1 = call <3 x i64> asm "; def $0", "=v"()
896  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 poison>
897  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
898  ret void
899}
900
901define void @v_shuffle_v4i64_v3i64__5_5_5_0(ptr addrspace(1) inreg %ptr) {
902; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_0:
903; GFX900:       ; %bb.0:
904; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
905; GFX900-NEXT:    ;;#ASMSTART
906; GFX900-NEXT:    ; def v[0:5]
907; GFX900-NEXT:    ;;#ASMEND
908; GFX900-NEXT:    ;;#ASMSTART
909; GFX900-NEXT:    ; def v[2:7]
910; GFX900-NEXT:    ;;#ASMEND
911; GFX900-NEXT:    v_mov_b32_e32 v8, 0
912; GFX900-NEXT:    v_mov_b32_e32 v2, v6
913; GFX900-NEXT:    v_mov_b32_e32 v3, v7
914; GFX900-NEXT:    v_mov_b32_e32 v4, v0
915; GFX900-NEXT:    v_mov_b32_e32 v5, v1
916; GFX900-NEXT:    global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
917; GFX900-NEXT:    s_nop 0
918; GFX900-NEXT:    v_mov_b32_e32 v4, v6
919; GFX900-NEXT:    v_mov_b32_e32 v5, v7
920; GFX900-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17]
921; GFX900-NEXT:    s_waitcnt vmcnt(0)
922; GFX900-NEXT:    s_setpc_b64 s[30:31]
923;
924; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_0:
925; GFX90A:       ; %bb.0:
926; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
927; GFX90A-NEXT:    ;;#ASMSTART
928; GFX90A-NEXT:    ; def v[0:5]
929; GFX90A-NEXT:    ;;#ASMEND
930; GFX90A-NEXT:    ;;#ASMSTART
931; GFX90A-NEXT:    ; def v[2:7]
932; GFX90A-NEXT:    ;;#ASMEND
933; GFX90A-NEXT:    v_mov_b32_e32 v8, 0
934; GFX90A-NEXT:    v_mov_b32_e32 v2, v6
935; GFX90A-NEXT:    v_mov_b32_e32 v3, v7
936; GFX90A-NEXT:    v_mov_b32_e32 v4, v0
937; GFX90A-NEXT:    v_mov_b32_e32 v5, v1
938; GFX90A-NEXT:    global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
939; GFX90A-NEXT:    s_nop 0
940; GFX90A-NEXT:    v_mov_b32_e32 v4, v6
941; GFX90A-NEXT:    v_mov_b32_e32 v5, v7
942; GFX90A-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17]
943; GFX90A-NEXT:    s_waitcnt vmcnt(0)
944; GFX90A-NEXT:    s_setpc_b64 s[30:31]
945;
946; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_0:
947; GFX940:       ; %bb.0:
948; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
949; GFX940-NEXT:    ;;#ASMSTART
950; GFX940-NEXT:    ; def v[0:5]
951; GFX940-NEXT:    ;;#ASMEND
952; GFX940-NEXT:    v_mov_b32_e32 v8, 0
953; GFX940-NEXT:    ;;#ASMSTART
954; GFX940-NEXT:    ; def v[2:7]
955; GFX940-NEXT:    ;;#ASMEND
956; GFX940-NEXT:    s_nop 0
957; GFX940-NEXT:    v_mov_b32_e32 v2, v6
958; GFX940-NEXT:    v_mov_b32_e32 v3, v7
959; GFX940-NEXT:    v_mov_b32_e32 v4, v0
960; GFX940-NEXT:    v_mov_b32_e32 v5, v1
961; GFX940-NEXT:    global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1
962; GFX940-NEXT:    s_nop 1
963; GFX940-NEXT:    v_mov_b32_e32 v4, v6
964; GFX940-NEXT:    v_mov_b32_e32 v5, v7
965; GFX940-NEXT:    global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1
966; GFX940-NEXT:    s_waitcnt vmcnt(0)
967; GFX940-NEXT:    s_setpc_b64 s[30:31]
968  %vec0 = call <3 x i64> asm "; def $0", "=v"()
969  %vec1 = call <3 x i64> asm "; def $0", "=v"()
970  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 0>
971  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
972  ret void
973}
974
975define void @v_shuffle_v4i64_v3i64__5_5_5_1(ptr addrspace(1) inreg %ptr) {
976; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_1:
977; GFX900:       ; %bb.0:
978; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
979; GFX900-NEXT:    ;;#ASMSTART
980; GFX900-NEXT:    ; def v[0:5]
981; GFX900-NEXT:    ;;#ASMEND
982; GFX900-NEXT:    ;;#ASMSTART
983; GFX900-NEXT:    ; def v[4:9]
984; GFX900-NEXT:    ;;#ASMEND
985; GFX900-NEXT:    v_mov_b32_e32 v10, 0
986; GFX900-NEXT:    v_mov_b32_e32 v0, v8
987; GFX900-NEXT:    v_mov_b32_e32 v1, v9
988; GFX900-NEXT:    v_mov_b32_e32 v6, v8
989; GFX900-NEXT:    v_mov_b32_e32 v7, v9
990; GFX900-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
991; GFX900-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17]
992; GFX900-NEXT:    s_waitcnt vmcnt(0)
993; GFX900-NEXT:    s_setpc_b64 s[30:31]
994;
995; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_1:
996; GFX90A:       ; %bb.0:
997; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
998; GFX90A-NEXT:    ;;#ASMSTART
999; GFX90A-NEXT:    ; def v[0:5]
1000; GFX90A-NEXT:    ;;#ASMEND
1001; GFX90A-NEXT:    ;;#ASMSTART
1002; GFX90A-NEXT:    ; def v[4:9]
1003; GFX90A-NEXT:    ;;#ASMEND
1004; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
1005; GFX90A-NEXT:    v_mov_b32_e32 v0, v8
1006; GFX90A-NEXT:    v_mov_b32_e32 v1, v9
1007; GFX90A-NEXT:    v_mov_b32_e32 v6, v8
1008; GFX90A-NEXT:    v_mov_b32_e32 v7, v9
1009; GFX90A-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
1010; GFX90A-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17]
1011; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1012; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1013;
1014; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_1:
1015; GFX940:       ; %bb.0:
1016; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1017; GFX940-NEXT:    ;;#ASMSTART
1018; GFX940-NEXT:    ; def v[0:5]
1019; GFX940-NEXT:    ;;#ASMEND
1020; GFX940-NEXT:    v_mov_b32_e32 v10, 0
1021; GFX940-NEXT:    ;;#ASMSTART
1022; GFX940-NEXT:    ; def v[4:9]
1023; GFX940-NEXT:    ;;#ASMEND
1024; GFX940-NEXT:    s_nop 0
1025; GFX940-NEXT:    v_mov_b32_e32 v0, v8
1026; GFX940-NEXT:    v_mov_b32_e32 v1, v9
1027; GFX940-NEXT:    v_mov_b32_e32 v6, v8
1028; GFX940-NEXT:    v_mov_b32_e32 v7, v9
1029; GFX940-NEXT:    global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1
1030; GFX940-NEXT:    global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1
1031; GFX940-NEXT:    s_waitcnt vmcnt(0)
1032; GFX940-NEXT:    s_setpc_b64 s[30:31]
1033  %vec0 = call <3 x i64> asm "; def $0", "=v"()
1034  %vec1 = call <3 x i64> asm "; def $0", "=v"()
1035  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 1>
1036  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1037  ret void
1038}
1039
1040define void @v_shuffle_v4i64_v3i64__5_5_5_2(ptr addrspace(1) inreg %ptr) {
1041; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_2:
1042; GFX900:       ; %bb.0:
1043; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1044; GFX900-NEXT:    ;;#ASMSTART
1045; GFX900-NEXT:    ; def v[0:5]
1046; GFX900-NEXT:    ;;#ASMEND
1047; GFX900-NEXT:    ;;#ASMSTART
1048; GFX900-NEXT:    ; def v[6:11]
1049; GFX900-NEXT:    ;;#ASMEND
1050; GFX900-NEXT:    v_mov_b32_e32 v12, 0
1051; GFX900-NEXT:    v_mov_b32_e32 v2, v10
1052; GFX900-NEXT:    v_mov_b32_e32 v3, v11
1053; GFX900-NEXT:    v_mov_b32_e32 v8, v10
1054; GFX900-NEXT:    v_mov_b32_e32 v9, v11
1055; GFX900-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
1056; GFX900-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
1057; GFX900-NEXT:    s_waitcnt vmcnt(0)
1058; GFX900-NEXT:    s_setpc_b64 s[30:31]
1059;
1060; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_2:
1061; GFX90A:       ; %bb.0:
1062; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1063; GFX90A-NEXT:    ;;#ASMSTART
1064; GFX90A-NEXT:    ; def v[0:5]
1065; GFX90A-NEXT:    ;;#ASMEND
1066; GFX90A-NEXT:    ;;#ASMSTART
1067; GFX90A-NEXT:    ; def v[6:11]
1068; GFX90A-NEXT:    ;;#ASMEND
1069; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
1070; GFX90A-NEXT:    v_mov_b32_e32 v2, v10
1071; GFX90A-NEXT:    v_mov_b32_e32 v3, v11
1072; GFX90A-NEXT:    v_mov_b32_e32 v8, v10
1073; GFX90A-NEXT:    v_mov_b32_e32 v9, v11
1074; GFX90A-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
1075; GFX90A-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
1076; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1077; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1078;
1079; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_2:
1080; GFX940:       ; %bb.0:
1081; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1082; GFX940-NEXT:    ;;#ASMSTART
1083; GFX940-NEXT:    ; def v[0:5]
1084; GFX940-NEXT:    ;;#ASMEND
1085; GFX940-NEXT:    ;;#ASMSTART
1086; GFX940-NEXT:    ; def v[6:11]
1087; GFX940-NEXT:    ;;#ASMEND
1088; GFX940-NEXT:    v_mov_b32_e32 v12, 0
1089; GFX940-NEXT:    v_mov_b32_e32 v2, v10
1090; GFX940-NEXT:    v_mov_b32_e32 v3, v11
1091; GFX940-NEXT:    v_mov_b32_e32 v8, v10
1092; GFX940-NEXT:    v_mov_b32_e32 v9, v11
1093; GFX940-NEXT:    global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1
1094; GFX940-NEXT:    global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
1095; GFX940-NEXT:    s_waitcnt vmcnt(0)
1096; GFX940-NEXT:    s_setpc_b64 s[30:31]
1097  %vec0 = call <3 x i64> asm "; def $0", "=v"()
1098  %vec1 = call <3 x i64> asm "; def $0", "=v"()
1099  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 2>
1100  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1101  ret void
1102}
1103
1104define void @v_shuffle_v4i64_v3i64__5_5_5_3(ptr addrspace(1) inreg %ptr) {
1105; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_3:
1106; GFX900:       ; %bb.0:
1107; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1108; GFX900-NEXT:    ;;#ASMSTART
1109; GFX900-NEXT:    ; def v[0:5]
1110; GFX900-NEXT:    ;;#ASMEND
1111; GFX900-NEXT:    v_mov_b32_e32 v10, 0
1112; GFX900-NEXT:    v_mov_b32_e32 v6, v4
1113; GFX900-NEXT:    v_mov_b32_e32 v7, v5
1114; GFX900-NEXT:    v_mov_b32_e32 v8, v0
1115; GFX900-NEXT:    v_mov_b32_e32 v9, v1
1116; GFX900-NEXT:    v_mov_b32_e32 v2, v4
1117; GFX900-NEXT:    v_mov_b32_e32 v3, v5
1118; GFX900-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
1119; GFX900-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17]
1120; GFX900-NEXT:    s_waitcnt vmcnt(0)
1121; GFX900-NEXT:    s_setpc_b64 s[30:31]
1122;
1123; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_3:
1124; GFX90A:       ; %bb.0:
1125; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1126; GFX90A-NEXT:    ;;#ASMSTART
1127; GFX90A-NEXT:    ; def v[0:5]
1128; GFX90A-NEXT:    ;;#ASMEND
1129; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
1130; GFX90A-NEXT:    v_mov_b32_e32 v6, v4
1131; GFX90A-NEXT:    v_mov_b32_e32 v7, v5
1132; GFX90A-NEXT:    v_mov_b32_e32 v8, v0
1133; GFX90A-NEXT:    v_mov_b32_e32 v9, v1
1134; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
1135; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
1136; GFX90A-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
1137; GFX90A-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17]
1138; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1139; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1140;
1141; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_3:
1142; GFX940:       ; %bb.0:
1143; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1144; GFX940-NEXT:    ;;#ASMSTART
1145; GFX940-NEXT:    ; def v[0:5]
1146; GFX940-NEXT:    ;;#ASMEND
1147; GFX940-NEXT:    v_mov_b32_e32 v10, 0
1148; GFX940-NEXT:    v_mov_b32_e32 v6, v4
1149; GFX940-NEXT:    v_mov_b32_e32 v7, v5
1150; GFX940-NEXT:    v_mov_b32_e32 v8, v0
1151; GFX940-NEXT:    v_mov_b32_e32 v9, v1
1152; GFX940-NEXT:    v_mov_b32_e32 v2, v4
1153; GFX940-NEXT:    v_mov_b32_e32 v3, v5
1154; GFX940-NEXT:    global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1
1155; GFX940-NEXT:    global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1
1156; GFX940-NEXT:    s_waitcnt vmcnt(0)
1157; GFX940-NEXT:    s_setpc_b64 s[30:31]
1158  %vec0 = call <3 x i64> asm "; def $0", "=v"()
1159  %vec1 = call <3 x i64> asm "; def $0", "=v"()
1160  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 3>
1161  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1162  ret void
1163}
1164
1165define void @v_shuffle_v4i64_v3i64__5_5_5_4(ptr addrspace(1) inreg %ptr) {
1166; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_4:
1167; GFX900:       ; %bb.0:
1168; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1169; GFX900-NEXT:    ;;#ASMSTART
1170; GFX900-NEXT:    ; def v[0:5]
1171; GFX900-NEXT:    ;;#ASMEND
1172; GFX900-NEXT:    v_mov_b32_e32 v6, 0
1173; GFX900-NEXT:    v_mov_b32_e32 v0, v4
1174; GFX900-NEXT:    v_mov_b32_e32 v1, v5
1175; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
1176; GFX900-NEXT:    s_nop 0
1177; GFX900-NEXT:    v_mov_b32_e32 v2, v4
1178; GFX900-NEXT:    v_mov_b32_e32 v3, v5
1179; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
1180; GFX900-NEXT:    s_waitcnt vmcnt(0)
1181; GFX900-NEXT:    s_setpc_b64 s[30:31]
1182;
1183; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_4:
1184; GFX90A:       ; %bb.0:
1185; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1186; GFX90A-NEXT:    ;;#ASMSTART
1187; GFX90A-NEXT:    ; def v[0:5]
1188; GFX90A-NEXT:    ;;#ASMEND
1189; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
1190; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
1191; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
1192; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
1193; GFX90A-NEXT:    s_nop 0
1194; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
1195; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
1196; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
1197; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1198; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1199;
1200; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_4:
1201; GFX940:       ; %bb.0:
1202; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1203; GFX940-NEXT:    ;;#ASMSTART
1204; GFX940-NEXT:    ; def v[0:5]
1205; GFX940-NEXT:    ;;#ASMEND
1206; GFX940-NEXT:    v_mov_b32_e32 v6, 0
1207; GFX940-NEXT:    v_mov_b32_e32 v0, v4
1208; GFX940-NEXT:    v_mov_b32_e32 v1, v5
1209; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
1210; GFX940-NEXT:    s_nop 1
1211; GFX940-NEXT:    v_mov_b32_e32 v2, v4
1212; GFX940-NEXT:    v_mov_b32_e32 v3, v5
1213; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
1214; GFX940-NEXT:    s_waitcnt vmcnt(0)
1215; GFX940-NEXT:    s_setpc_b64 s[30:31]
1216  %vec0 = call <3 x i64> asm "; def $0", "=v"()
1217  %vec1 = call <3 x i64> asm "; def $0", "=v"()
1218  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 4>
1219  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1220  ret void
1221}
1222
1223define void @v_shuffle_v4i64_v3i64__5_5_5_5(ptr addrspace(1) inreg %ptr) {
1224; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_5_5:
1225; GFX900:       ; %bb.0:
1226; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1227; GFX900-NEXT:    ;;#ASMSTART
1228; GFX900-NEXT:    ; def v[0:5]
1229; GFX900-NEXT:    ;;#ASMEND
1230; GFX900-NEXT:    v_mov_b32_e32 v6, 0
1231; GFX900-NEXT:    v_mov_b32_e32 v2, v4
1232; GFX900-NEXT:    v_mov_b32_e32 v3, v5
1233; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
1234; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
1235; GFX900-NEXT:    s_waitcnt vmcnt(0)
1236; GFX900-NEXT:    s_setpc_b64 s[30:31]
1237;
1238; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_5_5:
1239; GFX90A:       ; %bb.0:
1240; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1241; GFX90A-NEXT:    ;;#ASMSTART
1242; GFX90A-NEXT:    ; def v[0:5]
1243; GFX90A-NEXT:    ;;#ASMEND
1244; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
1245; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
1246; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
1247; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
1248; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
1249; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1250; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1251;
1252; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_5_5:
1253; GFX940:       ; %bb.0:
1254; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1255; GFX940-NEXT:    ;;#ASMSTART
1256; GFX940-NEXT:    ; def v[0:5]
1257; GFX940-NEXT:    ;;#ASMEND
1258; GFX940-NEXT:    v_mov_b32_e32 v6, 0
1259; GFX940-NEXT:    v_mov_b32_e32 v2, v4
1260; GFX940-NEXT:    v_mov_b32_e32 v3, v5
1261; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
1262; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
1263; GFX940-NEXT:    s_waitcnt vmcnt(0)
1264; GFX940-NEXT:    s_setpc_b64 s[30:31]
1265  %vec0 = call <3 x i64> asm "; def $0", "=v"()
1266  %vec1 = call <3 x i64> asm "; def $0", "=v"()
1267  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
1268  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1269  ret void
1270}
1271
1272define void @v_shuffle_v4i64_v3i64__u_0_0_0(ptr addrspace(1) inreg %ptr) {
1273; GFX900-LABEL: v_shuffle_v4i64_v3i64__u_0_0_0:
1274; GFX900:       ; %bb.0:
1275; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1276; GFX900-NEXT:    ;;#ASMSTART
1277; GFX900-NEXT:    ; def v[0:5]
1278; GFX900-NEXT:    ;;#ASMEND
1279; GFX900-NEXT:    v_mov_b32_e32 v6, 0
1280; GFX900-NEXT:    v_mov_b32_e32 v2, v0
1281; GFX900-NEXT:    v_mov_b32_e32 v3, v1
1282; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
1283; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
1284; GFX900-NEXT:    s_waitcnt vmcnt(0)
1285; GFX900-NEXT:    s_setpc_b64 s[30:31]
1286;
1287; GFX90A-LABEL: v_shuffle_v4i64_v3i64__u_0_0_0:
1288; GFX90A:       ; %bb.0:
1289; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1290; GFX90A-NEXT:    ;;#ASMSTART
1291; GFX90A-NEXT:    ; def v[0:5]
1292; GFX90A-NEXT:    ;;#ASMEND
1293; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
1294; GFX90A-NEXT:    v_mov_b32_e32 v2, v0
1295; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
1296; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
1297; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
1298; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1299; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1300;
1301; GFX940-LABEL: v_shuffle_v4i64_v3i64__u_0_0_0:
1302; GFX940:       ; %bb.0:
1303; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1304; GFX940-NEXT:    ;;#ASMSTART
1305; GFX940-NEXT:    ; def v[0:5]
1306; GFX940-NEXT:    ;;#ASMEND
1307; GFX940-NEXT:    v_mov_b32_e32 v6, 0
1308; GFX940-NEXT:    v_mov_b32_e32 v2, v0
1309; GFX940-NEXT:    v_mov_b32_e32 v3, v1
1310; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
1311; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
1312; GFX940-NEXT:    s_waitcnt vmcnt(0)
1313; GFX940-NEXT:    s_setpc_b64 s[30:31]
1314  %vec0 = call <3 x i64> asm "; def $0", "=v"()
1315  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0>
1316  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1317  ret void
1318}
1319
1320define void @v_shuffle_v4i64_v3i64__0_0_0_0(ptr addrspace(1) inreg %ptr) {
1321; GFX900-LABEL: v_shuffle_v4i64_v3i64__0_0_0_0:
1322; GFX900:       ; %bb.0:
1323; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1324; GFX900-NEXT:    ;;#ASMSTART
1325; GFX900-NEXT:    ; def v[0:5]
1326; GFX900-NEXT:    ;;#ASMEND
1327; GFX900-NEXT:    v_mov_b32_e32 v6, 0
1328; GFX900-NEXT:    v_mov_b32_e32 v2, v0
1329; GFX900-NEXT:    v_mov_b32_e32 v3, v1
1330; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
1331; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
1332; GFX900-NEXT:    s_waitcnt vmcnt(0)
1333; GFX900-NEXT:    s_setpc_b64 s[30:31]
1334;
1335; GFX90A-LABEL: v_shuffle_v4i64_v3i64__0_0_0_0:
1336; GFX90A:       ; %bb.0:
1337; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1338; GFX90A-NEXT:    ;;#ASMSTART
1339; GFX90A-NEXT:    ; def v[0:5]
1340; GFX90A-NEXT:    ;;#ASMEND
1341; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
1342; GFX90A-NEXT:    v_mov_b32_e32 v2, v0
1343; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
1344; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
1345; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
1346; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1347; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1348;
1349; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_0_0_0:
1350; GFX940:       ; %bb.0:
1351; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1352; GFX940-NEXT:    ;;#ASMSTART
1353; GFX940-NEXT:    ; def v[0:5]
1354; GFX940-NEXT:    ;;#ASMEND
1355; GFX940-NEXT:    v_mov_b32_e32 v6, 0
1356; GFX940-NEXT:    v_mov_b32_e32 v2, v0
1357; GFX940-NEXT:    v_mov_b32_e32 v3, v1
1358; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
1359; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
1360; GFX940-NEXT:    s_waitcnt vmcnt(0)
1361; GFX940-NEXT:    s_setpc_b64 s[30:31]
1362  %vec0 = call <3 x i64> asm "; def $0", "=v"()
1363  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> zeroinitializer
1364  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1365  ret void
1366}
1367
1368define void @v_shuffle_v4i64_v3i64__1_0_0_0(ptr addrspace(1) inreg %ptr) {
1369; GFX900-LABEL: v_shuffle_v4i64_v3i64__1_0_0_0:
1370; GFX900:       ; %bb.0:
1371; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1372; GFX900-NEXT:    ;;#ASMSTART
1373; GFX900-NEXT:    ; def v[0:5]
1374; GFX900-NEXT:    ;;#ASMEND
1375; GFX900-NEXT:    v_mov_b32_e32 v8, 0
1376; GFX900-NEXT:    v_mov_b32_e32 v4, v0
1377; GFX900-NEXT:    v_mov_b32_e32 v5, v1
1378; GFX900-NEXT:    v_mov_b32_e32 v6, v0
1379; GFX900-NEXT:    v_mov_b32_e32 v7, v1
1380; GFX900-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
1381; GFX900-NEXT:    global_store_dwordx4 v8, v[2:5], s[16:17]
1382; GFX900-NEXT:    s_waitcnt vmcnt(0)
1383; GFX900-NEXT:    s_setpc_b64 s[30:31]
1384;
1385; GFX90A-LABEL: v_shuffle_v4i64_v3i64__1_0_0_0:
1386; GFX90A:       ; %bb.0:
1387; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1388; GFX90A-NEXT:    ;;#ASMSTART
1389; GFX90A-NEXT:    ; def v[0:5]
1390; GFX90A-NEXT:    ;;#ASMEND
1391; GFX90A-NEXT:    v_mov_b32_e32 v8, 0
1392; GFX90A-NEXT:    v_mov_b32_e32 v4, v0
1393; GFX90A-NEXT:    v_mov_b32_e32 v5, v1
1394; GFX90A-NEXT:    v_mov_b32_e32 v6, v0
1395; GFX90A-NEXT:    v_mov_b32_e32 v7, v1
1396; GFX90A-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
1397; GFX90A-NEXT:    global_store_dwordx4 v8, v[2:5], s[16:17]
1398; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1399; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1400;
1401; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_0_0_0:
1402; GFX940:       ; %bb.0:
1403; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1404; GFX940-NEXT:    ;;#ASMSTART
1405; GFX940-NEXT:    ; def v[0:5]
1406; GFX940-NEXT:    ;;#ASMEND
1407; GFX940-NEXT:    v_mov_b32_e32 v8, 0
1408; GFX940-NEXT:    v_mov_b32_e32 v4, v0
1409; GFX940-NEXT:    v_mov_b32_e32 v5, v1
1410; GFX940-NEXT:    v_mov_b32_e32 v6, v0
1411; GFX940-NEXT:    v_mov_b32_e32 v7, v1
1412; GFX940-NEXT:    global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1
1413; GFX940-NEXT:    global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1
1414; GFX940-NEXT:    s_waitcnt vmcnt(0)
1415; GFX940-NEXT:    s_setpc_b64 s[30:31]
1416  %vec0 = call <3 x i64> asm "; def $0", "=v"()
1417  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
1418  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1419  ret void
1420}
1421
1422define void @v_shuffle_v4i64_v3i64__2_0_0_0(ptr addrspace(1) inreg %ptr) {
1423; GFX900-LABEL: v_shuffle_v4i64_v3i64__2_0_0_0:
1424; GFX900:       ; %bb.0:
1425; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1426; GFX900-NEXT:    ;;#ASMSTART
1427; GFX900-NEXT:    ; def v[0:5]
1428; GFX900-NEXT:    ;;#ASMEND
1429; GFX900-NEXT:    v_mov_b32_e32 v6, 0
1430; GFX900-NEXT:    v_mov_b32_e32 v2, v0
1431; GFX900-NEXT:    v_mov_b32_e32 v3, v1
1432; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
1433; GFX900-NEXT:    s_nop 0
1434; GFX900-NEXT:    v_mov_b32_e32 v2, v4
1435; GFX900-NEXT:    v_mov_b32_e32 v3, v5
1436; GFX900-NEXT:    v_mov_b32_e32 v4, v0
1437; GFX900-NEXT:    v_mov_b32_e32 v5, v1
1438; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
1439; GFX900-NEXT:    s_waitcnt vmcnt(0)
1440; GFX900-NEXT:    s_setpc_b64 s[30:31]
1441;
1442; GFX90A-LABEL: v_shuffle_v4i64_v3i64__2_0_0_0:
1443; GFX90A:       ; %bb.0:
1444; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1445; GFX90A-NEXT:    ;;#ASMSTART
1446; GFX90A-NEXT:    ; def v[0:5]
1447; GFX90A-NEXT:    ;;#ASMEND
1448; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
1449; GFX90A-NEXT:    v_mov_b32_e32 v2, v0
1450; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
1451; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
1452; GFX90A-NEXT:    s_nop 0
1453; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
1454; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
1455; GFX90A-NEXT:    v_mov_b32_e32 v4, v0
1456; GFX90A-NEXT:    v_mov_b32_e32 v5, v1
1457; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
1458; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1459; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1460;
1461; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_0_0_0:
1462; GFX940:       ; %bb.0:
1463; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1464; GFX940-NEXT:    ;;#ASMSTART
1465; GFX940-NEXT:    ; def v[0:5]
1466; GFX940-NEXT:    ;;#ASMEND
1467; GFX940-NEXT:    v_mov_b32_e32 v6, 0
1468; GFX940-NEXT:    v_mov_b32_e32 v2, v0
1469; GFX940-NEXT:    v_mov_b32_e32 v3, v1
1470; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
1471; GFX940-NEXT:    s_nop 1
1472; GFX940-NEXT:    v_mov_b32_e32 v2, v4
1473; GFX940-NEXT:    v_mov_b32_e32 v3, v5
1474; GFX940-NEXT:    v_mov_b32_e32 v4, v0
1475; GFX940-NEXT:    v_mov_b32_e32 v5, v1
1476; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
1477; GFX940-NEXT:    s_waitcnt vmcnt(0)
1478; GFX940-NEXT:    s_setpc_b64 s[30:31]
1479  %vec0 = call <3 x i64> asm "; def $0", "=v"()
1480  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
1481  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1482  ret void
1483}
1484
1485define void @v_shuffle_v4i64_v3i64__3_0_0_0(ptr addrspace(1) inreg %ptr) {
1486; GFX900-LABEL: v_shuffle_v4i64_v3i64__3_0_0_0:
1487; GFX900:       ; %bb.0:
1488; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1489; GFX900-NEXT:    ;;#ASMSTART
1490; GFX900-NEXT:    ; def v[0:5]
1491; GFX900-NEXT:    ;;#ASMEND
1492; GFX900-NEXT:    v_mov_b32_e32 v6, 0
1493; GFX900-NEXT:    v_mov_b32_e32 v2, v0
1494; GFX900-NEXT:    v_mov_b32_e32 v3, v1
1495; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
1496; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
1497; GFX900-NEXT:    s_waitcnt vmcnt(0)
1498; GFX900-NEXT:    s_setpc_b64 s[30:31]
1499;
1500; GFX90A-LABEL: v_shuffle_v4i64_v3i64__3_0_0_0:
1501; GFX90A:       ; %bb.0:
1502; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1503; GFX90A-NEXT:    ;;#ASMSTART
1504; GFX90A-NEXT:    ; def v[0:5]
1505; GFX90A-NEXT:    ;;#ASMEND
1506; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
1507; GFX90A-NEXT:    v_mov_b32_e32 v2, v0
1508; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
1509; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
1510; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
1511; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1512; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1513;
1514; GFX940-LABEL: v_shuffle_v4i64_v3i64__3_0_0_0:
1515; GFX940:       ; %bb.0:
1516; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1517; GFX940-NEXT:    ;;#ASMSTART
1518; GFX940-NEXT:    ; def v[0:5]
1519; GFX940-NEXT:    ;;#ASMEND
1520; GFX940-NEXT:    v_mov_b32_e32 v6, 0
1521; GFX940-NEXT:    v_mov_b32_e32 v2, v0
1522; GFX940-NEXT:    v_mov_b32_e32 v3, v1
1523; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
1524; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
1525; GFX940-NEXT:    s_waitcnt vmcnt(0)
1526; GFX940-NEXT:    s_setpc_b64 s[30:31]
1527  %vec0 = call <3 x i64> asm "; def $0", "=v"()
1528  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
1529  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1530  ret void
1531}
1532
1533define void @v_shuffle_v4i64_v3i64__4_0_0_0(ptr addrspace(1) inreg %ptr) {
1534; GFX900-LABEL: v_shuffle_v4i64_v3i64__4_0_0_0:
1535; GFX900:       ; %bb.0:
1536; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1537; GFX900-NEXT:    ;;#ASMSTART
1538; GFX900-NEXT:    ; def v[0:5]
1539; GFX900-NEXT:    ;;#ASMEND
1540; GFX900-NEXT:    ;;#ASMSTART
1541; GFX900-NEXT:    ; def v[2:7]
1542; GFX900-NEXT:    ;;#ASMEND
1543; GFX900-NEXT:    v_mov_b32_e32 v8, 0
1544; GFX900-NEXT:    v_mov_b32_e32 v2, v0
1545; GFX900-NEXT:    v_mov_b32_e32 v3, v1
1546; GFX900-NEXT:    v_mov_b32_e32 v6, v0
1547; GFX900-NEXT:    v_mov_b32_e32 v7, v1
1548; GFX900-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
1549; GFX900-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17]
1550; GFX900-NEXT:    s_waitcnt vmcnt(0)
1551; GFX900-NEXT:    s_setpc_b64 s[30:31]
1552;
1553; GFX90A-LABEL: v_shuffle_v4i64_v3i64__4_0_0_0:
1554; GFX90A:       ; %bb.0:
1555; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1556; GFX90A-NEXT:    ;;#ASMSTART
1557; GFX90A-NEXT:    ; def v[0:5]
1558; GFX90A-NEXT:    ;;#ASMEND
1559; GFX90A-NEXT:    ;;#ASMSTART
1560; GFX90A-NEXT:    ; def v[2:7]
1561; GFX90A-NEXT:    ;;#ASMEND
1562; GFX90A-NEXT:    v_mov_b32_e32 v8, 0
1563; GFX90A-NEXT:    v_mov_b32_e32 v2, v0
1564; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
1565; GFX90A-NEXT:    v_mov_b32_e32 v6, v0
1566; GFX90A-NEXT:    v_mov_b32_e32 v7, v1
1567; GFX90A-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
1568; GFX90A-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17]
1569; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1570; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1571;
1572; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_0_0_0:
1573; GFX940:       ; %bb.0:
1574; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1575; GFX940-NEXT:    ;;#ASMSTART
1576; GFX940-NEXT:    ; def v[0:5]
1577; GFX940-NEXT:    ;;#ASMEND
1578; GFX940-NEXT:    v_mov_b32_e32 v8, 0
1579; GFX940-NEXT:    ;;#ASMSTART
1580; GFX940-NEXT:    ; def v[2:7]
1581; GFX940-NEXT:    ;;#ASMEND
1582; GFX940-NEXT:    s_nop 0
1583; GFX940-NEXT:    v_mov_b32_e32 v2, v0
1584; GFX940-NEXT:    v_mov_b32_e32 v3, v1
1585; GFX940-NEXT:    v_mov_b32_e32 v6, v0
1586; GFX940-NEXT:    v_mov_b32_e32 v7, v1
1587; GFX940-NEXT:    global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1
1588; GFX940-NEXT:    global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1
1589; GFX940-NEXT:    s_waitcnt vmcnt(0)
1590; GFX940-NEXT:    s_setpc_b64 s[30:31]
1591  %vec0 = call <3 x i64> asm "; def $0", "=v"()
1592  %vec1 = call <3 x i64> asm "; def $0", "=v"()
1593  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 0, i32 0, i32 0>
1594  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1595  ret void
1596}
1597
1598define void @v_shuffle_v4i64_v3i64__5_0_0_0(ptr addrspace(1) inreg %ptr) {
1599; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_0_0_0:
1600; GFX900:       ; %bb.0:
1601; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1602; GFX900-NEXT:    ;;#ASMSTART
1603; GFX900-NEXT:    ; def v[0:5]
1604; GFX900-NEXT:    ;;#ASMEND
1605; GFX900-NEXT:    ;;#ASMSTART
1606; GFX900-NEXT:    ; def v[2:7]
1607; GFX900-NEXT:    ;;#ASMEND
1608; GFX900-NEXT:    v_mov_b32_e32 v8, 0
1609; GFX900-NEXT:    v_mov_b32_e32 v2, v0
1610; GFX900-NEXT:    v_mov_b32_e32 v3, v1
1611; GFX900-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
1612; GFX900-NEXT:    v_mov_b32_e32 v4, v0
1613; GFX900-NEXT:    v_mov_b32_e32 v2, v6
1614; GFX900-NEXT:    v_mov_b32_e32 v3, v7
1615; GFX900-NEXT:    v_mov_b32_e32 v5, v1
1616; GFX900-NEXT:    global_store_dwordx4 v8, v[2:5], s[16:17]
1617; GFX900-NEXT:    s_waitcnt vmcnt(0)
1618; GFX900-NEXT:    s_setpc_b64 s[30:31]
1619;
1620; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_0_0_0:
1621; GFX90A:       ; %bb.0:
1622; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1623; GFX90A-NEXT:    ;;#ASMSTART
1624; GFX90A-NEXT:    ; def v[0:5]
1625; GFX90A-NEXT:    ;;#ASMEND
1626; GFX90A-NEXT:    ;;#ASMSTART
1627; GFX90A-NEXT:    ; def v[2:7]
1628; GFX90A-NEXT:    ;;#ASMEND
1629; GFX90A-NEXT:    v_mov_b32_e32 v8, 0
1630; GFX90A-NEXT:    v_mov_b32_e32 v2, v0
1631; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
1632; GFX90A-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
1633; GFX90A-NEXT:    v_mov_b32_e32 v4, v0
1634; GFX90A-NEXT:    v_mov_b32_e32 v2, v6
1635; GFX90A-NEXT:    v_mov_b32_e32 v3, v7
1636; GFX90A-NEXT:    v_mov_b32_e32 v5, v1
1637; GFX90A-NEXT:    global_store_dwordx4 v8, v[2:5], s[16:17]
1638; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1639; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1640;
1641; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_0_0:
1642; GFX940:       ; %bb.0:
1643; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1644; GFX940-NEXT:    ;;#ASMSTART
1645; GFX940-NEXT:    ; def v[0:5]
1646; GFX940-NEXT:    ;;#ASMEND
1647; GFX940-NEXT:    v_mov_b32_e32 v8, 0
1648; GFX940-NEXT:    ;;#ASMSTART
1649; GFX940-NEXT:    ; def v[2:7]
1650; GFX940-NEXT:    ;;#ASMEND
1651; GFX940-NEXT:    s_nop 0
1652; GFX940-NEXT:    v_mov_b32_e32 v2, v0
1653; GFX940-NEXT:    v_mov_b32_e32 v3, v1
1654; GFX940-NEXT:    global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1
1655; GFX940-NEXT:    v_mov_b32_e32 v4, v0
1656; GFX940-NEXT:    v_mov_b32_e32 v5, v1
1657; GFX940-NEXT:    v_mov_b32_e32 v2, v6
1658; GFX940-NEXT:    v_mov_b32_e32 v3, v7
1659; GFX940-NEXT:    global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1
1660; GFX940-NEXT:    s_waitcnt vmcnt(0)
1661; GFX940-NEXT:    s_setpc_b64 s[30:31]
1662  %vec0 = call <3 x i64> asm "; def $0", "=v"()
1663  %vec1 = call <3 x i64> asm "; def $0", "=v"()
1664  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 0, i32 0>
1665  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1666  ret void
1667}
1668
1669define void @v_shuffle_v4i64_v3i64__5_u_0_0(ptr addrspace(1) inreg %ptr) {
1670; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_u_0_0:
1671; GFX900:       ; %bb.0:
1672; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1673; GFX900-NEXT:    ;;#ASMSTART
1674; GFX900-NEXT:    ; def v[0:5]
1675; GFX900-NEXT:    ;;#ASMEND
1676; GFX900-NEXT:    ;;#ASMSTART
1677; GFX900-NEXT:    ; def v[2:7]
1678; GFX900-NEXT:    ;;#ASMEND
1679; GFX900-NEXT:    v_mov_b32_e32 v8, 0
1680; GFX900-NEXT:    v_mov_b32_e32 v2, v0
1681; GFX900-NEXT:    v_mov_b32_e32 v3, v1
1682; GFX900-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
1683; GFX900-NEXT:    s_nop 0
1684; GFX900-NEXT:    v_mov_b32_e32 v0, v6
1685; GFX900-NEXT:    v_mov_b32_e32 v1, v7
1686; GFX900-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17]
1687; GFX900-NEXT:    s_waitcnt vmcnt(0)
1688; GFX900-NEXT:    s_setpc_b64 s[30:31]
1689;
1690; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_u_0_0:
1691; GFX90A:       ; %bb.0:
1692; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1693; GFX90A-NEXT:    ;;#ASMSTART
1694; GFX90A-NEXT:    ; def v[0:5]
1695; GFX90A-NEXT:    ;;#ASMEND
1696; GFX90A-NEXT:    ;;#ASMSTART
1697; GFX90A-NEXT:    ; def v[2:7]
1698; GFX90A-NEXT:    ;;#ASMEND
1699; GFX90A-NEXT:    v_mov_b32_e32 v8, 0
1700; GFX90A-NEXT:    v_mov_b32_e32 v2, v0
1701; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
1702; GFX90A-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
1703; GFX90A-NEXT:    s_nop 0
1704; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
1705; GFX90A-NEXT:    v_mov_b32_e32 v1, v7
1706; GFX90A-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17]
1707; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1708; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1709;
1710; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_0_0:
1711; GFX940:       ; %bb.0:
1712; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1713; GFX940-NEXT:    ;;#ASMSTART
1714; GFX940-NEXT:    ; def v[0:5]
1715; GFX940-NEXT:    ;;#ASMEND
1716; GFX940-NEXT:    v_mov_b32_e32 v8, 0
1717; GFX940-NEXT:    ;;#ASMSTART
1718; GFX940-NEXT:    ; def v[2:7]
1719; GFX940-NEXT:    ;;#ASMEND
1720; GFX940-NEXT:    s_nop 0
1721; GFX940-NEXT:    v_mov_b32_e32 v2, v0
1722; GFX940-NEXT:    v_mov_b32_e32 v3, v1
1723; GFX940-NEXT:    global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1
1724; GFX940-NEXT:    s_nop 1
1725; GFX940-NEXT:    v_mov_b32_e32 v0, v6
1726; GFX940-NEXT:    v_mov_b32_e32 v1, v7
1727; GFX940-NEXT:    global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1
1728; GFX940-NEXT:    s_waitcnt vmcnt(0)
1729; GFX940-NEXT:    s_setpc_b64 s[30:31]
1730  %vec0 = call <3 x i64> asm "; def $0", "=v"()
1731  %vec1 = call <3 x i64> asm "; def $0", "=v"()
1732  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 0, i32 0>
1733  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1734  ret void
1735}
1736
1737define void @v_shuffle_v4i64_v3i64__5_1_0_0(ptr addrspace(1) inreg %ptr) {
1738; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_1_0_0:
1739; GFX900:       ; %bb.0:
1740; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1741; GFX900-NEXT:    ;;#ASMSTART
1742; GFX900-NEXT:    ; def v[0:5]
1743; GFX900-NEXT:    ;;#ASMEND
1744; GFX900-NEXT:    ;;#ASMSTART
1745; GFX900-NEXT:    ; def v[4:9]
1746; GFX900-NEXT:    ;;#ASMEND
1747; GFX900-NEXT:    v_mov_b32_e32 v10, 0
1748; GFX900-NEXT:    v_mov_b32_e32 v4, v0
1749; GFX900-NEXT:    v_mov_b32_e32 v5, v1
1750; GFX900-NEXT:    v_mov_b32_e32 v6, v0
1751; GFX900-NEXT:    v_mov_b32_e32 v7, v1
1752; GFX900-NEXT:    v_mov_b32_e32 v0, v8
1753; GFX900-NEXT:    v_mov_b32_e32 v1, v9
1754; GFX900-NEXT:    global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
1755; GFX900-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17]
1756; GFX900-NEXT:    s_waitcnt vmcnt(0)
1757; GFX900-NEXT:    s_setpc_b64 s[30:31]
1758;
1759; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_1_0_0:
1760; GFX90A:       ; %bb.0:
1761; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1762; GFX90A-NEXT:    ;;#ASMSTART
1763; GFX90A-NEXT:    ; def v[0:5]
1764; GFX90A-NEXT:    ;;#ASMEND
1765; GFX90A-NEXT:    ;;#ASMSTART
1766; GFX90A-NEXT:    ; def v[4:9]
1767; GFX90A-NEXT:    ;;#ASMEND
1768; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
1769; GFX90A-NEXT:    v_mov_b32_e32 v4, v0
1770; GFX90A-NEXT:    v_mov_b32_e32 v5, v1
1771; GFX90A-NEXT:    v_mov_b32_e32 v6, v0
1772; GFX90A-NEXT:    v_mov_b32_e32 v7, v1
1773; GFX90A-NEXT:    v_mov_b32_e32 v0, v8
1774; GFX90A-NEXT:    v_mov_b32_e32 v1, v9
1775; GFX90A-NEXT:    global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
1776; GFX90A-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17]
1777; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1778; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1779;
1780; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_0_0:
1781; GFX940:       ; %bb.0:
1782; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1783; GFX940-NEXT:    ;;#ASMSTART
1784; GFX940-NEXT:    ; def v[0:5]
1785; GFX940-NEXT:    ;;#ASMEND
1786; GFX940-NEXT:    v_mov_b32_e32 v10, 0
1787; GFX940-NEXT:    ;;#ASMSTART
1788; GFX940-NEXT:    ; def v[4:9]
1789; GFX940-NEXT:    ;;#ASMEND
1790; GFX940-NEXT:    s_nop 0
1791; GFX940-NEXT:    v_mov_b32_e32 v4, v0
1792; GFX940-NEXT:    v_mov_b32_e32 v5, v1
1793; GFX940-NEXT:    v_mov_b32_e32 v6, v0
1794; GFX940-NEXT:    v_mov_b32_e32 v7, v1
1795; GFX940-NEXT:    v_mov_b32_e32 v0, v8
1796; GFX940-NEXT:    v_mov_b32_e32 v1, v9
1797; GFX940-NEXT:    global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1
1798; GFX940-NEXT:    global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1
1799; GFX940-NEXT:    s_waitcnt vmcnt(0)
1800; GFX940-NEXT:    s_setpc_b64 s[30:31]
1801  %vec0 = call <3 x i64> asm "; def $0", "=v"()
1802  %vec1 = call <3 x i64> asm "; def $0", "=v"()
1803  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 0, i32 0>
1804  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1805  ret void
1806}
1807
1808define void @v_shuffle_v4i64_v3i64__5_2_0_0(ptr addrspace(1) inreg %ptr) {
1809; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_2_0_0:
1810; GFX900:       ; %bb.0:
1811; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1812; GFX900-NEXT:    ;;#ASMSTART
1813; GFX900-NEXT:    ; def v[0:5]
1814; GFX900-NEXT:    ;;#ASMEND
1815; GFX900-NEXT:    v_mov_b32_e32 v12, 0
1816; GFX900-NEXT:    v_mov_b32_e32 v2, v0
1817; GFX900-NEXT:    v_mov_b32_e32 v3, v1
1818; GFX900-NEXT:    ;;#ASMSTART
1819; GFX900-NEXT:    ; def v[6:11]
1820; GFX900-NEXT:    ;;#ASMEND
1821; GFX900-NEXT:    global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
1822; GFX900-NEXT:    s_nop 0
1823; GFX900-NEXT:    v_mov_b32_e32 v2, v10
1824; GFX900-NEXT:    v_mov_b32_e32 v3, v11
1825; GFX900-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17]
1826; GFX900-NEXT:    s_waitcnt vmcnt(0)
1827; GFX900-NEXT:    s_setpc_b64 s[30:31]
1828;
1829; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_2_0_0:
1830; GFX90A:       ; %bb.0:
1831; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1832; GFX90A-NEXT:    ;;#ASMSTART
1833; GFX90A-NEXT:    ; def v[0:5]
1834; GFX90A-NEXT:    ;;#ASMEND
1835; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
1836; GFX90A-NEXT:    v_mov_b32_e32 v2, v0
1837; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
1838; GFX90A-NEXT:    ;;#ASMSTART
1839; GFX90A-NEXT:    ; def v[6:11]
1840; GFX90A-NEXT:    ;;#ASMEND
1841; GFX90A-NEXT:    global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
1842; GFX90A-NEXT:    s_nop 0
1843; GFX90A-NEXT:    v_mov_b32_e32 v2, v10
1844; GFX90A-NEXT:    v_mov_b32_e32 v3, v11
1845; GFX90A-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17]
1846; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1847; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1848;
1849; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_0_0:
1850; GFX940:       ; %bb.0:
1851; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1852; GFX940-NEXT:    ;;#ASMSTART
1853; GFX940-NEXT:    ; def v[0:5]
1854; GFX940-NEXT:    ;;#ASMEND
1855; GFX940-NEXT:    v_mov_b32_e32 v12, 0
1856; GFX940-NEXT:    v_mov_b32_e32 v2, v0
1857; GFX940-NEXT:    v_mov_b32_e32 v3, v1
1858; GFX940-NEXT:    ;;#ASMSTART
1859; GFX940-NEXT:    ; def v[6:11]
1860; GFX940-NEXT:    ;;#ASMEND
1861; GFX940-NEXT:    global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1
1862; GFX940-NEXT:    s_nop 1
1863; GFX940-NEXT:    v_mov_b32_e32 v2, v10
1864; GFX940-NEXT:    v_mov_b32_e32 v3, v11
1865; GFX940-NEXT:    global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1
1866; GFX940-NEXT:    s_waitcnt vmcnt(0)
1867; GFX940-NEXT:    s_setpc_b64 s[30:31]
1868  %vec0 = call <3 x i64> asm "; def $0", "=v"()
1869  %vec1 = call <3 x i64> asm "; def $0", "=v"()
1870  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 0, i32 0>
1871  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1872  ret void
1873}
1874
1875define void @v_shuffle_v4i64_v3i64__5_3_0_0(ptr addrspace(1) inreg %ptr) {
1876; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_3_0_0:
1877; GFX900:       ; %bb.0:
1878; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1879; GFX900-NEXT:    ;;#ASMSTART
1880; GFX900-NEXT:    ; def v[0:5]
1881; GFX900-NEXT:    ;;#ASMEND
1882; GFX900-NEXT:    v_mov_b32_e32 v10, 0
1883; GFX900-NEXT:    v_mov_b32_e32 v2, v0
1884; GFX900-NEXT:    v_mov_b32_e32 v3, v1
1885; GFX900-NEXT:    ;;#ASMSTART
1886; GFX900-NEXT:    ; def v[4:9]
1887; GFX900-NEXT:    ;;#ASMEND
1888; GFX900-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
1889; GFX900-NEXT:    s_nop 0
1890; GFX900-NEXT:    v_mov_b32_e32 v0, v8
1891; GFX900-NEXT:    v_mov_b32_e32 v1, v9
1892; GFX900-NEXT:    v_mov_b32_e32 v2, v4
1893; GFX900-NEXT:    v_mov_b32_e32 v3, v5
1894; GFX900-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17]
1895; GFX900-NEXT:    s_waitcnt vmcnt(0)
1896; GFX900-NEXT:    s_setpc_b64 s[30:31]
1897;
1898; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_3_0_0:
1899; GFX90A:       ; %bb.0:
1900; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1901; GFX90A-NEXT:    ;;#ASMSTART
1902; GFX90A-NEXT:    ; def v[0:5]
1903; GFX90A-NEXT:    ;;#ASMEND
1904; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
1905; GFX90A-NEXT:    v_mov_b32_e32 v2, v0
1906; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
1907; GFX90A-NEXT:    ;;#ASMSTART
1908; GFX90A-NEXT:    ; def v[4:9]
1909; GFX90A-NEXT:    ;;#ASMEND
1910; GFX90A-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
1911; GFX90A-NEXT:    s_nop 0
1912; GFX90A-NEXT:    v_mov_b32_e32 v0, v8
1913; GFX90A-NEXT:    v_mov_b32_e32 v1, v9
1914; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
1915; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
1916; GFX90A-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17]
1917; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1918; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1919;
1920; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_0_0:
1921; GFX940:       ; %bb.0:
1922; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1923; GFX940-NEXT:    ;;#ASMSTART
1924; GFX940-NEXT:    ; def v[0:5]
1925; GFX940-NEXT:    ;;#ASMEND
1926; GFX940-NEXT:    v_mov_b32_e32 v10, 0
1927; GFX940-NEXT:    v_mov_b32_e32 v2, v0
1928; GFX940-NEXT:    v_mov_b32_e32 v3, v1
1929; GFX940-NEXT:    ;;#ASMSTART
1930; GFX940-NEXT:    ; def v[4:9]
1931; GFX940-NEXT:    ;;#ASMEND
1932; GFX940-NEXT:    global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1
1933; GFX940-NEXT:    s_nop 1
1934; GFX940-NEXT:    v_mov_b32_e32 v0, v8
1935; GFX940-NEXT:    v_mov_b32_e32 v1, v9
1936; GFX940-NEXT:    v_mov_b32_e32 v2, v4
1937; GFX940-NEXT:    v_mov_b32_e32 v3, v5
1938; GFX940-NEXT:    global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1
1939; GFX940-NEXT:    s_waitcnt vmcnt(0)
1940; GFX940-NEXT:    s_setpc_b64 s[30:31]
1941  %vec0 = call <3 x i64> asm "; def $0", "=v"()
1942  %vec1 = call <3 x i64> asm "; def $0", "=v"()
1943  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 0, i32 0>
1944  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
1945  ret void
1946}
1947
1948define void @v_shuffle_v4i64_v3i64__5_4_0_0(ptr addrspace(1) inreg %ptr) {
1949; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_4_0_0:
1950; GFX900:       ; %bb.0:
1951; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1952; GFX900-NEXT:    ;;#ASMSTART
1953; GFX900-NEXT:    ; def v[0:5]
1954; GFX900-NEXT:    ;;#ASMEND
1955; GFX900-NEXT:    ;;#ASMSTART
1956; GFX900-NEXT:    ; def v[2:7]
1957; GFX900-NEXT:    ;;#ASMEND
1958; GFX900-NEXT:    v_mov_b32_e32 v8, 0
1959; GFX900-NEXT:    v_mov_b32_e32 v2, v0
1960; GFX900-NEXT:    v_mov_b32_e32 v3, v1
1961; GFX900-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
1962; GFX900-NEXT:    s_nop 0
1963; GFX900-NEXT:    v_mov_b32_e32 v2, v6
1964; GFX900-NEXT:    v_mov_b32_e32 v3, v7
1965; GFX900-NEXT:    global_store_dwordx4 v8, v[2:5], s[16:17]
1966; GFX900-NEXT:    s_waitcnt vmcnt(0)
1967; GFX900-NEXT:    s_setpc_b64 s[30:31]
1968;
1969; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_4_0_0:
1970; GFX90A:       ; %bb.0:
1971; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1972; GFX90A-NEXT:    ;;#ASMSTART
1973; GFX90A-NEXT:    ; def v[0:5]
1974; GFX90A-NEXT:    ;;#ASMEND
1975; GFX90A-NEXT:    ;;#ASMSTART
1976; GFX90A-NEXT:    ; def v[2:7]
1977; GFX90A-NEXT:    ;;#ASMEND
1978; GFX90A-NEXT:    v_mov_b32_e32 v8, 0
1979; GFX90A-NEXT:    v_mov_b32_e32 v2, v0
1980; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
1981; GFX90A-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
1982; GFX90A-NEXT:    s_nop 0
1983; GFX90A-NEXT:    v_mov_b32_e32 v2, v6
1984; GFX90A-NEXT:    v_mov_b32_e32 v3, v7
1985; GFX90A-NEXT:    global_store_dwordx4 v8, v[2:5], s[16:17]
1986; GFX90A-NEXT:    s_waitcnt vmcnt(0)
1987; GFX90A-NEXT:    s_setpc_b64 s[30:31]
1988;
1989; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_0_0:
1990; GFX940:       ; %bb.0:
1991; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1992; GFX940-NEXT:    ;;#ASMSTART
1993; GFX940-NEXT:    ; def v[0:5]
1994; GFX940-NEXT:    ;;#ASMEND
1995; GFX940-NEXT:    v_mov_b32_e32 v8, 0
1996; GFX940-NEXT:    ;;#ASMSTART
1997; GFX940-NEXT:    ; def v[2:7]
1998; GFX940-NEXT:    ;;#ASMEND
1999; GFX940-NEXT:    s_nop 0
2000; GFX940-NEXT:    v_mov_b32_e32 v2, v0
2001; GFX940-NEXT:    v_mov_b32_e32 v3, v1
2002; GFX940-NEXT:    global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1
2003; GFX940-NEXT:    s_nop 1
2004; GFX940-NEXT:    v_mov_b32_e32 v2, v6
2005; GFX940-NEXT:    v_mov_b32_e32 v3, v7
2006; GFX940-NEXT:    global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1
2007; GFX940-NEXT:    s_waitcnt vmcnt(0)
2008; GFX940-NEXT:    s_setpc_b64 s[30:31]
2009  %vec0 = call <3 x i64> asm "; def $0", "=v"()
2010  %vec1 = call <3 x i64> asm "; def $0", "=v"()
2011  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 0, i32 0>
2012  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2013  ret void
2014}
2015
2016define void @v_shuffle_v4i64_v3i64__5_5_0_0(ptr addrspace(1) inreg %ptr) {
2017; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_0_0:
2018; GFX900:       ; %bb.0:
2019; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2020; GFX900-NEXT:    ;;#ASMSTART
2021; GFX900-NEXT:    ; def v[0:5]
2022; GFX900-NEXT:    ;;#ASMEND
2023; GFX900-NEXT:    ;;#ASMSTART
2024; GFX900-NEXT:    ; def v[2:7]
2025; GFX900-NEXT:    ;;#ASMEND
2026; GFX900-NEXT:    v_mov_b32_e32 v8, 0
2027; GFX900-NEXT:    v_mov_b32_e32 v2, v0
2028; GFX900-NEXT:    v_mov_b32_e32 v3, v1
2029; GFX900-NEXT:    v_mov_b32_e32 v4, v6
2030; GFX900-NEXT:    v_mov_b32_e32 v5, v7
2031; GFX900-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
2032; GFX900-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17]
2033; GFX900-NEXT:    s_waitcnt vmcnt(0)
2034; GFX900-NEXT:    s_setpc_b64 s[30:31]
2035;
2036; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_0_0:
2037; GFX90A:       ; %bb.0:
2038; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2039; GFX90A-NEXT:    ;;#ASMSTART
2040; GFX90A-NEXT:    ; def v[0:5]
2041; GFX90A-NEXT:    ;;#ASMEND
2042; GFX90A-NEXT:    ;;#ASMSTART
2043; GFX90A-NEXT:    ; def v[2:7]
2044; GFX90A-NEXT:    ;;#ASMEND
2045; GFX90A-NEXT:    v_mov_b32_e32 v8, 0
2046; GFX90A-NEXT:    v_mov_b32_e32 v2, v0
2047; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
2048; GFX90A-NEXT:    v_mov_b32_e32 v4, v6
2049; GFX90A-NEXT:    v_mov_b32_e32 v5, v7
2050; GFX90A-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
2051; GFX90A-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17]
2052; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2053; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2054;
2055; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_0:
2056; GFX940:       ; %bb.0:
2057; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2058; GFX940-NEXT:    ;;#ASMSTART
2059; GFX940-NEXT:    ; def v[0:5]
2060; GFX940-NEXT:    ;;#ASMEND
2061; GFX940-NEXT:    v_mov_b32_e32 v8, 0
2062; GFX940-NEXT:    ;;#ASMSTART
2063; GFX940-NEXT:    ; def v[2:7]
2064; GFX940-NEXT:    ;;#ASMEND
2065; GFX940-NEXT:    s_nop 0
2066; GFX940-NEXT:    v_mov_b32_e32 v2, v0
2067; GFX940-NEXT:    v_mov_b32_e32 v3, v1
2068; GFX940-NEXT:    v_mov_b32_e32 v4, v6
2069; GFX940-NEXT:    v_mov_b32_e32 v5, v7
2070; GFX940-NEXT:    global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1
2071; GFX940-NEXT:    global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1
2072; GFX940-NEXT:    s_waitcnt vmcnt(0)
2073; GFX940-NEXT:    s_setpc_b64 s[30:31]
2074  %vec0 = call <3 x i64> asm "; def $0", "=v"()
2075  %vec1 = call <3 x i64> asm "; def $0", "=v"()
2076  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 0>
2077  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2078  ret void
2079}
2080
2081define void @v_shuffle_v4i64_v3i64__5_5_u_0(ptr addrspace(1) inreg %ptr) {
2082; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_0:
2083; GFX900:       ; %bb.0:
2084; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2085; GFX900-NEXT:    ;;#ASMSTART
2086; GFX900-NEXT:    ; def v[0:5]
2087; GFX900-NEXT:    ;;#ASMEND
2088; GFX900-NEXT:    ;;#ASMSTART
2089; GFX900-NEXT:    ; def v[2:7]
2090; GFX900-NEXT:    ;;#ASMEND
2091; GFX900-NEXT:    v_mov_b32_e32 v8, 0
2092; GFX900-NEXT:    v_mov_b32_e32 v2, v0
2093; GFX900-NEXT:    v_mov_b32_e32 v3, v1
2094; GFX900-NEXT:    v_mov_b32_e32 v4, v6
2095; GFX900-NEXT:    v_mov_b32_e32 v5, v7
2096; GFX900-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
2097; GFX900-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17]
2098; GFX900-NEXT:    s_waitcnt vmcnt(0)
2099; GFX900-NEXT:    s_setpc_b64 s[30:31]
2100;
2101; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_0:
2102; GFX90A:       ; %bb.0:
2103; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2104; GFX90A-NEXT:    ;;#ASMSTART
2105; GFX90A-NEXT:    ; def v[0:5]
2106; GFX90A-NEXT:    ;;#ASMEND
2107; GFX90A-NEXT:    ;;#ASMSTART
2108; GFX90A-NEXT:    ; def v[2:7]
2109; GFX90A-NEXT:    ;;#ASMEND
2110; GFX90A-NEXT:    v_mov_b32_e32 v8, 0
2111; GFX90A-NEXT:    v_mov_b32_e32 v2, v0
2112; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
2113; GFX90A-NEXT:    v_mov_b32_e32 v4, v6
2114; GFX90A-NEXT:    v_mov_b32_e32 v5, v7
2115; GFX90A-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
2116; GFX90A-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17]
2117; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2118; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2119;
2120; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_0:
2121; GFX940:       ; %bb.0:
2122; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2123; GFX940-NEXT:    ;;#ASMSTART
2124; GFX940-NEXT:    ; def v[0:5]
2125; GFX940-NEXT:    ;;#ASMEND
2126; GFX940-NEXT:    v_mov_b32_e32 v8, 0
2127; GFX940-NEXT:    ;;#ASMSTART
2128; GFX940-NEXT:    ; def v[2:7]
2129; GFX940-NEXT:    ;;#ASMEND
2130; GFX940-NEXT:    s_nop 0
2131; GFX940-NEXT:    v_mov_b32_e32 v2, v0
2132; GFX940-NEXT:    v_mov_b32_e32 v3, v1
2133; GFX940-NEXT:    v_mov_b32_e32 v4, v6
2134; GFX940-NEXT:    v_mov_b32_e32 v5, v7
2135; GFX940-NEXT:    global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1
2136; GFX940-NEXT:    global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1
2137; GFX940-NEXT:    s_waitcnt vmcnt(0)
2138; GFX940-NEXT:    s_setpc_b64 s[30:31]
2139  %vec0 = call <3 x i64> asm "; def $0", "=v"()
2140  %vec1 = call <3 x i64> asm "; def $0", "=v"()
2141  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 0>
2142  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2143  ret void
2144}
2145
2146define void @v_shuffle_v4i64_v3i64__5_5_1_0(ptr addrspace(1) inreg %ptr) {
2147; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_1_0:
2148; GFX900:       ; %bb.0:
2149; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2150; GFX900-NEXT:    ;;#ASMSTART
2151; GFX900-NEXT:    ; def v[0:5]
2152; GFX900-NEXT:    ;;#ASMEND
2153; GFX900-NEXT:    ;;#ASMSTART
2154; GFX900-NEXT:    ; def v[4:9]
2155; GFX900-NEXT:    ;;#ASMEND
2156; GFX900-NEXT:    v_mov_b32_e32 v10, 0
2157; GFX900-NEXT:    v_mov_b32_e32 v4, v0
2158; GFX900-NEXT:    v_mov_b32_e32 v5, v1
2159; GFX900-NEXT:    v_mov_b32_e32 v6, v8
2160; GFX900-NEXT:    v_mov_b32_e32 v7, v9
2161; GFX900-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
2162; GFX900-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17]
2163; GFX900-NEXT:    s_waitcnt vmcnt(0)
2164; GFX900-NEXT:    s_setpc_b64 s[30:31]
2165;
2166; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_1_0:
2167; GFX90A:       ; %bb.0:
2168; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2169; GFX90A-NEXT:    ;;#ASMSTART
2170; GFX90A-NEXT:    ; def v[0:5]
2171; GFX90A-NEXT:    ;;#ASMEND
2172; GFX90A-NEXT:    ;;#ASMSTART
2173; GFX90A-NEXT:    ; def v[4:9]
2174; GFX90A-NEXT:    ;;#ASMEND
2175; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
2176; GFX90A-NEXT:    v_mov_b32_e32 v4, v0
2177; GFX90A-NEXT:    v_mov_b32_e32 v5, v1
2178; GFX90A-NEXT:    v_mov_b32_e32 v6, v8
2179; GFX90A-NEXT:    v_mov_b32_e32 v7, v9
2180; GFX90A-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
2181; GFX90A-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17]
2182; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2183; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2184;
2185; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_0:
2186; GFX940:       ; %bb.0:
2187; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2188; GFX940-NEXT:    ;;#ASMSTART
2189; GFX940-NEXT:    ; def v[0:5]
2190; GFX940-NEXT:    ;;#ASMEND
2191; GFX940-NEXT:    v_mov_b32_e32 v10, 0
2192; GFX940-NEXT:    ;;#ASMSTART
2193; GFX940-NEXT:    ; def v[4:9]
2194; GFX940-NEXT:    ;;#ASMEND
2195; GFX940-NEXT:    s_nop 0
2196; GFX940-NEXT:    v_mov_b32_e32 v4, v0
2197; GFX940-NEXT:    v_mov_b32_e32 v5, v1
2198; GFX940-NEXT:    v_mov_b32_e32 v6, v8
2199; GFX940-NEXT:    v_mov_b32_e32 v7, v9
2200; GFX940-NEXT:    global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1
2201; GFX940-NEXT:    global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1
2202; GFX940-NEXT:    s_waitcnt vmcnt(0)
2203; GFX940-NEXT:    s_setpc_b64 s[30:31]
2204  %vec0 = call <3 x i64> asm "; def $0", "=v"()
2205  %vec1 = call <3 x i64> asm "; def $0", "=v"()
2206  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 0>
2207  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2208  ret void
2209}
2210
2211define void @v_shuffle_v4i64_v3i64__5_5_2_0(ptr addrspace(1) inreg %ptr) {
2212; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_2_0:
2213; GFX900:       ; %bb.0:
2214; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2215; GFX900-NEXT:    ;;#ASMSTART
2216; GFX900-NEXT:    ; def v[0:5]
2217; GFX900-NEXT:    ;;#ASMEND
2218; GFX900-NEXT:    ;;#ASMSTART
2219; GFX900-NEXT:    ; def v[6:11]
2220; GFX900-NEXT:    ;;#ASMEND
2221; GFX900-NEXT:    v_mov_b32_e32 v12, 0
2222; GFX900-NEXT:    v_mov_b32_e32 v2, v4
2223; GFX900-NEXT:    v_mov_b32_e32 v3, v5
2224; GFX900-NEXT:    v_mov_b32_e32 v4, v0
2225; GFX900-NEXT:    v_mov_b32_e32 v5, v1
2226; GFX900-NEXT:    v_mov_b32_e32 v8, v10
2227; GFX900-NEXT:    v_mov_b32_e32 v9, v11
2228; GFX900-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
2229; GFX900-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
2230; GFX900-NEXT:    s_waitcnt vmcnt(0)
2231; GFX900-NEXT:    s_setpc_b64 s[30:31]
2232;
2233; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_2_0:
2234; GFX90A:       ; %bb.0:
2235; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2236; GFX90A-NEXT:    ;;#ASMSTART
2237; GFX90A-NEXT:    ; def v[0:5]
2238; GFX90A-NEXT:    ;;#ASMEND
2239; GFX90A-NEXT:    ;;#ASMSTART
2240; GFX90A-NEXT:    ; def v[6:11]
2241; GFX90A-NEXT:    ;;#ASMEND
2242; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
2243; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
2244; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
2245; GFX90A-NEXT:    v_mov_b32_e32 v4, v0
2246; GFX90A-NEXT:    v_mov_b32_e32 v5, v1
2247; GFX90A-NEXT:    v_mov_b32_e32 v8, v10
2248; GFX90A-NEXT:    v_mov_b32_e32 v9, v11
2249; GFX90A-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
2250; GFX90A-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
2251; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2252; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2253;
2254; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_0:
2255; GFX940:       ; %bb.0:
2256; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2257; GFX940-NEXT:    ;;#ASMSTART
2258; GFX940-NEXT:    ; def v[0:5]
2259; GFX940-NEXT:    ;;#ASMEND
2260; GFX940-NEXT:    ;;#ASMSTART
2261; GFX940-NEXT:    ; def v[6:11]
2262; GFX940-NEXT:    ;;#ASMEND
2263; GFX940-NEXT:    v_mov_b32_e32 v12, 0
2264; GFX940-NEXT:    v_mov_b32_e32 v2, v4
2265; GFX940-NEXT:    v_mov_b32_e32 v3, v5
2266; GFX940-NEXT:    v_mov_b32_e32 v4, v0
2267; GFX940-NEXT:    v_mov_b32_e32 v5, v1
2268; GFX940-NEXT:    v_mov_b32_e32 v8, v10
2269; GFX940-NEXT:    v_mov_b32_e32 v9, v11
2270; GFX940-NEXT:    global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1
2271; GFX940-NEXT:    global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
2272; GFX940-NEXT:    s_waitcnt vmcnt(0)
2273; GFX940-NEXT:    s_setpc_b64 s[30:31]
2274  %vec0 = call <3 x i64> asm "; def $0", "=v"()
2275  %vec1 = call <3 x i64> asm "; def $0", "=v"()
2276  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 0>
2277  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2278  ret void
2279}
2280
2281define void @v_shuffle_v4i64_v3i64__5_5_3_0(ptr addrspace(1) inreg %ptr) {
2282; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_3_0:
2283; GFX900:       ; %bb.0:
2284; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2285; GFX900-NEXT:    ;;#ASMSTART
2286; GFX900-NEXT:    ; def v[0:5]
2287; GFX900-NEXT:    ;;#ASMEND
2288; GFX900-NEXT:    ;;#ASMSTART
2289; GFX900-NEXT:    ; def v[2:7]
2290; GFX900-NEXT:    ;;#ASMEND
2291; GFX900-NEXT:    v_mov_b32_e32 v8, 0
2292; GFX900-NEXT:    v_mov_b32_e32 v4, v0
2293; GFX900-NEXT:    v_mov_b32_e32 v5, v1
2294; GFX900-NEXT:    global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
2295; GFX900-NEXT:    s_nop 0
2296; GFX900-NEXT:    v_mov_b32_e32 v4, v6
2297; GFX900-NEXT:    v_mov_b32_e32 v5, v7
2298; GFX900-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17]
2299; GFX900-NEXT:    s_waitcnt vmcnt(0)
2300; GFX900-NEXT:    s_setpc_b64 s[30:31]
2301;
2302; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_3_0:
2303; GFX90A:       ; %bb.0:
2304; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2305; GFX90A-NEXT:    ;;#ASMSTART
2306; GFX90A-NEXT:    ; def v[0:5]
2307; GFX90A-NEXT:    ;;#ASMEND
2308; GFX90A-NEXT:    ;;#ASMSTART
2309; GFX90A-NEXT:    ; def v[2:7]
2310; GFX90A-NEXT:    ;;#ASMEND
2311; GFX90A-NEXT:    v_mov_b32_e32 v8, 0
2312; GFX90A-NEXT:    v_mov_b32_e32 v4, v0
2313; GFX90A-NEXT:    v_mov_b32_e32 v5, v1
2314; GFX90A-NEXT:    global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
2315; GFX90A-NEXT:    s_nop 0
2316; GFX90A-NEXT:    v_mov_b32_e32 v4, v6
2317; GFX90A-NEXT:    v_mov_b32_e32 v5, v7
2318; GFX90A-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17]
2319; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2320; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2321;
2322; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_0:
2323; GFX940:       ; %bb.0:
2324; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2325; GFX940-NEXT:    ;;#ASMSTART
2326; GFX940-NEXT:    ; def v[0:5]
2327; GFX940-NEXT:    ;;#ASMEND
2328; GFX940-NEXT:    v_mov_b32_e32 v8, 0
2329; GFX940-NEXT:    ;;#ASMSTART
2330; GFX940-NEXT:    ; def v[2:7]
2331; GFX940-NEXT:    ;;#ASMEND
2332; GFX940-NEXT:    s_nop 0
2333; GFX940-NEXT:    v_mov_b32_e32 v4, v0
2334; GFX940-NEXT:    v_mov_b32_e32 v5, v1
2335; GFX940-NEXT:    global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1
2336; GFX940-NEXT:    s_nop 1
2337; GFX940-NEXT:    v_mov_b32_e32 v4, v6
2338; GFX940-NEXT:    v_mov_b32_e32 v5, v7
2339; GFX940-NEXT:    global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1
2340; GFX940-NEXT:    s_waitcnt vmcnt(0)
2341; GFX940-NEXT:    s_setpc_b64 s[30:31]
2342  %vec0 = call <3 x i64> asm "; def $0", "=v"()
2343  %vec1 = call <3 x i64> asm "; def $0", "=v"()
2344  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 0>
2345  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2346  ret void
2347}
2348
2349define void @v_shuffle_v4i64_v3i64__5_5_4_0(ptr addrspace(1) inreg %ptr) {
2350; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_4_0:
2351; GFX900:       ; %bb.0:
2352; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2353; GFX900-NEXT:    ;;#ASMSTART
2354; GFX900-NEXT:    ; def v[0:5]
2355; GFX900-NEXT:    ;;#ASMEND
2356; GFX900-NEXT:    ;;#ASMSTART
2357; GFX900-NEXT:    ; def v[2:7]
2358; GFX900-NEXT:    ;;#ASMEND
2359; GFX900-NEXT:    v_mov_b32_e32 v8, 0
2360; GFX900-NEXT:    v_mov_b32_e32 v2, v4
2361; GFX900-NEXT:    v_mov_b32_e32 v3, v5
2362; GFX900-NEXT:    v_mov_b32_e32 v4, v0
2363; GFX900-NEXT:    v_mov_b32_e32 v5, v1
2364; GFX900-NEXT:    global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
2365; GFX900-NEXT:    s_nop 0
2366; GFX900-NEXT:    v_mov_b32_e32 v4, v6
2367; GFX900-NEXT:    v_mov_b32_e32 v5, v7
2368; GFX900-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17]
2369; GFX900-NEXT:    s_waitcnt vmcnt(0)
2370; GFX900-NEXT:    s_setpc_b64 s[30:31]
2371;
2372; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_4_0:
2373; GFX90A:       ; %bb.0:
2374; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2375; GFX90A-NEXT:    ;;#ASMSTART
2376; GFX90A-NEXT:    ; def v[0:5]
2377; GFX90A-NEXT:    ;;#ASMEND
2378; GFX90A-NEXT:    ;;#ASMSTART
2379; GFX90A-NEXT:    ; def v[2:7]
2380; GFX90A-NEXT:    ;;#ASMEND
2381; GFX90A-NEXT:    v_mov_b32_e32 v8, 0
2382; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
2383; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
2384; GFX90A-NEXT:    v_mov_b32_e32 v4, v0
2385; GFX90A-NEXT:    v_mov_b32_e32 v5, v1
2386; GFX90A-NEXT:    global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
2387; GFX90A-NEXT:    s_nop 0
2388; GFX90A-NEXT:    v_mov_b32_e32 v4, v6
2389; GFX90A-NEXT:    v_mov_b32_e32 v5, v7
2390; GFX90A-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17]
2391; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2392; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2393;
2394; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_0:
2395; GFX940:       ; %bb.0:
2396; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2397; GFX940-NEXT:    ;;#ASMSTART
2398; GFX940-NEXT:    ; def v[0:5]
2399; GFX940-NEXT:    ;;#ASMEND
2400; GFX940-NEXT:    v_mov_b32_e32 v8, 0
2401; GFX940-NEXT:    ;;#ASMSTART
2402; GFX940-NEXT:    ; def v[2:7]
2403; GFX940-NEXT:    ;;#ASMEND
2404; GFX940-NEXT:    s_nop 0
2405; GFX940-NEXT:    v_mov_b32_e32 v2, v4
2406; GFX940-NEXT:    v_mov_b32_e32 v3, v5
2407; GFX940-NEXT:    v_mov_b32_e32 v4, v0
2408; GFX940-NEXT:    v_mov_b32_e32 v5, v1
2409; GFX940-NEXT:    global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1
2410; GFX940-NEXT:    s_nop 1
2411; GFX940-NEXT:    v_mov_b32_e32 v4, v6
2412; GFX940-NEXT:    v_mov_b32_e32 v5, v7
2413; GFX940-NEXT:    global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1
2414; GFX940-NEXT:    s_waitcnt vmcnt(0)
2415; GFX940-NEXT:    s_setpc_b64 s[30:31]
2416  %vec0 = call <3 x i64> asm "; def $0", "=v"()
2417  %vec1 = call <3 x i64> asm "; def $0", "=v"()
2418  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 0>
2419  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2420  ret void
2421}
2422
2423define void @v_shuffle_v4i64_v3i64__u_1_1_1(ptr addrspace(1) inreg %ptr) {
2424; GFX900-LABEL: v_shuffle_v4i64_v3i64__u_1_1_1:
2425; GFX900:       ; %bb.0:
2426; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2427; GFX900-NEXT:    ;;#ASMSTART
2428; GFX900-NEXT:    ; def v[0:5]
2429; GFX900-NEXT:    ;;#ASMEND
2430; GFX900-NEXT:    v_mov_b32_e32 v6, 0
2431; GFX900-NEXT:    v_mov_b32_e32 v4, v2
2432; GFX900-NEXT:    v_mov_b32_e32 v5, v3
2433; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
2434; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
2435; GFX900-NEXT:    s_waitcnt vmcnt(0)
2436; GFX900-NEXT:    s_setpc_b64 s[30:31]
2437;
2438; GFX90A-LABEL: v_shuffle_v4i64_v3i64__u_1_1_1:
2439; GFX90A:       ; %bb.0:
2440; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2441; GFX90A-NEXT:    ;;#ASMSTART
2442; GFX90A-NEXT:    ; def v[0:5]
2443; GFX90A-NEXT:    ;;#ASMEND
2444; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
2445; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
2446; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
2447; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
2448; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
2449; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2450; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2451;
2452; GFX940-LABEL: v_shuffle_v4i64_v3i64__u_1_1_1:
2453; GFX940:       ; %bb.0:
2454; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2455; GFX940-NEXT:    ;;#ASMSTART
2456; GFX940-NEXT:    ; def v[0:5]
2457; GFX940-NEXT:    ;;#ASMEND
2458; GFX940-NEXT:    v_mov_b32_e32 v6, 0
2459; GFX940-NEXT:    v_mov_b32_e32 v4, v2
2460; GFX940-NEXT:    v_mov_b32_e32 v5, v3
2461; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
2462; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
2463; GFX940-NEXT:    s_waitcnt vmcnt(0)
2464; GFX940-NEXT:    s_setpc_b64 s[30:31]
2465  %vec0 = call <3 x i64> asm "; def $0", "=v"()
2466  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
2467  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2468  ret void
2469}
2470
2471define void @v_shuffle_v4i64_v3i64__0_1_1_1(ptr addrspace(1) inreg %ptr) {
2472; GFX900-LABEL: v_shuffle_v4i64_v3i64__0_1_1_1:
2473; GFX900:       ; %bb.0:
2474; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2475; GFX900-NEXT:    ;;#ASMSTART
2476; GFX900-NEXT:    ; def v[0:5]
2477; GFX900-NEXT:    ;;#ASMEND
2478; GFX900-NEXT:    v_mov_b32_e32 v6, 0
2479; GFX900-NEXT:    v_mov_b32_e32 v4, v2
2480; GFX900-NEXT:    v_mov_b32_e32 v5, v3
2481; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
2482; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
2483; GFX900-NEXT:    s_waitcnt vmcnt(0)
2484; GFX900-NEXT:    s_setpc_b64 s[30:31]
2485;
2486; GFX90A-LABEL: v_shuffle_v4i64_v3i64__0_1_1_1:
2487; GFX90A:       ; %bb.0:
2488; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2489; GFX90A-NEXT:    ;;#ASMSTART
2490; GFX90A-NEXT:    ; def v[0:5]
2491; GFX90A-NEXT:    ;;#ASMEND
2492; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
2493; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
2494; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
2495; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
2496; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
2497; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2498; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2499;
2500; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_1_1_1:
2501; GFX940:       ; %bb.0:
2502; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2503; GFX940-NEXT:    ;;#ASMSTART
2504; GFX940-NEXT:    ; def v[0:5]
2505; GFX940-NEXT:    ;;#ASMEND
2506; GFX940-NEXT:    v_mov_b32_e32 v6, 0
2507; GFX940-NEXT:    v_mov_b32_e32 v4, v2
2508; GFX940-NEXT:    v_mov_b32_e32 v5, v3
2509; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
2510; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
2511; GFX940-NEXT:    s_waitcnt vmcnt(0)
2512; GFX940-NEXT:    s_setpc_b64 s[30:31]
2513  %vec0 = call <3 x i64> asm "; def $0", "=v"()
2514  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
2515  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2516  ret void
2517}
2518
2519define void @v_shuffle_v4i64_v3i64__1_1_1_1(ptr addrspace(1) inreg %ptr) {
2520; GFX900-LABEL: v_shuffle_v4i64_v3i64__1_1_1_1:
2521; GFX900:       ; %bb.0:
2522; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2523; GFX900-NEXT:    ;;#ASMSTART
2524; GFX900-NEXT:    ; def v[0:5]
2525; GFX900-NEXT:    ;;#ASMEND
2526; GFX900-NEXT:    v_mov_b32_e32 v6, 0
2527; GFX900-NEXT:    v_mov_b32_e32 v4, v2
2528; GFX900-NEXT:    v_mov_b32_e32 v5, v3
2529; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
2530; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
2531; GFX900-NEXT:    s_waitcnt vmcnt(0)
2532; GFX900-NEXT:    s_setpc_b64 s[30:31]
2533;
2534; GFX90A-LABEL: v_shuffle_v4i64_v3i64__1_1_1_1:
2535; GFX90A:       ; %bb.0:
2536; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2537; GFX90A-NEXT:    ;;#ASMSTART
2538; GFX90A-NEXT:    ; def v[0:5]
2539; GFX90A-NEXT:    ;;#ASMEND
2540; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
2541; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
2542; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
2543; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
2544; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
2545; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2546; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2547;
2548; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_1_1_1:
2549; GFX940:       ; %bb.0:
2550; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2551; GFX940-NEXT:    ;;#ASMSTART
2552; GFX940-NEXT:    ; def v[0:5]
2553; GFX940-NEXT:    ;;#ASMEND
2554; GFX940-NEXT:    v_mov_b32_e32 v6, 0
2555; GFX940-NEXT:    v_mov_b32_e32 v4, v2
2556; GFX940-NEXT:    v_mov_b32_e32 v5, v3
2557; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
2558; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
2559; GFX940-NEXT:    s_waitcnt vmcnt(0)
2560; GFX940-NEXT:    s_setpc_b64 s[30:31]
2561  %vec0 = call <3 x i64> asm "; def $0", "=v"()
2562  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2563  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2564  ret void
2565}
2566
2567define void @v_shuffle_v4i64_v3i64__2_1_1_1(ptr addrspace(1) inreg %ptr) {
2568; GFX900-LABEL: v_shuffle_v4i64_v3i64__2_1_1_1:
2569; GFX900:       ; %bb.0:
2570; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2571; GFX900-NEXT:    ;;#ASMSTART
2572; GFX900-NEXT:    ; def v[0:5]
2573; GFX900-NEXT:    ;;#ASMEND
2574; GFX900-NEXT:    v_mov_b32_e32 v6, 0
2575; GFX900-NEXT:    v_mov_b32_e32 v0, v2
2576; GFX900-NEXT:    v_mov_b32_e32 v1, v3
2577; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
2578; GFX900-NEXT:    s_nop 0
2579; GFX900-NEXT:    v_mov_b32_e32 v0, v4
2580; GFX900-NEXT:    v_mov_b32_e32 v1, v5
2581; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
2582; GFX900-NEXT:    s_waitcnt vmcnt(0)
2583; GFX900-NEXT:    s_setpc_b64 s[30:31]
2584;
2585; GFX90A-LABEL: v_shuffle_v4i64_v3i64__2_1_1_1:
2586; GFX90A:       ; %bb.0:
2587; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2588; GFX90A-NEXT:    ;;#ASMSTART
2589; GFX90A-NEXT:    ; def v[0:5]
2590; GFX90A-NEXT:    ;;#ASMEND
2591; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
2592; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
2593; GFX90A-NEXT:    v_mov_b32_e32 v1, v3
2594; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
2595; GFX90A-NEXT:    s_nop 0
2596; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
2597; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
2598; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
2599; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2600; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2601;
2602; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_1_1_1:
2603; GFX940:       ; %bb.0:
2604; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2605; GFX940-NEXT:    ;;#ASMSTART
2606; GFX940-NEXT:    ; def v[0:5]
2607; GFX940-NEXT:    ;;#ASMEND
2608; GFX940-NEXT:    v_mov_b32_e32 v6, 0
2609; GFX940-NEXT:    v_mov_b32_e32 v0, v2
2610; GFX940-NEXT:    v_mov_b32_e32 v1, v3
2611; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
2612; GFX940-NEXT:    s_nop 1
2613; GFX940-NEXT:    v_mov_b32_e32 v0, v4
2614; GFX940-NEXT:    v_mov_b32_e32 v1, v5
2615; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
2616; GFX940-NEXT:    s_waitcnt vmcnt(0)
2617; GFX940-NEXT:    s_setpc_b64 s[30:31]
2618  %vec0 = call <3 x i64> asm "; def $0", "=v"()
2619  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
2620  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2621  ret void
2622}
2623
2624define void @v_shuffle_v4i64_v3i64__3_1_1_1(ptr addrspace(1) inreg %ptr) {
2625; GFX900-LABEL: v_shuffle_v4i64_v3i64__3_1_1_1:
2626; GFX900:       ; %bb.0:
2627; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2628; GFX900-NEXT:    ;;#ASMSTART
2629; GFX900-NEXT:    ; def v[0:5]
2630; GFX900-NEXT:    ;;#ASMEND
2631; GFX900-NEXT:    v_mov_b32_e32 v6, 0
2632; GFX900-NEXT:    v_mov_b32_e32 v4, v2
2633; GFX900-NEXT:    v_mov_b32_e32 v5, v3
2634; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
2635; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
2636; GFX900-NEXT:    s_waitcnt vmcnt(0)
2637; GFX900-NEXT:    s_setpc_b64 s[30:31]
2638;
2639; GFX90A-LABEL: v_shuffle_v4i64_v3i64__3_1_1_1:
2640; GFX90A:       ; %bb.0:
2641; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2642; GFX90A-NEXT:    ;;#ASMSTART
2643; GFX90A-NEXT:    ; def v[0:5]
2644; GFX90A-NEXT:    ;;#ASMEND
2645; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
2646; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
2647; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
2648; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
2649; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
2650; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2651; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2652;
2653; GFX940-LABEL: v_shuffle_v4i64_v3i64__3_1_1_1:
2654; GFX940:       ; %bb.0:
2655; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2656; GFX940-NEXT:    ;;#ASMSTART
2657; GFX940-NEXT:    ; def v[0:5]
2658; GFX940-NEXT:    ;;#ASMEND
2659; GFX940-NEXT:    v_mov_b32_e32 v6, 0
2660; GFX940-NEXT:    v_mov_b32_e32 v4, v2
2661; GFX940-NEXT:    v_mov_b32_e32 v5, v3
2662; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
2663; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
2664; GFX940-NEXT:    s_waitcnt vmcnt(0)
2665; GFX940-NEXT:    s_setpc_b64 s[30:31]
2666  %vec0 = call <3 x i64> asm "; def $0", "=v"()
2667  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
2668  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2669  ret void
2670}
2671
2672define void @v_shuffle_v4i64_v3i64__4_1_1_1(ptr addrspace(1) inreg %ptr) {
2673; GFX900-LABEL: v_shuffle_v4i64_v3i64__4_1_1_1:
2674; GFX900:       ; %bb.0:
2675; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2676; GFX900-NEXT:    ;;#ASMSTART
2677; GFX900-NEXT:    ; def v[0:5]
2678; GFX900-NEXT:    ;;#ASMEND
2679; GFX900-NEXT:    ;;#ASMSTART
2680; GFX900-NEXT:    ; def v[4:9]
2681; GFX900-NEXT:    ;;#ASMEND
2682; GFX900-NEXT:    v_mov_b32_e32 v10, 0
2683; GFX900-NEXT:    v_mov_b32_e32 v4, v2
2684; GFX900-NEXT:    v_mov_b32_e32 v5, v3
2685; GFX900-NEXT:    v_mov_b32_e32 v8, v2
2686; GFX900-NEXT:    v_mov_b32_e32 v9, v3
2687; GFX900-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
2688; GFX900-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17]
2689; GFX900-NEXT:    s_waitcnt vmcnt(0)
2690; GFX900-NEXT:    s_setpc_b64 s[30:31]
2691;
2692; GFX90A-LABEL: v_shuffle_v4i64_v3i64__4_1_1_1:
2693; GFX90A:       ; %bb.0:
2694; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2695; GFX90A-NEXT:    ;;#ASMSTART
2696; GFX90A-NEXT:    ; def v[0:5]
2697; GFX90A-NEXT:    ;;#ASMEND
2698; GFX90A-NEXT:    ;;#ASMSTART
2699; GFX90A-NEXT:    ; def v[4:9]
2700; GFX90A-NEXT:    ;;#ASMEND
2701; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
2702; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
2703; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
2704; GFX90A-NEXT:    v_mov_b32_e32 v8, v2
2705; GFX90A-NEXT:    v_mov_b32_e32 v9, v3
2706; GFX90A-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
2707; GFX90A-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17]
2708; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2709; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2710;
2711; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_1_1_1:
2712; GFX940:       ; %bb.0:
2713; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2714; GFX940-NEXT:    ;;#ASMSTART
2715; GFX940-NEXT:    ; def v[0:5]
2716; GFX940-NEXT:    ;;#ASMEND
2717; GFX940-NEXT:    v_mov_b32_e32 v10, 0
2718; GFX940-NEXT:    ;;#ASMSTART
2719; GFX940-NEXT:    ; def v[4:9]
2720; GFX940-NEXT:    ;;#ASMEND
2721; GFX940-NEXT:    s_nop 0
2722; GFX940-NEXT:    v_mov_b32_e32 v4, v2
2723; GFX940-NEXT:    v_mov_b32_e32 v5, v3
2724; GFX940-NEXT:    v_mov_b32_e32 v8, v2
2725; GFX940-NEXT:    v_mov_b32_e32 v9, v3
2726; GFX940-NEXT:    global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1
2727; GFX940-NEXT:    global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1
2728; GFX940-NEXT:    s_waitcnt vmcnt(0)
2729; GFX940-NEXT:    s_setpc_b64 s[30:31]
2730  %vec0 = call <3 x i64> asm "; def $0", "=v"()
2731  %vec1 = call <3 x i64> asm "; def $0", "=v"()
2732  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 1, i32 1, i32 1>
2733  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2734  ret void
2735}
2736
2737define void @v_shuffle_v4i64_v3i64__5_1_1_1(ptr addrspace(1) inreg %ptr) {
2738; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_1_1_1:
2739; GFX900:       ; %bb.0:
2740; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2741; GFX900-NEXT:    ;;#ASMSTART
2742; GFX900-NEXT:    ; def v[0:5]
2743; GFX900-NEXT:    ;;#ASMEND
2744; GFX900-NEXT:    ;;#ASMSTART
2745; GFX900-NEXT:    ; def v[4:9]
2746; GFX900-NEXT:    ;;#ASMEND
2747; GFX900-NEXT:    v_mov_b32_e32 v10, 0
2748; GFX900-NEXT:    v_mov_b32_e32 v4, v2
2749; GFX900-NEXT:    v_mov_b32_e32 v5, v3
2750; GFX900-NEXT:    v_mov_b32_e32 v0, v8
2751; GFX900-NEXT:    v_mov_b32_e32 v1, v9
2752; GFX900-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
2753; GFX900-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17]
2754; GFX900-NEXT:    s_waitcnt vmcnt(0)
2755; GFX900-NEXT:    s_setpc_b64 s[30:31]
2756;
2757; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_1_1_1:
2758; GFX90A:       ; %bb.0:
2759; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2760; GFX90A-NEXT:    ;;#ASMSTART
2761; GFX90A-NEXT:    ; def v[0:5]
2762; GFX90A-NEXT:    ;;#ASMEND
2763; GFX90A-NEXT:    ;;#ASMSTART
2764; GFX90A-NEXT:    ; def v[4:9]
2765; GFX90A-NEXT:    ;;#ASMEND
2766; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
2767; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
2768; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
2769; GFX90A-NEXT:    v_mov_b32_e32 v0, v8
2770; GFX90A-NEXT:    v_mov_b32_e32 v1, v9
2771; GFX90A-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
2772; GFX90A-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17]
2773; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2774; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2775;
2776; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_1_1:
2777; GFX940:       ; %bb.0:
2778; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2779; GFX940-NEXT:    ;;#ASMSTART
2780; GFX940-NEXT:    ; def v[0:5]
2781; GFX940-NEXT:    ;;#ASMEND
2782; GFX940-NEXT:    v_mov_b32_e32 v10, 0
2783; GFX940-NEXT:    ;;#ASMSTART
2784; GFX940-NEXT:    ; def v[4:9]
2785; GFX940-NEXT:    ;;#ASMEND
2786; GFX940-NEXT:    s_nop 0
2787; GFX940-NEXT:    v_mov_b32_e32 v4, v2
2788; GFX940-NEXT:    v_mov_b32_e32 v5, v3
2789; GFX940-NEXT:    v_mov_b32_e32 v0, v8
2790; GFX940-NEXT:    v_mov_b32_e32 v1, v9
2791; GFX940-NEXT:    global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1
2792; GFX940-NEXT:    global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1
2793; GFX940-NEXT:    s_waitcnt vmcnt(0)
2794; GFX940-NEXT:    s_setpc_b64 s[30:31]
2795  %vec0 = call <3 x i64> asm "; def $0", "=v"()
2796  %vec1 = call <3 x i64> asm "; def $0", "=v"()
2797  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 1, i32 1>
2798  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2799  ret void
2800}
2801
2802define void @v_shuffle_v4i64_v3i64__5_u_1_1(ptr addrspace(1) inreg %ptr) {
2803; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_u_1_1:
2804; GFX900:       ; %bb.0:
2805; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2806; GFX900-NEXT:    ;;#ASMSTART
2807; GFX900-NEXT:    ; def v[0:5]
2808; GFX900-NEXT:    ;;#ASMEND
2809; GFX900-NEXT:    ;;#ASMSTART
2810; GFX900-NEXT:    ; def v[4:9]
2811; GFX900-NEXT:    ;;#ASMEND
2812; GFX900-NEXT:    v_mov_b32_e32 v10, 0
2813; GFX900-NEXT:    v_mov_b32_e32 v4, v2
2814; GFX900-NEXT:    v_mov_b32_e32 v5, v3
2815; GFX900-NEXT:    v_mov_b32_e32 v0, v8
2816; GFX900-NEXT:    v_mov_b32_e32 v1, v9
2817; GFX900-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
2818; GFX900-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17]
2819; GFX900-NEXT:    s_waitcnt vmcnt(0)
2820; GFX900-NEXT:    s_setpc_b64 s[30:31]
2821;
2822; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_u_1_1:
2823; GFX90A:       ; %bb.0:
2824; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2825; GFX90A-NEXT:    ;;#ASMSTART
2826; GFX90A-NEXT:    ; def v[0:5]
2827; GFX90A-NEXT:    ;;#ASMEND
2828; GFX90A-NEXT:    ;;#ASMSTART
2829; GFX90A-NEXT:    ; def v[4:9]
2830; GFX90A-NEXT:    ;;#ASMEND
2831; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
2832; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
2833; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
2834; GFX90A-NEXT:    v_mov_b32_e32 v0, v8
2835; GFX90A-NEXT:    v_mov_b32_e32 v1, v9
2836; GFX90A-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
2837; GFX90A-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17]
2838; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2839; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2840;
2841; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_1_1:
2842; GFX940:       ; %bb.0:
2843; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2844; GFX940-NEXT:    ;;#ASMSTART
2845; GFX940-NEXT:    ; def v[0:5]
2846; GFX940-NEXT:    ;;#ASMEND
2847; GFX940-NEXT:    v_mov_b32_e32 v10, 0
2848; GFX940-NEXT:    ;;#ASMSTART
2849; GFX940-NEXT:    ; def v[4:9]
2850; GFX940-NEXT:    ;;#ASMEND
2851; GFX940-NEXT:    s_nop 0
2852; GFX940-NEXT:    v_mov_b32_e32 v4, v2
2853; GFX940-NEXT:    v_mov_b32_e32 v5, v3
2854; GFX940-NEXT:    v_mov_b32_e32 v0, v8
2855; GFX940-NEXT:    v_mov_b32_e32 v1, v9
2856; GFX940-NEXT:    global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1
2857; GFX940-NEXT:    global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1
2858; GFX940-NEXT:    s_waitcnt vmcnt(0)
2859; GFX940-NEXT:    s_setpc_b64 s[30:31]
2860  %vec0 = call <3 x i64> asm "; def $0", "=v"()
2861  %vec1 = call <3 x i64> asm "; def $0", "=v"()
2862  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 1, i32 1>
2863  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2864  ret void
2865}
2866
2867define void @v_shuffle_v4i64_v3i64__5_0_1_1(ptr addrspace(1) inreg %ptr) {
2868; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_0_1_1:
2869; GFX900:       ; %bb.0:
2870; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2871; GFX900-NEXT:    ;;#ASMSTART
2872; GFX900-NEXT:    ; def v[0:5]
2873; GFX900-NEXT:    ;;#ASMEND
2874; GFX900-NEXT:    ;;#ASMSTART
2875; GFX900-NEXT:    ; def v[4:9]
2876; GFX900-NEXT:    ;;#ASMEND
2877; GFX900-NEXT:    v_mov_b32_e32 v10, 0
2878; GFX900-NEXT:    v_mov_b32_e32 v4, v2
2879; GFX900-NEXT:    v_mov_b32_e32 v5, v3
2880; GFX900-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
2881; GFX900-NEXT:    s_nop 0
2882; GFX900-NEXT:    v_mov_b32_e32 v2, v8
2883; GFX900-NEXT:    v_mov_b32_e32 v3, v9
2884; GFX900-NEXT:    v_mov_b32_e32 v4, v0
2885; GFX900-NEXT:    v_mov_b32_e32 v5, v1
2886; GFX900-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17]
2887; GFX900-NEXT:    s_waitcnt vmcnt(0)
2888; GFX900-NEXT:    s_setpc_b64 s[30:31]
2889;
2890; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_0_1_1:
2891; GFX90A:       ; %bb.0:
2892; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2893; GFX90A-NEXT:    ;;#ASMSTART
2894; GFX90A-NEXT:    ; def v[0:5]
2895; GFX90A-NEXT:    ;;#ASMEND
2896; GFX90A-NEXT:    ;;#ASMSTART
2897; GFX90A-NEXT:    ; def v[4:9]
2898; GFX90A-NEXT:    ;;#ASMEND
2899; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
2900; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
2901; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
2902; GFX90A-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
2903; GFX90A-NEXT:    s_nop 0
2904; GFX90A-NEXT:    v_mov_b32_e32 v2, v8
2905; GFX90A-NEXT:    v_mov_b32_e32 v3, v9
2906; GFX90A-NEXT:    v_mov_b32_e32 v4, v0
2907; GFX90A-NEXT:    v_mov_b32_e32 v5, v1
2908; GFX90A-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17]
2909; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2910; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2911;
2912; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_1_1:
2913; GFX940:       ; %bb.0:
2914; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2915; GFX940-NEXT:    ;;#ASMSTART
2916; GFX940-NEXT:    ; def v[0:5]
2917; GFX940-NEXT:    ;;#ASMEND
2918; GFX940-NEXT:    v_mov_b32_e32 v10, 0
2919; GFX940-NEXT:    ;;#ASMSTART
2920; GFX940-NEXT:    ; def v[4:9]
2921; GFX940-NEXT:    ;;#ASMEND
2922; GFX940-NEXT:    s_nop 0
2923; GFX940-NEXT:    v_mov_b32_e32 v4, v2
2924; GFX940-NEXT:    v_mov_b32_e32 v5, v3
2925; GFX940-NEXT:    global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1
2926; GFX940-NEXT:    s_nop 1
2927; GFX940-NEXT:    v_mov_b32_e32 v2, v8
2928; GFX940-NEXT:    v_mov_b32_e32 v3, v9
2929; GFX940-NEXT:    v_mov_b32_e32 v4, v0
2930; GFX940-NEXT:    v_mov_b32_e32 v5, v1
2931; GFX940-NEXT:    global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1
2932; GFX940-NEXT:    s_waitcnt vmcnt(0)
2933; GFX940-NEXT:    s_setpc_b64 s[30:31]
2934  %vec0 = call <3 x i64> asm "; def $0", "=v"()
2935  %vec1 = call <3 x i64> asm "; def $0", "=v"()
2936  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 1, i32 1>
2937  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
2938  ret void
2939}
2940
2941define void @v_shuffle_v4i64_v3i64__5_2_1_1(ptr addrspace(1) inreg %ptr) {
2942; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_2_1_1:
2943; GFX900:       ; %bb.0:
2944; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2945; GFX900-NEXT:    ;;#ASMSTART
2946; GFX900-NEXT:    ; def v[0:5]
2947; GFX900-NEXT:    ;;#ASMEND
2948; GFX900-NEXT:    v_mov_b32_e32 v12, 0
2949; GFX900-NEXT:    v_mov_b32_e32 v0, v2
2950; GFX900-NEXT:    v_mov_b32_e32 v1, v3
2951; GFX900-NEXT:    ;;#ASMSTART
2952; GFX900-NEXT:    ; def v[6:11]
2953; GFX900-NEXT:    ;;#ASMEND
2954; GFX900-NEXT:    global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
2955; GFX900-NEXT:    s_nop 0
2956; GFX900-NEXT:    v_mov_b32_e32 v2, v10
2957; GFX900-NEXT:    v_mov_b32_e32 v3, v11
2958; GFX900-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17]
2959; GFX900-NEXT:    s_waitcnt vmcnt(0)
2960; GFX900-NEXT:    s_setpc_b64 s[30:31]
2961;
2962; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_2_1_1:
2963; GFX90A:       ; %bb.0:
2964; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2965; GFX90A-NEXT:    ;;#ASMSTART
2966; GFX90A-NEXT:    ; def v[0:5]
2967; GFX90A-NEXT:    ;;#ASMEND
2968; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
2969; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
2970; GFX90A-NEXT:    v_mov_b32_e32 v1, v3
2971; GFX90A-NEXT:    ;;#ASMSTART
2972; GFX90A-NEXT:    ; def v[6:11]
2973; GFX90A-NEXT:    ;;#ASMEND
2974; GFX90A-NEXT:    global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
2975; GFX90A-NEXT:    s_nop 0
2976; GFX90A-NEXT:    v_mov_b32_e32 v2, v10
2977; GFX90A-NEXT:    v_mov_b32_e32 v3, v11
2978; GFX90A-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17]
2979; GFX90A-NEXT:    s_waitcnt vmcnt(0)
2980; GFX90A-NEXT:    s_setpc_b64 s[30:31]
2981;
2982; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_1_1:
2983; GFX940:       ; %bb.0:
2984; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2985; GFX940-NEXT:    ;;#ASMSTART
2986; GFX940-NEXT:    ; def v[0:5]
2987; GFX940-NEXT:    ;;#ASMEND
2988; GFX940-NEXT:    v_mov_b32_e32 v12, 0
2989; GFX940-NEXT:    v_mov_b32_e32 v0, v2
2990; GFX940-NEXT:    v_mov_b32_e32 v1, v3
2991; GFX940-NEXT:    ;;#ASMSTART
2992; GFX940-NEXT:    ; def v[6:11]
2993; GFX940-NEXT:    ;;#ASMEND
2994; GFX940-NEXT:    global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1
2995; GFX940-NEXT:    s_nop 1
2996; GFX940-NEXT:    v_mov_b32_e32 v2, v10
2997; GFX940-NEXT:    v_mov_b32_e32 v3, v11
2998; GFX940-NEXT:    global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1
2999; GFX940-NEXT:    s_waitcnt vmcnt(0)
3000; GFX940-NEXT:    s_setpc_b64 s[30:31]
3001  %vec0 = call <3 x i64> asm "; def $0", "=v"()
3002  %vec1 = call <3 x i64> asm "; def $0", "=v"()
3003  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 1, i32 1>
3004  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3005  ret void
3006}
3007
3008define void @v_shuffle_v4i64_v3i64__5_3_1_1(ptr addrspace(1) inreg %ptr) {
3009; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_3_1_1:
3010; GFX900:       ; %bb.0:
3011; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3012; GFX900-NEXT:    ;;#ASMSTART
3013; GFX900-NEXT:    ; def v[0:5]
3014; GFX900-NEXT:    ;;#ASMEND
3015; GFX900-NEXT:    v_mov_b32_e32 v12, 0
3016; GFX900-NEXT:    v_mov_b32_e32 v4, v2
3017; GFX900-NEXT:    v_mov_b32_e32 v5, v3
3018; GFX900-NEXT:    ;;#ASMSTART
3019; GFX900-NEXT:    ; def v[6:11]
3020; GFX900-NEXT:    ;;#ASMEND
3021; GFX900-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
3022; GFX900-NEXT:    v_mov_b32_e32 v0, v10
3023; GFX900-NEXT:    v_mov_b32_e32 v1, v11
3024; GFX900-NEXT:    v_mov_b32_e32 v2, v6
3025; GFX900-NEXT:    v_mov_b32_e32 v3, v7
3026; GFX900-NEXT:    global_store_dwordx4 v12, v[0:3], s[16:17]
3027; GFX900-NEXT:    s_waitcnt vmcnt(0)
3028; GFX900-NEXT:    s_setpc_b64 s[30:31]
3029;
3030; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_3_1_1:
3031; GFX90A:       ; %bb.0:
3032; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3033; GFX90A-NEXT:    ;;#ASMSTART
3034; GFX90A-NEXT:    ; def v[0:5]
3035; GFX90A-NEXT:    ;;#ASMEND
3036; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
3037; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
3038; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
3039; GFX90A-NEXT:    ;;#ASMSTART
3040; GFX90A-NEXT:    ; def v[6:11]
3041; GFX90A-NEXT:    ;;#ASMEND
3042; GFX90A-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
3043; GFX90A-NEXT:    v_mov_b32_e32 v0, v10
3044; GFX90A-NEXT:    v_mov_b32_e32 v1, v11
3045; GFX90A-NEXT:    v_mov_b32_e32 v2, v6
3046; GFX90A-NEXT:    v_mov_b32_e32 v3, v7
3047; GFX90A-NEXT:    global_store_dwordx4 v12, v[0:3], s[16:17]
3048; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3049; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3050;
3051; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_1_1:
3052; GFX940:       ; %bb.0:
3053; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3054; GFX940-NEXT:    ;;#ASMSTART
3055; GFX940-NEXT:    ; def v[0:5]
3056; GFX940-NEXT:    ;;#ASMEND
3057; GFX940-NEXT:    v_mov_b32_e32 v12, 0
3058; GFX940-NEXT:    v_mov_b32_e32 v4, v2
3059; GFX940-NEXT:    v_mov_b32_e32 v5, v3
3060; GFX940-NEXT:    ;;#ASMSTART
3061; GFX940-NEXT:    ; def v[6:11]
3062; GFX940-NEXT:    ;;#ASMEND
3063; GFX940-NEXT:    global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1
3064; GFX940-NEXT:    v_mov_b32_e32 v0, v10
3065; GFX940-NEXT:    v_mov_b32_e32 v1, v11
3066; GFX940-NEXT:    v_mov_b32_e32 v2, v6
3067; GFX940-NEXT:    v_mov_b32_e32 v3, v7
3068; GFX940-NEXT:    global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1
3069; GFX940-NEXT:    s_waitcnt vmcnt(0)
3070; GFX940-NEXT:    s_setpc_b64 s[30:31]
3071  %vec0 = call <3 x i64> asm "; def $0", "=v"()
3072  %vec1 = call <3 x i64> asm "; def $0", "=v"()
3073  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 1, i32 1>
3074  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3075  ret void
3076}
3077
3078define void @v_shuffle_v4i64_v3i64__5_4_1_1(ptr addrspace(1) inreg %ptr) {
3079; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_4_1_1:
3080; GFX900:       ; %bb.0:
3081; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3082; GFX900-NEXT:    ;;#ASMSTART
3083; GFX900-NEXT:    ; def v[0:5]
3084; GFX900-NEXT:    ;;#ASMEND
3085; GFX900-NEXT:    ;;#ASMSTART
3086; GFX900-NEXT:    ; def v[4:9]
3087; GFX900-NEXT:    ;;#ASMEND
3088; GFX900-NEXT:    v_mov_b32_e32 v10, 0
3089; GFX900-NEXT:    v_mov_b32_e32 v4, v2
3090; GFX900-NEXT:    v_mov_b32_e32 v5, v3
3091; GFX900-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
3092; GFX900-NEXT:    s_nop 0
3093; GFX900-NEXT:    v_mov_b32_e32 v4, v8
3094; GFX900-NEXT:    v_mov_b32_e32 v5, v9
3095; GFX900-NEXT:    global_store_dwordx4 v10, v[4:7], s[16:17]
3096; GFX900-NEXT:    s_waitcnt vmcnt(0)
3097; GFX900-NEXT:    s_setpc_b64 s[30:31]
3098;
3099; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_4_1_1:
3100; GFX90A:       ; %bb.0:
3101; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3102; GFX90A-NEXT:    ;;#ASMSTART
3103; GFX90A-NEXT:    ; def v[0:5]
3104; GFX90A-NEXT:    ;;#ASMEND
3105; GFX90A-NEXT:    ;;#ASMSTART
3106; GFX90A-NEXT:    ; def v[4:9]
3107; GFX90A-NEXT:    ;;#ASMEND
3108; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
3109; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
3110; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
3111; GFX90A-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
3112; GFX90A-NEXT:    s_nop 0
3113; GFX90A-NEXT:    v_mov_b32_e32 v4, v8
3114; GFX90A-NEXT:    v_mov_b32_e32 v5, v9
3115; GFX90A-NEXT:    global_store_dwordx4 v10, v[4:7], s[16:17]
3116; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3117; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3118;
3119; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_1_1:
3120; GFX940:       ; %bb.0:
3121; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3122; GFX940-NEXT:    ;;#ASMSTART
3123; GFX940-NEXT:    ; def v[0:5]
3124; GFX940-NEXT:    ;;#ASMEND
3125; GFX940-NEXT:    v_mov_b32_e32 v10, 0
3126; GFX940-NEXT:    ;;#ASMSTART
3127; GFX940-NEXT:    ; def v[4:9]
3128; GFX940-NEXT:    ;;#ASMEND
3129; GFX940-NEXT:    s_nop 0
3130; GFX940-NEXT:    v_mov_b32_e32 v4, v2
3131; GFX940-NEXT:    v_mov_b32_e32 v5, v3
3132; GFX940-NEXT:    global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1
3133; GFX940-NEXT:    s_nop 1
3134; GFX940-NEXT:    v_mov_b32_e32 v4, v8
3135; GFX940-NEXT:    v_mov_b32_e32 v5, v9
3136; GFX940-NEXT:    global_store_dwordx4 v10, v[4:7], s[0:1] sc0 sc1
3137; GFX940-NEXT:    s_waitcnt vmcnt(0)
3138; GFX940-NEXT:    s_setpc_b64 s[30:31]
3139  %vec0 = call <3 x i64> asm "; def $0", "=v"()
3140  %vec1 = call <3 x i64> asm "; def $0", "=v"()
3141  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 1, i32 1>
3142  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3143  ret void
3144}
3145
3146define void @v_shuffle_v4i64_v3i64__5_5_1_1(ptr addrspace(1) inreg %ptr) {
3147; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_1_1:
3148; GFX900:       ; %bb.0:
3149; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3150; GFX900-NEXT:    ;;#ASMSTART
3151; GFX900-NEXT:    ; def v[0:5]
3152; GFX900-NEXT:    ;;#ASMEND
3153; GFX900-NEXT:    ;;#ASMSTART
3154; GFX900-NEXT:    ; def v[4:9]
3155; GFX900-NEXT:    ;;#ASMEND
3156; GFX900-NEXT:    v_mov_b32_e32 v10, 0
3157; GFX900-NEXT:    v_mov_b32_e32 v4, v2
3158; GFX900-NEXT:    v_mov_b32_e32 v5, v3
3159; GFX900-NEXT:    v_mov_b32_e32 v6, v8
3160; GFX900-NEXT:    v_mov_b32_e32 v7, v9
3161; GFX900-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
3162; GFX900-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17]
3163; GFX900-NEXT:    s_waitcnt vmcnt(0)
3164; GFX900-NEXT:    s_setpc_b64 s[30:31]
3165;
3166; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_1_1:
3167; GFX90A:       ; %bb.0:
3168; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3169; GFX90A-NEXT:    ;;#ASMSTART
3170; GFX90A-NEXT:    ; def v[0:5]
3171; GFX90A-NEXT:    ;;#ASMEND
3172; GFX90A-NEXT:    ;;#ASMSTART
3173; GFX90A-NEXT:    ; def v[4:9]
3174; GFX90A-NEXT:    ;;#ASMEND
3175; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
3176; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
3177; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
3178; GFX90A-NEXT:    v_mov_b32_e32 v6, v8
3179; GFX90A-NEXT:    v_mov_b32_e32 v7, v9
3180; GFX90A-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
3181; GFX90A-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17]
3182; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3183; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3184;
3185; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_1:
3186; GFX940:       ; %bb.0:
3187; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3188; GFX940-NEXT:    ;;#ASMSTART
3189; GFX940-NEXT:    ; def v[0:5]
3190; GFX940-NEXT:    ;;#ASMEND
3191; GFX940-NEXT:    v_mov_b32_e32 v10, 0
3192; GFX940-NEXT:    ;;#ASMSTART
3193; GFX940-NEXT:    ; def v[4:9]
3194; GFX940-NEXT:    ;;#ASMEND
3195; GFX940-NEXT:    s_nop 0
3196; GFX940-NEXT:    v_mov_b32_e32 v4, v2
3197; GFX940-NEXT:    v_mov_b32_e32 v5, v3
3198; GFX940-NEXT:    v_mov_b32_e32 v6, v8
3199; GFX940-NEXT:    v_mov_b32_e32 v7, v9
3200; GFX940-NEXT:    global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1
3201; GFX940-NEXT:    global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1
3202; GFX940-NEXT:    s_waitcnt vmcnt(0)
3203; GFX940-NEXT:    s_setpc_b64 s[30:31]
3204  %vec0 = call <3 x i64> asm "; def $0", "=v"()
3205  %vec1 = call <3 x i64> asm "; def $0", "=v"()
3206  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 1>
3207  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3208  ret void
3209}
3210
3211define void @v_shuffle_v4i64_v3i64__5_5_u_1(ptr addrspace(1) inreg %ptr) {
3212; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_1:
3213; GFX900:       ; %bb.0:
3214; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3215; GFX900-NEXT:    ;;#ASMSTART
3216; GFX900-NEXT:    ; def v[0:5]
3217; GFX900-NEXT:    ;;#ASMEND
3218; GFX900-NEXT:    ;;#ASMSTART
3219; GFX900-NEXT:    ; def v[4:9]
3220; GFX900-NEXT:    ;;#ASMEND
3221; GFX900-NEXT:    v_mov_b32_e32 v10, 0
3222; GFX900-NEXT:    v_mov_b32_e32 v6, v8
3223; GFX900-NEXT:    v_mov_b32_e32 v7, v9
3224; GFX900-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
3225; GFX900-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17]
3226; GFX900-NEXT:    s_waitcnt vmcnt(0)
3227; GFX900-NEXT:    s_setpc_b64 s[30:31]
3228;
3229; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_1:
3230; GFX90A:       ; %bb.0:
3231; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3232; GFX90A-NEXT:    ;;#ASMSTART
3233; GFX90A-NEXT:    ; def v[0:5]
3234; GFX90A-NEXT:    ;;#ASMEND
3235; GFX90A-NEXT:    ;;#ASMSTART
3236; GFX90A-NEXT:    ; def v[4:9]
3237; GFX90A-NEXT:    ;;#ASMEND
3238; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
3239; GFX90A-NEXT:    v_mov_b32_e32 v6, v8
3240; GFX90A-NEXT:    v_mov_b32_e32 v7, v9
3241; GFX90A-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
3242; GFX90A-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17]
3243; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3244; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3245;
3246; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_1:
3247; GFX940:       ; %bb.0:
3248; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3249; GFX940-NEXT:    ;;#ASMSTART
3250; GFX940-NEXT:    ; def v[0:5]
3251; GFX940-NEXT:    ;;#ASMEND
3252; GFX940-NEXT:    v_mov_b32_e32 v10, 0
3253; GFX940-NEXT:    ;;#ASMSTART
3254; GFX940-NEXT:    ; def v[4:9]
3255; GFX940-NEXT:    ;;#ASMEND
3256; GFX940-NEXT:    global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1
3257; GFX940-NEXT:    v_mov_b32_e32 v6, v8
3258; GFX940-NEXT:    v_mov_b32_e32 v7, v9
3259; GFX940-NEXT:    global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1
3260; GFX940-NEXT:    s_waitcnt vmcnt(0)
3261; GFX940-NEXT:    s_setpc_b64 s[30:31]
3262  %vec0 = call <3 x i64> asm "; def $0", "=v"()
3263  %vec1 = call <3 x i64> asm "; def $0", "=v"()
3264  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 1>
3265  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3266  ret void
3267}
3268
3269define void @v_shuffle_v4i64_v3i64__5_5_0_1(ptr addrspace(1) inreg %ptr) {
3270; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_0_1:
3271; GFX900:       ; %bb.0:
3272; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3273; GFX900-NEXT:    ;;#ASMSTART
3274; GFX900-NEXT:    ; def v[0:5]
3275; GFX900-NEXT:    ;;#ASMEND
3276; GFX900-NEXT:    ;;#ASMSTART
3277; GFX900-NEXT:    ; def v[4:9]
3278; GFX900-NEXT:    ;;#ASMEND
3279; GFX900-NEXT:    v_mov_b32_e32 v10, 0
3280; GFX900-NEXT:    v_mov_b32_e32 v6, v8
3281; GFX900-NEXT:    v_mov_b32_e32 v7, v9
3282; GFX900-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
3283; GFX900-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17]
3284; GFX900-NEXT:    s_waitcnt vmcnt(0)
3285; GFX900-NEXT:    s_setpc_b64 s[30:31]
3286;
3287; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_0_1:
3288; GFX90A:       ; %bb.0:
3289; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3290; GFX90A-NEXT:    ;;#ASMSTART
3291; GFX90A-NEXT:    ; def v[0:5]
3292; GFX90A-NEXT:    ;;#ASMEND
3293; GFX90A-NEXT:    ;;#ASMSTART
3294; GFX90A-NEXT:    ; def v[4:9]
3295; GFX90A-NEXT:    ;;#ASMEND
3296; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
3297; GFX90A-NEXT:    v_mov_b32_e32 v6, v8
3298; GFX90A-NEXT:    v_mov_b32_e32 v7, v9
3299; GFX90A-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
3300; GFX90A-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17]
3301; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3302; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3303;
3304; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_1:
3305; GFX940:       ; %bb.0:
3306; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3307; GFX940-NEXT:    ;;#ASMSTART
3308; GFX940-NEXT:    ; def v[0:5]
3309; GFX940-NEXT:    ;;#ASMEND
3310; GFX940-NEXT:    v_mov_b32_e32 v10, 0
3311; GFX940-NEXT:    ;;#ASMSTART
3312; GFX940-NEXT:    ; def v[4:9]
3313; GFX940-NEXT:    ;;#ASMEND
3314; GFX940-NEXT:    global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1
3315; GFX940-NEXT:    v_mov_b32_e32 v6, v8
3316; GFX940-NEXT:    v_mov_b32_e32 v7, v9
3317; GFX940-NEXT:    global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1
3318; GFX940-NEXT:    s_waitcnt vmcnt(0)
3319; GFX940-NEXT:    s_setpc_b64 s[30:31]
3320  %vec0 = call <3 x i64> asm "; def $0", "=v"()
3321  %vec1 = call <3 x i64> asm "; def $0", "=v"()
3322  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 1>
3323  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3324  ret void
3325}
3326
3327define void @v_shuffle_v4i64_v3i64__5_5_2_1(ptr addrspace(1) inreg %ptr) {
3328; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_2_1:
3329; GFX900:       ; %bb.0:
3330; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3331; GFX900-NEXT:    ;;#ASMSTART
3332; GFX900-NEXT:    ; def v[0:5]
3333; GFX900-NEXT:    ;;#ASMEND
3334; GFX900-NEXT:    ;;#ASMSTART
3335; GFX900-NEXT:    ; def v[6:11]
3336; GFX900-NEXT:    ;;#ASMEND
3337; GFX900-NEXT:    v_mov_b32_e32 v12, 0
3338; GFX900-NEXT:    v_mov_b32_e32 v0, v4
3339; GFX900-NEXT:    v_mov_b32_e32 v1, v5
3340; GFX900-NEXT:    v_mov_b32_e32 v8, v10
3341; GFX900-NEXT:    v_mov_b32_e32 v9, v11
3342; GFX900-NEXT:    global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
3343; GFX900-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
3344; GFX900-NEXT:    s_waitcnt vmcnt(0)
3345; GFX900-NEXT:    s_setpc_b64 s[30:31]
3346;
3347; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_2_1:
3348; GFX90A:       ; %bb.0:
3349; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3350; GFX90A-NEXT:    ;;#ASMSTART
3351; GFX90A-NEXT:    ; def v[0:5]
3352; GFX90A-NEXT:    ;;#ASMEND
3353; GFX90A-NEXT:    ;;#ASMSTART
3354; GFX90A-NEXT:    ; def v[6:11]
3355; GFX90A-NEXT:    ;;#ASMEND
3356; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
3357; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
3358; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
3359; GFX90A-NEXT:    v_mov_b32_e32 v8, v10
3360; GFX90A-NEXT:    v_mov_b32_e32 v9, v11
3361; GFX90A-NEXT:    global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
3362; GFX90A-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
3363; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3364; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3365;
3366; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_1:
3367; GFX940:       ; %bb.0:
3368; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3369; GFX940-NEXT:    ;;#ASMSTART
3370; GFX940-NEXT:    ; def v[0:5]
3371; GFX940-NEXT:    ;;#ASMEND
3372; GFX940-NEXT:    ;;#ASMSTART
3373; GFX940-NEXT:    ; def v[6:11]
3374; GFX940-NEXT:    ;;#ASMEND
3375; GFX940-NEXT:    v_mov_b32_e32 v12, 0
3376; GFX940-NEXT:    v_mov_b32_e32 v0, v4
3377; GFX940-NEXT:    v_mov_b32_e32 v1, v5
3378; GFX940-NEXT:    v_mov_b32_e32 v8, v10
3379; GFX940-NEXT:    v_mov_b32_e32 v9, v11
3380; GFX940-NEXT:    global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1
3381; GFX940-NEXT:    global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
3382; GFX940-NEXT:    s_waitcnt vmcnt(0)
3383; GFX940-NEXT:    s_setpc_b64 s[30:31]
3384  %vec0 = call <3 x i64> asm "; def $0", "=v"()
3385  %vec1 = call <3 x i64> asm "; def $0", "=v"()
3386  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 1>
3387  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3388  ret void
3389}
3390
3391define void @v_shuffle_v4i64_v3i64__5_5_3_1(ptr addrspace(1) inreg %ptr) {
3392; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_3_1:
3393; GFX900:       ; %bb.0:
3394; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3395; GFX900-NEXT:    ;;#ASMSTART
3396; GFX900-NEXT:    ; def v[0:5]
3397; GFX900-NEXT:    ;;#ASMEND
3398; GFX900-NEXT:    ;;#ASMSTART
3399; GFX900-NEXT:    ; def v[4:9]
3400; GFX900-NEXT:    ;;#ASMEND
3401; GFX900-NEXT:    v_mov_b32_e32 v10, 0
3402; GFX900-NEXT:    v_mov_b32_e32 v6, v2
3403; GFX900-NEXT:    v_mov_b32_e32 v7, v3
3404; GFX900-NEXT:    global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
3405; GFX900-NEXT:    s_nop 0
3406; GFX900-NEXT:    v_mov_b32_e32 v6, v8
3407; GFX900-NEXT:    v_mov_b32_e32 v7, v9
3408; GFX900-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17]
3409; GFX900-NEXT:    s_waitcnt vmcnt(0)
3410; GFX900-NEXT:    s_setpc_b64 s[30:31]
3411;
3412; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_3_1:
3413; GFX90A:       ; %bb.0:
3414; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3415; GFX90A-NEXT:    ;;#ASMSTART
3416; GFX90A-NEXT:    ; def v[0:5]
3417; GFX90A-NEXT:    ;;#ASMEND
3418; GFX90A-NEXT:    ;;#ASMSTART
3419; GFX90A-NEXT:    ; def v[4:9]
3420; GFX90A-NEXT:    ;;#ASMEND
3421; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
3422; GFX90A-NEXT:    v_mov_b32_e32 v6, v2
3423; GFX90A-NEXT:    v_mov_b32_e32 v7, v3
3424; GFX90A-NEXT:    global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
3425; GFX90A-NEXT:    s_nop 0
3426; GFX90A-NEXT:    v_mov_b32_e32 v6, v8
3427; GFX90A-NEXT:    v_mov_b32_e32 v7, v9
3428; GFX90A-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17]
3429; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3430; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3431;
3432; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_1:
3433; GFX940:       ; %bb.0:
3434; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3435; GFX940-NEXT:    ;;#ASMSTART
3436; GFX940-NEXT:    ; def v[0:5]
3437; GFX940-NEXT:    ;;#ASMEND
3438; GFX940-NEXT:    v_mov_b32_e32 v10, 0
3439; GFX940-NEXT:    ;;#ASMSTART
3440; GFX940-NEXT:    ; def v[4:9]
3441; GFX940-NEXT:    ;;#ASMEND
3442; GFX940-NEXT:    s_nop 0
3443; GFX940-NEXT:    v_mov_b32_e32 v6, v2
3444; GFX940-NEXT:    v_mov_b32_e32 v7, v3
3445; GFX940-NEXT:    global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1
3446; GFX940-NEXT:    s_nop 1
3447; GFX940-NEXT:    v_mov_b32_e32 v6, v8
3448; GFX940-NEXT:    v_mov_b32_e32 v7, v9
3449; GFX940-NEXT:    global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1
3450; GFX940-NEXT:    s_waitcnt vmcnt(0)
3451; GFX940-NEXT:    s_setpc_b64 s[30:31]
3452  %vec0 = call <3 x i64> asm "; def $0", "=v"()
3453  %vec1 = call <3 x i64> asm "; def $0", "=v"()
3454  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 1>
3455  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3456  ret void
3457}
3458
3459define void @v_shuffle_v4i64_v3i64__5_5_4_1(ptr addrspace(1) inreg %ptr) {
3460; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_4_1:
3461; GFX900:       ; %bb.0:
3462; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3463; GFX900-NEXT:    ;;#ASMSTART
3464; GFX900-NEXT:    ; def v[0:5]
3465; GFX900-NEXT:    ;;#ASMEND
3466; GFX900-NEXT:    ;;#ASMSTART
3467; GFX900-NEXT:    ; def v[4:9]
3468; GFX900-NEXT:    ;;#ASMEND
3469; GFX900-NEXT:    v_mov_b32_e32 v10, 0
3470; GFX900-NEXT:    v_mov_b32_e32 v0, v6
3471; GFX900-NEXT:    v_mov_b32_e32 v1, v7
3472; GFX900-NEXT:    v_mov_b32_e32 v6, v8
3473; GFX900-NEXT:    v_mov_b32_e32 v7, v9
3474; GFX900-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
3475; GFX900-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17]
3476; GFX900-NEXT:    s_waitcnt vmcnt(0)
3477; GFX900-NEXT:    s_setpc_b64 s[30:31]
3478;
3479; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_4_1:
3480; GFX90A:       ; %bb.0:
3481; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3482; GFX90A-NEXT:    ;;#ASMSTART
3483; GFX90A-NEXT:    ; def v[0:5]
3484; GFX90A-NEXT:    ;;#ASMEND
3485; GFX90A-NEXT:    ;;#ASMSTART
3486; GFX90A-NEXT:    ; def v[4:9]
3487; GFX90A-NEXT:    ;;#ASMEND
3488; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
3489; GFX90A-NEXT:    v_mov_b32_e32 v0, v6
3490; GFX90A-NEXT:    v_mov_b32_e32 v1, v7
3491; GFX90A-NEXT:    v_mov_b32_e32 v6, v8
3492; GFX90A-NEXT:    v_mov_b32_e32 v7, v9
3493; GFX90A-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17] offset:16
3494; GFX90A-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17]
3495; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3496; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3497;
3498; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_1:
3499; GFX940:       ; %bb.0:
3500; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3501; GFX940-NEXT:    ;;#ASMSTART
3502; GFX940-NEXT:    ; def v[0:5]
3503; GFX940-NEXT:    ;;#ASMEND
3504; GFX940-NEXT:    v_mov_b32_e32 v10, 0
3505; GFX940-NEXT:    ;;#ASMSTART
3506; GFX940-NEXT:    ; def v[4:9]
3507; GFX940-NEXT:    ;;#ASMEND
3508; GFX940-NEXT:    s_nop 0
3509; GFX940-NEXT:    v_mov_b32_e32 v0, v6
3510; GFX940-NEXT:    v_mov_b32_e32 v1, v7
3511; GFX940-NEXT:    v_mov_b32_e32 v6, v8
3512; GFX940-NEXT:    v_mov_b32_e32 v7, v9
3513; GFX940-NEXT:    global_store_dwordx4 v10, v[0:3], s[0:1] offset:16 sc0 sc1
3514; GFX940-NEXT:    global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1
3515; GFX940-NEXT:    s_waitcnt vmcnt(0)
3516; GFX940-NEXT:    s_setpc_b64 s[30:31]
3517  %vec0 = call <3 x i64> asm "; def $0", "=v"()
3518  %vec1 = call <3 x i64> asm "; def $0", "=v"()
3519  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 1>
3520  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3521  ret void
3522}
3523
3524define void @v_shuffle_v4i64_v3i64__u_2_2_2(ptr addrspace(1) inreg %ptr) {
3525; GFX900-LABEL: v_shuffle_v4i64_v3i64__u_2_2_2:
3526; GFX900:       ; %bb.0:
3527; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3528; GFX900-NEXT:    ;;#ASMSTART
3529; GFX900-NEXT:    ; def v[0:5]
3530; GFX900-NEXT:    ;;#ASMEND
3531; GFX900-NEXT:    v_mov_b32_e32 v6, 0
3532; GFX900-NEXT:    v_mov_b32_e32 v2, v4
3533; GFX900-NEXT:    v_mov_b32_e32 v3, v5
3534; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
3535; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
3536; GFX900-NEXT:    s_waitcnt vmcnt(0)
3537; GFX900-NEXT:    s_setpc_b64 s[30:31]
3538;
3539; GFX90A-LABEL: v_shuffle_v4i64_v3i64__u_2_2_2:
3540; GFX90A:       ; %bb.0:
3541; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3542; GFX90A-NEXT:    ;;#ASMSTART
3543; GFX90A-NEXT:    ; def v[0:5]
3544; GFX90A-NEXT:    ;;#ASMEND
3545; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
3546; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
3547; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
3548; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
3549; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
3550; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3551; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3552;
3553; GFX940-LABEL: v_shuffle_v4i64_v3i64__u_2_2_2:
3554; GFX940:       ; %bb.0:
3555; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3556; GFX940-NEXT:    ;;#ASMSTART
3557; GFX940-NEXT:    ; def v[0:5]
3558; GFX940-NEXT:    ;;#ASMEND
3559; GFX940-NEXT:    v_mov_b32_e32 v6, 0
3560; GFX940-NEXT:    v_mov_b32_e32 v2, v4
3561; GFX940-NEXT:    v_mov_b32_e32 v3, v5
3562; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
3563; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
3564; GFX940-NEXT:    s_waitcnt vmcnt(0)
3565; GFX940-NEXT:    s_setpc_b64 s[30:31]
3566  %vec0 = call <3 x i64> asm "; def $0", "=v"()
3567  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
3568  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3569  ret void
3570}
3571
3572define void @v_shuffle_v4i64_v3i64__0_2_2_2(ptr addrspace(1) inreg %ptr) {
3573; GFX900-LABEL: v_shuffle_v4i64_v3i64__0_2_2_2:
3574; GFX900:       ; %bb.0:
3575; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3576; GFX900-NEXT:    ;;#ASMSTART
3577; GFX900-NEXT:    ; def v[0:5]
3578; GFX900-NEXT:    ;;#ASMEND
3579; GFX900-NEXT:    v_mov_b32_e32 v6, 0
3580; GFX900-NEXT:    v_mov_b32_e32 v2, v4
3581; GFX900-NEXT:    v_mov_b32_e32 v3, v5
3582; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
3583; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
3584; GFX900-NEXT:    s_waitcnt vmcnt(0)
3585; GFX900-NEXT:    s_setpc_b64 s[30:31]
3586;
3587; GFX90A-LABEL: v_shuffle_v4i64_v3i64__0_2_2_2:
3588; GFX90A:       ; %bb.0:
3589; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3590; GFX90A-NEXT:    ;;#ASMSTART
3591; GFX90A-NEXT:    ; def v[0:5]
3592; GFX90A-NEXT:    ;;#ASMEND
3593; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
3594; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
3595; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
3596; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
3597; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
3598; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3599; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3600;
3601; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_2_2_2:
3602; GFX940:       ; %bb.0:
3603; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3604; GFX940-NEXT:    ;;#ASMSTART
3605; GFX940-NEXT:    ; def v[0:5]
3606; GFX940-NEXT:    ;;#ASMEND
3607; GFX940-NEXT:    v_mov_b32_e32 v6, 0
3608; GFX940-NEXT:    v_mov_b32_e32 v2, v4
3609; GFX940-NEXT:    v_mov_b32_e32 v3, v5
3610; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
3611; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
3612; GFX940-NEXT:    s_waitcnt vmcnt(0)
3613; GFX940-NEXT:    s_setpc_b64 s[30:31]
3614  %vec0 = call <3 x i64> asm "; def $0", "=v"()
3615  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
3616  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3617  ret void
3618}
3619
3620define void @v_shuffle_v4i64_v3i64__1_2_2_2(ptr addrspace(1) inreg %ptr) {
3621; GFX900-LABEL: v_shuffle_v4i64_v3i64__1_2_2_2:
3622; GFX900:       ; %bb.0:
3623; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3624; GFX900-NEXT:    v_mov_b32_e32 v10, 0
3625; GFX900-NEXT:    ;;#ASMSTART
3626; GFX900-NEXT:    ; def v[0:5]
3627; GFX900-NEXT:    ;;#ASMEND
3628; GFX900-NEXT:    v_mov_b32_e32 v6, v4
3629; GFX900-NEXT:    v_mov_b32_e32 v7, v5
3630; GFX900-NEXT:    v_mov_b32_e32 v8, v4
3631; GFX900-NEXT:    v_mov_b32_e32 v9, v5
3632; GFX900-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
3633; GFX900-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17]
3634; GFX900-NEXT:    s_waitcnt vmcnt(0)
3635; GFX900-NEXT:    s_setpc_b64 s[30:31]
3636;
3637; GFX90A-LABEL: v_shuffle_v4i64_v3i64__1_2_2_2:
3638; GFX90A:       ; %bb.0:
3639; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3640; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
3641; GFX90A-NEXT:    ;;#ASMSTART
3642; GFX90A-NEXT:    ; def v[0:5]
3643; GFX90A-NEXT:    ;;#ASMEND
3644; GFX90A-NEXT:    v_mov_b32_e32 v6, v4
3645; GFX90A-NEXT:    v_mov_b32_e32 v7, v5
3646; GFX90A-NEXT:    v_mov_b32_e32 v8, v4
3647; GFX90A-NEXT:    v_mov_b32_e32 v9, v5
3648; GFX90A-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
3649; GFX90A-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17]
3650; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3651; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3652;
3653; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_2_2_2:
3654; GFX940:       ; %bb.0:
3655; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3656; GFX940-NEXT:    v_mov_b32_e32 v10, 0
3657; GFX940-NEXT:    ;;#ASMSTART
3658; GFX940-NEXT:    ; def v[0:5]
3659; GFX940-NEXT:    ;;#ASMEND
3660; GFX940-NEXT:    s_nop 0
3661; GFX940-NEXT:    v_mov_b32_e32 v6, v4
3662; GFX940-NEXT:    v_mov_b32_e32 v7, v5
3663; GFX940-NEXT:    v_mov_b32_e32 v8, v4
3664; GFX940-NEXT:    v_mov_b32_e32 v9, v5
3665; GFX940-NEXT:    global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1
3666; GFX940-NEXT:    global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1
3667; GFX940-NEXT:    s_waitcnt vmcnt(0)
3668; GFX940-NEXT:    s_setpc_b64 s[30:31]
3669  %vec0 = call <3 x i64> asm "; def $0", "=v"()
3670  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
3671  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3672  ret void
3673}
3674
3675define void @v_shuffle_v4i64_v3i64__2_2_2_2(ptr addrspace(1) inreg %ptr) {
3676; GFX900-LABEL: v_shuffle_v4i64_v3i64__2_2_2_2:
3677; GFX900:       ; %bb.0:
3678; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3679; GFX900-NEXT:    ;;#ASMSTART
3680; GFX900-NEXT:    ; def v[0:5]
3681; GFX900-NEXT:    ;;#ASMEND
3682; GFX900-NEXT:    v_mov_b32_e32 v6, 0
3683; GFX900-NEXT:    v_mov_b32_e32 v2, v4
3684; GFX900-NEXT:    v_mov_b32_e32 v3, v5
3685; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
3686; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
3687; GFX900-NEXT:    s_waitcnt vmcnt(0)
3688; GFX900-NEXT:    s_setpc_b64 s[30:31]
3689;
3690; GFX90A-LABEL: v_shuffle_v4i64_v3i64__2_2_2_2:
3691; GFX90A:       ; %bb.0:
3692; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3693; GFX90A-NEXT:    ;;#ASMSTART
3694; GFX90A-NEXT:    ; def v[0:5]
3695; GFX90A-NEXT:    ;;#ASMEND
3696; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
3697; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
3698; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
3699; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
3700; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
3701; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3702; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3703;
3704; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_2_2_2:
3705; GFX940:       ; %bb.0:
3706; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3707; GFX940-NEXT:    ;;#ASMSTART
3708; GFX940-NEXT:    ; def v[0:5]
3709; GFX940-NEXT:    ;;#ASMEND
3710; GFX940-NEXT:    v_mov_b32_e32 v6, 0
3711; GFX940-NEXT:    v_mov_b32_e32 v2, v4
3712; GFX940-NEXT:    v_mov_b32_e32 v3, v5
3713; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
3714; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
3715; GFX940-NEXT:    s_waitcnt vmcnt(0)
3716; GFX940-NEXT:    s_setpc_b64 s[30:31]
3717  %vec0 = call <3 x i64> asm "; def $0", "=v"()
3718  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
3719  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3720  ret void
3721}
3722
3723define void @v_shuffle_v4i64_v3i64__3_2_2_2(ptr addrspace(1) inreg %ptr) {
3724; GFX900-LABEL: v_shuffle_v4i64_v3i64__3_2_2_2:
3725; GFX900:       ; %bb.0:
3726; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3727; GFX900-NEXT:    ;;#ASMSTART
3728; GFX900-NEXT:    ; def v[0:5]
3729; GFX900-NEXT:    ;;#ASMEND
3730; GFX900-NEXT:    v_mov_b32_e32 v6, 0
3731; GFX900-NEXT:    v_mov_b32_e32 v2, v4
3732; GFX900-NEXT:    v_mov_b32_e32 v3, v5
3733; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
3734; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
3735; GFX900-NEXT:    s_waitcnt vmcnt(0)
3736; GFX900-NEXT:    s_setpc_b64 s[30:31]
3737;
3738; GFX90A-LABEL: v_shuffle_v4i64_v3i64__3_2_2_2:
3739; GFX90A:       ; %bb.0:
3740; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3741; GFX90A-NEXT:    ;;#ASMSTART
3742; GFX90A-NEXT:    ; def v[0:5]
3743; GFX90A-NEXT:    ;;#ASMEND
3744; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
3745; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
3746; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
3747; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
3748; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
3749; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3750; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3751;
3752; GFX940-LABEL: v_shuffle_v4i64_v3i64__3_2_2_2:
3753; GFX940:       ; %bb.0:
3754; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3755; GFX940-NEXT:    ;;#ASMSTART
3756; GFX940-NEXT:    ; def v[0:5]
3757; GFX940-NEXT:    ;;#ASMEND
3758; GFX940-NEXT:    v_mov_b32_e32 v6, 0
3759; GFX940-NEXT:    v_mov_b32_e32 v2, v4
3760; GFX940-NEXT:    v_mov_b32_e32 v3, v5
3761; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
3762; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
3763; GFX940-NEXT:    s_waitcnt vmcnt(0)
3764; GFX940-NEXT:    s_setpc_b64 s[30:31]
3765  %vec0 = call <3 x i64> asm "; def $0", "=v"()
3766  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
3767  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3768  ret void
3769}
3770
3771define void @v_shuffle_v4i64_v3i64__4_2_2_2(ptr addrspace(1) inreg %ptr) {
3772; GFX900-LABEL: v_shuffle_v4i64_v3i64__4_2_2_2:
3773; GFX900:       ; %bb.0:
3774; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3775; GFX900-NEXT:    ;;#ASMSTART
3776; GFX900-NEXT:    ; def v[0:5]
3777; GFX900-NEXT:    ;;#ASMEND
3778; GFX900-NEXT:    ;;#ASMSTART
3779; GFX900-NEXT:    ; def v[6:11]
3780; GFX900-NEXT:    ;;#ASMEND
3781; GFX900-NEXT:    v_mov_b32_e32 v12, 0
3782; GFX900-NEXT:    v_mov_b32_e32 v2, v4
3783; GFX900-NEXT:    v_mov_b32_e32 v3, v5
3784; GFX900-NEXT:    v_mov_b32_e32 v10, v4
3785; GFX900-NEXT:    v_mov_b32_e32 v11, v5
3786; GFX900-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
3787; GFX900-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
3788; GFX900-NEXT:    s_waitcnt vmcnt(0)
3789; GFX900-NEXT:    s_setpc_b64 s[30:31]
3790;
3791; GFX90A-LABEL: v_shuffle_v4i64_v3i64__4_2_2_2:
3792; GFX90A:       ; %bb.0:
3793; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3794; GFX90A-NEXT:    ;;#ASMSTART
3795; GFX90A-NEXT:    ; def v[0:5]
3796; GFX90A-NEXT:    ;;#ASMEND
3797; GFX90A-NEXT:    ;;#ASMSTART
3798; GFX90A-NEXT:    ; def v[6:11]
3799; GFX90A-NEXT:    ;;#ASMEND
3800; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
3801; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
3802; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
3803; GFX90A-NEXT:    v_mov_b32_e32 v10, v4
3804; GFX90A-NEXT:    v_mov_b32_e32 v11, v5
3805; GFX90A-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
3806; GFX90A-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
3807; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3808; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3809;
3810; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_2_2_2:
3811; GFX940:       ; %bb.0:
3812; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3813; GFX940-NEXT:    ;;#ASMSTART
3814; GFX940-NEXT:    ; def v[0:5]
3815; GFX940-NEXT:    ;;#ASMEND
3816; GFX940-NEXT:    ;;#ASMSTART
3817; GFX940-NEXT:    ; def v[6:11]
3818; GFX940-NEXT:    ;;#ASMEND
3819; GFX940-NEXT:    v_mov_b32_e32 v12, 0
3820; GFX940-NEXT:    v_mov_b32_e32 v2, v4
3821; GFX940-NEXT:    v_mov_b32_e32 v3, v5
3822; GFX940-NEXT:    v_mov_b32_e32 v10, v4
3823; GFX940-NEXT:    v_mov_b32_e32 v11, v5
3824; GFX940-NEXT:    global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1
3825; GFX940-NEXT:    global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
3826; GFX940-NEXT:    s_waitcnt vmcnt(0)
3827; GFX940-NEXT:    s_setpc_b64 s[30:31]
3828  %vec0 = call <3 x i64> asm "; def $0", "=v"()
3829  %vec1 = call <3 x i64> asm "; def $0", "=v"()
3830  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 2, i32 2, i32 2>
3831  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3832  ret void
3833}
3834
3835define void @v_shuffle_v4i64_v3i64__5_2_2_2(ptr addrspace(1) inreg %ptr) {
3836; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_2_2_2:
3837; GFX900:       ; %bb.0:
3838; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3839; GFX900-NEXT:    ;;#ASMSTART
3840; GFX900-NEXT:    ; def v[0:5]
3841; GFX900-NEXT:    ;;#ASMEND
3842; GFX900-NEXT:    v_mov_b32_e32 v12, 0
3843; GFX900-NEXT:    v_mov_b32_e32 v2, v4
3844; GFX900-NEXT:    v_mov_b32_e32 v3, v5
3845; GFX900-NEXT:    ;;#ASMSTART
3846; GFX900-NEXT:    ; def v[6:11]
3847; GFX900-NEXT:    ;;#ASMEND
3848; GFX900-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
3849; GFX900-NEXT:    s_nop 0
3850; GFX900-NEXT:    v_mov_b32_e32 v2, v10
3851; GFX900-NEXT:    v_mov_b32_e32 v3, v11
3852; GFX900-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17]
3853; GFX900-NEXT:    s_waitcnt vmcnt(0)
3854; GFX900-NEXT:    s_setpc_b64 s[30:31]
3855;
3856; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_2_2_2:
3857; GFX90A:       ; %bb.0:
3858; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3859; GFX90A-NEXT:    ;;#ASMSTART
3860; GFX90A-NEXT:    ; def v[0:5]
3861; GFX90A-NEXT:    ;;#ASMEND
3862; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
3863; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
3864; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
3865; GFX90A-NEXT:    ;;#ASMSTART
3866; GFX90A-NEXT:    ; def v[6:11]
3867; GFX90A-NEXT:    ;;#ASMEND
3868; GFX90A-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
3869; GFX90A-NEXT:    s_nop 0
3870; GFX90A-NEXT:    v_mov_b32_e32 v2, v10
3871; GFX90A-NEXT:    v_mov_b32_e32 v3, v11
3872; GFX90A-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17]
3873; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3874; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3875;
3876; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_2_2:
3877; GFX940:       ; %bb.0:
3878; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3879; GFX940-NEXT:    ;;#ASMSTART
3880; GFX940-NEXT:    ; def v[0:5]
3881; GFX940-NEXT:    ;;#ASMEND
3882; GFX940-NEXT:    v_mov_b32_e32 v12, 0
3883; GFX940-NEXT:    v_mov_b32_e32 v2, v4
3884; GFX940-NEXT:    v_mov_b32_e32 v3, v5
3885; GFX940-NEXT:    ;;#ASMSTART
3886; GFX940-NEXT:    ; def v[6:11]
3887; GFX940-NEXT:    ;;#ASMEND
3888; GFX940-NEXT:    global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1
3889; GFX940-NEXT:    s_nop 1
3890; GFX940-NEXT:    v_mov_b32_e32 v2, v10
3891; GFX940-NEXT:    v_mov_b32_e32 v3, v11
3892; GFX940-NEXT:    global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1
3893; GFX940-NEXT:    s_waitcnt vmcnt(0)
3894; GFX940-NEXT:    s_setpc_b64 s[30:31]
3895  %vec0 = call <3 x i64> asm "; def $0", "=v"()
3896  %vec1 = call <3 x i64> asm "; def $0", "=v"()
3897  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2>
3898  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3899  ret void
3900}
3901
3902define void @v_shuffle_v4i64_v3i64__5_u_2_2(ptr addrspace(1) inreg %ptr) {
3903; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_u_2_2:
3904; GFX900:       ; %bb.0:
3905; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3906; GFX900-NEXT:    ;;#ASMSTART
3907; GFX900-NEXT:    ; def v[0:5]
3908; GFX900-NEXT:    ;;#ASMEND
3909; GFX900-NEXT:    v_mov_b32_e32 v12, 0
3910; GFX900-NEXT:    ;;#ASMSTART
3911; GFX900-NEXT:    ; def v[6:11]
3912; GFX900-NEXT:    ;;#ASMEND
3913; GFX900-NEXT:    v_mov_b32_e32 v2, v4
3914; GFX900-NEXT:    v_mov_b32_e32 v3, v5
3915; GFX900-NEXT:    v_mov_b32_e32 v0, v10
3916; GFX900-NEXT:    v_mov_b32_e32 v1, v11
3917; GFX900-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
3918; GFX900-NEXT:    global_store_dwordx4 v12, v[0:3], s[16:17]
3919; GFX900-NEXT:    s_waitcnt vmcnt(0)
3920; GFX900-NEXT:    s_setpc_b64 s[30:31]
3921;
3922; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_u_2_2:
3923; GFX90A:       ; %bb.0:
3924; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3925; GFX90A-NEXT:    ;;#ASMSTART
3926; GFX90A-NEXT:    ; def v[0:5]
3927; GFX90A-NEXT:    ;;#ASMEND
3928; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
3929; GFX90A-NEXT:    ;;#ASMSTART
3930; GFX90A-NEXT:    ; def v[6:11]
3931; GFX90A-NEXT:    ;;#ASMEND
3932; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
3933; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
3934; GFX90A-NEXT:    v_mov_b32_e32 v0, v10
3935; GFX90A-NEXT:    v_mov_b32_e32 v1, v11
3936; GFX90A-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
3937; GFX90A-NEXT:    global_store_dwordx4 v12, v[0:3], s[16:17]
3938; GFX90A-NEXT:    s_waitcnt vmcnt(0)
3939; GFX90A-NEXT:    s_setpc_b64 s[30:31]
3940;
3941; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_2_2:
3942; GFX940:       ; %bb.0:
3943; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3944; GFX940-NEXT:    ;;#ASMSTART
3945; GFX940-NEXT:    ; def v[0:5]
3946; GFX940-NEXT:    ;;#ASMEND
3947; GFX940-NEXT:    v_mov_b32_e32 v12, 0
3948; GFX940-NEXT:    ;;#ASMSTART
3949; GFX940-NEXT:    ; def v[6:11]
3950; GFX940-NEXT:    ;;#ASMEND
3951; GFX940-NEXT:    v_mov_b32_e32 v2, v4
3952; GFX940-NEXT:    v_mov_b32_e32 v3, v5
3953; GFX940-NEXT:    v_mov_b32_e32 v0, v10
3954; GFX940-NEXT:    v_mov_b32_e32 v1, v11
3955; GFX940-NEXT:    global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1
3956; GFX940-NEXT:    global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1
3957; GFX940-NEXT:    s_waitcnt vmcnt(0)
3958; GFX940-NEXT:    s_setpc_b64 s[30:31]
3959  %vec0 = call <3 x i64> asm "; def $0", "=v"()
3960  %vec1 = call <3 x i64> asm "; def $0", "=v"()
3961  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 2, i32 2>
3962  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
3963  ret void
3964}
3965
3966define void @v_shuffle_v4i64_v3i64__5_0_2_2(ptr addrspace(1) inreg %ptr) {
3967; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_0_2_2:
3968; GFX900:       ; %bb.0:
3969; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3970; GFX900-NEXT:    ;;#ASMSTART
3971; GFX900-NEXT:    ; def v[0:5]
3972; GFX900-NEXT:    ;;#ASMEND
3973; GFX900-NEXT:    v_mov_b32_e32 v12, 0
3974; GFX900-NEXT:    v_mov_b32_e32 v2, v4
3975; GFX900-NEXT:    v_mov_b32_e32 v3, v5
3976; GFX900-NEXT:    ;;#ASMSTART
3977; GFX900-NEXT:    ; def v[6:11]
3978; GFX900-NEXT:    ;;#ASMEND
3979; GFX900-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
3980; GFX900-NEXT:    s_nop 0
3981; GFX900-NEXT:    v_mov_b32_e32 v2, v10
3982; GFX900-NEXT:    v_mov_b32_e32 v3, v11
3983; GFX900-NEXT:    v_mov_b32_e32 v4, v0
3984; GFX900-NEXT:    v_mov_b32_e32 v5, v1
3985; GFX900-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17]
3986; GFX900-NEXT:    s_waitcnt vmcnt(0)
3987; GFX900-NEXT:    s_setpc_b64 s[30:31]
3988;
3989; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_0_2_2:
3990; GFX90A:       ; %bb.0:
3991; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3992; GFX90A-NEXT:    ;;#ASMSTART
3993; GFX90A-NEXT:    ; def v[0:5]
3994; GFX90A-NEXT:    ;;#ASMEND
3995; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
3996; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
3997; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
3998; GFX90A-NEXT:    ;;#ASMSTART
3999; GFX90A-NEXT:    ; def v[6:11]
4000; GFX90A-NEXT:    ;;#ASMEND
4001; GFX90A-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
4002; GFX90A-NEXT:    s_nop 0
4003; GFX90A-NEXT:    v_mov_b32_e32 v2, v10
4004; GFX90A-NEXT:    v_mov_b32_e32 v3, v11
4005; GFX90A-NEXT:    v_mov_b32_e32 v4, v0
4006; GFX90A-NEXT:    v_mov_b32_e32 v5, v1
4007; GFX90A-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17]
4008; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4009; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4010;
4011; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_2_2:
4012; GFX940:       ; %bb.0:
4013; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4014; GFX940-NEXT:    ;;#ASMSTART
4015; GFX940-NEXT:    ; def v[0:5]
4016; GFX940-NEXT:    ;;#ASMEND
4017; GFX940-NEXT:    v_mov_b32_e32 v12, 0
4018; GFX940-NEXT:    v_mov_b32_e32 v2, v4
4019; GFX940-NEXT:    v_mov_b32_e32 v3, v5
4020; GFX940-NEXT:    ;;#ASMSTART
4021; GFX940-NEXT:    ; def v[6:11]
4022; GFX940-NEXT:    ;;#ASMEND
4023; GFX940-NEXT:    global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1
4024; GFX940-NEXT:    s_nop 1
4025; GFX940-NEXT:    v_mov_b32_e32 v2, v10
4026; GFX940-NEXT:    v_mov_b32_e32 v3, v11
4027; GFX940-NEXT:    v_mov_b32_e32 v4, v0
4028; GFX940-NEXT:    v_mov_b32_e32 v5, v1
4029; GFX940-NEXT:    global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1
4030; GFX940-NEXT:    s_waitcnt vmcnt(0)
4031; GFX940-NEXT:    s_setpc_b64 s[30:31]
4032  %vec0 = call <3 x i64> asm "; def $0", "=v"()
4033  %vec1 = call <3 x i64> asm "; def $0", "=v"()
4034  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 2, i32 2>
4035  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4036  ret void
4037}
4038
4039define void @v_shuffle_v4i64_v3i64__5_1_2_2(ptr addrspace(1) inreg %ptr) {
4040; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_1_2_2:
4041; GFX900:       ; %bb.0:
4042; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4043; GFX900-NEXT:    ;;#ASMSTART
4044; GFX900-NEXT:    ; def v[0:5]
4045; GFX900-NEXT:    ;;#ASMEND
4046; GFX900-NEXT:    ;;#ASMSTART
4047; GFX900-NEXT:    ; def v[6:11]
4048; GFX900-NEXT:    ;;#ASMEND
4049; GFX900-NEXT:    v_mov_b32_e32 v12, 0
4050; GFX900-NEXT:    v_mov_b32_e32 v6, v4
4051; GFX900-NEXT:    v_mov_b32_e32 v7, v5
4052; GFX900-NEXT:    v_mov_b32_e32 v8, v4
4053; GFX900-NEXT:    v_mov_b32_e32 v9, v5
4054; GFX900-NEXT:    v_mov_b32_e32 v0, v10
4055; GFX900-NEXT:    v_mov_b32_e32 v1, v11
4056; GFX900-NEXT:    global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
4057; GFX900-NEXT:    global_store_dwordx4 v12, v[0:3], s[16:17]
4058; GFX900-NEXT:    s_waitcnt vmcnt(0)
4059; GFX900-NEXT:    s_setpc_b64 s[30:31]
4060;
4061; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_1_2_2:
4062; GFX90A:       ; %bb.0:
4063; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4064; GFX90A-NEXT:    ;;#ASMSTART
4065; GFX90A-NEXT:    ; def v[0:5]
4066; GFX90A-NEXT:    ;;#ASMEND
4067; GFX90A-NEXT:    ;;#ASMSTART
4068; GFX90A-NEXT:    ; def v[6:11]
4069; GFX90A-NEXT:    ;;#ASMEND
4070; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
4071; GFX90A-NEXT:    v_mov_b32_e32 v6, v4
4072; GFX90A-NEXT:    v_mov_b32_e32 v7, v5
4073; GFX90A-NEXT:    v_mov_b32_e32 v8, v4
4074; GFX90A-NEXT:    v_mov_b32_e32 v9, v5
4075; GFX90A-NEXT:    v_mov_b32_e32 v0, v10
4076; GFX90A-NEXT:    v_mov_b32_e32 v1, v11
4077; GFX90A-NEXT:    global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
4078; GFX90A-NEXT:    global_store_dwordx4 v12, v[0:3], s[16:17]
4079; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4080; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4081;
4082; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_2_2:
4083; GFX940:       ; %bb.0:
4084; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4085; GFX940-NEXT:    ;;#ASMSTART
4086; GFX940-NEXT:    ; def v[0:5]
4087; GFX940-NEXT:    ;;#ASMEND
4088; GFX940-NEXT:    ;;#ASMSTART
4089; GFX940-NEXT:    ; def v[6:11]
4090; GFX940-NEXT:    ;;#ASMEND
4091; GFX940-NEXT:    v_mov_b32_e32 v12, 0
4092; GFX940-NEXT:    v_mov_b32_e32 v6, v4
4093; GFX940-NEXT:    v_mov_b32_e32 v7, v5
4094; GFX940-NEXT:    v_mov_b32_e32 v8, v4
4095; GFX940-NEXT:    v_mov_b32_e32 v9, v5
4096; GFX940-NEXT:    v_mov_b32_e32 v0, v10
4097; GFX940-NEXT:    v_mov_b32_e32 v1, v11
4098; GFX940-NEXT:    global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1
4099; GFX940-NEXT:    global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1
4100; GFX940-NEXT:    s_waitcnt vmcnt(0)
4101; GFX940-NEXT:    s_setpc_b64 s[30:31]
4102  %vec0 = call <3 x i64> asm "; def $0", "=v"()
4103  %vec1 = call <3 x i64> asm "; def $0", "=v"()
4104  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 2, i32 2>
4105  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4106  ret void
4107}
4108
4109define void @v_shuffle_v4i64_v3i64__5_3_2_2(ptr addrspace(1) inreg %ptr) {
4110; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_3_2_2:
4111; GFX900:       ; %bb.0:
4112; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4113; GFX900-NEXT:    ;;#ASMSTART
4114; GFX900-NEXT:    ; def v[0:5]
4115; GFX900-NEXT:    ;;#ASMEND
4116; GFX900-NEXT:    v_mov_b32_e32 v12, 0
4117; GFX900-NEXT:    v_mov_b32_e32 v2, v4
4118; GFX900-NEXT:    v_mov_b32_e32 v3, v5
4119; GFX900-NEXT:    ;;#ASMSTART
4120; GFX900-NEXT:    ; def v[6:11]
4121; GFX900-NEXT:    ;;#ASMEND
4122; GFX900-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
4123; GFX900-NEXT:    v_mov_b32_e32 v0, v10
4124; GFX900-NEXT:    v_mov_b32_e32 v1, v11
4125; GFX900-NEXT:    v_mov_b32_e32 v2, v6
4126; GFX900-NEXT:    v_mov_b32_e32 v3, v7
4127; GFX900-NEXT:    global_store_dwordx4 v12, v[0:3], s[16:17]
4128; GFX900-NEXT:    s_waitcnt vmcnt(0)
4129; GFX900-NEXT:    s_setpc_b64 s[30:31]
4130;
4131; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_3_2_2:
4132; GFX90A:       ; %bb.0:
4133; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4134; GFX90A-NEXT:    ;;#ASMSTART
4135; GFX90A-NEXT:    ; def v[0:5]
4136; GFX90A-NEXT:    ;;#ASMEND
4137; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
4138; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
4139; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
4140; GFX90A-NEXT:    ;;#ASMSTART
4141; GFX90A-NEXT:    ; def v[6:11]
4142; GFX90A-NEXT:    ;;#ASMEND
4143; GFX90A-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
4144; GFX90A-NEXT:    v_mov_b32_e32 v0, v10
4145; GFX90A-NEXT:    v_mov_b32_e32 v1, v11
4146; GFX90A-NEXT:    v_mov_b32_e32 v2, v6
4147; GFX90A-NEXT:    v_mov_b32_e32 v3, v7
4148; GFX90A-NEXT:    global_store_dwordx4 v12, v[0:3], s[16:17]
4149; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4150; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4151;
4152; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_2_2:
4153; GFX940:       ; %bb.0:
4154; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4155; GFX940-NEXT:    ;;#ASMSTART
4156; GFX940-NEXT:    ; def v[0:5]
4157; GFX940-NEXT:    ;;#ASMEND
4158; GFX940-NEXT:    v_mov_b32_e32 v12, 0
4159; GFX940-NEXT:    v_mov_b32_e32 v2, v4
4160; GFX940-NEXT:    v_mov_b32_e32 v3, v5
4161; GFX940-NEXT:    ;;#ASMSTART
4162; GFX940-NEXT:    ; def v[6:11]
4163; GFX940-NEXT:    ;;#ASMEND
4164; GFX940-NEXT:    global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1
4165; GFX940-NEXT:    v_mov_b32_e32 v0, v10
4166; GFX940-NEXT:    v_mov_b32_e32 v1, v11
4167; GFX940-NEXT:    v_mov_b32_e32 v2, v6
4168; GFX940-NEXT:    v_mov_b32_e32 v3, v7
4169; GFX940-NEXT:    global_store_dwordx4 v12, v[0:3], s[0:1] sc0 sc1
4170; GFX940-NEXT:    s_waitcnt vmcnt(0)
4171; GFX940-NEXT:    s_setpc_b64 s[30:31]
4172  %vec0 = call <3 x i64> asm "; def $0", "=v"()
4173  %vec1 = call <3 x i64> asm "; def $0", "=v"()
4174  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 2, i32 2>
4175  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4176  ret void
4177}
4178
4179define void @v_shuffle_v4i64_v3i64__5_4_2_2(ptr addrspace(1) inreg %ptr) {
4180; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_4_2_2:
4181; GFX900:       ; %bb.0:
4182; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4183; GFX900-NEXT:    ;;#ASMSTART
4184; GFX900-NEXT:    ; def v[0:5]
4185; GFX900-NEXT:    ;;#ASMEND
4186; GFX900-NEXT:    ;;#ASMSTART
4187; GFX900-NEXT:    ; def v[6:11]
4188; GFX900-NEXT:    ;;#ASMEND
4189; GFX900-NEXT:    v_mov_b32_e32 v12, 0
4190; GFX900-NEXT:    v_mov_b32_e32 v2, v4
4191; GFX900-NEXT:    v_mov_b32_e32 v3, v5
4192; GFX900-NEXT:    v_mov_b32_e32 v6, v10
4193; GFX900-NEXT:    v_mov_b32_e32 v7, v11
4194; GFX900-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
4195; GFX900-NEXT:    global_store_dwordx4 v12, v[6:9], s[16:17]
4196; GFX900-NEXT:    s_waitcnt vmcnt(0)
4197; GFX900-NEXT:    s_setpc_b64 s[30:31]
4198;
4199; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_4_2_2:
4200; GFX90A:       ; %bb.0:
4201; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4202; GFX90A-NEXT:    ;;#ASMSTART
4203; GFX90A-NEXT:    ; def v[0:5]
4204; GFX90A-NEXT:    ;;#ASMEND
4205; GFX90A-NEXT:    ;;#ASMSTART
4206; GFX90A-NEXT:    ; def v[6:11]
4207; GFX90A-NEXT:    ;;#ASMEND
4208; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
4209; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
4210; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
4211; GFX90A-NEXT:    v_mov_b32_e32 v6, v10
4212; GFX90A-NEXT:    v_mov_b32_e32 v7, v11
4213; GFX90A-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
4214; GFX90A-NEXT:    global_store_dwordx4 v12, v[6:9], s[16:17]
4215; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4216; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4217;
4218; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_2_2:
4219; GFX940:       ; %bb.0:
4220; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4221; GFX940-NEXT:    ;;#ASMSTART
4222; GFX940-NEXT:    ; def v[0:5]
4223; GFX940-NEXT:    ;;#ASMEND
4224; GFX940-NEXT:    ;;#ASMSTART
4225; GFX940-NEXT:    ; def v[6:11]
4226; GFX940-NEXT:    ;;#ASMEND
4227; GFX940-NEXT:    v_mov_b32_e32 v12, 0
4228; GFX940-NEXT:    v_mov_b32_e32 v2, v4
4229; GFX940-NEXT:    v_mov_b32_e32 v3, v5
4230; GFX940-NEXT:    v_mov_b32_e32 v6, v10
4231; GFX940-NEXT:    v_mov_b32_e32 v7, v11
4232; GFX940-NEXT:    global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1
4233; GFX940-NEXT:    global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1
4234; GFX940-NEXT:    s_waitcnt vmcnt(0)
4235; GFX940-NEXT:    s_setpc_b64 s[30:31]
4236  %vec0 = call <3 x i64> asm "; def $0", "=v"()
4237  %vec1 = call <3 x i64> asm "; def $0", "=v"()
4238  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 2, i32 2>
4239  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4240  ret void
4241}
4242
4243define void @v_shuffle_v4i64_v3i64__5_5_2_2(ptr addrspace(1) inreg %ptr) {
4244; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_2_2:
4245; GFX900:       ; %bb.0:
4246; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4247; GFX900-NEXT:    ;;#ASMSTART
4248; GFX900-NEXT:    ; def v[0:5]
4249; GFX900-NEXT:    ;;#ASMEND
4250; GFX900-NEXT:    ;;#ASMSTART
4251; GFX900-NEXT:    ; def v[6:11]
4252; GFX900-NEXT:    ;;#ASMEND
4253; GFX900-NEXT:    v_mov_b32_e32 v12, 0
4254; GFX900-NEXT:    v_mov_b32_e32 v2, v4
4255; GFX900-NEXT:    v_mov_b32_e32 v3, v5
4256; GFX900-NEXT:    v_mov_b32_e32 v8, v10
4257; GFX900-NEXT:    v_mov_b32_e32 v9, v11
4258; GFX900-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
4259; GFX900-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
4260; GFX900-NEXT:    s_waitcnt vmcnt(0)
4261; GFX900-NEXT:    s_setpc_b64 s[30:31]
4262;
4263; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_2_2:
4264; GFX90A:       ; %bb.0:
4265; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4266; GFX90A-NEXT:    ;;#ASMSTART
4267; GFX90A-NEXT:    ; def v[0:5]
4268; GFX90A-NEXT:    ;;#ASMEND
4269; GFX90A-NEXT:    ;;#ASMSTART
4270; GFX90A-NEXT:    ; def v[6:11]
4271; GFX90A-NEXT:    ;;#ASMEND
4272; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
4273; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
4274; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
4275; GFX90A-NEXT:    v_mov_b32_e32 v8, v10
4276; GFX90A-NEXT:    v_mov_b32_e32 v9, v11
4277; GFX90A-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
4278; GFX90A-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
4279; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4280; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4281;
4282; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_2:
4283; GFX940:       ; %bb.0:
4284; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4285; GFX940-NEXT:    ;;#ASMSTART
4286; GFX940-NEXT:    ; def v[0:5]
4287; GFX940-NEXT:    ;;#ASMEND
4288; GFX940-NEXT:    ;;#ASMSTART
4289; GFX940-NEXT:    ; def v[6:11]
4290; GFX940-NEXT:    ;;#ASMEND
4291; GFX940-NEXT:    v_mov_b32_e32 v12, 0
4292; GFX940-NEXT:    v_mov_b32_e32 v2, v4
4293; GFX940-NEXT:    v_mov_b32_e32 v3, v5
4294; GFX940-NEXT:    v_mov_b32_e32 v8, v10
4295; GFX940-NEXT:    v_mov_b32_e32 v9, v11
4296; GFX940-NEXT:    global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1
4297; GFX940-NEXT:    global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
4298; GFX940-NEXT:    s_waitcnt vmcnt(0)
4299; GFX940-NEXT:    s_setpc_b64 s[30:31]
4300  %vec0 = call <3 x i64> asm "; def $0", "=v"()
4301  %vec1 = call <3 x i64> asm "; def $0", "=v"()
4302  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 2>
4303  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4304  ret void
4305}
4306
4307define void @v_shuffle_v4i64_v3i64__5_5_u_2(ptr addrspace(1) inreg %ptr) {
4308; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_2:
4309; GFX900:       ; %bb.0:
4310; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4311; GFX900-NEXT:    ;;#ASMSTART
4312; GFX900-NEXT:    ; def v[6:11]
4313; GFX900-NEXT:    ;;#ASMEND
4314; GFX900-NEXT:    v_mov_b32_e32 v12, 0
4315; GFX900-NEXT:    v_mov_b32_e32 v8, v10
4316; GFX900-NEXT:    v_mov_b32_e32 v9, v11
4317; GFX900-NEXT:    ;;#ASMSTART
4318; GFX900-NEXT:    ; def v[0:5]
4319; GFX900-NEXT:    ;;#ASMEND
4320; GFX900-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
4321; GFX900-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
4322; GFX900-NEXT:    s_waitcnt vmcnt(0)
4323; GFX900-NEXT:    s_setpc_b64 s[30:31]
4324;
4325; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_2:
4326; GFX90A:       ; %bb.0:
4327; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4328; GFX90A-NEXT:    ;;#ASMSTART
4329; GFX90A-NEXT:    ; def v[6:11]
4330; GFX90A-NEXT:    ;;#ASMEND
4331; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
4332; GFX90A-NEXT:    v_mov_b32_e32 v8, v10
4333; GFX90A-NEXT:    v_mov_b32_e32 v9, v11
4334; GFX90A-NEXT:    ;;#ASMSTART
4335; GFX90A-NEXT:    ; def v[0:5]
4336; GFX90A-NEXT:    ;;#ASMEND
4337; GFX90A-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
4338; GFX90A-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
4339; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4340; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4341;
4342; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_2:
4343; GFX940:       ; %bb.0:
4344; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4345; GFX940-NEXT:    ;;#ASMSTART
4346; GFX940-NEXT:    ; def v[6:11]
4347; GFX940-NEXT:    ;;#ASMEND
4348; GFX940-NEXT:    v_mov_b32_e32 v12, 0
4349; GFX940-NEXT:    v_mov_b32_e32 v8, v10
4350; GFX940-NEXT:    v_mov_b32_e32 v9, v11
4351; GFX940-NEXT:    ;;#ASMSTART
4352; GFX940-NEXT:    ; def v[0:5]
4353; GFX940-NEXT:    ;;#ASMEND
4354; GFX940-NEXT:    global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1
4355; GFX940-NEXT:    global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
4356; GFX940-NEXT:    s_waitcnt vmcnt(0)
4357; GFX940-NEXT:    s_setpc_b64 s[30:31]
4358  %vec0 = call <3 x i64> asm "; def $0", "=v"()
4359  %vec1 = call <3 x i64> asm "; def $0", "=v"()
4360  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 2>
4361  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4362  ret void
4363}
4364
4365define void @v_shuffle_v4i64_v3i64__5_5_0_2(ptr addrspace(1) inreg %ptr) {
4366; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_0_2:
4367; GFX900:       ; %bb.0:
4368; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4369; GFX900-NEXT:    ;;#ASMSTART
4370; GFX900-NEXT:    ; def v[0:5]
4371; GFX900-NEXT:    ;;#ASMEND
4372; GFX900-NEXT:    ;;#ASMSTART
4373; GFX900-NEXT:    ; def v[6:11]
4374; GFX900-NEXT:    ;;#ASMEND
4375; GFX900-NEXT:    v_mov_b32_e32 v12, 0
4376; GFX900-NEXT:    v_mov_b32_e32 v2, v4
4377; GFX900-NEXT:    v_mov_b32_e32 v3, v5
4378; GFX900-NEXT:    v_mov_b32_e32 v8, v10
4379; GFX900-NEXT:    v_mov_b32_e32 v9, v11
4380; GFX900-NEXT:    global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
4381; GFX900-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
4382; GFX900-NEXT:    s_waitcnt vmcnt(0)
4383; GFX900-NEXT:    s_setpc_b64 s[30:31]
4384;
4385; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_0_2:
4386; GFX90A:       ; %bb.0:
4387; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4388; GFX90A-NEXT:    ;;#ASMSTART
4389; GFX90A-NEXT:    ; def v[0:5]
4390; GFX90A-NEXT:    ;;#ASMEND
4391; GFX90A-NEXT:    ;;#ASMSTART
4392; GFX90A-NEXT:    ; def v[6:11]
4393; GFX90A-NEXT:    ;;#ASMEND
4394; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
4395; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
4396; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
4397; GFX90A-NEXT:    v_mov_b32_e32 v8, v10
4398; GFX90A-NEXT:    v_mov_b32_e32 v9, v11
4399; GFX90A-NEXT:    global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
4400; GFX90A-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
4401; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4402; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4403;
4404; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_2:
4405; GFX940:       ; %bb.0:
4406; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4407; GFX940-NEXT:    ;;#ASMSTART
4408; GFX940-NEXT:    ; def v[0:5]
4409; GFX940-NEXT:    ;;#ASMEND
4410; GFX940-NEXT:    ;;#ASMSTART
4411; GFX940-NEXT:    ; def v[6:11]
4412; GFX940-NEXT:    ;;#ASMEND
4413; GFX940-NEXT:    v_mov_b32_e32 v12, 0
4414; GFX940-NEXT:    v_mov_b32_e32 v2, v4
4415; GFX940-NEXT:    v_mov_b32_e32 v3, v5
4416; GFX940-NEXT:    v_mov_b32_e32 v8, v10
4417; GFX940-NEXT:    v_mov_b32_e32 v9, v11
4418; GFX940-NEXT:    global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1
4419; GFX940-NEXT:    global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
4420; GFX940-NEXT:    s_waitcnt vmcnt(0)
4421; GFX940-NEXT:    s_setpc_b64 s[30:31]
4422  %vec0 = call <3 x i64> asm "; def $0", "=v"()
4423  %vec1 = call <3 x i64> asm "; def $0", "=v"()
4424  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 2>
4425  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4426  ret void
4427}
4428
4429define void @v_shuffle_v4i64_v3i64__5_5_1_2(ptr addrspace(1) inreg %ptr) {
4430; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_1_2:
4431; GFX900:       ; %bb.0:
4432; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4433; GFX900-NEXT:    ;;#ASMSTART
4434; GFX900-NEXT:    ; def v[6:11]
4435; GFX900-NEXT:    ;;#ASMEND
4436; GFX900-NEXT:    v_mov_b32_e32 v12, 0
4437; GFX900-NEXT:    v_mov_b32_e32 v8, v10
4438; GFX900-NEXT:    v_mov_b32_e32 v9, v11
4439; GFX900-NEXT:    ;;#ASMSTART
4440; GFX900-NEXT:    ; def v[0:5]
4441; GFX900-NEXT:    ;;#ASMEND
4442; GFX900-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
4443; GFX900-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
4444; GFX900-NEXT:    s_waitcnt vmcnt(0)
4445; GFX900-NEXT:    s_setpc_b64 s[30:31]
4446;
4447; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_1_2:
4448; GFX90A:       ; %bb.0:
4449; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4450; GFX90A-NEXT:    ;;#ASMSTART
4451; GFX90A-NEXT:    ; def v[6:11]
4452; GFX90A-NEXT:    ;;#ASMEND
4453; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
4454; GFX90A-NEXT:    v_mov_b32_e32 v8, v10
4455; GFX90A-NEXT:    v_mov_b32_e32 v9, v11
4456; GFX90A-NEXT:    ;;#ASMSTART
4457; GFX90A-NEXT:    ; def v[0:5]
4458; GFX90A-NEXT:    ;;#ASMEND
4459; GFX90A-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
4460; GFX90A-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
4461; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4462; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4463;
4464; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_2:
4465; GFX940:       ; %bb.0:
4466; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4467; GFX940-NEXT:    ;;#ASMSTART
4468; GFX940-NEXT:    ; def v[6:11]
4469; GFX940-NEXT:    ;;#ASMEND
4470; GFX940-NEXT:    v_mov_b32_e32 v12, 0
4471; GFX940-NEXT:    v_mov_b32_e32 v8, v10
4472; GFX940-NEXT:    v_mov_b32_e32 v9, v11
4473; GFX940-NEXT:    ;;#ASMSTART
4474; GFX940-NEXT:    ; def v[0:5]
4475; GFX940-NEXT:    ;;#ASMEND
4476; GFX940-NEXT:    global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1
4477; GFX940-NEXT:    global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
4478; GFX940-NEXT:    s_waitcnt vmcnt(0)
4479; GFX940-NEXT:    s_setpc_b64 s[30:31]
4480  %vec0 = call <3 x i64> asm "; def $0", "=v"()
4481  %vec1 = call <3 x i64> asm "; def $0", "=v"()
4482  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 2>
4483  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4484  ret void
4485}
4486
4487define void @v_shuffle_v4i64_v3i64__5_5_3_2(ptr addrspace(1) inreg %ptr) {
4488; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_3_2:
4489; GFX900:       ; %bb.0:
4490; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4491; GFX900-NEXT:    ;;#ASMSTART
4492; GFX900-NEXT:    ; def v[6:11]
4493; GFX900-NEXT:    ;;#ASMEND
4494; GFX900-NEXT:    v_mov_b32_e32 v12, 0
4495; GFX900-NEXT:    ;;#ASMSTART
4496; GFX900-NEXT:    ; def v[0:5]
4497; GFX900-NEXT:    ;;#ASMEND
4498; GFX900-NEXT:    v_mov_b32_e32 v8, v4
4499; GFX900-NEXT:    v_mov_b32_e32 v9, v5
4500; GFX900-NEXT:    global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
4501; GFX900-NEXT:    s_nop 0
4502; GFX900-NEXT:    v_mov_b32_e32 v8, v10
4503; GFX900-NEXT:    v_mov_b32_e32 v9, v11
4504; GFX900-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
4505; GFX900-NEXT:    s_waitcnt vmcnt(0)
4506; GFX900-NEXT:    s_setpc_b64 s[30:31]
4507;
4508; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_3_2:
4509; GFX90A:       ; %bb.0:
4510; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4511; GFX90A-NEXT:    ;;#ASMSTART
4512; GFX90A-NEXT:    ; def v[6:11]
4513; GFX90A-NEXT:    ;;#ASMEND
4514; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
4515; GFX90A-NEXT:    ;;#ASMSTART
4516; GFX90A-NEXT:    ; def v[0:5]
4517; GFX90A-NEXT:    ;;#ASMEND
4518; GFX90A-NEXT:    v_mov_b32_e32 v8, v4
4519; GFX90A-NEXT:    v_mov_b32_e32 v9, v5
4520; GFX90A-NEXT:    global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
4521; GFX90A-NEXT:    s_nop 0
4522; GFX90A-NEXT:    v_mov_b32_e32 v8, v10
4523; GFX90A-NEXT:    v_mov_b32_e32 v9, v11
4524; GFX90A-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
4525; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4526; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4527;
4528; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_2:
4529; GFX940:       ; %bb.0:
4530; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4531; GFX940-NEXT:    ;;#ASMSTART
4532; GFX940-NEXT:    ; def v[6:11]
4533; GFX940-NEXT:    ;;#ASMEND
4534; GFX940-NEXT:    v_mov_b32_e32 v12, 0
4535; GFX940-NEXT:    ;;#ASMSTART
4536; GFX940-NEXT:    ; def v[0:5]
4537; GFX940-NEXT:    ;;#ASMEND
4538; GFX940-NEXT:    s_nop 0
4539; GFX940-NEXT:    v_mov_b32_e32 v8, v4
4540; GFX940-NEXT:    v_mov_b32_e32 v9, v5
4541; GFX940-NEXT:    global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1
4542; GFX940-NEXT:    s_nop 1
4543; GFX940-NEXT:    v_mov_b32_e32 v8, v10
4544; GFX940-NEXT:    v_mov_b32_e32 v9, v11
4545; GFX940-NEXT:    global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
4546; GFX940-NEXT:    s_waitcnt vmcnt(0)
4547; GFX940-NEXT:    s_setpc_b64 s[30:31]
4548  %vec0 = call <3 x i64> asm "; def $0", "=v"()
4549  %vec1 = call <3 x i64> asm "; def $0", "=v"()
4550  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 2>
4551  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4552  ret void
4553}
4554
4555define void @v_shuffle_v4i64_v3i64__5_5_4_2(ptr addrspace(1) inreg %ptr) {
4556; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_4_2:
4557; GFX900:       ; %bb.0:
4558; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4559; GFX900-NEXT:    ;;#ASMSTART
4560; GFX900-NEXT:    ; def v[0:5]
4561; GFX900-NEXT:    ;;#ASMEND
4562; GFX900-NEXT:    ;;#ASMSTART
4563; GFX900-NEXT:    ; def v[6:11]
4564; GFX900-NEXT:    ;;#ASMEND
4565; GFX900-NEXT:    v_mov_b32_e32 v12, 0
4566; GFX900-NEXT:    v_mov_b32_e32 v2, v8
4567; GFX900-NEXT:    v_mov_b32_e32 v3, v9
4568; GFX900-NEXT:    v_mov_b32_e32 v8, v10
4569; GFX900-NEXT:    v_mov_b32_e32 v9, v11
4570; GFX900-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
4571; GFX900-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
4572; GFX900-NEXT:    s_waitcnt vmcnt(0)
4573; GFX900-NEXT:    s_setpc_b64 s[30:31]
4574;
4575; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_4_2:
4576; GFX90A:       ; %bb.0:
4577; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4578; GFX90A-NEXT:    ;;#ASMSTART
4579; GFX90A-NEXT:    ; def v[0:5]
4580; GFX90A-NEXT:    ;;#ASMEND
4581; GFX90A-NEXT:    ;;#ASMSTART
4582; GFX90A-NEXT:    ; def v[6:11]
4583; GFX90A-NEXT:    ;;#ASMEND
4584; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
4585; GFX90A-NEXT:    v_mov_b32_e32 v2, v8
4586; GFX90A-NEXT:    v_mov_b32_e32 v3, v9
4587; GFX90A-NEXT:    v_mov_b32_e32 v8, v10
4588; GFX90A-NEXT:    v_mov_b32_e32 v9, v11
4589; GFX90A-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17] offset:16
4590; GFX90A-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
4591; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4592; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4593;
4594; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_2:
4595; GFX940:       ; %bb.0:
4596; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4597; GFX940-NEXT:    ;;#ASMSTART
4598; GFX940-NEXT:    ; def v[0:5]
4599; GFX940-NEXT:    ;;#ASMEND
4600; GFX940-NEXT:    ;;#ASMSTART
4601; GFX940-NEXT:    ; def v[6:11]
4602; GFX940-NEXT:    ;;#ASMEND
4603; GFX940-NEXT:    v_mov_b32_e32 v12, 0
4604; GFX940-NEXT:    v_mov_b32_e32 v2, v8
4605; GFX940-NEXT:    v_mov_b32_e32 v3, v9
4606; GFX940-NEXT:    v_mov_b32_e32 v8, v10
4607; GFX940-NEXT:    v_mov_b32_e32 v9, v11
4608; GFX940-NEXT:    global_store_dwordx4 v12, v[2:5], s[0:1] offset:16 sc0 sc1
4609; GFX940-NEXT:    global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
4610; GFX940-NEXT:    s_waitcnt vmcnt(0)
4611; GFX940-NEXT:    s_setpc_b64 s[30:31]
4612  %vec0 = call <3 x i64> asm "; def $0", "=v"()
4613  %vec1 = call <3 x i64> asm "; def $0", "=v"()
4614  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 2>
4615  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4616  ret void
4617}
4618
4619define void @v_shuffle_v4i64_v3i64__u_3_3_3(ptr addrspace(1) inreg %ptr) {
4620; GFX9-LABEL: v_shuffle_v4i64_v3i64__u_3_3_3:
4621; GFX9:       ; %bb.0:
4622; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4623; GFX9-NEXT:    s_setpc_b64 s[30:31]
4624  %vec0 = call <3 x i64> asm "; def $0", "=v"()
4625  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
4626  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4627  ret void
4628}
4629
4630define void @v_shuffle_v4i64_v3i64__0_3_3_3(ptr addrspace(1) inreg %ptr) {
4631; GFX900-LABEL: v_shuffle_v4i64_v3i64__0_3_3_3:
4632; GFX900:       ; %bb.0:
4633; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4634; GFX900-NEXT:    v_mov_b32_e32 v6, 0
4635; GFX900-NEXT:    ;;#ASMSTART
4636; GFX900-NEXT:    ; def v[0:5]
4637; GFX900-NEXT:    ;;#ASMEND
4638; GFX900-NEXT:    global_store_dwordx4 v6, v[4:7], s[16:17] offset:16
4639; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
4640; GFX900-NEXT:    s_waitcnt vmcnt(0)
4641; GFX900-NEXT:    s_setpc_b64 s[30:31]
4642;
4643; GFX90A-LABEL: v_shuffle_v4i64_v3i64__0_3_3_3:
4644; GFX90A:       ; %bb.0:
4645; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4646; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
4647; GFX90A-NEXT:    ;;#ASMSTART
4648; GFX90A-NEXT:    ; def v[0:5]
4649; GFX90A-NEXT:    ;;#ASMEND
4650; GFX90A-NEXT:    global_store_dwordx4 v6, v[4:7], s[16:17] offset:16
4651; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
4652; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4653; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4654;
4655; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_3_3_3:
4656; GFX940:       ; %bb.0:
4657; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4658; GFX940-NEXT:    v_mov_b32_e32 v6, 0
4659; GFX940-NEXT:    ;;#ASMSTART
4660; GFX940-NEXT:    ; def v[0:5]
4661; GFX940-NEXT:    ;;#ASMEND
4662; GFX940-NEXT:    global_store_dwordx4 v6, v[4:7], s[0:1] offset:16 sc0 sc1
4663; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
4664; GFX940-NEXT:    s_waitcnt vmcnt(0)
4665; GFX940-NEXT:    s_setpc_b64 s[30:31]
4666  %vec0 = call <3 x i64> asm "; def $0", "=v"()
4667  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
4668  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4669  ret void
4670}
4671
4672define void @v_shuffle_v4i64_v3i64__1_3_3_3(ptr addrspace(1) inreg %ptr) {
4673; GFX900-LABEL: v_shuffle_v4i64_v3i64__1_3_3_3:
4674; GFX900:       ; %bb.0:
4675; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4676; GFX900-NEXT:    v_mov_b32_e32 v6, 0
4677; GFX900-NEXT:    ;;#ASMSTART
4678; GFX900-NEXT:    ; def v[0:5]
4679; GFX900-NEXT:    ;;#ASMEND
4680; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
4681; GFX900-NEXT:    s_waitcnt vmcnt(0)
4682; GFX900-NEXT:    s_setpc_b64 s[30:31]
4683;
4684; GFX90A-LABEL: v_shuffle_v4i64_v3i64__1_3_3_3:
4685; GFX90A:       ; %bb.0:
4686; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4687; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
4688; GFX90A-NEXT:    ;;#ASMSTART
4689; GFX90A-NEXT:    ; def v[0:5]
4690; GFX90A-NEXT:    ;;#ASMEND
4691; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
4692; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4693; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4694;
4695; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_3_3_3:
4696; GFX940:       ; %bb.0:
4697; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4698; GFX940-NEXT:    v_mov_b32_e32 v6, 0
4699; GFX940-NEXT:    ;;#ASMSTART
4700; GFX940-NEXT:    ; def v[0:5]
4701; GFX940-NEXT:    ;;#ASMEND
4702; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
4703; GFX940-NEXT:    s_waitcnt vmcnt(0)
4704; GFX940-NEXT:    s_setpc_b64 s[30:31]
4705  %vec0 = call <3 x i64> asm "; def $0", "=v"()
4706  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
4707  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4708  ret void
4709}
4710
4711define void @v_shuffle_v4i64_v3i64__2_3_3_3(ptr addrspace(1) inreg %ptr) {
4712; GFX900-LABEL: v_shuffle_v4i64_v3i64__2_3_3_3:
4713; GFX900:       ; %bb.0:
4714; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4715; GFX900-NEXT:    ;;#ASMSTART
4716; GFX900-NEXT:    ; def v[0:5]
4717; GFX900-NEXT:    ;;#ASMEND
4718; GFX900-NEXT:    v_mov_b32_e32 v6, 0
4719; GFX900-NEXT:    v_mov_b32_e32 v0, v4
4720; GFX900-NEXT:    v_mov_b32_e32 v1, v5
4721; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
4722; GFX900-NEXT:    s_waitcnt vmcnt(0)
4723; GFX900-NEXT:    s_setpc_b64 s[30:31]
4724;
4725; GFX90A-LABEL: v_shuffle_v4i64_v3i64__2_3_3_3:
4726; GFX90A:       ; %bb.0:
4727; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4728; GFX90A-NEXT:    ;;#ASMSTART
4729; GFX90A-NEXT:    ; def v[0:5]
4730; GFX90A-NEXT:    ;;#ASMEND
4731; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
4732; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
4733; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
4734; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
4735; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4736; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4737;
4738; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_3_3_3:
4739; GFX940:       ; %bb.0:
4740; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4741; GFX940-NEXT:    ;;#ASMSTART
4742; GFX940-NEXT:    ; def v[0:5]
4743; GFX940-NEXT:    ;;#ASMEND
4744; GFX940-NEXT:    v_mov_b32_e32 v6, 0
4745; GFX940-NEXT:    v_mov_b32_e32 v0, v4
4746; GFX940-NEXT:    v_mov_b32_e32 v1, v5
4747; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
4748; GFX940-NEXT:    s_waitcnt vmcnt(0)
4749; GFX940-NEXT:    s_setpc_b64 s[30:31]
4750  %vec0 = call <3 x i64> asm "; def $0", "=v"()
4751  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
4752  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4753  ret void
4754}
4755
4756define void @v_shuffle_v4i64_v3i64__3_3_3_3(ptr addrspace(1) inreg %ptr) {
4757; GFX9-LABEL: v_shuffle_v4i64_v3i64__3_3_3_3:
4758; GFX9:       ; %bb.0:
4759; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4760; GFX9-NEXT:    s_setpc_b64 s[30:31]
4761  %vec0 = call <3 x i64> asm "; def $0", "=v"()
4762  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
4763  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4764  ret void
4765}
4766
4767define void @v_shuffle_v4i64_v3i64__4_3_3_3(ptr addrspace(1) inreg %ptr) {
4768; GFX900-LABEL: v_shuffle_v4i64_v3i64__4_3_3_3:
4769; GFX900:       ; %bb.0:
4770; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4771; GFX900-NEXT:    ;;#ASMSTART
4772; GFX900-NEXT:    ; def v[0:5]
4773; GFX900-NEXT:    ;;#ASMEND
4774; GFX900-NEXT:    v_mov_b32_e32 v8, 0
4775; GFX900-NEXT:    v_mov_b32_e32 v4, v0
4776; GFX900-NEXT:    v_mov_b32_e32 v5, v1
4777; GFX900-NEXT:    v_mov_b32_e32 v6, v0
4778; GFX900-NEXT:    v_mov_b32_e32 v7, v1
4779; GFX900-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
4780; GFX900-NEXT:    global_store_dwordx4 v8, v[2:5], s[16:17]
4781; GFX900-NEXT:    s_waitcnt vmcnt(0)
4782; GFX900-NEXT:    s_setpc_b64 s[30:31]
4783;
4784; GFX90A-LABEL: v_shuffle_v4i64_v3i64__4_3_3_3:
4785; GFX90A:       ; %bb.0:
4786; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4787; GFX90A-NEXT:    ;;#ASMSTART
4788; GFX90A-NEXT:    ; def v[0:5]
4789; GFX90A-NEXT:    ;;#ASMEND
4790; GFX90A-NEXT:    v_mov_b32_e32 v8, 0
4791; GFX90A-NEXT:    v_mov_b32_e32 v4, v0
4792; GFX90A-NEXT:    v_mov_b32_e32 v5, v1
4793; GFX90A-NEXT:    v_mov_b32_e32 v6, v0
4794; GFX90A-NEXT:    v_mov_b32_e32 v7, v1
4795; GFX90A-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
4796; GFX90A-NEXT:    global_store_dwordx4 v8, v[2:5], s[16:17]
4797; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4798; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4799;
4800; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_3_3_3:
4801; GFX940:       ; %bb.0:
4802; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4803; GFX940-NEXT:    ;;#ASMSTART
4804; GFX940-NEXT:    ; def v[0:5]
4805; GFX940-NEXT:    ;;#ASMEND
4806; GFX940-NEXT:    v_mov_b32_e32 v8, 0
4807; GFX940-NEXT:    v_mov_b32_e32 v4, v0
4808; GFX940-NEXT:    v_mov_b32_e32 v5, v1
4809; GFX940-NEXT:    v_mov_b32_e32 v6, v0
4810; GFX940-NEXT:    v_mov_b32_e32 v7, v1
4811; GFX940-NEXT:    global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1
4812; GFX940-NEXT:    global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1
4813; GFX940-NEXT:    s_waitcnt vmcnt(0)
4814; GFX940-NEXT:    s_setpc_b64 s[30:31]
4815  %vec0 = call <3 x i64> asm "; def $0", "=v"()
4816  %vec1 = call <3 x i64> asm "; def $0", "=v"()
4817  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 3, i32 3, i32 3>
4818  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4819  ret void
4820}
4821
4822define void @v_shuffle_v4i64_v3i64__5_3_3_3(ptr addrspace(1) inreg %ptr) {
4823; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_3_3_3:
4824; GFX900:       ; %bb.0:
4825; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4826; GFX900-NEXT:    ;;#ASMSTART
4827; GFX900-NEXT:    ; def v[0:5]
4828; GFX900-NEXT:    ;;#ASMEND
4829; GFX900-NEXT:    v_mov_b32_e32 v6, 0
4830; GFX900-NEXT:    v_mov_b32_e32 v2, v0
4831; GFX900-NEXT:    v_mov_b32_e32 v3, v1
4832; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
4833; GFX900-NEXT:    s_nop 0
4834; GFX900-NEXT:    v_mov_b32_e32 v2, v4
4835; GFX900-NEXT:    v_mov_b32_e32 v3, v5
4836; GFX900-NEXT:    v_mov_b32_e32 v4, v0
4837; GFX900-NEXT:    v_mov_b32_e32 v5, v1
4838; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
4839; GFX900-NEXT:    s_waitcnt vmcnt(0)
4840; GFX900-NEXT:    s_setpc_b64 s[30:31]
4841;
4842; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_3_3_3:
4843; GFX90A:       ; %bb.0:
4844; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4845; GFX90A-NEXT:    ;;#ASMSTART
4846; GFX90A-NEXT:    ; def v[0:5]
4847; GFX90A-NEXT:    ;;#ASMEND
4848; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
4849; GFX90A-NEXT:    v_mov_b32_e32 v2, v0
4850; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
4851; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
4852; GFX90A-NEXT:    s_nop 0
4853; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
4854; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
4855; GFX90A-NEXT:    v_mov_b32_e32 v4, v0
4856; GFX90A-NEXT:    v_mov_b32_e32 v5, v1
4857; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
4858; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4859; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4860;
4861; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_3_3:
4862; GFX940:       ; %bb.0:
4863; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4864; GFX940-NEXT:    ;;#ASMSTART
4865; GFX940-NEXT:    ; def v[0:5]
4866; GFX940-NEXT:    ;;#ASMEND
4867; GFX940-NEXT:    v_mov_b32_e32 v6, 0
4868; GFX940-NEXT:    v_mov_b32_e32 v2, v0
4869; GFX940-NEXT:    v_mov_b32_e32 v3, v1
4870; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
4871; GFX940-NEXT:    s_nop 1
4872; GFX940-NEXT:    v_mov_b32_e32 v2, v4
4873; GFX940-NEXT:    v_mov_b32_e32 v3, v5
4874; GFX940-NEXT:    v_mov_b32_e32 v4, v0
4875; GFX940-NEXT:    v_mov_b32_e32 v5, v1
4876; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
4877; GFX940-NEXT:    s_waitcnt vmcnt(0)
4878; GFX940-NEXT:    s_setpc_b64 s[30:31]
4879  %vec0 = call <3 x i64> asm "; def $0", "=v"()
4880  %vec1 = call <3 x i64> asm "; def $0", "=v"()
4881  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3>
4882  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4883  ret void
4884}
4885
4886define void @v_shuffle_v4i64_v3i64__5_u_3_3(ptr addrspace(1) inreg %ptr) {
4887; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_u_3_3:
4888; GFX900:       ; %bb.0:
4889; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4890; GFX900-NEXT:    ;;#ASMSTART
4891; GFX900-NEXT:    ; def v[0:5]
4892; GFX900-NEXT:    ;;#ASMEND
4893; GFX900-NEXT:    v_mov_b32_e32 v6, 0
4894; GFX900-NEXT:    v_mov_b32_e32 v2, v0
4895; GFX900-NEXT:    v_mov_b32_e32 v3, v1
4896; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
4897; GFX900-NEXT:    s_nop 0
4898; GFX900-NEXT:    v_mov_b32_e32 v0, v4
4899; GFX900-NEXT:    v_mov_b32_e32 v1, v5
4900; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
4901; GFX900-NEXT:    s_waitcnt vmcnt(0)
4902; GFX900-NEXT:    s_setpc_b64 s[30:31]
4903;
4904; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_u_3_3:
4905; GFX90A:       ; %bb.0:
4906; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4907; GFX90A-NEXT:    ;;#ASMSTART
4908; GFX90A-NEXT:    ; def v[0:5]
4909; GFX90A-NEXT:    ;;#ASMEND
4910; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
4911; GFX90A-NEXT:    v_mov_b32_e32 v2, v0
4912; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
4913; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
4914; GFX90A-NEXT:    s_nop 0
4915; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
4916; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
4917; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
4918; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4919; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4920;
4921; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_3_3:
4922; GFX940:       ; %bb.0:
4923; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4924; GFX940-NEXT:    ;;#ASMSTART
4925; GFX940-NEXT:    ; def v[0:5]
4926; GFX940-NEXT:    ;;#ASMEND
4927; GFX940-NEXT:    v_mov_b32_e32 v6, 0
4928; GFX940-NEXT:    v_mov_b32_e32 v2, v0
4929; GFX940-NEXT:    v_mov_b32_e32 v3, v1
4930; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
4931; GFX940-NEXT:    s_nop 1
4932; GFX940-NEXT:    v_mov_b32_e32 v0, v4
4933; GFX940-NEXT:    v_mov_b32_e32 v1, v5
4934; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
4935; GFX940-NEXT:    s_waitcnt vmcnt(0)
4936; GFX940-NEXT:    s_setpc_b64 s[30:31]
4937  %vec0 = call <3 x i64> asm "; def $0", "=v"()
4938  %vec1 = call <3 x i64> asm "; def $0", "=v"()
4939  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 3, i32 3>
4940  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
4941  ret void
4942}
4943
4944define void @v_shuffle_v4i64_v3i64__5_0_3_3(ptr addrspace(1) inreg %ptr) {
4945; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_0_3_3:
4946; GFX900:       ; %bb.0:
4947; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4948; GFX900-NEXT:    ;;#ASMSTART
4949; GFX900-NEXT:    ; def v[0:5]
4950; GFX900-NEXT:    ;;#ASMEND
4951; GFX900-NEXT:    ;;#ASMSTART
4952; GFX900-NEXT:    ; def v[2:7]
4953; GFX900-NEXT:    ;;#ASMEND
4954; GFX900-NEXT:    v_mov_b32_e32 v8, 0
4955; GFX900-NEXT:    v_mov_b32_e32 v4, v2
4956; GFX900-NEXT:    v_mov_b32_e32 v5, v3
4957; GFX900-NEXT:    global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
4958; GFX900-NEXT:    s_nop 0
4959; GFX900-NEXT:    v_mov_b32_e32 v2, v6
4960; GFX900-NEXT:    v_mov_b32_e32 v3, v7
4961; GFX900-NEXT:    v_mov_b32_e32 v4, v0
4962; GFX900-NEXT:    v_mov_b32_e32 v5, v1
4963; GFX900-NEXT:    global_store_dwordx4 v8, v[2:5], s[16:17]
4964; GFX900-NEXT:    s_waitcnt vmcnt(0)
4965; GFX900-NEXT:    s_setpc_b64 s[30:31]
4966;
4967; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_0_3_3:
4968; GFX90A:       ; %bb.0:
4969; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4970; GFX90A-NEXT:    ;;#ASMSTART
4971; GFX90A-NEXT:    ; def v[0:5]
4972; GFX90A-NEXT:    ;;#ASMEND
4973; GFX90A-NEXT:    ;;#ASMSTART
4974; GFX90A-NEXT:    ; def v[2:7]
4975; GFX90A-NEXT:    ;;#ASMEND
4976; GFX90A-NEXT:    v_mov_b32_e32 v8, 0
4977; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
4978; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
4979; GFX90A-NEXT:    global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
4980; GFX90A-NEXT:    s_nop 0
4981; GFX90A-NEXT:    v_mov_b32_e32 v2, v6
4982; GFX90A-NEXT:    v_mov_b32_e32 v3, v7
4983; GFX90A-NEXT:    v_mov_b32_e32 v4, v0
4984; GFX90A-NEXT:    v_mov_b32_e32 v5, v1
4985; GFX90A-NEXT:    global_store_dwordx4 v8, v[2:5], s[16:17]
4986; GFX90A-NEXT:    s_waitcnt vmcnt(0)
4987; GFX90A-NEXT:    s_setpc_b64 s[30:31]
4988;
4989; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_3_3:
4990; GFX940:       ; %bb.0:
4991; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4992; GFX940-NEXT:    ;;#ASMSTART
4993; GFX940-NEXT:    ; def v[0:5]
4994; GFX940-NEXT:    ;;#ASMEND
4995; GFX940-NEXT:    v_mov_b32_e32 v8, 0
4996; GFX940-NEXT:    ;;#ASMSTART
4997; GFX940-NEXT:    ; def v[2:7]
4998; GFX940-NEXT:    ;;#ASMEND
4999; GFX940-NEXT:    s_nop 0
5000; GFX940-NEXT:    v_mov_b32_e32 v4, v2
5001; GFX940-NEXT:    v_mov_b32_e32 v5, v3
5002; GFX940-NEXT:    global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1
5003; GFX940-NEXT:    s_nop 1
5004; GFX940-NEXT:    v_mov_b32_e32 v2, v6
5005; GFX940-NEXT:    v_mov_b32_e32 v3, v7
5006; GFX940-NEXT:    v_mov_b32_e32 v4, v0
5007; GFX940-NEXT:    v_mov_b32_e32 v5, v1
5008; GFX940-NEXT:    global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1
5009; GFX940-NEXT:    s_waitcnt vmcnt(0)
5010; GFX940-NEXT:    s_setpc_b64 s[30:31]
5011  %vec0 = call <3 x i64> asm "; def $0", "=v"()
5012  %vec1 = call <3 x i64> asm "; def $0", "=v"()
5013  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 3, i32 3>
5014  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5015  ret void
5016}
5017
5018define void @v_shuffle_v4i64_v3i64__5_1_3_3(ptr addrspace(1) inreg %ptr) {
5019; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_1_3_3:
5020; GFX900:       ; %bb.0:
5021; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5022; GFX900-NEXT:    ;;#ASMSTART
5023; GFX900-NEXT:    ; def v[0:5]
5024; GFX900-NEXT:    ;;#ASMEND
5025; GFX900-NEXT:    ;;#ASMSTART
5026; GFX900-NEXT:    ; def v[4:9]
5027; GFX900-NEXT:    ;;#ASMEND
5028; GFX900-NEXT:    v_mov_b32_e32 v10, 0
5029; GFX900-NEXT:    v_mov_b32_e32 v6, v4
5030; GFX900-NEXT:    v_mov_b32_e32 v7, v5
5031; GFX900-NEXT:    v_mov_b32_e32 v0, v8
5032; GFX900-NEXT:    v_mov_b32_e32 v1, v9
5033; GFX900-NEXT:    global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
5034; GFX900-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17]
5035; GFX900-NEXT:    s_waitcnt vmcnt(0)
5036; GFX900-NEXT:    s_setpc_b64 s[30:31]
5037;
5038; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_1_3_3:
5039; GFX90A:       ; %bb.0:
5040; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5041; GFX90A-NEXT:    ;;#ASMSTART
5042; GFX90A-NEXT:    ; def v[0:5]
5043; GFX90A-NEXT:    ;;#ASMEND
5044; GFX90A-NEXT:    ;;#ASMSTART
5045; GFX90A-NEXT:    ; def v[4:9]
5046; GFX90A-NEXT:    ;;#ASMEND
5047; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
5048; GFX90A-NEXT:    v_mov_b32_e32 v6, v4
5049; GFX90A-NEXT:    v_mov_b32_e32 v7, v5
5050; GFX90A-NEXT:    v_mov_b32_e32 v0, v8
5051; GFX90A-NEXT:    v_mov_b32_e32 v1, v9
5052; GFX90A-NEXT:    global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
5053; GFX90A-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17]
5054; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5055; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5056;
5057; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_3_3:
5058; GFX940:       ; %bb.0:
5059; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5060; GFX940-NEXT:    ;;#ASMSTART
5061; GFX940-NEXT:    ; def v[0:5]
5062; GFX940-NEXT:    ;;#ASMEND
5063; GFX940-NEXT:    v_mov_b32_e32 v10, 0
5064; GFX940-NEXT:    ;;#ASMSTART
5065; GFX940-NEXT:    ; def v[4:9]
5066; GFX940-NEXT:    ;;#ASMEND
5067; GFX940-NEXT:    s_nop 0
5068; GFX940-NEXT:    v_mov_b32_e32 v6, v4
5069; GFX940-NEXT:    v_mov_b32_e32 v7, v5
5070; GFX940-NEXT:    v_mov_b32_e32 v0, v8
5071; GFX940-NEXT:    v_mov_b32_e32 v1, v9
5072; GFX940-NEXT:    global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1
5073; GFX940-NEXT:    global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1
5074; GFX940-NEXT:    s_waitcnt vmcnt(0)
5075; GFX940-NEXT:    s_setpc_b64 s[30:31]
5076  %vec0 = call <3 x i64> asm "; def $0", "=v"()
5077  %vec1 = call <3 x i64> asm "; def $0", "=v"()
5078  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 3, i32 3>
5079  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5080  ret void
5081}
5082
5083define void @v_shuffle_v4i64_v3i64__5_2_3_3(ptr addrspace(1) inreg %ptr) {
5084; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_2_3_3:
5085; GFX900:       ; %bb.0:
5086; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5087; GFX900-NEXT:    ;;#ASMSTART
5088; GFX900-NEXT:    ; def v[0:5]
5089; GFX900-NEXT:    ;;#ASMEND
5090; GFX900-NEXT:    ;;#ASMSTART
5091; GFX900-NEXT:    ; def v[6:11]
5092; GFX900-NEXT:    ;;#ASMEND
5093; GFX900-NEXT:    v_mov_b32_e32 v12, 0
5094; GFX900-NEXT:    v_mov_b32_e32 v8, v6
5095; GFX900-NEXT:    v_mov_b32_e32 v9, v7
5096; GFX900-NEXT:    v_mov_b32_e32 v2, v10
5097; GFX900-NEXT:    v_mov_b32_e32 v3, v11
5098; GFX900-NEXT:    global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
5099; GFX900-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17]
5100; GFX900-NEXT:    s_waitcnt vmcnt(0)
5101; GFX900-NEXT:    s_setpc_b64 s[30:31]
5102;
5103; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_2_3_3:
5104; GFX90A:       ; %bb.0:
5105; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5106; GFX90A-NEXT:    ;;#ASMSTART
5107; GFX90A-NEXT:    ; def v[0:5]
5108; GFX90A-NEXT:    ;;#ASMEND
5109; GFX90A-NEXT:    ;;#ASMSTART
5110; GFX90A-NEXT:    ; def v[6:11]
5111; GFX90A-NEXT:    ;;#ASMEND
5112; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
5113; GFX90A-NEXT:    v_mov_b32_e32 v8, v6
5114; GFX90A-NEXT:    v_mov_b32_e32 v9, v7
5115; GFX90A-NEXT:    v_mov_b32_e32 v2, v10
5116; GFX90A-NEXT:    v_mov_b32_e32 v3, v11
5117; GFX90A-NEXT:    global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
5118; GFX90A-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17]
5119; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5120; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5121;
5122; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_3_3:
5123; GFX940:       ; %bb.0:
5124; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5125; GFX940-NEXT:    ;;#ASMSTART
5126; GFX940-NEXT:    ; def v[0:5]
5127; GFX940-NEXT:    ;;#ASMEND
5128; GFX940-NEXT:    ;;#ASMSTART
5129; GFX940-NEXT:    ; def v[6:11]
5130; GFX940-NEXT:    ;;#ASMEND
5131; GFX940-NEXT:    v_mov_b32_e32 v12, 0
5132; GFX940-NEXT:    v_mov_b32_e32 v8, v6
5133; GFX940-NEXT:    v_mov_b32_e32 v9, v7
5134; GFX940-NEXT:    v_mov_b32_e32 v2, v10
5135; GFX940-NEXT:    v_mov_b32_e32 v3, v11
5136; GFX940-NEXT:    global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1
5137; GFX940-NEXT:    global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1
5138; GFX940-NEXT:    s_waitcnt vmcnt(0)
5139; GFX940-NEXT:    s_setpc_b64 s[30:31]
5140  %vec0 = call <3 x i64> asm "; def $0", "=v"()
5141  %vec1 = call <3 x i64> asm "; def $0", "=v"()
5142  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 3, i32 3>
5143  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5144  ret void
5145}
5146
5147define void @v_shuffle_v4i64_v3i64__5_4_3_3(ptr addrspace(1) inreg %ptr) {
5148; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_4_3_3:
5149; GFX900:       ; %bb.0:
5150; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5151; GFX900-NEXT:    ;;#ASMSTART
5152; GFX900-NEXT:    ; def v[0:5]
5153; GFX900-NEXT:    ;;#ASMEND
5154; GFX900-NEXT:    v_mov_b32_e32 v10, 0
5155; GFX900-NEXT:    v_mov_b32_e32 v6, v0
5156; GFX900-NEXT:    v_mov_b32_e32 v7, v1
5157; GFX900-NEXT:    v_mov_b32_e32 v8, v0
5158; GFX900-NEXT:    v_mov_b32_e32 v9, v1
5159; GFX900-NEXT:    v_mov_b32_e32 v0, v4
5160; GFX900-NEXT:    v_mov_b32_e32 v1, v5
5161; GFX900-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
5162; GFX900-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17]
5163; GFX900-NEXT:    s_waitcnt vmcnt(0)
5164; GFX900-NEXT:    s_setpc_b64 s[30:31]
5165;
5166; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_4_3_3:
5167; GFX90A:       ; %bb.0:
5168; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5169; GFX90A-NEXT:    ;;#ASMSTART
5170; GFX90A-NEXT:    ; def v[0:5]
5171; GFX90A-NEXT:    ;;#ASMEND
5172; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
5173; GFX90A-NEXT:    v_mov_b32_e32 v6, v0
5174; GFX90A-NEXT:    v_mov_b32_e32 v7, v1
5175; GFX90A-NEXT:    v_mov_b32_e32 v8, v0
5176; GFX90A-NEXT:    v_mov_b32_e32 v9, v1
5177; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
5178; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
5179; GFX90A-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
5180; GFX90A-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17]
5181; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5182; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5183;
5184; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_3_3:
5185; GFX940:       ; %bb.0:
5186; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5187; GFX940-NEXT:    ;;#ASMSTART
5188; GFX940-NEXT:    ; def v[0:5]
5189; GFX940-NEXT:    ;;#ASMEND
5190; GFX940-NEXT:    v_mov_b32_e32 v10, 0
5191; GFX940-NEXT:    v_mov_b32_e32 v6, v0
5192; GFX940-NEXT:    v_mov_b32_e32 v7, v1
5193; GFX940-NEXT:    v_mov_b32_e32 v8, v0
5194; GFX940-NEXT:    v_mov_b32_e32 v9, v1
5195; GFX940-NEXT:    v_mov_b32_e32 v0, v4
5196; GFX940-NEXT:    v_mov_b32_e32 v1, v5
5197; GFX940-NEXT:    global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1
5198; GFX940-NEXT:    global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1
5199; GFX940-NEXT:    s_waitcnt vmcnt(0)
5200; GFX940-NEXT:    s_setpc_b64 s[30:31]
5201  %vec0 = call <3 x i64> asm "; def $0", "=v"()
5202  %vec1 = call <3 x i64> asm "; def $0", "=v"()
5203  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 3, i32 3>
5204  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5205  ret void
5206}
5207
5208define void @v_shuffle_v4i64_v3i64__5_5_3_3(ptr addrspace(1) inreg %ptr) {
5209; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_3_3:
5210; GFX900:       ; %bb.0:
5211; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5212; GFX900-NEXT:    ;;#ASMSTART
5213; GFX900-NEXT:    ; def v[0:5]
5214; GFX900-NEXT:    ;;#ASMEND
5215; GFX900-NEXT:    v_mov_b32_e32 v6, 0
5216; GFX900-NEXT:    v_mov_b32_e32 v2, v0
5217; GFX900-NEXT:    v_mov_b32_e32 v3, v1
5218; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
5219; GFX900-NEXT:    s_nop 0
5220; GFX900-NEXT:    v_mov_b32_e32 v2, v4
5221; GFX900-NEXT:    v_mov_b32_e32 v3, v5
5222; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
5223; GFX900-NEXT:    s_waitcnt vmcnt(0)
5224; GFX900-NEXT:    s_setpc_b64 s[30:31]
5225;
5226; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_3_3:
5227; GFX90A:       ; %bb.0:
5228; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5229; GFX90A-NEXT:    ;;#ASMSTART
5230; GFX90A-NEXT:    ; def v[0:5]
5231; GFX90A-NEXT:    ;;#ASMEND
5232; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
5233; GFX90A-NEXT:    v_mov_b32_e32 v2, v0
5234; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
5235; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
5236; GFX90A-NEXT:    s_nop 0
5237; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
5238; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
5239; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
5240; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5241; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5242;
5243; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_3:
5244; GFX940:       ; %bb.0:
5245; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5246; GFX940-NEXT:    ;;#ASMSTART
5247; GFX940-NEXT:    ; def v[0:5]
5248; GFX940-NEXT:    ;;#ASMEND
5249; GFX940-NEXT:    v_mov_b32_e32 v6, 0
5250; GFX940-NEXT:    v_mov_b32_e32 v2, v0
5251; GFX940-NEXT:    v_mov_b32_e32 v3, v1
5252; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
5253; GFX940-NEXT:    s_nop 1
5254; GFX940-NEXT:    v_mov_b32_e32 v2, v4
5255; GFX940-NEXT:    v_mov_b32_e32 v3, v5
5256; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
5257; GFX940-NEXT:    s_waitcnt vmcnt(0)
5258; GFX940-NEXT:    s_setpc_b64 s[30:31]
5259  %vec0 = call <3 x i64> asm "; def $0", "=v"()
5260  %vec1 = call <3 x i64> asm "; def $0", "=v"()
5261  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 3>
5262  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5263  ret void
5264}
5265
5266define void @v_shuffle_v4i64_v3i64__5_5_u_3(ptr addrspace(1) inreg %ptr) {
5267; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_3:
5268; GFX900:       ; %bb.0:
5269; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5270; GFX900-NEXT:    ;;#ASMSTART
5271; GFX900-NEXT:    ; def v[0:5]
5272; GFX900-NEXT:    ;;#ASMEND
5273; GFX900-NEXT:    v_mov_b32_e32 v6, 0
5274; GFX900-NEXT:    v_mov_b32_e32 v2, v0
5275; GFX900-NEXT:    v_mov_b32_e32 v3, v1
5276; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
5277; GFX900-NEXT:    s_nop 0
5278; GFX900-NEXT:    v_mov_b32_e32 v2, v4
5279; GFX900-NEXT:    v_mov_b32_e32 v3, v5
5280; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
5281; GFX900-NEXT:    s_waitcnt vmcnt(0)
5282; GFX900-NEXT:    s_setpc_b64 s[30:31]
5283;
5284; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_3:
5285; GFX90A:       ; %bb.0:
5286; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5287; GFX90A-NEXT:    ;;#ASMSTART
5288; GFX90A-NEXT:    ; def v[0:5]
5289; GFX90A-NEXT:    ;;#ASMEND
5290; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
5291; GFX90A-NEXT:    v_mov_b32_e32 v2, v0
5292; GFX90A-NEXT:    v_mov_b32_e32 v3, v1
5293; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
5294; GFX90A-NEXT:    s_nop 0
5295; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
5296; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
5297; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
5298; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5299; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5300;
5301; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_3:
5302; GFX940:       ; %bb.0:
5303; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5304; GFX940-NEXT:    ;;#ASMSTART
5305; GFX940-NEXT:    ; def v[0:5]
5306; GFX940-NEXT:    ;;#ASMEND
5307; GFX940-NEXT:    v_mov_b32_e32 v6, 0
5308; GFX940-NEXT:    v_mov_b32_e32 v2, v0
5309; GFX940-NEXT:    v_mov_b32_e32 v3, v1
5310; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
5311; GFX940-NEXT:    s_nop 1
5312; GFX940-NEXT:    v_mov_b32_e32 v2, v4
5313; GFX940-NEXT:    v_mov_b32_e32 v3, v5
5314; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
5315; GFX940-NEXT:    s_waitcnt vmcnt(0)
5316; GFX940-NEXT:    s_setpc_b64 s[30:31]
5317  %vec0 = call <3 x i64> asm "; def $0", "=v"()
5318  %vec1 = call <3 x i64> asm "; def $0", "=v"()
5319  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 3>
5320  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5321  ret void
5322}
5323
5324define void @v_shuffle_v4i64_v3i64__5_5_0_3(ptr addrspace(1) inreg %ptr) {
5325; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_0_3:
5326; GFX900:       ; %bb.0:
5327; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5328; GFX900-NEXT:    ;;#ASMSTART
5329; GFX900-NEXT:    ; def v[0:5]
5330; GFX900-NEXT:    ;;#ASMEND
5331; GFX900-NEXT:    ;;#ASMSTART
5332; GFX900-NEXT:    ; def v[2:7]
5333; GFX900-NEXT:    ;;#ASMEND
5334; GFX900-NEXT:    v_mov_b32_e32 v8, 0
5335; GFX900-NEXT:    v_mov_b32_e32 v4, v6
5336; GFX900-NEXT:    v_mov_b32_e32 v5, v7
5337; GFX900-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
5338; GFX900-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17]
5339; GFX900-NEXT:    s_waitcnt vmcnt(0)
5340; GFX900-NEXT:    s_setpc_b64 s[30:31]
5341;
5342; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_0_3:
5343; GFX90A:       ; %bb.0:
5344; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5345; GFX90A-NEXT:    ;;#ASMSTART
5346; GFX90A-NEXT:    ; def v[0:5]
5347; GFX90A-NEXT:    ;;#ASMEND
5348; GFX90A-NEXT:    ;;#ASMSTART
5349; GFX90A-NEXT:    ; def v[2:7]
5350; GFX90A-NEXT:    ;;#ASMEND
5351; GFX90A-NEXT:    v_mov_b32_e32 v8, 0
5352; GFX90A-NEXT:    v_mov_b32_e32 v4, v6
5353; GFX90A-NEXT:    v_mov_b32_e32 v5, v7
5354; GFX90A-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
5355; GFX90A-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17]
5356; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5357; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5358;
5359; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_3:
5360; GFX940:       ; %bb.0:
5361; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5362; GFX940-NEXT:    ;;#ASMSTART
5363; GFX940-NEXT:    ; def v[0:5]
5364; GFX940-NEXT:    ;;#ASMEND
5365; GFX940-NEXT:    v_mov_b32_e32 v8, 0
5366; GFX940-NEXT:    ;;#ASMSTART
5367; GFX940-NEXT:    ; def v[2:7]
5368; GFX940-NEXT:    ;;#ASMEND
5369; GFX940-NEXT:    global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1
5370; GFX940-NEXT:    v_mov_b32_e32 v4, v6
5371; GFX940-NEXT:    v_mov_b32_e32 v5, v7
5372; GFX940-NEXT:    global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1
5373; GFX940-NEXT:    s_waitcnt vmcnt(0)
5374; GFX940-NEXT:    s_setpc_b64 s[30:31]
5375  %vec0 = call <3 x i64> asm "; def $0", "=v"()
5376  %vec1 = call <3 x i64> asm "; def $0", "=v"()
5377  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 3>
5378  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5379  ret void
5380}
5381
5382define void @v_shuffle_v4i64_v3i64__5_5_1_3(ptr addrspace(1) inreg %ptr) {
5383; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_1_3:
5384; GFX900:       ; %bb.0:
5385; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5386; GFX900-NEXT:    ;;#ASMSTART
5387; GFX900-NEXT:    ; def v[0:5]
5388; GFX900-NEXT:    ;;#ASMEND
5389; GFX900-NEXT:    ;;#ASMSTART
5390; GFX900-NEXT:    ; def v[4:9]
5391; GFX900-NEXT:    ;;#ASMEND
5392; GFX900-NEXT:    v_mov_b32_e32 v10, 0
5393; GFX900-NEXT:    v_mov_b32_e32 v6, v8
5394; GFX900-NEXT:    v_mov_b32_e32 v7, v9
5395; GFX900-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
5396; GFX900-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17]
5397; GFX900-NEXT:    s_waitcnt vmcnt(0)
5398; GFX900-NEXT:    s_setpc_b64 s[30:31]
5399;
5400; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_1_3:
5401; GFX90A:       ; %bb.0:
5402; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5403; GFX90A-NEXT:    ;;#ASMSTART
5404; GFX90A-NEXT:    ; def v[0:5]
5405; GFX90A-NEXT:    ;;#ASMEND
5406; GFX90A-NEXT:    ;;#ASMSTART
5407; GFX90A-NEXT:    ; def v[4:9]
5408; GFX90A-NEXT:    ;;#ASMEND
5409; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
5410; GFX90A-NEXT:    v_mov_b32_e32 v6, v8
5411; GFX90A-NEXT:    v_mov_b32_e32 v7, v9
5412; GFX90A-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
5413; GFX90A-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17]
5414; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5415; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5416;
5417; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_3:
5418; GFX940:       ; %bb.0:
5419; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5420; GFX940-NEXT:    ;;#ASMSTART
5421; GFX940-NEXT:    ; def v[0:5]
5422; GFX940-NEXT:    ;;#ASMEND
5423; GFX940-NEXT:    v_mov_b32_e32 v10, 0
5424; GFX940-NEXT:    ;;#ASMSTART
5425; GFX940-NEXT:    ; def v[4:9]
5426; GFX940-NEXT:    ;;#ASMEND
5427; GFX940-NEXT:    global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1
5428; GFX940-NEXT:    v_mov_b32_e32 v6, v8
5429; GFX940-NEXT:    v_mov_b32_e32 v7, v9
5430; GFX940-NEXT:    global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1
5431; GFX940-NEXT:    s_waitcnt vmcnt(0)
5432; GFX940-NEXT:    s_setpc_b64 s[30:31]
5433  %vec0 = call <3 x i64> asm "; def $0", "=v"()
5434  %vec1 = call <3 x i64> asm "; def $0", "=v"()
5435  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 3>
5436  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5437  ret void
5438}
5439
5440define void @v_shuffle_v4i64_v3i64__5_5_2_3(ptr addrspace(1) inreg %ptr) {
5441; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_2_3:
5442; GFX900:       ; %bb.0:
5443; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5444; GFX900-NEXT:    ;;#ASMSTART
5445; GFX900-NEXT:    ; def v[0:5]
5446; GFX900-NEXT:    ;;#ASMEND
5447; GFX900-NEXT:    ;;#ASMSTART
5448; GFX900-NEXT:    ; def v[6:11]
5449; GFX900-NEXT:    ;;#ASMEND
5450; GFX900-NEXT:    v_mov_b32_e32 v12, 0
5451; GFX900-NEXT:    v_mov_b32_e32 v0, v4
5452; GFX900-NEXT:    v_mov_b32_e32 v1, v5
5453; GFX900-NEXT:    v_mov_b32_e32 v2, v6
5454; GFX900-NEXT:    v_mov_b32_e32 v3, v7
5455; GFX900-NEXT:    v_mov_b32_e32 v8, v10
5456; GFX900-NEXT:    v_mov_b32_e32 v9, v11
5457; GFX900-NEXT:    global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
5458; GFX900-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
5459; GFX900-NEXT:    s_waitcnt vmcnt(0)
5460; GFX900-NEXT:    s_setpc_b64 s[30:31]
5461;
5462; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_2_3:
5463; GFX90A:       ; %bb.0:
5464; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5465; GFX90A-NEXT:    ;;#ASMSTART
5466; GFX90A-NEXT:    ; def v[0:5]
5467; GFX90A-NEXT:    ;;#ASMEND
5468; GFX90A-NEXT:    ;;#ASMSTART
5469; GFX90A-NEXT:    ; def v[6:11]
5470; GFX90A-NEXT:    ;;#ASMEND
5471; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
5472; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
5473; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
5474; GFX90A-NEXT:    v_mov_b32_e32 v2, v6
5475; GFX90A-NEXT:    v_mov_b32_e32 v3, v7
5476; GFX90A-NEXT:    v_mov_b32_e32 v8, v10
5477; GFX90A-NEXT:    v_mov_b32_e32 v9, v11
5478; GFX90A-NEXT:    global_store_dwordx4 v12, v[0:3], s[16:17] offset:16
5479; GFX90A-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
5480; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5481; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5482;
5483; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_3:
5484; GFX940:       ; %bb.0:
5485; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5486; GFX940-NEXT:    ;;#ASMSTART
5487; GFX940-NEXT:    ; def v[0:5]
5488; GFX940-NEXT:    ;;#ASMEND
5489; GFX940-NEXT:    ;;#ASMSTART
5490; GFX940-NEXT:    ; def v[6:11]
5491; GFX940-NEXT:    ;;#ASMEND
5492; GFX940-NEXT:    v_mov_b32_e32 v12, 0
5493; GFX940-NEXT:    v_mov_b32_e32 v0, v4
5494; GFX940-NEXT:    v_mov_b32_e32 v1, v5
5495; GFX940-NEXT:    v_mov_b32_e32 v2, v6
5496; GFX940-NEXT:    v_mov_b32_e32 v3, v7
5497; GFX940-NEXT:    v_mov_b32_e32 v8, v10
5498; GFX940-NEXT:    v_mov_b32_e32 v9, v11
5499; GFX940-NEXT:    global_store_dwordx4 v12, v[0:3], s[0:1] offset:16 sc0 sc1
5500; GFX940-NEXT:    global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
5501; GFX940-NEXT:    s_waitcnt vmcnt(0)
5502; GFX940-NEXT:    s_setpc_b64 s[30:31]
5503  %vec0 = call <3 x i64> asm "; def $0", "=v"()
5504  %vec1 = call <3 x i64> asm "; def $0", "=v"()
5505  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 3>
5506  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5507  ret void
5508}
5509
5510define void @v_shuffle_v4i64_v3i64__5_5_4_3(ptr addrspace(1) inreg %ptr) {
5511; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_4_3:
5512; GFX900:       ; %bb.0:
5513; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5514; GFX900-NEXT:    ;;#ASMSTART
5515; GFX900-NEXT:    ; def v[0:5]
5516; GFX900-NEXT:    ;;#ASMEND
5517; GFX900-NEXT:    v_mov_b32_e32 v10, 0
5518; GFX900-NEXT:    v_mov_b32_e32 v6, v2
5519; GFX900-NEXT:    v_mov_b32_e32 v7, v3
5520; GFX900-NEXT:    v_mov_b32_e32 v8, v0
5521; GFX900-NEXT:    v_mov_b32_e32 v9, v1
5522; GFX900-NEXT:    v_mov_b32_e32 v2, v4
5523; GFX900-NEXT:    v_mov_b32_e32 v3, v5
5524; GFX900-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
5525; GFX900-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17]
5526; GFX900-NEXT:    s_waitcnt vmcnt(0)
5527; GFX900-NEXT:    s_setpc_b64 s[30:31]
5528;
5529; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_4_3:
5530; GFX90A:       ; %bb.0:
5531; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5532; GFX90A-NEXT:    ;;#ASMSTART
5533; GFX90A-NEXT:    ; def v[0:5]
5534; GFX90A-NEXT:    ;;#ASMEND
5535; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
5536; GFX90A-NEXT:    v_mov_b32_e32 v6, v2
5537; GFX90A-NEXT:    v_mov_b32_e32 v7, v3
5538; GFX90A-NEXT:    v_mov_b32_e32 v8, v0
5539; GFX90A-NEXT:    v_mov_b32_e32 v9, v1
5540; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
5541; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
5542; GFX90A-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
5543; GFX90A-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17]
5544; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5545; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5546;
5547; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_3:
5548; GFX940:       ; %bb.0:
5549; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5550; GFX940-NEXT:    ;;#ASMSTART
5551; GFX940-NEXT:    ; def v[0:5]
5552; GFX940-NEXT:    ;;#ASMEND
5553; GFX940-NEXT:    v_mov_b32_e32 v10, 0
5554; GFX940-NEXT:    v_mov_b32_e32 v6, v2
5555; GFX940-NEXT:    v_mov_b32_e32 v7, v3
5556; GFX940-NEXT:    v_mov_b32_e32 v8, v0
5557; GFX940-NEXT:    v_mov_b32_e32 v9, v1
5558; GFX940-NEXT:    v_mov_b32_e32 v2, v4
5559; GFX940-NEXT:    v_mov_b32_e32 v3, v5
5560; GFX940-NEXT:    global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1
5561; GFX940-NEXT:    global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1
5562; GFX940-NEXT:    s_waitcnt vmcnt(0)
5563; GFX940-NEXT:    s_setpc_b64 s[30:31]
5564  %vec0 = call <3 x i64> asm "; def $0", "=v"()
5565  %vec1 = call <3 x i64> asm "; def $0", "=v"()
5566  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 3>
5567  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5568  ret void
5569}
5570
5571define void @v_shuffle_v4i64_v3i64__u_4_4_4(ptr addrspace(1) inreg %ptr) {
5572; GFX900-LABEL: v_shuffle_v4i64_v3i64__u_4_4_4:
5573; GFX900:       ; %bb.0:
5574; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5575; GFX900-NEXT:    ;;#ASMSTART
5576; GFX900-NEXT:    ; def v[0:5]
5577; GFX900-NEXT:    ;;#ASMEND
5578; GFX900-NEXT:    v_mov_b32_e32 v6, 0
5579; GFX900-NEXT:    v_mov_b32_e32 v4, v2
5580; GFX900-NEXT:    v_mov_b32_e32 v5, v3
5581; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
5582; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
5583; GFX900-NEXT:    s_waitcnt vmcnt(0)
5584; GFX900-NEXT:    s_setpc_b64 s[30:31]
5585;
5586; GFX90A-LABEL: v_shuffle_v4i64_v3i64__u_4_4_4:
5587; GFX90A:       ; %bb.0:
5588; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5589; GFX90A-NEXT:    ;;#ASMSTART
5590; GFX90A-NEXT:    ; def v[0:5]
5591; GFX90A-NEXT:    ;;#ASMEND
5592; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
5593; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
5594; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
5595; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
5596; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
5597; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5598; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5599;
5600; GFX940-LABEL: v_shuffle_v4i64_v3i64__u_4_4_4:
5601; GFX940:       ; %bb.0:
5602; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5603; GFX940-NEXT:    ;;#ASMSTART
5604; GFX940-NEXT:    ; def v[0:5]
5605; GFX940-NEXT:    ;;#ASMEND
5606; GFX940-NEXT:    v_mov_b32_e32 v6, 0
5607; GFX940-NEXT:    v_mov_b32_e32 v4, v2
5608; GFX940-NEXT:    v_mov_b32_e32 v5, v3
5609; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
5610; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
5611; GFX940-NEXT:    s_waitcnt vmcnt(0)
5612; GFX940-NEXT:    s_setpc_b64 s[30:31]
5613  %vec0 = call <3 x i64> asm "; def $0", "=v"()
5614  %vec1 = call <3 x i64> asm "; def $0", "=v"()
5615  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
5616  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5617  ret void
5618}
5619
5620define void @v_shuffle_v4i64_v3i64__0_4_4_4(ptr addrspace(1) inreg %ptr) {
5621; GFX900-LABEL: v_shuffle_v4i64_v3i64__0_4_4_4:
5622; GFX900:       ; %bb.0:
5623; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5624; GFX900-NEXT:    ;;#ASMSTART
5625; GFX900-NEXT:    ; def v[0:5]
5626; GFX900-NEXT:    ;;#ASMEND
5627; GFX900-NEXT:    ;;#ASMSTART
5628; GFX900-NEXT:    ; def v[2:7]
5629; GFX900-NEXT:    ;;#ASMEND
5630; GFX900-NEXT:    v_mov_b32_e32 v8, 0
5631; GFX900-NEXT:    v_mov_b32_e32 v6, v4
5632; GFX900-NEXT:    v_mov_b32_e32 v7, v5
5633; GFX900-NEXT:    v_mov_b32_e32 v2, v4
5634; GFX900-NEXT:    v_mov_b32_e32 v3, v5
5635; GFX900-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
5636; GFX900-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17]
5637; GFX900-NEXT:    s_waitcnt vmcnt(0)
5638; GFX900-NEXT:    s_setpc_b64 s[30:31]
5639;
5640; GFX90A-LABEL: v_shuffle_v4i64_v3i64__0_4_4_4:
5641; GFX90A:       ; %bb.0:
5642; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5643; GFX90A-NEXT:    ;;#ASMSTART
5644; GFX90A-NEXT:    ; def v[0:5]
5645; GFX90A-NEXT:    ;;#ASMEND
5646; GFX90A-NEXT:    ;;#ASMSTART
5647; GFX90A-NEXT:    ; def v[2:7]
5648; GFX90A-NEXT:    ;;#ASMEND
5649; GFX90A-NEXT:    v_mov_b32_e32 v8, 0
5650; GFX90A-NEXT:    v_mov_b32_e32 v6, v4
5651; GFX90A-NEXT:    v_mov_b32_e32 v7, v5
5652; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
5653; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
5654; GFX90A-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
5655; GFX90A-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17]
5656; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5657; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5658;
5659; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_4_4_4:
5660; GFX940:       ; %bb.0:
5661; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5662; GFX940-NEXT:    ;;#ASMSTART
5663; GFX940-NEXT:    ; def v[0:5]
5664; GFX940-NEXT:    ;;#ASMEND
5665; GFX940-NEXT:    v_mov_b32_e32 v8, 0
5666; GFX940-NEXT:    ;;#ASMSTART
5667; GFX940-NEXT:    ; def v[2:7]
5668; GFX940-NEXT:    ;;#ASMEND
5669; GFX940-NEXT:    s_nop 0
5670; GFX940-NEXT:    v_mov_b32_e32 v6, v4
5671; GFX940-NEXT:    v_mov_b32_e32 v7, v5
5672; GFX940-NEXT:    v_mov_b32_e32 v2, v4
5673; GFX940-NEXT:    v_mov_b32_e32 v3, v5
5674; GFX940-NEXT:    global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1
5675; GFX940-NEXT:    global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1
5676; GFX940-NEXT:    s_waitcnt vmcnt(0)
5677; GFX940-NEXT:    s_setpc_b64 s[30:31]
5678  %vec0 = call <3 x i64> asm "; def $0", "=v"()
5679  %vec1 = call <3 x i64> asm "; def $0", "=v"()
5680  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
5681  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5682  ret void
5683}
5684
5685define void @v_shuffle_v4i64_v3i64__1_4_4_4(ptr addrspace(1) inreg %ptr) {
5686; GFX900-LABEL: v_shuffle_v4i64_v3i64__1_4_4_4:
5687; GFX900:       ; %bb.0:
5688; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5689; GFX900-NEXT:    ;;#ASMSTART
5690; GFX900-NEXT:    ; def v[0:5]
5691; GFX900-NEXT:    ;;#ASMEND
5692; GFX900-NEXT:    ;;#ASMSTART
5693; GFX900-NEXT:    ; def v[4:9]
5694; GFX900-NEXT:    ;;#ASMEND
5695; GFX900-NEXT:    v_mov_b32_e32 v10, 0
5696; GFX900-NEXT:    v_mov_b32_e32 v8, v6
5697; GFX900-NEXT:    v_mov_b32_e32 v9, v7
5698; GFX900-NEXT:    v_mov_b32_e32 v4, v6
5699; GFX900-NEXT:    v_mov_b32_e32 v5, v7
5700; GFX900-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
5701; GFX900-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17]
5702; GFX900-NEXT:    s_waitcnt vmcnt(0)
5703; GFX900-NEXT:    s_setpc_b64 s[30:31]
5704;
5705; GFX90A-LABEL: v_shuffle_v4i64_v3i64__1_4_4_4:
5706; GFX90A:       ; %bb.0:
5707; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5708; GFX90A-NEXT:    ;;#ASMSTART
5709; GFX90A-NEXT:    ; def v[0:5]
5710; GFX90A-NEXT:    ;;#ASMEND
5711; GFX90A-NEXT:    ;;#ASMSTART
5712; GFX90A-NEXT:    ; def v[4:9]
5713; GFX90A-NEXT:    ;;#ASMEND
5714; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
5715; GFX90A-NEXT:    v_mov_b32_e32 v8, v6
5716; GFX90A-NEXT:    v_mov_b32_e32 v9, v7
5717; GFX90A-NEXT:    v_mov_b32_e32 v4, v6
5718; GFX90A-NEXT:    v_mov_b32_e32 v5, v7
5719; GFX90A-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
5720; GFX90A-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17]
5721; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5722; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5723;
5724; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_4_4_4:
5725; GFX940:       ; %bb.0:
5726; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5727; GFX940-NEXT:    ;;#ASMSTART
5728; GFX940-NEXT:    ; def v[0:5]
5729; GFX940-NEXT:    ;;#ASMEND
5730; GFX940-NEXT:    v_mov_b32_e32 v10, 0
5731; GFX940-NEXT:    ;;#ASMSTART
5732; GFX940-NEXT:    ; def v[4:9]
5733; GFX940-NEXT:    ;;#ASMEND
5734; GFX940-NEXT:    s_nop 0
5735; GFX940-NEXT:    v_mov_b32_e32 v8, v6
5736; GFX940-NEXT:    v_mov_b32_e32 v9, v7
5737; GFX940-NEXT:    v_mov_b32_e32 v4, v6
5738; GFX940-NEXT:    v_mov_b32_e32 v5, v7
5739; GFX940-NEXT:    global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1
5740; GFX940-NEXT:    global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1
5741; GFX940-NEXT:    s_waitcnt vmcnt(0)
5742; GFX940-NEXT:    s_setpc_b64 s[30:31]
5743  %vec0 = call <3 x i64> asm "; def $0", "=v"()
5744  %vec1 = call <3 x i64> asm "; def $0", "=v"()
5745  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
5746  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5747  ret void
5748}
5749
5750define void @v_shuffle_v4i64_v3i64__2_4_4_4(ptr addrspace(1) inreg %ptr) {
5751; GFX900-LABEL: v_shuffle_v4i64_v3i64__2_4_4_4:
5752; GFX900:       ; %bb.0:
5753; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5754; GFX900-NEXT:    ;;#ASMSTART
5755; GFX900-NEXT:    ; def v[6:11]
5756; GFX900-NEXT:    ;;#ASMEND
5757; GFX900-NEXT:    v_mov_b32_e32 v12, 0
5758; GFX900-NEXT:    ;;#ASMSTART
5759; GFX900-NEXT:    ; def v[0:5]
5760; GFX900-NEXT:    ;;#ASMEND
5761; GFX900-NEXT:    v_mov_b32_e32 v10, v8
5762; GFX900-NEXT:    v_mov_b32_e32 v11, v9
5763; GFX900-NEXT:    v_mov_b32_e32 v6, v4
5764; GFX900-NEXT:    v_mov_b32_e32 v7, v5
5765; GFX900-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
5766; GFX900-NEXT:    global_store_dwordx4 v12, v[6:9], s[16:17]
5767; GFX900-NEXT:    s_waitcnt vmcnt(0)
5768; GFX900-NEXT:    s_setpc_b64 s[30:31]
5769;
5770; GFX90A-LABEL: v_shuffle_v4i64_v3i64__2_4_4_4:
5771; GFX90A:       ; %bb.0:
5772; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5773; GFX90A-NEXT:    ;;#ASMSTART
5774; GFX90A-NEXT:    ; def v[6:11]
5775; GFX90A-NEXT:    ;;#ASMEND
5776; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
5777; GFX90A-NEXT:    ;;#ASMSTART
5778; GFX90A-NEXT:    ; def v[0:5]
5779; GFX90A-NEXT:    ;;#ASMEND
5780; GFX90A-NEXT:    v_mov_b32_e32 v10, v8
5781; GFX90A-NEXT:    v_mov_b32_e32 v11, v9
5782; GFX90A-NEXT:    v_mov_b32_e32 v6, v4
5783; GFX90A-NEXT:    v_mov_b32_e32 v7, v5
5784; GFX90A-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
5785; GFX90A-NEXT:    global_store_dwordx4 v12, v[6:9], s[16:17]
5786; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5787; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5788;
5789; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_4_4_4:
5790; GFX940:       ; %bb.0:
5791; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5792; GFX940-NEXT:    ;;#ASMSTART
5793; GFX940-NEXT:    ; def v[6:11]
5794; GFX940-NEXT:    ;;#ASMEND
5795; GFX940-NEXT:    v_mov_b32_e32 v12, 0
5796; GFX940-NEXT:    ;;#ASMSTART
5797; GFX940-NEXT:    ; def v[0:5]
5798; GFX940-NEXT:    ;;#ASMEND
5799; GFX940-NEXT:    v_mov_b32_e32 v10, v8
5800; GFX940-NEXT:    v_mov_b32_e32 v11, v9
5801; GFX940-NEXT:    v_mov_b32_e32 v6, v4
5802; GFX940-NEXT:    v_mov_b32_e32 v7, v5
5803; GFX940-NEXT:    global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1
5804; GFX940-NEXT:    global_store_dwordx4 v12, v[6:9], s[0:1] sc0 sc1
5805; GFX940-NEXT:    s_waitcnt vmcnt(0)
5806; GFX940-NEXT:    s_setpc_b64 s[30:31]
5807  %vec0 = call <3 x i64> asm "; def $0", "=v"()
5808  %vec1 = call <3 x i64> asm "; def $0", "=v"()
5809  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
5810  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5811  ret void
5812}
5813
5814define void @v_shuffle_v4i64_v3i64__3_4_4_4(ptr addrspace(1) inreg %ptr) {
5815; GFX900-LABEL: v_shuffle_v4i64_v3i64__3_4_4_4:
5816; GFX900:       ; %bb.0:
5817; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5818; GFX900-NEXT:    ;;#ASMSTART
5819; GFX900-NEXT:    ; def v[0:5]
5820; GFX900-NEXT:    ;;#ASMEND
5821; GFX900-NEXT:    v_mov_b32_e32 v6, 0
5822; GFX900-NEXT:    v_mov_b32_e32 v4, v2
5823; GFX900-NEXT:    v_mov_b32_e32 v5, v3
5824; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
5825; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
5826; GFX900-NEXT:    s_waitcnt vmcnt(0)
5827; GFX900-NEXT:    s_setpc_b64 s[30:31]
5828;
5829; GFX90A-LABEL: v_shuffle_v4i64_v3i64__3_4_4_4:
5830; GFX90A:       ; %bb.0:
5831; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5832; GFX90A-NEXT:    ;;#ASMSTART
5833; GFX90A-NEXT:    ; def v[0:5]
5834; GFX90A-NEXT:    ;;#ASMEND
5835; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
5836; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
5837; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
5838; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
5839; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
5840; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5841; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5842;
5843; GFX940-LABEL: v_shuffle_v4i64_v3i64__3_4_4_4:
5844; GFX940:       ; %bb.0:
5845; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5846; GFX940-NEXT:    ;;#ASMSTART
5847; GFX940-NEXT:    ; def v[0:5]
5848; GFX940-NEXT:    ;;#ASMEND
5849; GFX940-NEXT:    v_mov_b32_e32 v6, 0
5850; GFX940-NEXT:    v_mov_b32_e32 v4, v2
5851; GFX940-NEXT:    v_mov_b32_e32 v5, v3
5852; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
5853; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
5854; GFX940-NEXT:    s_waitcnt vmcnt(0)
5855; GFX940-NEXT:    s_setpc_b64 s[30:31]
5856  %vec0 = call <3 x i64> asm "; def $0", "=v"()
5857  %vec1 = call <3 x i64> asm "; def $0", "=v"()
5858  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
5859  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5860  ret void
5861}
5862
5863define void @v_shuffle_v4i64_v3i64__4_4_4_4(ptr addrspace(1) inreg %ptr) {
5864; GFX900-LABEL: v_shuffle_v4i64_v3i64__4_4_4_4:
5865; GFX900:       ; %bb.0:
5866; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5867; GFX900-NEXT:    ;;#ASMSTART
5868; GFX900-NEXT:    ; def v[0:5]
5869; GFX900-NEXT:    ;;#ASMEND
5870; GFX900-NEXT:    v_mov_b32_e32 v6, 0
5871; GFX900-NEXT:    v_mov_b32_e32 v4, v2
5872; GFX900-NEXT:    v_mov_b32_e32 v5, v3
5873; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
5874; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
5875; GFX900-NEXT:    s_waitcnt vmcnt(0)
5876; GFX900-NEXT:    s_setpc_b64 s[30:31]
5877;
5878; GFX90A-LABEL: v_shuffle_v4i64_v3i64__4_4_4_4:
5879; GFX90A:       ; %bb.0:
5880; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5881; GFX90A-NEXT:    ;;#ASMSTART
5882; GFX90A-NEXT:    ; def v[0:5]
5883; GFX90A-NEXT:    ;;#ASMEND
5884; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
5885; GFX90A-NEXT:    v_mov_b32_e32 v4, v2
5886; GFX90A-NEXT:    v_mov_b32_e32 v5, v3
5887; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
5888; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
5889; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5890; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5891;
5892; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_4_4_4:
5893; GFX940:       ; %bb.0:
5894; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5895; GFX940-NEXT:    ;;#ASMSTART
5896; GFX940-NEXT:    ; def v[0:5]
5897; GFX940-NEXT:    ;;#ASMEND
5898; GFX940-NEXT:    v_mov_b32_e32 v6, 0
5899; GFX940-NEXT:    v_mov_b32_e32 v4, v2
5900; GFX940-NEXT:    v_mov_b32_e32 v5, v3
5901; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
5902; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
5903; GFX940-NEXT:    s_waitcnt vmcnt(0)
5904; GFX940-NEXT:    s_setpc_b64 s[30:31]
5905  %vec0 = call <3 x i64> asm "; def $0", "=v"()
5906  %vec1 = call <3 x i64> asm "; def $0", "=v"()
5907  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
5908  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5909  ret void
5910}
5911
5912define void @v_shuffle_v4i64_v3i64__5_4_4_4(ptr addrspace(1) inreg %ptr) {
5913; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_4_4_4:
5914; GFX900:       ; %bb.0:
5915; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5916; GFX900-NEXT:    ;;#ASMSTART
5917; GFX900-NEXT:    ; def v[0:5]
5918; GFX900-NEXT:    ;;#ASMEND
5919; GFX900-NEXT:    v_mov_b32_e32 v6, 0
5920; GFX900-NEXT:    v_mov_b32_e32 v0, v2
5921; GFX900-NEXT:    v_mov_b32_e32 v1, v3
5922; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
5923; GFX900-NEXT:    s_nop 0
5924; GFX900-NEXT:    v_mov_b32_e32 v0, v4
5925; GFX900-NEXT:    v_mov_b32_e32 v1, v5
5926; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
5927; GFX900-NEXT:    s_waitcnt vmcnt(0)
5928; GFX900-NEXT:    s_setpc_b64 s[30:31]
5929;
5930; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_4_4_4:
5931; GFX90A:       ; %bb.0:
5932; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5933; GFX90A-NEXT:    ;;#ASMSTART
5934; GFX90A-NEXT:    ; def v[0:5]
5935; GFX90A-NEXT:    ;;#ASMEND
5936; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
5937; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
5938; GFX90A-NEXT:    v_mov_b32_e32 v1, v3
5939; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
5940; GFX90A-NEXT:    s_nop 0
5941; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
5942; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
5943; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
5944; GFX90A-NEXT:    s_waitcnt vmcnt(0)
5945; GFX90A-NEXT:    s_setpc_b64 s[30:31]
5946;
5947; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_4_4:
5948; GFX940:       ; %bb.0:
5949; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5950; GFX940-NEXT:    ;;#ASMSTART
5951; GFX940-NEXT:    ; def v[0:5]
5952; GFX940-NEXT:    ;;#ASMEND
5953; GFX940-NEXT:    v_mov_b32_e32 v6, 0
5954; GFX940-NEXT:    v_mov_b32_e32 v0, v2
5955; GFX940-NEXT:    v_mov_b32_e32 v1, v3
5956; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
5957; GFX940-NEXT:    s_nop 1
5958; GFX940-NEXT:    v_mov_b32_e32 v0, v4
5959; GFX940-NEXT:    v_mov_b32_e32 v1, v5
5960; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
5961; GFX940-NEXT:    s_waitcnt vmcnt(0)
5962; GFX940-NEXT:    s_setpc_b64 s[30:31]
5963  %vec0 = call <3 x i64> asm "; def $0", "=v"()
5964  %vec1 = call <3 x i64> asm "; def $0", "=v"()
5965  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
5966  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
5967  ret void
5968}
5969
5970define void @v_shuffle_v4i64_v3i64__5_u_4_4(ptr addrspace(1) inreg %ptr) {
5971; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_u_4_4:
5972; GFX900:       ; %bb.0:
5973; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5974; GFX900-NEXT:    ;;#ASMSTART
5975; GFX900-NEXT:    ; def v[0:5]
5976; GFX900-NEXT:    ;;#ASMEND
5977; GFX900-NEXT:    v_mov_b32_e32 v6, 0
5978; GFX900-NEXT:    v_mov_b32_e32 v0, v2
5979; GFX900-NEXT:    v_mov_b32_e32 v1, v3
5980; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
5981; GFX900-NEXT:    s_nop 0
5982; GFX900-NEXT:    v_mov_b32_e32 v0, v4
5983; GFX900-NEXT:    v_mov_b32_e32 v1, v5
5984; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
5985; GFX900-NEXT:    s_waitcnt vmcnt(0)
5986; GFX900-NEXT:    s_setpc_b64 s[30:31]
5987;
5988; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_u_4_4:
5989; GFX90A:       ; %bb.0:
5990; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5991; GFX90A-NEXT:    ;;#ASMSTART
5992; GFX90A-NEXT:    ; def v[0:5]
5993; GFX90A-NEXT:    ;;#ASMEND
5994; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
5995; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
5996; GFX90A-NEXT:    v_mov_b32_e32 v1, v3
5997; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
5998; GFX90A-NEXT:    s_nop 0
5999; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
6000; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
6001; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
6002; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6003; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6004;
6005; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_4_4:
6006; GFX940:       ; %bb.0:
6007; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6008; GFX940-NEXT:    ;;#ASMSTART
6009; GFX940-NEXT:    ; def v[0:5]
6010; GFX940-NEXT:    ;;#ASMEND
6011; GFX940-NEXT:    v_mov_b32_e32 v6, 0
6012; GFX940-NEXT:    v_mov_b32_e32 v0, v2
6013; GFX940-NEXT:    v_mov_b32_e32 v1, v3
6014; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
6015; GFX940-NEXT:    s_nop 1
6016; GFX940-NEXT:    v_mov_b32_e32 v0, v4
6017; GFX940-NEXT:    v_mov_b32_e32 v1, v5
6018; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
6019; GFX940-NEXT:    s_waitcnt vmcnt(0)
6020; GFX940-NEXT:    s_setpc_b64 s[30:31]
6021  %vec0 = call <3 x i64> asm "; def $0", "=v"()
6022  %vec1 = call <3 x i64> asm "; def $0", "=v"()
6023  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 4, i32 4>
6024  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6025  ret void
6026}
6027
6028define void @v_shuffle_v4i64_v3i64__5_0_4_4(ptr addrspace(1) inreg %ptr) {
6029; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_0_4_4:
6030; GFX900:       ; %bb.0:
6031; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6032; GFX900-NEXT:    ;;#ASMSTART
6033; GFX900-NEXT:    ; def v[0:5]
6034; GFX900-NEXT:    ;;#ASMEND
6035; GFX900-NEXT:    ;;#ASMSTART
6036; GFX900-NEXT:    ; def v[2:7]
6037; GFX900-NEXT:    ;;#ASMEND
6038; GFX900-NEXT:    v_mov_b32_e32 v8, 0
6039; GFX900-NEXT:    v_mov_b32_e32 v2, v4
6040; GFX900-NEXT:    v_mov_b32_e32 v3, v5
6041; GFX900-NEXT:    global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
6042; GFX900-NEXT:    s_nop 0
6043; GFX900-NEXT:    v_mov_b32_e32 v2, v6
6044; GFX900-NEXT:    v_mov_b32_e32 v3, v7
6045; GFX900-NEXT:    v_mov_b32_e32 v4, v0
6046; GFX900-NEXT:    v_mov_b32_e32 v5, v1
6047; GFX900-NEXT:    global_store_dwordx4 v8, v[2:5], s[16:17]
6048; GFX900-NEXT:    s_waitcnt vmcnt(0)
6049; GFX900-NEXT:    s_setpc_b64 s[30:31]
6050;
6051; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_0_4_4:
6052; GFX90A:       ; %bb.0:
6053; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6054; GFX90A-NEXT:    ;;#ASMSTART
6055; GFX90A-NEXT:    ; def v[0:5]
6056; GFX90A-NEXT:    ;;#ASMEND
6057; GFX90A-NEXT:    ;;#ASMSTART
6058; GFX90A-NEXT:    ; def v[2:7]
6059; GFX90A-NEXT:    ;;#ASMEND
6060; GFX90A-NEXT:    v_mov_b32_e32 v8, 0
6061; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
6062; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
6063; GFX90A-NEXT:    global_store_dwordx4 v8, v[2:5], s[16:17] offset:16
6064; GFX90A-NEXT:    s_nop 0
6065; GFX90A-NEXT:    v_mov_b32_e32 v2, v6
6066; GFX90A-NEXT:    v_mov_b32_e32 v3, v7
6067; GFX90A-NEXT:    v_mov_b32_e32 v4, v0
6068; GFX90A-NEXT:    v_mov_b32_e32 v5, v1
6069; GFX90A-NEXT:    global_store_dwordx4 v8, v[2:5], s[16:17]
6070; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6071; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6072;
6073; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_4_4:
6074; GFX940:       ; %bb.0:
6075; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6076; GFX940-NEXT:    ;;#ASMSTART
6077; GFX940-NEXT:    ; def v[0:5]
6078; GFX940-NEXT:    ;;#ASMEND
6079; GFX940-NEXT:    v_mov_b32_e32 v8, 0
6080; GFX940-NEXT:    ;;#ASMSTART
6081; GFX940-NEXT:    ; def v[2:7]
6082; GFX940-NEXT:    ;;#ASMEND
6083; GFX940-NEXT:    s_nop 0
6084; GFX940-NEXT:    v_mov_b32_e32 v2, v4
6085; GFX940-NEXT:    v_mov_b32_e32 v3, v5
6086; GFX940-NEXT:    global_store_dwordx4 v8, v[2:5], s[0:1] offset:16 sc0 sc1
6087; GFX940-NEXT:    s_nop 1
6088; GFX940-NEXT:    v_mov_b32_e32 v2, v6
6089; GFX940-NEXT:    v_mov_b32_e32 v3, v7
6090; GFX940-NEXT:    v_mov_b32_e32 v4, v0
6091; GFX940-NEXT:    v_mov_b32_e32 v5, v1
6092; GFX940-NEXT:    global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1
6093; GFX940-NEXT:    s_waitcnt vmcnt(0)
6094; GFX940-NEXT:    s_setpc_b64 s[30:31]
6095  %vec0 = call <3 x i64> asm "; def $0", "=v"()
6096  %vec1 = call <3 x i64> asm "; def $0", "=v"()
6097  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 4, i32 4>
6098  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6099  ret void
6100}
6101
6102define void @v_shuffle_v4i64_v3i64__5_1_4_4(ptr addrspace(1) inreg %ptr) {
6103; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_1_4_4:
6104; GFX900:       ; %bb.0:
6105; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6106; GFX900-NEXT:    ;;#ASMSTART
6107; GFX900-NEXT:    ; def v[0:5]
6108; GFX900-NEXT:    ;;#ASMEND
6109; GFX900-NEXT:    ;;#ASMSTART
6110; GFX900-NEXT:    ; def v[4:9]
6111; GFX900-NEXT:    ;;#ASMEND
6112; GFX900-NEXT:    v_mov_b32_e32 v10, 0
6113; GFX900-NEXT:    v_mov_b32_e32 v4, v6
6114; GFX900-NEXT:    v_mov_b32_e32 v5, v7
6115; GFX900-NEXT:    v_mov_b32_e32 v0, v8
6116; GFX900-NEXT:    v_mov_b32_e32 v1, v9
6117; GFX900-NEXT:    global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
6118; GFX900-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17]
6119; GFX900-NEXT:    s_waitcnt vmcnt(0)
6120; GFX900-NEXT:    s_setpc_b64 s[30:31]
6121;
6122; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_1_4_4:
6123; GFX90A:       ; %bb.0:
6124; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6125; GFX90A-NEXT:    ;;#ASMSTART
6126; GFX90A-NEXT:    ; def v[0:5]
6127; GFX90A-NEXT:    ;;#ASMEND
6128; GFX90A-NEXT:    ;;#ASMSTART
6129; GFX90A-NEXT:    ; def v[4:9]
6130; GFX90A-NEXT:    ;;#ASMEND
6131; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
6132; GFX90A-NEXT:    v_mov_b32_e32 v4, v6
6133; GFX90A-NEXT:    v_mov_b32_e32 v5, v7
6134; GFX90A-NEXT:    v_mov_b32_e32 v0, v8
6135; GFX90A-NEXT:    v_mov_b32_e32 v1, v9
6136; GFX90A-NEXT:    global_store_dwordx4 v10, v[4:7], s[16:17] offset:16
6137; GFX90A-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17]
6138; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6139; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6140;
6141; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_4_4:
6142; GFX940:       ; %bb.0:
6143; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6144; GFX940-NEXT:    ;;#ASMSTART
6145; GFX940-NEXT:    ; def v[0:5]
6146; GFX940-NEXT:    ;;#ASMEND
6147; GFX940-NEXT:    v_mov_b32_e32 v10, 0
6148; GFX940-NEXT:    ;;#ASMSTART
6149; GFX940-NEXT:    ; def v[4:9]
6150; GFX940-NEXT:    ;;#ASMEND
6151; GFX940-NEXT:    s_nop 0
6152; GFX940-NEXT:    v_mov_b32_e32 v4, v6
6153; GFX940-NEXT:    v_mov_b32_e32 v5, v7
6154; GFX940-NEXT:    v_mov_b32_e32 v0, v8
6155; GFX940-NEXT:    v_mov_b32_e32 v1, v9
6156; GFX940-NEXT:    global_store_dwordx4 v10, v[4:7], s[0:1] offset:16 sc0 sc1
6157; GFX940-NEXT:    global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1
6158; GFX940-NEXT:    s_waitcnt vmcnt(0)
6159; GFX940-NEXT:    s_setpc_b64 s[30:31]
6160  %vec0 = call <3 x i64> asm "; def $0", "=v"()
6161  %vec1 = call <3 x i64> asm "; def $0", "=v"()
6162  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 4, i32 4>
6163  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6164  ret void
6165}
6166
6167define void @v_shuffle_v4i64_v3i64__5_2_4_4(ptr addrspace(1) inreg %ptr) {
6168; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_2_4_4:
6169; GFX900:       ; %bb.0:
6170; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6171; GFX900-NEXT:    ;;#ASMSTART
6172; GFX900-NEXT:    ; def v[0:5]
6173; GFX900-NEXT:    ;;#ASMEND
6174; GFX900-NEXT:    ;;#ASMSTART
6175; GFX900-NEXT:    ; def v[6:11]
6176; GFX900-NEXT:    ;;#ASMEND
6177; GFX900-NEXT:    v_mov_b32_e32 v12, 0
6178; GFX900-NEXT:    v_mov_b32_e32 v6, v8
6179; GFX900-NEXT:    v_mov_b32_e32 v7, v9
6180; GFX900-NEXT:    v_mov_b32_e32 v2, v10
6181; GFX900-NEXT:    v_mov_b32_e32 v3, v11
6182; GFX900-NEXT:    global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
6183; GFX900-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17]
6184; GFX900-NEXT:    s_waitcnt vmcnt(0)
6185; GFX900-NEXT:    s_setpc_b64 s[30:31]
6186;
6187; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_2_4_4:
6188; GFX90A:       ; %bb.0:
6189; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6190; GFX90A-NEXT:    ;;#ASMSTART
6191; GFX90A-NEXT:    ; def v[0:5]
6192; GFX90A-NEXT:    ;;#ASMEND
6193; GFX90A-NEXT:    ;;#ASMSTART
6194; GFX90A-NEXT:    ; def v[6:11]
6195; GFX90A-NEXT:    ;;#ASMEND
6196; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
6197; GFX90A-NEXT:    v_mov_b32_e32 v6, v8
6198; GFX90A-NEXT:    v_mov_b32_e32 v7, v9
6199; GFX90A-NEXT:    v_mov_b32_e32 v2, v10
6200; GFX90A-NEXT:    v_mov_b32_e32 v3, v11
6201; GFX90A-NEXT:    global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
6202; GFX90A-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17]
6203; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6204; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6205;
6206; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_4_4:
6207; GFX940:       ; %bb.0:
6208; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6209; GFX940-NEXT:    ;;#ASMSTART
6210; GFX940-NEXT:    ; def v[0:5]
6211; GFX940-NEXT:    ;;#ASMEND
6212; GFX940-NEXT:    ;;#ASMSTART
6213; GFX940-NEXT:    ; def v[6:11]
6214; GFX940-NEXT:    ;;#ASMEND
6215; GFX940-NEXT:    v_mov_b32_e32 v12, 0
6216; GFX940-NEXT:    v_mov_b32_e32 v6, v8
6217; GFX940-NEXT:    v_mov_b32_e32 v7, v9
6218; GFX940-NEXT:    v_mov_b32_e32 v2, v10
6219; GFX940-NEXT:    v_mov_b32_e32 v3, v11
6220; GFX940-NEXT:    global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1
6221; GFX940-NEXT:    global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1
6222; GFX940-NEXT:    s_waitcnt vmcnt(0)
6223; GFX940-NEXT:    s_setpc_b64 s[30:31]
6224  %vec0 = call <3 x i64> asm "; def $0", "=v"()
6225  %vec1 = call <3 x i64> asm "; def $0", "=v"()
6226  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 4, i32 4>
6227  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6228  ret void
6229}
6230
6231define void @v_shuffle_v4i64_v3i64__5_3_4_4(ptr addrspace(1) inreg %ptr) {
6232; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_3_4_4:
6233; GFX900:       ; %bb.0:
6234; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6235; GFX900-NEXT:    ;;#ASMSTART
6236; GFX900-NEXT:    ; def v[0:5]
6237; GFX900-NEXT:    ;;#ASMEND
6238; GFX900-NEXT:    v_mov_b32_e32 v10, 0
6239; GFX900-NEXT:    v_mov_b32_e32 v6, v2
6240; GFX900-NEXT:    v_mov_b32_e32 v7, v3
6241; GFX900-NEXT:    v_mov_b32_e32 v8, v2
6242; GFX900-NEXT:    v_mov_b32_e32 v9, v3
6243; GFX900-NEXT:    v_mov_b32_e32 v2, v4
6244; GFX900-NEXT:    v_mov_b32_e32 v3, v5
6245; GFX900-NEXT:    v_mov_b32_e32 v4, v0
6246; GFX900-NEXT:    v_mov_b32_e32 v5, v1
6247; GFX900-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
6248; GFX900-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17]
6249; GFX900-NEXT:    s_waitcnt vmcnt(0)
6250; GFX900-NEXT:    s_setpc_b64 s[30:31]
6251;
6252; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_3_4_4:
6253; GFX90A:       ; %bb.0:
6254; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6255; GFX90A-NEXT:    ;;#ASMSTART
6256; GFX90A-NEXT:    ; def v[0:5]
6257; GFX90A-NEXT:    ;;#ASMEND
6258; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
6259; GFX90A-NEXT:    v_mov_b32_e32 v6, v2
6260; GFX90A-NEXT:    v_mov_b32_e32 v7, v3
6261; GFX90A-NEXT:    v_mov_b32_e32 v8, v2
6262; GFX90A-NEXT:    v_mov_b32_e32 v9, v3
6263; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
6264; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
6265; GFX90A-NEXT:    v_mov_b32_e32 v4, v0
6266; GFX90A-NEXT:    v_mov_b32_e32 v5, v1
6267; GFX90A-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
6268; GFX90A-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17]
6269; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6270; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6271;
6272; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_4_4:
6273; GFX940:       ; %bb.0:
6274; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6275; GFX940-NEXT:    ;;#ASMSTART
6276; GFX940-NEXT:    ; def v[0:5]
6277; GFX940-NEXT:    ;;#ASMEND
6278; GFX940-NEXT:    v_mov_b32_e32 v10, 0
6279; GFX940-NEXT:    v_mov_b32_e32 v6, v2
6280; GFX940-NEXT:    v_mov_b32_e32 v7, v3
6281; GFX940-NEXT:    v_mov_b32_e32 v8, v2
6282; GFX940-NEXT:    v_mov_b32_e32 v9, v3
6283; GFX940-NEXT:    v_mov_b32_e32 v2, v4
6284; GFX940-NEXT:    v_mov_b32_e32 v3, v5
6285; GFX940-NEXT:    v_mov_b32_e32 v4, v0
6286; GFX940-NEXT:    v_mov_b32_e32 v5, v1
6287; GFX940-NEXT:    global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1
6288; GFX940-NEXT:    global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1
6289; GFX940-NEXT:    s_waitcnt vmcnt(0)
6290; GFX940-NEXT:    s_setpc_b64 s[30:31]
6291  %vec0 = call <3 x i64> asm "; def $0", "=v"()
6292  %vec1 = call <3 x i64> asm "; def $0", "=v"()
6293  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 4, i32 4>
6294  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6295  ret void
6296}
6297
6298define void @v_shuffle_v4i64_v3i64__5_5_4_4(ptr addrspace(1) inreg %ptr) {
6299; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_4_4:
6300; GFX900:       ; %bb.0:
6301; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6302; GFX900-NEXT:    ;;#ASMSTART
6303; GFX900-NEXT:    ; def v[0:5]
6304; GFX900-NEXT:    ;;#ASMEND
6305; GFX900-NEXT:    v_mov_b32_e32 v6, 0
6306; GFX900-NEXT:    v_mov_b32_e32 v0, v2
6307; GFX900-NEXT:    v_mov_b32_e32 v1, v3
6308; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
6309; GFX900-NEXT:    s_nop 0
6310; GFX900-NEXT:    v_mov_b32_e32 v2, v4
6311; GFX900-NEXT:    v_mov_b32_e32 v3, v5
6312; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
6313; GFX900-NEXT:    s_waitcnt vmcnt(0)
6314; GFX900-NEXT:    s_setpc_b64 s[30:31]
6315;
6316; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_4_4:
6317; GFX90A:       ; %bb.0:
6318; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6319; GFX90A-NEXT:    ;;#ASMSTART
6320; GFX90A-NEXT:    ; def v[0:5]
6321; GFX90A-NEXT:    ;;#ASMEND
6322; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
6323; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
6324; GFX90A-NEXT:    v_mov_b32_e32 v1, v3
6325; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
6326; GFX90A-NEXT:    s_nop 0
6327; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
6328; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
6329; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
6330; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6331; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6332;
6333; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_4:
6334; GFX940:       ; %bb.0:
6335; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6336; GFX940-NEXT:    ;;#ASMSTART
6337; GFX940-NEXT:    ; def v[0:5]
6338; GFX940-NEXT:    ;;#ASMEND
6339; GFX940-NEXT:    v_mov_b32_e32 v6, 0
6340; GFX940-NEXT:    v_mov_b32_e32 v0, v2
6341; GFX940-NEXT:    v_mov_b32_e32 v1, v3
6342; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
6343; GFX940-NEXT:    s_nop 1
6344; GFX940-NEXT:    v_mov_b32_e32 v2, v4
6345; GFX940-NEXT:    v_mov_b32_e32 v3, v5
6346; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
6347; GFX940-NEXT:    s_waitcnt vmcnt(0)
6348; GFX940-NEXT:    s_setpc_b64 s[30:31]
6349  %vec0 = call <3 x i64> asm "; def $0", "=v"()
6350  %vec1 = call <3 x i64> asm "; def $0", "=v"()
6351  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 4>
6352  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6353  ret void
6354}
6355
6356define void @v_shuffle_v4i64_v3i64__5_5_u_4(ptr addrspace(1) inreg %ptr) {
6357; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_4:
6358; GFX900:       ; %bb.0:
6359; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6360; GFX900-NEXT:    v_mov_b32_e32 v6, 0
6361; GFX900-NEXT:    ;;#ASMSTART
6362; GFX900-NEXT:    ; def v[0:5]
6363; GFX900-NEXT:    ;;#ASMEND
6364; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
6365; GFX900-NEXT:    s_nop 0
6366; GFX900-NEXT:    v_mov_b32_e32 v2, v4
6367; GFX900-NEXT:    v_mov_b32_e32 v3, v5
6368; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
6369; GFX900-NEXT:    s_waitcnt vmcnt(0)
6370; GFX900-NEXT:    s_setpc_b64 s[30:31]
6371;
6372; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_4:
6373; GFX90A:       ; %bb.0:
6374; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6375; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
6376; GFX90A-NEXT:    ;;#ASMSTART
6377; GFX90A-NEXT:    ; def v[0:5]
6378; GFX90A-NEXT:    ;;#ASMEND
6379; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
6380; GFX90A-NEXT:    s_nop 0
6381; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
6382; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
6383; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
6384; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6385; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6386;
6387; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_4:
6388; GFX940:       ; %bb.0:
6389; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6390; GFX940-NEXT:    v_mov_b32_e32 v6, 0
6391; GFX940-NEXT:    ;;#ASMSTART
6392; GFX940-NEXT:    ; def v[0:5]
6393; GFX940-NEXT:    ;;#ASMEND
6394; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
6395; GFX940-NEXT:    s_nop 1
6396; GFX940-NEXT:    v_mov_b32_e32 v2, v4
6397; GFX940-NEXT:    v_mov_b32_e32 v3, v5
6398; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
6399; GFX940-NEXT:    s_waitcnt vmcnt(0)
6400; GFX940-NEXT:    s_setpc_b64 s[30:31]
6401  %vec0 = call <3 x i64> asm "; def $0", "=v"()
6402  %vec1 = call <3 x i64> asm "; def $0", "=v"()
6403  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 4>
6404  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6405  ret void
6406}
6407
6408define void @v_shuffle_v4i64_v3i64__5_5_0_4(ptr addrspace(1) inreg %ptr) {
6409; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_0_4:
6410; GFX900:       ; %bb.0:
6411; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6412; GFX900-NEXT:    ;;#ASMSTART
6413; GFX900-NEXT:    ; def v[0:5]
6414; GFX900-NEXT:    ;;#ASMEND
6415; GFX900-NEXT:    ;;#ASMSTART
6416; GFX900-NEXT:    ; def v[2:7]
6417; GFX900-NEXT:    ;;#ASMEND
6418; GFX900-NEXT:    v_mov_b32_e32 v8, 0
6419; GFX900-NEXT:    v_mov_b32_e32 v2, v4
6420; GFX900-NEXT:    v_mov_b32_e32 v3, v5
6421; GFX900-NEXT:    v_mov_b32_e32 v4, v6
6422; GFX900-NEXT:    v_mov_b32_e32 v5, v7
6423; GFX900-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
6424; GFX900-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17]
6425; GFX900-NEXT:    s_waitcnt vmcnt(0)
6426; GFX900-NEXT:    s_setpc_b64 s[30:31]
6427;
6428; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_0_4:
6429; GFX90A:       ; %bb.0:
6430; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6431; GFX90A-NEXT:    ;;#ASMSTART
6432; GFX90A-NEXT:    ; def v[0:5]
6433; GFX90A-NEXT:    ;;#ASMEND
6434; GFX90A-NEXT:    ;;#ASMSTART
6435; GFX90A-NEXT:    ; def v[2:7]
6436; GFX90A-NEXT:    ;;#ASMEND
6437; GFX90A-NEXT:    v_mov_b32_e32 v8, 0
6438; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
6439; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
6440; GFX90A-NEXT:    v_mov_b32_e32 v4, v6
6441; GFX90A-NEXT:    v_mov_b32_e32 v5, v7
6442; GFX90A-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
6443; GFX90A-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17]
6444; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6445; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6446;
6447; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_4:
6448; GFX940:       ; %bb.0:
6449; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6450; GFX940-NEXT:    ;;#ASMSTART
6451; GFX940-NEXT:    ; def v[0:5]
6452; GFX940-NEXT:    ;;#ASMEND
6453; GFX940-NEXT:    v_mov_b32_e32 v8, 0
6454; GFX940-NEXT:    ;;#ASMSTART
6455; GFX940-NEXT:    ; def v[2:7]
6456; GFX940-NEXT:    ;;#ASMEND
6457; GFX940-NEXT:    s_nop 0
6458; GFX940-NEXT:    v_mov_b32_e32 v2, v4
6459; GFX940-NEXT:    v_mov_b32_e32 v3, v5
6460; GFX940-NEXT:    v_mov_b32_e32 v4, v6
6461; GFX940-NEXT:    v_mov_b32_e32 v5, v7
6462; GFX940-NEXT:    global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1
6463; GFX940-NEXT:    global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1
6464; GFX940-NEXT:    s_waitcnt vmcnt(0)
6465; GFX940-NEXT:    s_setpc_b64 s[30:31]
6466  %vec0 = call <3 x i64> asm "; def $0", "=v"()
6467  %vec1 = call <3 x i64> asm "; def $0", "=v"()
6468  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 4>
6469  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6470  ret void
6471}
6472
6473define void @v_shuffle_v4i64_v3i64__5_5_1_4(ptr addrspace(1) inreg %ptr) {
6474; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_1_4:
6475; GFX900:       ; %bb.0:
6476; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6477; GFX900-NEXT:    ;;#ASMSTART
6478; GFX900-NEXT:    ; def v[0:5]
6479; GFX900-NEXT:    ;;#ASMEND
6480; GFX900-NEXT:    ;;#ASMSTART
6481; GFX900-NEXT:    ; def v[4:9]
6482; GFX900-NEXT:    ;;#ASMEND
6483; GFX900-NEXT:    v_mov_b32_e32 v10, 0
6484; GFX900-NEXT:    v_mov_b32_e32 v4, v6
6485; GFX900-NEXT:    v_mov_b32_e32 v5, v7
6486; GFX900-NEXT:    v_mov_b32_e32 v6, v8
6487; GFX900-NEXT:    v_mov_b32_e32 v7, v9
6488; GFX900-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
6489; GFX900-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17]
6490; GFX900-NEXT:    s_waitcnt vmcnt(0)
6491; GFX900-NEXT:    s_setpc_b64 s[30:31]
6492;
6493; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_1_4:
6494; GFX90A:       ; %bb.0:
6495; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6496; GFX90A-NEXT:    ;;#ASMSTART
6497; GFX90A-NEXT:    ; def v[0:5]
6498; GFX90A-NEXT:    ;;#ASMEND
6499; GFX90A-NEXT:    ;;#ASMSTART
6500; GFX90A-NEXT:    ; def v[4:9]
6501; GFX90A-NEXT:    ;;#ASMEND
6502; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
6503; GFX90A-NEXT:    v_mov_b32_e32 v4, v6
6504; GFX90A-NEXT:    v_mov_b32_e32 v5, v7
6505; GFX90A-NEXT:    v_mov_b32_e32 v6, v8
6506; GFX90A-NEXT:    v_mov_b32_e32 v7, v9
6507; GFX90A-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
6508; GFX90A-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17]
6509; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6510; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6511;
6512; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_4:
6513; GFX940:       ; %bb.0:
6514; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6515; GFX940-NEXT:    ;;#ASMSTART
6516; GFX940-NEXT:    ; def v[0:5]
6517; GFX940-NEXT:    ;;#ASMEND
6518; GFX940-NEXT:    v_mov_b32_e32 v10, 0
6519; GFX940-NEXT:    ;;#ASMSTART
6520; GFX940-NEXT:    ; def v[4:9]
6521; GFX940-NEXT:    ;;#ASMEND
6522; GFX940-NEXT:    s_nop 0
6523; GFX940-NEXT:    v_mov_b32_e32 v4, v6
6524; GFX940-NEXT:    v_mov_b32_e32 v5, v7
6525; GFX940-NEXT:    v_mov_b32_e32 v6, v8
6526; GFX940-NEXT:    v_mov_b32_e32 v7, v9
6527; GFX940-NEXT:    global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1
6528; GFX940-NEXT:    global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1
6529; GFX940-NEXT:    s_waitcnt vmcnt(0)
6530; GFX940-NEXT:    s_setpc_b64 s[30:31]
6531  %vec0 = call <3 x i64> asm "; def $0", "=v"()
6532  %vec1 = call <3 x i64> asm "; def $0", "=v"()
6533  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 4>
6534  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6535  ret void
6536}
6537
6538define void @v_shuffle_v4i64_v3i64__5_5_2_4(ptr addrspace(1) inreg %ptr) {
6539; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_2_4:
6540; GFX900:       ; %bb.0:
6541; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6542; GFX900-NEXT:    ;;#ASMSTART
6543; GFX900-NEXT:    ; def v[6:11]
6544; GFX900-NEXT:    ;;#ASMEND
6545; GFX900-NEXT:    v_mov_b32_e32 v12, 0
6546; GFX900-NEXT:    ;;#ASMSTART
6547; GFX900-NEXT:    ; def v[0:5]
6548; GFX900-NEXT:    ;;#ASMEND
6549; GFX900-NEXT:    v_mov_b32_e32 v6, v4
6550; GFX900-NEXT:    v_mov_b32_e32 v7, v5
6551; GFX900-NEXT:    global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
6552; GFX900-NEXT:    s_nop 0
6553; GFX900-NEXT:    v_mov_b32_e32 v8, v10
6554; GFX900-NEXT:    v_mov_b32_e32 v9, v11
6555; GFX900-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
6556; GFX900-NEXT:    s_waitcnt vmcnt(0)
6557; GFX900-NEXT:    s_setpc_b64 s[30:31]
6558;
6559; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_2_4:
6560; GFX90A:       ; %bb.0:
6561; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6562; GFX90A-NEXT:    ;;#ASMSTART
6563; GFX90A-NEXT:    ; def v[6:11]
6564; GFX90A-NEXT:    ;;#ASMEND
6565; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
6566; GFX90A-NEXT:    ;;#ASMSTART
6567; GFX90A-NEXT:    ; def v[0:5]
6568; GFX90A-NEXT:    ;;#ASMEND
6569; GFX90A-NEXT:    v_mov_b32_e32 v6, v4
6570; GFX90A-NEXT:    v_mov_b32_e32 v7, v5
6571; GFX90A-NEXT:    global_store_dwordx4 v12, v[6:9], s[16:17] offset:16
6572; GFX90A-NEXT:    s_nop 0
6573; GFX90A-NEXT:    v_mov_b32_e32 v8, v10
6574; GFX90A-NEXT:    v_mov_b32_e32 v9, v11
6575; GFX90A-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
6576; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6577; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6578;
6579; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_4:
6580; GFX940:       ; %bb.0:
6581; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6582; GFX940-NEXT:    ;;#ASMSTART
6583; GFX940-NEXT:    ; def v[6:11]
6584; GFX940-NEXT:    ;;#ASMEND
6585; GFX940-NEXT:    v_mov_b32_e32 v12, 0
6586; GFX940-NEXT:    ;;#ASMSTART
6587; GFX940-NEXT:    ; def v[0:5]
6588; GFX940-NEXT:    ;;#ASMEND
6589; GFX940-NEXT:    s_nop 0
6590; GFX940-NEXT:    v_mov_b32_e32 v6, v4
6591; GFX940-NEXT:    v_mov_b32_e32 v7, v5
6592; GFX940-NEXT:    global_store_dwordx4 v12, v[6:9], s[0:1] offset:16 sc0 sc1
6593; GFX940-NEXT:    s_nop 1
6594; GFX940-NEXT:    v_mov_b32_e32 v8, v10
6595; GFX940-NEXT:    v_mov_b32_e32 v9, v11
6596; GFX940-NEXT:    global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
6597; GFX940-NEXT:    s_waitcnt vmcnt(0)
6598; GFX940-NEXT:    s_setpc_b64 s[30:31]
6599  %vec0 = call <3 x i64> asm "; def $0", "=v"()
6600  %vec1 = call <3 x i64> asm "; def $0", "=v"()
6601  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 4>
6602  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6603  ret void
6604}
6605
6606define void @v_shuffle_v4i64_v3i64__5_5_3_4(ptr addrspace(1) inreg %ptr) {
6607; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_3_4:
6608; GFX900:       ; %bb.0:
6609; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6610; GFX900-NEXT:    v_mov_b32_e32 v6, 0
6611; GFX900-NEXT:    ;;#ASMSTART
6612; GFX900-NEXT:    ; def v[0:5]
6613; GFX900-NEXT:    ;;#ASMEND
6614; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
6615; GFX900-NEXT:    s_nop 0
6616; GFX900-NEXT:    v_mov_b32_e32 v2, v4
6617; GFX900-NEXT:    v_mov_b32_e32 v3, v5
6618; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
6619; GFX900-NEXT:    s_waitcnt vmcnt(0)
6620; GFX900-NEXT:    s_setpc_b64 s[30:31]
6621;
6622; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_3_4:
6623; GFX90A:       ; %bb.0:
6624; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6625; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
6626; GFX90A-NEXT:    ;;#ASMSTART
6627; GFX90A-NEXT:    ; def v[0:5]
6628; GFX90A-NEXT:    ;;#ASMEND
6629; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
6630; GFX90A-NEXT:    s_nop 0
6631; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
6632; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
6633; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
6634; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6635; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6636;
6637; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_4:
6638; GFX940:       ; %bb.0:
6639; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6640; GFX940-NEXT:    v_mov_b32_e32 v6, 0
6641; GFX940-NEXT:    ;;#ASMSTART
6642; GFX940-NEXT:    ; def v[0:5]
6643; GFX940-NEXT:    ;;#ASMEND
6644; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
6645; GFX940-NEXT:    s_nop 1
6646; GFX940-NEXT:    v_mov_b32_e32 v2, v4
6647; GFX940-NEXT:    v_mov_b32_e32 v3, v5
6648; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
6649; GFX940-NEXT:    s_waitcnt vmcnt(0)
6650; GFX940-NEXT:    s_setpc_b64 s[30:31]
6651  %vec0 = call <3 x i64> asm "; def $0", "=v"()
6652  %vec1 = call <3 x i64> asm "; def $0", "=v"()
6653  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 4>
6654  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6655  ret void
6656}
6657
6658define void @v_shuffle_v4i64_v3i64__u_5_5_5(ptr addrspace(1) inreg %ptr) {
6659; GFX900-LABEL: v_shuffle_v4i64_v3i64__u_5_5_5:
6660; GFX900:       ; %bb.0:
6661; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6662; GFX900-NEXT:    ;;#ASMSTART
6663; GFX900-NEXT:    ; def v[0:5]
6664; GFX900-NEXT:    ;;#ASMEND
6665; GFX900-NEXT:    v_mov_b32_e32 v6, 0
6666; GFX900-NEXT:    v_mov_b32_e32 v2, v4
6667; GFX900-NEXT:    v_mov_b32_e32 v3, v5
6668; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
6669; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
6670; GFX900-NEXT:    s_waitcnt vmcnt(0)
6671; GFX900-NEXT:    s_setpc_b64 s[30:31]
6672;
6673; GFX90A-LABEL: v_shuffle_v4i64_v3i64__u_5_5_5:
6674; GFX90A:       ; %bb.0:
6675; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6676; GFX90A-NEXT:    ;;#ASMSTART
6677; GFX90A-NEXT:    ; def v[0:5]
6678; GFX90A-NEXT:    ;;#ASMEND
6679; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
6680; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
6681; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
6682; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
6683; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
6684; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6685; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6686;
6687; GFX940-LABEL: v_shuffle_v4i64_v3i64__u_5_5_5:
6688; GFX940:       ; %bb.0:
6689; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6690; GFX940-NEXT:    ;;#ASMSTART
6691; GFX940-NEXT:    ; def v[0:5]
6692; GFX940-NEXT:    ;;#ASMEND
6693; GFX940-NEXT:    v_mov_b32_e32 v6, 0
6694; GFX940-NEXT:    v_mov_b32_e32 v2, v4
6695; GFX940-NEXT:    v_mov_b32_e32 v3, v5
6696; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
6697; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
6698; GFX940-NEXT:    s_waitcnt vmcnt(0)
6699; GFX940-NEXT:    s_setpc_b64 s[30:31]
6700  %vec0 = call <3 x i64> asm "; def $0", "=v"()
6701  %vec1 = call <3 x i64> asm "; def $0", "=v"()
6702  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
6703  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6704  ret void
6705}
6706
6707define void @v_shuffle_v4i64_v3i64__0_5_5_5(ptr addrspace(1) inreg %ptr) {
6708; GFX900-LABEL: v_shuffle_v4i64_v3i64__0_5_5_5:
6709; GFX900:       ; %bb.0:
6710; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6711; GFX900-NEXT:    ;;#ASMSTART
6712; GFX900-NEXT:    ; def v[0:5]
6713; GFX900-NEXT:    ;;#ASMEND
6714; GFX900-NEXT:    ;;#ASMSTART
6715; GFX900-NEXT:    ; def v[2:7]
6716; GFX900-NEXT:    ;;#ASMEND
6717; GFX900-NEXT:    v_mov_b32_e32 v8, 0
6718; GFX900-NEXT:    v_mov_b32_e32 v4, v6
6719; GFX900-NEXT:    v_mov_b32_e32 v5, v7
6720; GFX900-NEXT:    v_mov_b32_e32 v2, v6
6721; GFX900-NEXT:    v_mov_b32_e32 v3, v7
6722; GFX900-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
6723; GFX900-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17]
6724; GFX900-NEXT:    s_waitcnt vmcnt(0)
6725; GFX900-NEXT:    s_setpc_b64 s[30:31]
6726;
6727; GFX90A-LABEL: v_shuffle_v4i64_v3i64__0_5_5_5:
6728; GFX90A:       ; %bb.0:
6729; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6730; GFX90A-NEXT:    ;;#ASMSTART
6731; GFX90A-NEXT:    ; def v[0:5]
6732; GFX90A-NEXT:    ;;#ASMEND
6733; GFX90A-NEXT:    ;;#ASMSTART
6734; GFX90A-NEXT:    ; def v[2:7]
6735; GFX90A-NEXT:    ;;#ASMEND
6736; GFX90A-NEXT:    v_mov_b32_e32 v8, 0
6737; GFX90A-NEXT:    v_mov_b32_e32 v4, v6
6738; GFX90A-NEXT:    v_mov_b32_e32 v5, v7
6739; GFX90A-NEXT:    v_mov_b32_e32 v2, v6
6740; GFX90A-NEXT:    v_mov_b32_e32 v3, v7
6741; GFX90A-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
6742; GFX90A-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17]
6743; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6744; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6745;
6746; GFX940-LABEL: v_shuffle_v4i64_v3i64__0_5_5_5:
6747; GFX940:       ; %bb.0:
6748; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6749; GFX940-NEXT:    ;;#ASMSTART
6750; GFX940-NEXT:    ; def v[0:5]
6751; GFX940-NEXT:    ;;#ASMEND
6752; GFX940-NEXT:    v_mov_b32_e32 v8, 0
6753; GFX940-NEXT:    ;;#ASMSTART
6754; GFX940-NEXT:    ; def v[2:7]
6755; GFX940-NEXT:    ;;#ASMEND
6756; GFX940-NEXT:    s_nop 0
6757; GFX940-NEXT:    v_mov_b32_e32 v4, v6
6758; GFX940-NEXT:    v_mov_b32_e32 v5, v7
6759; GFX940-NEXT:    v_mov_b32_e32 v2, v6
6760; GFX940-NEXT:    v_mov_b32_e32 v3, v7
6761; GFX940-NEXT:    global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1
6762; GFX940-NEXT:    global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1
6763; GFX940-NEXT:    s_waitcnt vmcnt(0)
6764; GFX940-NEXT:    s_setpc_b64 s[30:31]
6765  %vec0 = call <3 x i64> asm "; def $0", "=v"()
6766  %vec1 = call <3 x i64> asm "; def $0", "=v"()
6767  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
6768  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6769  ret void
6770}
6771
6772define void @v_shuffle_v4i64_v3i64__1_5_5_5(ptr addrspace(1) inreg %ptr) {
6773; GFX900-LABEL: v_shuffle_v4i64_v3i64__1_5_5_5:
6774; GFX900:       ; %bb.0:
6775; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6776; GFX900-NEXT:    ;;#ASMSTART
6777; GFX900-NEXT:    ; def v[0:5]
6778; GFX900-NEXT:    ;;#ASMEND
6779; GFX900-NEXT:    ;;#ASMSTART
6780; GFX900-NEXT:    ; def v[4:9]
6781; GFX900-NEXT:    ;;#ASMEND
6782; GFX900-NEXT:    v_mov_b32_e32 v10, 0
6783; GFX900-NEXT:    v_mov_b32_e32 v6, v8
6784; GFX900-NEXT:    v_mov_b32_e32 v7, v9
6785; GFX900-NEXT:    v_mov_b32_e32 v4, v8
6786; GFX900-NEXT:    v_mov_b32_e32 v5, v9
6787; GFX900-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
6788; GFX900-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17]
6789; GFX900-NEXT:    s_waitcnt vmcnt(0)
6790; GFX900-NEXT:    s_setpc_b64 s[30:31]
6791;
6792; GFX90A-LABEL: v_shuffle_v4i64_v3i64__1_5_5_5:
6793; GFX90A:       ; %bb.0:
6794; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6795; GFX90A-NEXT:    ;;#ASMSTART
6796; GFX90A-NEXT:    ; def v[0:5]
6797; GFX90A-NEXT:    ;;#ASMEND
6798; GFX90A-NEXT:    ;;#ASMSTART
6799; GFX90A-NEXT:    ; def v[4:9]
6800; GFX90A-NEXT:    ;;#ASMEND
6801; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
6802; GFX90A-NEXT:    v_mov_b32_e32 v6, v8
6803; GFX90A-NEXT:    v_mov_b32_e32 v7, v9
6804; GFX90A-NEXT:    v_mov_b32_e32 v4, v8
6805; GFX90A-NEXT:    v_mov_b32_e32 v5, v9
6806; GFX90A-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
6807; GFX90A-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17]
6808; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6809; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6810;
6811; GFX940-LABEL: v_shuffle_v4i64_v3i64__1_5_5_5:
6812; GFX940:       ; %bb.0:
6813; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6814; GFX940-NEXT:    ;;#ASMSTART
6815; GFX940-NEXT:    ; def v[0:5]
6816; GFX940-NEXT:    ;;#ASMEND
6817; GFX940-NEXT:    v_mov_b32_e32 v10, 0
6818; GFX940-NEXT:    ;;#ASMSTART
6819; GFX940-NEXT:    ; def v[4:9]
6820; GFX940-NEXT:    ;;#ASMEND
6821; GFX940-NEXT:    s_nop 0
6822; GFX940-NEXT:    v_mov_b32_e32 v6, v8
6823; GFX940-NEXT:    v_mov_b32_e32 v7, v9
6824; GFX940-NEXT:    v_mov_b32_e32 v4, v8
6825; GFX940-NEXT:    v_mov_b32_e32 v5, v9
6826; GFX940-NEXT:    global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1
6827; GFX940-NEXT:    global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1
6828; GFX940-NEXT:    s_waitcnt vmcnt(0)
6829; GFX940-NEXT:    s_setpc_b64 s[30:31]
6830  %vec0 = call <3 x i64> asm "; def $0", "=v"()
6831  %vec1 = call <3 x i64> asm "; def $0", "=v"()
6832  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
6833  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6834  ret void
6835}
6836
6837define void @v_shuffle_v4i64_v3i64__2_5_5_5(ptr addrspace(1) inreg %ptr) {
6838; GFX900-LABEL: v_shuffle_v4i64_v3i64__2_5_5_5:
6839; GFX900:       ; %bb.0:
6840; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6841; GFX900-NEXT:    ;;#ASMSTART
6842; GFX900-NEXT:    ; def v[6:11]
6843; GFX900-NEXT:    ;;#ASMEND
6844; GFX900-NEXT:    v_mov_b32_e32 v12, 0
6845; GFX900-NEXT:    v_mov_b32_e32 v8, v10
6846; GFX900-NEXT:    v_mov_b32_e32 v9, v11
6847; GFX900-NEXT:    ;;#ASMSTART
6848; GFX900-NEXT:    ; def v[0:5]
6849; GFX900-NEXT:    ;;#ASMEND
6850; GFX900-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
6851; GFX900-NEXT:    s_nop 0
6852; GFX900-NEXT:    v_mov_b32_e32 v8, v4
6853; GFX900-NEXT:    v_mov_b32_e32 v9, v5
6854; GFX900-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
6855; GFX900-NEXT:    s_waitcnt vmcnt(0)
6856; GFX900-NEXT:    s_setpc_b64 s[30:31]
6857;
6858; GFX90A-LABEL: v_shuffle_v4i64_v3i64__2_5_5_5:
6859; GFX90A:       ; %bb.0:
6860; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6861; GFX90A-NEXT:    ;;#ASMSTART
6862; GFX90A-NEXT:    ; def v[6:11]
6863; GFX90A-NEXT:    ;;#ASMEND
6864; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
6865; GFX90A-NEXT:    v_mov_b32_e32 v8, v10
6866; GFX90A-NEXT:    v_mov_b32_e32 v9, v11
6867; GFX90A-NEXT:    ;;#ASMSTART
6868; GFX90A-NEXT:    ; def v[0:5]
6869; GFX90A-NEXT:    ;;#ASMEND
6870; GFX90A-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
6871; GFX90A-NEXT:    s_nop 0
6872; GFX90A-NEXT:    v_mov_b32_e32 v8, v4
6873; GFX90A-NEXT:    v_mov_b32_e32 v9, v5
6874; GFX90A-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
6875; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6876; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6877;
6878; GFX940-LABEL: v_shuffle_v4i64_v3i64__2_5_5_5:
6879; GFX940:       ; %bb.0:
6880; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6881; GFX940-NEXT:    ;;#ASMSTART
6882; GFX940-NEXT:    ; def v[6:11]
6883; GFX940-NEXT:    ;;#ASMEND
6884; GFX940-NEXT:    v_mov_b32_e32 v12, 0
6885; GFX940-NEXT:    v_mov_b32_e32 v8, v10
6886; GFX940-NEXT:    v_mov_b32_e32 v9, v11
6887; GFX940-NEXT:    ;;#ASMSTART
6888; GFX940-NEXT:    ; def v[0:5]
6889; GFX940-NEXT:    ;;#ASMEND
6890; GFX940-NEXT:    global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1
6891; GFX940-NEXT:    s_nop 1
6892; GFX940-NEXT:    v_mov_b32_e32 v8, v4
6893; GFX940-NEXT:    v_mov_b32_e32 v9, v5
6894; GFX940-NEXT:    global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
6895; GFX940-NEXT:    s_waitcnt vmcnt(0)
6896; GFX940-NEXT:    s_setpc_b64 s[30:31]
6897  %vec0 = call <3 x i64> asm "; def $0", "=v"()
6898  %vec1 = call <3 x i64> asm "; def $0", "=v"()
6899  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
6900  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6901  ret void
6902}
6903
6904define void @v_shuffle_v4i64_v3i64__3_5_5_5(ptr addrspace(1) inreg %ptr) {
6905; GFX900-LABEL: v_shuffle_v4i64_v3i64__3_5_5_5:
6906; GFX900:       ; %bb.0:
6907; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6908; GFX900-NEXT:    ;;#ASMSTART
6909; GFX900-NEXT:    ; def v[0:5]
6910; GFX900-NEXT:    ;;#ASMEND
6911; GFX900-NEXT:    v_mov_b32_e32 v6, 0
6912; GFX900-NEXT:    v_mov_b32_e32 v2, v4
6913; GFX900-NEXT:    v_mov_b32_e32 v3, v5
6914; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
6915; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
6916; GFX900-NEXT:    s_waitcnt vmcnt(0)
6917; GFX900-NEXT:    s_setpc_b64 s[30:31]
6918;
6919; GFX90A-LABEL: v_shuffle_v4i64_v3i64__3_5_5_5:
6920; GFX90A:       ; %bb.0:
6921; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6922; GFX90A-NEXT:    ;;#ASMSTART
6923; GFX90A-NEXT:    ; def v[0:5]
6924; GFX90A-NEXT:    ;;#ASMEND
6925; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
6926; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
6927; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
6928; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
6929; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
6930; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6931; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6932;
6933; GFX940-LABEL: v_shuffle_v4i64_v3i64__3_5_5_5:
6934; GFX940:       ; %bb.0:
6935; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6936; GFX940-NEXT:    ;;#ASMSTART
6937; GFX940-NEXT:    ; def v[0:5]
6938; GFX940-NEXT:    ;;#ASMEND
6939; GFX940-NEXT:    v_mov_b32_e32 v6, 0
6940; GFX940-NEXT:    v_mov_b32_e32 v2, v4
6941; GFX940-NEXT:    v_mov_b32_e32 v3, v5
6942; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
6943; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
6944; GFX940-NEXT:    s_waitcnt vmcnt(0)
6945; GFX940-NEXT:    s_setpc_b64 s[30:31]
6946  %vec0 = call <3 x i64> asm "; def $0", "=v"()
6947  %vec1 = call <3 x i64> asm "; def $0", "=v"()
6948  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
6949  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
6950  ret void
6951}
6952
6953define void @v_shuffle_v4i64_v3i64__4_5_5_5(ptr addrspace(1) inreg %ptr) {
6954; GFX900-LABEL: v_shuffle_v4i64_v3i64__4_5_5_5:
6955; GFX900:       ; %bb.0:
6956; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6957; GFX900-NEXT:    v_mov_b32_e32 v10, 0
6958; GFX900-NEXT:    ;;#ASMSTART
6959; GFX900-NEXT:    ; def v[0:5]
6960; GFX900-NEXT:    ;;#ASMEND
6961; GFX900-NEXT:    v_mov_b32_e32 v6, v4
6962; GFX900-NEXT:    v_mov_b32_e32 v7, v5
6963; GFX900-NEXT:    v_mov_b32_e32 v8, v4
6964; GFX900-NEXT:    v_mov_b32_e32 v9, v5
6965; GFX900-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
6966; GFX900-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17]
6967; GFX900-NEXT:    s_waitcnt vmcnt(0)
6968; GFX900-NEXT:    s_setpc_b64 s[30:31]
6969;
6970; GFX90A-LABEL: v_shuffle_v4i64_v3i64__4_5_5_5:
6971; GFX90A:       ; %bb.0:
6972; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6973; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
6974; GFX90A-NEXT:    ;;#ASMSTART
6975; GFX90A-NEXT:    ; def v[0:5]
6976; GFX90A-NEXT:    ;;#ASMEND
6977; GFX90A-NEXT:    v_mov_b32_e32 v6, v4
6978; GFX90A-NEXT:    v_mov_b32_e32 v7, v5
6979; GFX90A-NEXT:    v_mov_b32_e32 v8, v4
6980; GFX90A-NEXT:    v_mov_b32_e32 v9, v5
6981; GFX90A-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
6982; GFX90A-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17]
6983; GFX90A-NEXT:    s_waitcnt vmcnt(0)
6984; GFX90A-NEXT:    s_setpc_b64 s[30:31]
6985;
6986; GFX940-LABEL: v_shuffle_v4i64_v3i64__4_5_5_5:
6987; GFX940:       ; %bb.0:
6988; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6989; GFX940-NEXT:    v_mov_b32_e32 v10, 0
6990; GFX940-NEXT:    ;;#ASMSTART
6991; GFX940-NEXT:    ; def v[0:5]
6992; GFX940-NEXT:    ;;#ASMEND
6993; GFX940-NEXT:    s_nop 0
6994; GFX940-NEXT:    v_mov_b32_e32 v6, v4
6995; GFX940-NEXT:    v_mov_b32_e32 v7, v5
6996; GFX940-NEXT:    v_mov_b32_e32 v8, v4
6997; GFX940-NEXT:    v_mov_b32_e32 v9, v5
6998; GFX940-NEXT:    global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1
6999; GFX940-NEXT:    global_store_dwordx4 v10, v[2:5], s[0:1] sc0 sc1
7000; GFX940-NEXT:    s_waitcnt vmcnt(0)
7001; GFX940-NEXT:    s_setpc_b64 s[30:31]
7002  %vec0 = call <3 x i64> asm "; def $0", "=v"()
7003  %vec1 = call <3 x i64> asm "; def $0", "=v"()
7004  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
7005  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
7006  ret void
7007}
7008
7009define void @v_shuffle_v4i64_v3i64__5_u_5_5(ptr addrspace(1) inreg %ptr) {
7010; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_u_5_5:
7011; GFX900:       ; %bb.0:
7012; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7013; GFX900-NEXT:    ;;#ASMSTART
7014; GFX900-NEXT:    ; def v[0:5]
7015; GFX900-NEXT:    ;;#ASMEND
7016; GFX900-NEXT:    v_mov_b32_e32 v6, 0
7017; GFX900-NEXT:    v_mov_b32_e32 v2, v4
7018; GFX900-NEXT:    v_mov_b32_e32 v3, v5
7019; GFX900-NEXT:    v_mov_b32_e32 v0, v4
7020; GFX900-NEXT:    v_mov_b32_e32 v1, v5
7021; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
7022; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
7023; GFX900-NEXT:    s_waitcnt vmcnt(0)
7024; GFX900-NEXT:    s_setpc_b64 s[30:31]
7025;
7026; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_u_5_5:
7027; GFX90A:       ; %bb.0:
7028; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7029; GFX90A-NEXT:    ;;#ASMSTART
7030; GFX90A-NEXT:    ; def v[0:5]
7031; GFX90A-NEXT:    ;;#ASMEND
7032; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
7033; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
7034; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
7035; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
7036; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
7037; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
7038; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17]
7039; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7040; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7041;
7042; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_u_5_5:
7043; GFX940:       ; %bb.0:
7044; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7045; GFX940-NEXT:    ;;#ASMSTART
7046; GFX940-NEXT:    ; def v[0:5]
7047; GFX940-NEXT:    ;;#ASMEND
7048; GFX940-NEXT:    v_mov_b32_e32 v6, 0
7049; GFX940-NEXT:    v_mov_b32_e32 v2, v4
7050; GFX940-NEXT:    v_mov_b32_e32 v3, v5
7051; GFX940-NEXT:    v_mov_b32_e32 v0, v4
7052; GFX940-NEXT:    v_mov_b32_e32 v1, v5
7053; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
7054; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] sc0 sc1
7055; GFX940-NEXT:    s_waitcnt vmcnt(0)
7056; GFX940-NEXT:    s_setpc_b64 s[30:31]
7057  %vec0 = call <3 x i64> asm "; def $0", "=v"()
7058  %vec1 = call <3 x i64> asm "; def $0", "=v"()
7059  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 5, i32 5>
7060  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
7061  ret void
7062}
7063
7064define void @v_shuffle_v4i64_v3i64__5_0_5_5(ptr addrspace(1) inreg %ptr) {
7065; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_0_5_5:
7066; GFX900:       ; %bb.0:
7067; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7068; GFX900-NEXT:    ;;#ASMSTART
7069; GFX900-NEXT:    ; def v[0:5]
7070; GFX900-NEXT:    ;;#ASMEND
7071; GFX900-NEXT:    ;;#ASMSTART
7072; GFX900-NEXT:    ; def v[2:7]
7073; GFX900-NEXT:    ;;#ASMEND
7074; GFX900-NEXT:    v_mov_b32_e32 v8, 0
7075; GFX900-NEXT:    v_mov_b32_e32 v4, v6
7076; GFX900-NEXT:    v_mov_b32_e32 v5, v7
7077; GFX900-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
7078; GFX900-NEXT:    v_mov_b32_e32 v2, v6
7079; GFX900-NEXT:    v_mov_b32_e32 v3, v7
7080; GFX900-NEXT:    v_mov_b32_e32 v4, v0
7081; GFX900-NEXT:    v_mov_b32_e32 v5, v1
7082; GFX900-NEXT:    global_store_dwordx4 v8, v[2:5], s[16:17]
7083; GFX900-NEXT:    s_waitcnt vmcnt(0)
7084; GFX900-NEXT:    s_setpc_b64 s[30:31]
7085;
7086; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_0_5_5:
7087; GFX90A:       ; %bb.0:
7088; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7089; GFX90A-NEXT:    ;;#ASMSTART
7090; GFX90A-NEXT:    ; def v[0:5]
7091; GFX90A-NEXT:    ;;#ASMEND
7092; GFX90A-NEXT:    ;;#ASMSTART
7093; GFX90A-NEXT:    ; def v[2:7]
7094; GFX90A-NEXT:    ;;#ASMEND
7095; GFX90A-NEXT:    v_mov_b32_e32 v8, 0
7096; GFX90A-NEXT:    v_mov_b32_e32 v4, v6
7097; GFX90A-NEXT:    v_mov_b32_e32 v5, v7
7098; GFX90A-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17] offset:16
7099; GFX90A-NEXT:    v_mov_b32_e32 v2, v6
7100; GFX90A-NEXT:    v_mov_b32_e32 v3, v7
7101; GFX90A-NEXT:    v_mov_b32_e32 v4, v0
7102; GFX90A-NEXT:    v_mov_b32_e32 v5, v1
7103; GFX90A-NEXT:    global_store_dwordx4 v8, v[2:5], s[16:17]
7104; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7105; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7106;
7107; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_0_5_5:
7108; GFX940:       ; %bb.0:
7109; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7110; GFX940-NEXT:    ;;#ASMSTART
7111; GFX940-NEXT:    ; def v[0:5]
7112; GFX940-NEXT:    ;;#ASMEND
7113; GFX940-NEXT:    v_mov_b32_e32 v8, 0
7114; GFX940-NEXT:    ;;#ASMSTART
7115; GFX940-NEXT:    ; def v[2:7]
7116; GFX940-NEXT:    ;;#ASMEND
7117; GFX940-NEXT:    s_nop 0
7118; GFX940-NEXT:    v_mov_b32_e32 v4, v6
7119; GFX940-NEXT:    v_mov_b32_e32 v5, v7
7120; GFX940-NEXT:    global_store_dwordx4 v8, v[4:7], s[0:1] offset:16 sc0 sc1
7121; GFX940-NEXT:    v_mov_b32_e32 v2, v6
7122; GFX940-NEXT:    v_mov_b32_e32 v3, v7
7123; GFX940-NEXT:    v_mov_b32_e32 v4, v0
7124; GFX940-NEXT:    v_mov_b32_e32 v5, v1
7125; GFX940-NEXT:    global_store_dwordx4 v8, v[2:5], s[0:1] sc0 sc1
7126; GFX940-NEXT:    s_waitcnt vmcnt(0)
7127; GFX940-NEXT:    s_setpc_b64 s[30:31]
7128  %vec0 = call <3 x i64> asm "; def $0", "=v"()
7129  %vec1 = call <3 x i64> asm "; def $0", "=v"()
7130  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 5, i32 5>
7131  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
7132  ret void
7133}
7134
7135define void @v_shuffle_v4i64_v3i64__5_1_5_5(ptr addrspace(1) inreg %ptr) {
7136; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_1_5_5:
7137; GFX900:       ; %bb.0:
7138; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7139; GFX900-NEXT:    ;;#ASMSTART
7140; GFX900-NEXT:    ; def v[0:5]
7141; GFX900-NEXT:    ;;#ASMEND
7142; GFX900-NEXT:    ;;#ASMSTART
7143; GFX900-NEXT:    ; def v[4:9]
7144; GFX900-NEXT:    ;;#ASMEND
7145; GFX900-NEXT:    v_mov_b32_e32 v10, 0
7146; GFX900-NEXT:    v_mov_b32_e32 v6, v8
7147; GFX900-NEXT:    v_mov_b32_e32 v7, v9
7148; GFX900-NEXT:    v_mov_b32_e32 v0, v8
7149; GFX900-NEXT:    v_mov_b32_e32 v1, v9
7150; GFX900-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
7151; GFX900-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17]
7152; GFX900-NEXT:    s_waitcnt vmcnt(0)
7153; GFX900-NEXT:    s_setpc_b64 s[30:31]
7154;
7155; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_1_5_5:
7156; GFX90A:       ; %bb.0:
7157; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7158; GFX90A-NEXT:    ;;#ASMSTART
7159; GFX90A-NEXT:    ; def v[0:5]
7160; GFX90A-NEXT:    ;;#ASMEND
7161; GFX90A-NEXT:    ;;#ASMSTART
7162; GFX90A-NEXT:    ; def v[4:9]
7163; GFX90A-NEXT:    ;;#ASMEND
7164; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
7165; GFX90A-NEXT:    v_mov_b32_e32 v6, v8
7166; GFX90A-NEXT:    v_mov_b32_e32 v7, v9
7167; GFX90A-NEXT:    v_mov_b32_e32 v0, v8
7168; GFX90A-NEXT:    v_mov_b32_e32 v1, v9
7169; GFX90A-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
7170; GFX90A-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17]
7171; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7172; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7173;
7174; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_1_5_5:
7175; GFX940:       ; %bb.0:
7176; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7177; GFX940-NEXT:    ;;#ASMSTART
7178; GFX940-NEXT:    ; def v[0:5]
7179; GFX940-NEXT:    ;;#ASMEND
7180; GFX940-NEXT:    v_mov_b32_e32 v10, 0
7181; GFX940-NEXT:    ;;#ASMSTART
7182; GFX940-NEXT:    ; def v[4:9]
7183; GFX940-NEXT:    ;;#ASMEND
7184; GFX940-NEXT:    s_nop 0
7185; GFX940-NEXT:    v_mov_b32_e32 v6, v8
7186; GFX940-NEXT:    v_mov_b32_e32 v7, v9
7187; GFX940-NEXT:    v_mov_b32_e32 v0, v8
7188; GFX940-NEXT:    v_mov_b32_e32 v1, v9
7189; GFX940-NEXT:    global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1
7190; GFX940-NEXT:    global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1
7191; GFX940-NEXT:    s_waitcnt vmcnt(0)
7192; GFX940-NEXT:    s_setpc_b64 s[30:31]
7193  %vec0 = call <3 x i64> asm "; def $0", "=v"()
7194  %vec1 = call <3 x i64> asm "; def $0", "=v"()
7195  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 5, i32 5>
7196  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
7197  ret void
7198}
7199
7200define void @v_shuffle_v4i64_v3i64__5_2_5_5(ptr addrspace(1) inreg %ptr) {
7201; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_2_5_5:
7202; GFX900:       ; %bb.0:
7203; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7204; GFX900-NEXT:    ;;#ASMSTART
7205; GFX900-NEXT:    ; def v[0:5]
7206; GFX900-NEXT:    ;;#ASMEND
7207; GFX900-NEXT:    ;;#ASMSTART
7208; GFX900-NEXT:    ; def v[6:11]
7209; GFX900-NEXT:    ;;#ASMEND
7210; GFX900-NEXT:    v_mov_b32_e32 v12, 0
7211; GFX900-NEXT:    v_mov_b32_e32 v8, v10
7212; GFX900-NEXT:    v_mov_b32_e32 v9, v11
7213; GFX900-NEXT:    v_mov_b32_e32 v2, v10
7214; GFX900-NEXT:    v_mov_b32_e32 v3, v11
7215; GFX900-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
7216; GFX900-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17]
7217; GFX900-NEXT:    s_waitcnt vmcnt(0)
7218; GFX900-NEXT:    s_setpc_b64 s[30:31]
7219;
7220; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_2_5_5:
7221; GFX90A:       ; %bb.0:
7222; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7223; GFX90A-NEXT:    ;;#ASMSTART
7224; GFX90A-NEXT:    ; def v[0:5]
7225; GFX90A-NEXT:    ;;#ASMEND
7226; GFX90A-NEXT:    ;;#ASMSTART
7227; GFX90A-NEXT:    ; def v[6:11]
7228; GFX90A-NEXT:    ;;#ASMEND
7229; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
7230; GFX90A-NEXT:    v_mov_b32_e32 v8, v10
7231; GFX90A-NEXT:    v_mov_b32_e32 v9, v11
7232; GFX90A-NEXT:    v_mov_b32_e32 v2, v10
7233; GFX90A-NEXT:    v_mov_b32_e32 v3, v11
7234; GFX90A-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
7235; GFX90A-NEXT:    global_store_dwordx4 v12, v[2:5], s[16:17]
7236; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7237; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7238;
7239; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_2_5_5:
7240; GFX940:       ; %bb.0:
7241; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7242; GFX940-NEXT:    ;;#ASMSTART
7243; GFX940-NEXT:    ; def v[0:5]
7244; GFX940-NEXT:    ;;#ASMEND
7245; GFX940-NEXT:    ;;#ASMSTART
7246; GFX940-NEXT:    ; def v[6:11]
7247; GFX940-NEXT:    ;;#ASMEND
7248; GFX940-NEXT:    v_mov_b32_e32 v12, 0
7249; GFX940-NEXT:    v_mov_b32_e32 v8, v10
7250; GFX940-NEXT:    v_mov_b32_e32 v9, v11
7251; GFX940-NEXT:    v_mov_b32_e32 v2, v10
7252; GFX940-NEXT:    v_mov_b32_e32 v3, v11
7253; GFX940-NEXT:    global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1
7254; GFX940-NEXT:    global_store_dwordx4 v12, v[2:5], s[0:1] sc0 sc1
7255; GFX940-NEXT:    s_waitcnt vmcnt(0)
7256; GFX940-NEXT:    s_setpc_b64 s[30:31]
7257  %vec0 = call <3 x i64> asm "; def $0", "=v"()
7258  %vec1 = call <3 x i64> asm "; def $0", "=v"()
7259  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 5, i32 5>
7260  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
7261  ret void
7262}
7263
7264define void @v_shuffle_v4i64_v3i64__5_3_5_5(ptr addrspace(1) inreg %ptr) {
7265; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_3_5_5:
7266; GFX900:       ; %bb.0:
7267; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7268; GFX900-NEXT:    ;;#ASMSTART
7269; GFX900-NEXT:    ; def v[0:5]
7270; GFX900-NEXT:    ;;#ASMEND
7271; GFX900-NEXT:    v_mov_b32_e32 v6, 0
7272; GFX900-NEXT:    v_mov_b32_e32 v2, v4
7273; GFX900-NEXT:    v_mov_b32_e32 v3, v5
7274; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
7275; GFX900-NEXT:    s_nop 0
7276; GFX900-NEXT:    v_mov_b32_e32 v4, v0
7277; GFX900-NEXT:    v_mov_b32_e32 v5, v1
7278; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
7279; GFX900-NEXT:    s_waitcnt vmcnt(0)
7280; GFX900-NEXT:    s_setpc_b64 s[30:31]
7281;
7282; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_3_5_5:
7283; GFX90A:       ; %bb.0:
7284; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7285; GFX90A-NEXT:    ;;#ASMSTART
7286; GFX90A-NEXT:    ; def v[0:5]
7287; GFX90A-NEXT:    ;;#ASMEND
7288; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
7289; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
7290; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
7291; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
7292; GFX90A-NEXT:    s_nop 0
7293; GFX90A-NEXT:    v_mov_b32_e32 v4, v0
7294; GFX90A-NEXT:    v_mov_b32_e32 v5, v1
7295; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
7296; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7297; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7298;
7299; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_3_5_5:
7300; GFX940:       ; %bb.0:
7301; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7302; GFX940-NEXT:    ;;#ASMSTART
7303; GFX940-NEXT:    ; def v[0:5]
7304; GFX940-NEXT:    ;;#ASMEND
7305; GFX940-NEXT:    v_mov_b32_e32 v6, 0
7306; GFX940-NEXT:    v_mov_b32_e32 v2, v4
7307; GFX940-NEXT:    v_mov_b32_e32 v3, v5
7308; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
7309; GFX940-NEXT:    s_nop 1
7310; GFX940-NEXT:    v_mov_b32_e32 v4, v0
7311; GFX940-NEXT:    v_mov_b32_e32 v5, v1
7312; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
7313; GFX940-NEXT:    s_waitcnt vmcnt(0)
7314; GFX940-NEXT:    s_setpc_b64 s[30:31]
7315  %vec0 = call <3 x i64> asm "; def $0", "=v"()
7316  %vec1 = call <3 x i64> asm "; def $0", "=v"()
7317  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 5, i32 5>
7318  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
7319  ret void
7320}
7321
7322define void @v_shuffle_v4i64_v3i64__5_4_5_5(ptr addrspace(1) inreg %ptr) {
7323; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_4_5_5:
7324; GFX900:       ; %bb.0:
7325; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7326; GFX900-NEXT:    ;;#ASMSTART
7327; GFX900-NEXT:    ; def v[0:5]
7328; GFX900-NEXT:    ;;#ASMEND
7329; GFX900-NEXT:    v_mov_b32_e32 v10, 0
7330; GFX900-NEXT:    v_mov_b32_e32 v6, v4
7331; GFX900-NEXT:    v_mov_b32_e32 v7, v5
7332; GFX900-NEXT:    v_mov_b32_e32 v8, v4
7333; GFX900-NEXT:    v_mov_b32_e32 v9, v5
7334; GFX900-NEXT:    v_mov_b32_e32 v0, v4
7335; GFX900-NEXT:    v_mov_b32_e32 v1, v5
7336; GFX900-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
7337; GFX900-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17]
7338; GFX900-NEXT:    s_waitcnt vmcnt(0)
7339; GFX900-NEXT:    s_setpc_b64 s[30:31]
7340;
7341; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_4_5_5:
7342; GFX90A:       ; %bb.0:
7343; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7344; GFX90A-NEXT:    ;;#ASMSTART
7345; GFX90A-NEXT:    ; def v[0:5]
7346; GFX90A-NEXT:    ;;#ASMEND
7347; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
7348; GFX90A-NEXT:    v_mov_b32_e32 v6, v4
7349; GFX90A-NEXT:    v_mov_b32_e32 v7, v5
7350; GFX90A-NEXT:    v_mov_b32_e32 v8, v4
7351; GFX90A-NEXT:    v_mov_b32_e32 v9, v5
7352; GFX90A-NEXT:    v_mov_b32_e32 v0, v4
7353; GFX90A-NEXT:    v_mov_b32_e32 v1, v5
7354; GFX90A-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17] offset:16
7355; GFX90A-NEXT:    global_store_dwordx4 v10, v[0:3], s[16:17]
7356; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7357; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7358;
7359; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_4_5_5:
7360; GFX940:       ; %bb.0:
7361; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7362; GFX940-NEXT:    ;;#ASMSTART
7363; GFX940-NEXT:    ; def v[0:5]
7364; GFX940-NEXT:    ;;#ASMEND
7365; GFX940-NEXT:    v_mov_b32_e32 v10, 0
7366; GFX940-NEXT:    v_mov_b32_e32 v6, v4
7367; GFX940-NEXT:    v_mov_b32_e32 v7, v5
7368; GFX940-NEXT:    v_mov_b32_e32 v8, v4
7369; GFX940-NEXT:    v_mov_b32_e32 v9, v5
7370; GFX940-NEXT:    v_mov_b32_e32 v0, v4
7371; GFX940-NEXT:    v_mov_b32_e32 v1, v5
7372; GFX940-NEXT:    global_store_dwordx4 v10, v[6:9], s[0:1] offset:16 sc0 sc1
7373; GFX940-NEXT:    global_store_dwordx4 v10, v[0:3], s[0:1] sc0 sc1
7374; GFX940-NEXT:    s_waitcnt vmcnt(0)
7375; GFX940-NEXT:    s_setpc_b64 s[30:31]
7376  %vec0 = call <3 x i64> asm "; def $0", "=v"()
7377  %vec1 = call <3 x i64> asm "; def $0", "=v"()
7378  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 5, i32 5>
7379  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
7380  ret void
7381}
7382
7383define void @v_shuffle_v4i64_v3i64__5_5_u_5(ptr addrspace(1) inreg %ptr) {
7384; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_u_5:
7385; GFX900:       ; %bb.0:
7386; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7387; GFX900-NEXT:    v_mov_b32_e32 v6, 0
7388; GFX900-NEXT:    ;;#ASMSTART
7389; GFX900-NEXT:    ; def v[0:5]
7390; GFX900-NEXT:    ;;#ASMEND
7391; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
7392; GFX900-NEXT:    s_nop 0
7393; GFX900-NEXT:    v_mov_b32_e32 v2, v4
7394; GFX900-NEXT:    v_mov_b32_e32 v3, v5
7395; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
7396; GFX900-NEXT:    s_waitcnt vmcnt(0)
7397; GFX900-NEXT:    s_setpc_b64 s[30:31]
7398;
7399; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_u_5:
7400; GFX90A:       ; %bb.0:
7401; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7402; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
7403; GFX90A-NEXT:    ;;#ASMSTART
7404; GFX90A-NEXT:    ; def v[0:5]
7405; GFX90A-NEXT:    ;;#ASMEND
7406; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
7407; GFX90A-NEXT:    s_nop 0
7408; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
7409; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
7410; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
7411; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7412; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7413;
7414; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_u_5:
7415; GFX940:       ; %bb.0:
7416; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7417; GFX940-NEXT:    v_mov_b32_e32 v6, 0
7418; GFX940-NEXT:    ;;#ASMSTART
7419; GFX940-NEXT:    ; def v[0:5]
7420; GFX940-NEXT:    ;;#ASMEND
7421; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
7422; GFX940-NEXT:    s_nop 1
7423; GFX940-NEXT:    v_mov_b32_e32 v2, v4
7424; GFX940-NEXT:    v_mov_b32_e32 v3, v5
7425; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
7426; GFX940-NEXT:    s_waitcnt vmcnt(0)
7427; GFX940-NEXT:    s_setpc_b64 s[30:31]
7428  %vec0 = call <3 x i64> asm "; def $0", "=v"()
7429  %vec1 = call <3 x i64> asm "; def $0", "=v"()
7430  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 5>
7431  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
7432  ret void
7433}
7434
7435define void @v_shuffle_v4i64_v3i64__5_5_0_5(ptr addrspace(1) inreg %ptr) {
7436; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_0_5:
7437; GFX900:       ; %bb.0:
7438; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7439; GFX900-NEXT:    ;;#ASMSTART
7440; GFX900-NEXT:    ; def v[0:5]
7441; GFX900-NEXT:    ;;#ASMEND
7442; GFX900-NEXT:    ;;#ASMSTART
7443; GFX900-NEXT:    ; def v[2:7]
7444; GFX900-NEXT:    ;;#ASMEND
7445; GFX900-NEXT:    v_mov_b32_e32 v8, 0
7446; GFX900-NEXT:    v_mov_b32_e32 v2, v6
7447; GFX900-NEXT:    v_mov_b32_e32 v3, v7
7448; GFX900-NEXT:    v_mov_b32_e32 v4, v6
7449; GFX900-NEXT:    v_mov_b32_e32 v5, v7
7450; GFX900-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
7451; GFX900-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17]
7452; GFX900-NEXT:    s_waitcnt vmcnt(0)
7453; GFX900-NEXT:    s_setpc_b64 s[30:31]
7454;
7455; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_0_5:
7456; GFX90A:       ; %bb.0:
7457; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7458; GFX90A-NEXT:    ;;#ASMSTART
7459; GFX90A-NEXT:    ; def v[0:5]
7460; GFX90A-NEXT:    ;;#ASMEND
7461; GFX90A-NEXT:    ;;#ASMSTART
7462; GFX90A-NEXT:    ; def v[2:7]
7463; GFX90A-NEXT:    ;;#ASMEND
7464; GFX90A-NEXT:    v_mov_b32_e32 v8, 0
7465; GFX90A-NEXT:    v_mov_b32_e32 v2, v6
7466; GFX90A-NEXT:    v_mov_b32_e32 v3, v7
7467; GFX90A-NEXT:    v_mov_b32_e32 v4, v6
7468; GFX90A-NEXT:    v_mov_b32_e32 v5, v7
7469; GFX90A-NEXT:    global_store_dwordx4 v8, v[0:3], s[16:17] offset:16
7470; GFX90A-NEXT:    global_store_dwordx4 v8, v[4:7], s[16:17]
7471; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7472; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7473;
7474; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_0_5:
7475; GFX940:       ; %bb.0:
7476; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7477; GFX940-NEXT:    ;;#ASMSTART
7478; GFX940-NEXT:    ; def v[0:5]
7479; GFX940-NEXT:    ;;#ASMEND
7480; GFX940-NEXT:    v_mov_b32_e32 v8, 0
7481; GFX940-NEXT:    ;;#ASMSTART
7482; GFX940-NEXT:    ; def v[2:7]
7483; GFX940-NEXT:    ;;#ASMEND
7484; GFX940-NEXT:    s_nop 0
7485; GFX940-NEXT:    v_mov_b32_e32 v2, v6
7486; GFX940-NEXT:    v_mov_b32_e32 v3, v7
7487; GFX940-NEXT:    v_mov_b32_e32 v4, v6
7488; GFX940-NEXT:    v_mov_b32_e32 v5, v7
7489; GFX940-NEXT:    global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1
7490; GFX940-NEXT:    global_store_dwordx4 v8, v[4:7], s[0:1] sc0 sc1
7491; GFX940-NEXT:    s_waitcnt vmcnt(0)
7492; GFX940-NEXT:    s_setpc_b64 s[30:31]
7493  %vec0 = call <3 x i64> asm "; def $0", "=v"()
7494  %vec1 = call <3 x i64> asm "; def $0", "=v"()
7495  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 5>
7496  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
7497  ret void
7498}
7499
7500define void @v_shuffle_v4i64_v3i64__5_5_1_5(ptr addrspace(1) inreg %ptr) {
7501; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_1_5:
7502; GFX900:       ; %bb.0:
7503; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7504; GFX900-NEXT:    ;;#ASMSTART
7505; GFX900-NEXT:    ; def v[0:5]
7506; GFX900-NEXT:    ;;#ASMEND
7507; GFX900-NEXT:    ;;#ASMSTART
7508; GFX900-NEXT:    ; def v[4:9]
7509; GFX900-NEXT:    ;;#ASMEND
7510; GFX900-NEXT:    v_mov_b32_e32 v10, 0
7511; GFX900-NEXT:    v_mov_b32_e32 v4, v8
7512; GFX900-NEXT:    v_mov_b32_e32 v5, v9
7513; GFX900-NEXT:    v_mov_b32_e32 v6, v8
7514; GFX900-NEXT:    v_mov_b32_e32 v7, v9
7515; GFX900-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
7516; GFX900-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17]
7517; GFX900-NEXT:    s_waitcnt vmcnt(0)
7518; GFX900-NEXT:    s_setpc_b64 s[30:31]
7519;
7520; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_1_5:
7521; GFX90A:       ; %bb.0:
7522; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7523; GFX90A-NEXT:    ;;#ASMSTART
7524; GFX90A-NEXT:    ; def v[0:5]
7525; GFX90A-NEXT:    ;;#ASMEND
7526; GFX90A-NEXT:    ;;#ASMSTART
7527; GFX90A-NEXT:    ; def v[4:9]
7528; GFX90A-NEXT:    ;;#ASMEND
7529; GFX90A-NEXT:    v_mov_b32_e32 v10, 0
7530; GFX90A-NEXT:    v_mov_b32_e32 v4, v8
7531; GFX90A-NEXT:    v_mov_b32_e32 v5, v9
7532; GFX90A-NEXT:    v_mov_b32_e32 v6, v8
7533; GFX90A-NEXT:    v_mov_b32_e32 v7, v9
7534; GFX90A-NEXT:    global_store_dwordx4 v10, v[2:5], s[16:17] offset:16
7535; GFX90A-NEXT:    global_store_dwordx4 v10, v[6:9], s[16:17]
7536; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7537; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7538;
7539; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_1_5:
7540; GFX940:       ; %bb.0:
7541; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7542; GFX940-NEXT:    ;;#ASMSTART
7543; GFX940-NEXT:    ; def v[0:5]
7544; GFX940-NEXT:    ;;#ASMEND
7545; GFX940-NEXT:    v_mov_b32_e32 v10, 0
7546; GFX940-NEXT:    ;;#ASMSTART
7547; GFX940-NEXT:    ; def v[4:9]
7548; GFX940-NEXT:    ;;#ASMEND
7549; GFX940-NEXT:    s_nop 0
7550; GFX940-NEXT:    v_mov_b32_e32 v4, v8
7551; GFX940-NEXT:    v_mov_b32_e32 v5, v9
7552; GFX940-NEXT:    v_mov_b32_e32 v6, v8
7553; GFX940-NEXT:    v_mov_b32_e32 v7, v9
7554; GFX940-NEXT:    global_store_dwordx4 v10, v[2:5], s[0:1] offset:16 sc0 sc1
7555; GFX940-NEXT:    global_store_dwordx4 v10, v[6:9], s[0:1] sc0 sc1
7556; GFX940-NEXT:    s_waitcnt vmcnt(0)
7557; GFX940-NEXT:    s_setpc_b64 s[30:31]
7558  %vec0 = call <3 x i64> asm "; def $0", "=v"()
7559  %vec1 = call <3 x i64> asm "; def $0", "=v"()
7560  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 5>
7561  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
7562  ret void
7563}
7564
7565define void @v_shuffle_v4i64_v3i64__5_5_2_5(ptr addrspace(1) inreg %ptr) {
7566; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_2_5:
7567; GFX900:       ; %bb.0:
7568; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7569; GFX900-NEXT:    ;;#ASMSTART
7570; GFX900-NEXT:    ; def v[6:11]
7571; GFX900-NEXT:    ;;#ASMEND
7572; GFX900-NEXT:    v_mov_b32_e32 v12, 0
7573; GFX900-NEXT:    ;;#ASMSTART
7574; GFX900-NEXT:    ; def v[0:5]
7575; GFX900-NEXT:    ;;#ASMEND
7576; GFX900-NEXT:    v_mov_b32_e32 v8, v4
7577; GFX900-NEXT:    v_mov_b32_e32 v9, v5
7578; GFX900-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
7579; GFX900-NEXT:    s_nop 0
7580; GFX900-NEXT:    v_mov_b32_e32 v8, v10
7581; GFX900-NEXT:    v_mov_b32_e32 v9, v11
7582; GFX900-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
7583; GFX900-NEXT:    s_waitcnt vmcnt(0)
7584; GFX900-NEXT:    s_setpc_b64 s[30:31]
7585;
7586; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_2_5:
7587; GFX90A:       ; %bb.0:
7588; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7589; GFX90A-NEXT:    ;;#ASMSTART
7590; GFX90A-NEXT:    ; def v[6:11]
7591; GFX90A-NEXT:    ;;#ASMEND
7592; GFX90A-NEXT:    v_mov_b32_e32 v12, 0
7593; GFX90A-NEXT:    ;;#ASMSTART
7594; GFX90A-NEXT:    ; def v[0:5]
7595; GFX90A-NEXT:    ;;#ASMEND
7596; GFX90A-NEXT:    v_mov_b32_e32 v8, v4
7597; GFX90A-NEXT:    v_mov_b32_e32 v9, v5
7598; GFX90A-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17] offset:16
7599; GFX90A-NEXT:    s_nop 0
7600; GFX90A-NEXT:    v_mov_b32_e32 v8, v10
7601; GFX90A-NEXT:    v_mov_b32_e32 v9, v11
7602; GFX90A-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17]
7603; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7604; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7605;
7606; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_2_5:
7607; GFX940:       ; %bb.0:
7608; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7609; GFX940-NEXT:    ;;#ASMSTART
7610; GFX940-NEXT:    ; def v[6:11]
7611; GFX940-NEXT:    ;;#ASMEND
7612; GFX940-NEXT:    v_mov_b32_e32 v12, 0
7613; GFX940-NEXT:    ;;#ASMSTART
7614; GFX940-NEXT:    ; def v[0:5]
7615; GFX940-NEXT:    ;;#ASMEND
7616; GFX940-NEXT:    s_nop 0
7617; GFX940-NEXT:    v_mov_b32_e32 v8, v4
7618; GFX940-NEXT:    v_mov_b32_e32 v9, v5
7619; GFX940-NEXT:    global_store_dwordx4 v12, v[8:11], s[0:1] offset:16 sc0 sc1
7620; GFX940-NEXT:    s_nop 1
7621; GFX940-NEXT:    v_mov_b32_e32 v8, v10
7622; GFX940-NEXT:    v_mov_b32_e32 v9, v11
7623; GFX940-NEXT:    global_store_dwordx4 v12, v[8:11], s[0:1] sc0 sc1
7624; GFX940-NEXT:    s_waitcnt vmcnt(0)
7625; GFX940-NEXT:    s_setpc_b64 s[30:31]
7626  %vec0 = call <3 x i64> asm "; def $0", "=v"()
7627  %vec1 = call <3 x i64> asm "; def $0", "=v"()
7628  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 5>
7629  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
7630  ret void
7631}
7632
7633define void @v_shuffle_v4i64_v3i64__5_5_3_5(ptr addrspace(1) inreg %ptr) {
7634; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_3_5:
7635; GFX900:       ; %bb.0:
7636; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7637; GFX900-NEXT:    ;;#ASMSTART
7638; GFX900-NEXT:    ; def v[0:5]
7639; GFX900-NEXT:    ;;#ASMEND
7640; GFX900-NEXT:    v_mov_b32_e32 v6, 0
7641; GFX900-NEXT:    v_mov_b32_e32 v2, v4
7642; GFX900-NEXT:    v_mov_b32_e32 v3, v5
7643; GFX900-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
7644; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
7645; GFX900-NEXT:    s_waitcnt vmcnt(0)
7646; GFX900-NEXT:    s_setpc_b64 s[30:31]
7647;
7648; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_3_5:
7649; GFX90A:       ; %bb.0:
7650; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7651; GFX90A-NEXT:    ;;#ASMSTART
7652; GFX90A-NEXT:    ; def v[0:5]
7653; GFX90A-NEXT:    ;;#ASMEND
7654; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
7655; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
7656; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
7657; GFX90A-NEXT:    global_store_dwordx4 v6, v[0:3], s[16:17] offset:16
7658; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
7659; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7660; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7661;
7662; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_3_5:
7663; GFX940:       ; %bb.0:
7664; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7665; GFX940-NEXT:    ;;#ASMSTART
7666; GFX940-NEXT:    ; def v[0:5]
7667; GFX940-NEXT:    ;;#ASMEND
7668; GFX940-NEXT:    v_mov_b32_e32 v6, 0
7669; GFX940-NEXT:    v_mov_b32_e32 v2, v4
7670; GFX940-NEXT:    v_mov_b32_e32 v3, v5
7671; GFX940-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1] offset:16 sc0 sc1
7672; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
7673; GFX940-NEXT:    s_waitcnt vmcnt(0)
7674; GFX940-NEXT:    s_setpc_b64 s[30:31]
7675  %vec0 = call <3 x i64> asm "; def $0", "=v"()
7676  %vec1 = call <3 x i64> asm "; def $0", "=v"()
7677  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 5>
7678  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
7679  ret void
7680}
7681
7682define void @v_shuffle_v4i64_v3i64__5_5_4_5(ptr addrspace(1) inreg %ptr) {
7683; GFX900-LABEL: v_shuffle_v4i64_v3i64__5_5_4_5:
7684; GFX900:       ; %bb.0:
7685; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7686; GFX900-NEXT:    v_mov_b32_e32 v6, 0
7687; GFX900-NEXT:    ;;#ASMSTART
7688; GFX900-NEXT:    ; def v[0:5]
7689; GFX900-NEXT:    ;;#ASMEND
7690; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
7691; GFX900-NEXT:    s_nop 0
7692; GFX900-NEXT:    v_mov_b32_e32 v2, v4
7693; GFX900-NEXT:    v_mov_b32_e32 v3, v5
7694; GFX900-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
7695; GFX900-NEXT:    s_waitcnt vmcnt(0)
7696; GFX900-NEXT:    s_setpc_b64 s[30:31]
7697;
7698; GFX90A-LABEL: v_shuffle_v4i64_v3i64__5_5_4_5:
7699; GFX90A:       ; %bb.0:
7700; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7701; GFX90A-NEXT:    v_mov_b32_e32 v6, 0
7702; GFX90A-NEXT:    ;;#ASMSTART
7703; GFX90A-NEXT:    ; def v[0:5]
7704; GFX90A-NEXT:    ;;#ASMEND
7705; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17] offset:16
7706; GFX90A-NEXT:    s_nop 0
7707; GFX90A-NEXT:    v_mov_b32_e32 v2, v4
7708; GFX90A-NEXT:    v_mov_b32_e32 v3, v5
7709; GFX90A-NEXT:    global_store_dwordx4 v6, v[2:5], s[16:17]
7710; GFX90A-NEXT:    s_waitcnt vmcnt(0)
7711; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7712;
7713; GFX940-LABEL: v_shuffle_v4i64_v3i64__5_5_4_5:
7714; GFX940:       ; %bb.0:
7715; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7716; GFX940-NEXT:    v_mov_b32_e32 v6, 0
7717; GFX940-NEXT:    ;;#ASMSTART
7718; GFX940-NEXT:    ; def v[0:5]
7719; GFX940-NEXT:    ;;#ASMEND
7720; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] offset:16 sc0 sc1
7721; GFX940-NEXT:    s_nop 1
7722; GFX940-NEXT:    v_mov_b32_e32 v2, v4
7723; GFX940-NEXT:    v_mov_b32_e32 v3, v5
7724; GFX940-NEXT:    global_store_dwordx4 v6, v[2:5], s[0:1] sc0 sc1
7725; GFX940-NEXT:    s_waitcnt vmcnt(0)
7726; GFX940-NEXT:    s_setpc_b64 s[30:31]
7727  %vec0 = call <3 x i64> asm "; def $0", "=v"()
7728  %vec1 = call <3 x i64> asm "; def $0", "=v"()
7729  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 5>
7730  store <4 x i64> %shuf, ptr addrspace(1) %ptr, align 32
7731  ret void
7732}
7733
7734define void @s_shuffle_v4i64_v3i64__u_u_u_u() {
7735; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_u_u_u:
7736; GFX9:       ; %bb.0:
7737; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7738; GFX9-NEXT:    ;;#ASMSTART
7739; GFX9-NEXT:    ; use s[8:15]
7740; GFX9-NEXT:    ;;#ASMEND
7741; GFX9-NEXT:    s_setpc_b64 s[30:31]
7742  %vec0 = call <3 x i64> asm "; def $0", "=s"()
7743  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> poison
7744  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
7745  ret void
7746}
7747
7748define void @s_shuffle_v4i64_v3i64__0_u_u_u() {
7749; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_u_u_u:
7750; GFX900:       ; %bb.0:
7751; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7752; GFX900-NEXT:    ;;#ASMSTART
7753; GFX900-NEXT:    ; def s[8:13]
7754; GFX900-NEXT:    ;;#ASMEND
7755; GFX900-NEXT:    ;;#ASMSTART
7756; GFX900-NEXT:    ; use s[8:15]
7757; GFX900-NEXT:    ;;#ASMEND
7758; GFX900-NEXT:    s_setpc_b64 s[30:31]
7759;
7760; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_u_u_u:
7761; GFX90A:       ; %bb.0:
7762; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7763; GFX90A-NEXT:    ;;#ASMSTART
7764; GFX90A-NEXT:    ; def s[8:13]
7765; GFX90A-NEXT:    ;;#ASMEND
7766; GFX90A-NEXT:    ;;#ASMSTART
7767; GFX90A-NEXT:    ; use s[8:15]
7768; GFX90A-NEXT:    ;;#ASMEND
7769; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7770;
7771; GFX940-LABEL: s_shuffle_v4i64_v3i64__0_u_u_u:
7772; GFX940:       ; %bb.0:
7773; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7774; GFX940-NEXT:    ;;#ASMSTART
7775; GFX940-NEXT:    ; def s[8:13]
7776; GFX940-NEXT:    ;;#ASMEND
7777; GFX940-NEXT:    s_nop 0
7778; GFX940-NEXT:    ;;#ASMSTART
7779; GFX940-NEXT:    ; use s[8:15]
7780; GFX940-NEXT:    ;;#ASMEND
7781; GFX940-NEXT:    s_setpc_b64 s[30:31]
7782  %vec0 = call <3 x i64> asm "; def $0", "=s"()
7783  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
7784  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
7785  ret void
7786}
7787
7788define void @s_shuffle_v4i64_v3i64__1_u_u_u() {
7789; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_u_u_u:
7790; GFX900:       ; %bb.0:
7791; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7792; GFX900-NEXT:    ;;#ASMSTART
7793; GFX900-NEXT:    ; def s[4:9]
7794; GFX900-NEXT:    ;;#ASMEND
7795; GFX900-NEXT:    s_mov_b32 s8, s6
7796; GFX900-NEXT:    s_mov_b32 s9, s7
7797; GFX900-NEXT:    ;;#ASMSTART
7798; GFX900-NEXT:    ; use s[8:15]
7799; GFX900-NEXT:    ;;#ASMEND
7800; GFX900-NEXT:    s_setpc_b64 s[30:31]
7801;
7802; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_u_u_u:
7803; GFX90A:       ; %bb.0:
7804; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7805; GFX90A-NEXT:    ;;#ASMSTART
7806; GFX90A-NEXT:    ; def s[4:9]
7807; GFX90A-NEXT:    ;;#ASMEND
7808; GFX90A-NEXT:    s_mov_b32 s8, s6
7809; GFX90A-NEXT:    s_mov_b32 s9, s7
7810; GFX90A-NEXT:    ;;#ASMSTART
7811; GFX90A-NEXT:    ; use s[8:15]
7812; GFX90A-NEXT:    ;;#ASMEND
7813; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7814;
7815; GFX940-LABEL: s_shuffle_v4i64_v3i64__1_u_u_u:
7816; GFX940:       ; %bb.0:
7817; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7818; GFX940-NEXT:    ;;#ASMSTART
7819; GFX940-NEXT:    ; def s[0:5]
7820; GFX940-NEXT:    ;;#ASMEND
7821; GFX940-NEXT:    s_mov_b32 s8, s2
7822; GFX940-NEXT:    s_mov_b32 s9, s3
7823; GFX940-NEXT:    ;;#ASMSTART
7824; GFX940-NEXT:    ; use s[8:15]
7825; GFX940-NEXT:    ;;#ASMEND
7826; GFX940-NEXT:    s_setpc_b64 s[30:31]
7827  %vec0 = call <3 x i64> asm "; def $0", "=s"()
7828  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
7829  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
7830  ret void
7831}
7832
7833define void @s_shuffle_v4i64_v3i64__2_u_u_u() {
7834; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_u_u_u:
7835; GFX900:       ; %bb.0:
7836; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7837; GFX900-NEXT:    ;;#ASMSTART
7838; GFX900-NEXT:    ; def s[4:9]
7839; GFX900-NEXT:    ;;#ASMEND
7840; GFX900-NEXT:    ;;#ASMSTART
7841; GFX900-NEXT:    ; use s[8:15]
7842; GFX900-NEXT:    ;;#ASMEND
7843; GFX900-NEXT:    s_setpc_b64 s[30:31]
7844;
7845; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_u_u_u:
7846; GFX90A:       ; %bb.0:
7847; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7848; GFX90A-NEXT:    ;;#ASMSTART
7849; GFX90A-NEXT:    ; def s[4:9]
7850; GFX90A-NEXT:    ;;#ASMEND
7851; GFX90A-NEXT:    ;;#ASMSTART
7852; GFX90A-NEXT:    ; use s[8:15]
7853; GFX90A-NEXT:    ;;#ASMEND
7854; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7855;
7856; GFX940-LABEL: s_shuffle_v4i64_v3i64__2_u_u_u:
7857; GFX940:       ; %bb.0:
7858; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7859; GFX940-NEXT:    ;;#ASMSTART
7860; GFX940-NEXT:    ; def s[0:5]
7861; GFX940-NEXT:    ;;#ASMEND
7862; GFX940-NEXT:    s_mov_b32 s8, s4
7863; GFX940-NEXT:    s_mov_b32 s9, s5
7864; GFX940-NEXT:    ;;#ASMSTART
7865; GFX940-NEXT:    ; use s[8:15]
7866; GFX940-NEXT:    ;;#ASMEND
7867; GFX940-NEXT:    s_setpc_b64 s[30:31]
7868  %vec0 = call <3 x i64> asm "; def $0", "=s"()
7869  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison>
7870  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
7871  ret void
7872}
7873
7874define void @s_shuffle_v4i64_v3i64__3_u_u_u() {
7875; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_u_u_u:
7876; GFX9:       ; %bb.0:
7877; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7878; GFX9-NEXT:    ;;#ASMSTART
7879; GFX9-NEXT:    ; use s[8:15]
7880; GFX9-NEXT:    ;;#ASMEND
7881; GFX9-NEXT:    s_setpc_b64 s[30:31]
7882  %vec0 = call <3 x i64> asm "; def $0", "=s"()
7883  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
7884  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
7885  ret void
7886}
7887
7888define void @s_shuffle_v4i64_v3i64__4_u_u_u() {
7889; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_u_u_u:
7890; GFX900:       ; %bb.0:
7891; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7892; GFX900-NEXT:    ;;#ASMSTART
7893; GFX900-NEXT:    ; def s[4:9]
7894; GFX900-NEXT:    ;;#ASMEND
7895; GFX900-NEXT:    s_mov_b32 s8, s6
7896; GFX900-NEXT:    s_mov_b32 s9, s7
7897; GFX900-NEXT:    ;;#ASMSTART
7898; GFX900-NEXT:    ; use s[8:15]
7899; GFX900-NEXT:    ;;#ASMEND
7900; GFX900-NEXT:    s_setpc_b64 s[30:31]
7901;
7902; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_u_u_u:
7903; GFX90A:       ; %bb.0:
7904; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7905; GFX90A-NEXT:    ;;#ASMSTART
7906; GFX90A-NEXT:    ; def s[4:9]
7907; GFX90A-NEXT:    ;;#ASMEND
7908; GFX90A-NEXT:    s_mov_b32 s8, s6
7909; GFX90A-NEXT:    s_mov_b32 s9, s7
7910; GFX90A-NEXT:    ;;#ASMSTART
7911; GFX90A-NEXT:    ; use s[8:15]
7912; GFX90A-NEXT:    ;;#ASMEND
7913; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7914;
7915; GFX940-LABEL: s_shuffle_v4i64_v3i64__4_u_u_u:
7916; GFX940:       ; %bb.0:
7917; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7918; GFX940-NEXT:    ;;#ASMSTART
7919; GFX940-NEXT:    ; def s[0:5]
7920; GFX940-NEXT:    ;;#ASMEND
7921; GFX940-NEXT:    s_mov_b32 s8, s2
7922; GFX940-NEXT:    s_mov_b32 s9, s3
7923; GFX940-NEXT:    ;;#ASMSTART
7924; GFX940-NEXT:    ; use s[8:15]
7925; GFX940-NEXT:    ;;#ASMEND
7926; GFX940-NEXT:    s_setpc_b64 s[30:31]
7927  %vec0 = call <3 x i64> asm "; def $0", "=s"()
7928  %vec1 = call <3 x i64> asm "; def $0", "=s"()
7929  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 poison, i32 poison, i32 poison>
7930  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
7931  ret void
7932}
7933
7934define void @s_shuffle_v4i64_v3i64__5_u_u_u() {
7935; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_u_u:
7936; GFX900:       ; %bb.0:
7937; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7938; GFX900-NEXT:    ;;#ASMSTART
7939; GFX900-NEXT:    ; def s[4:9]
7940; GFX900-NEXT:    ;;#ASMEND
7941; GFX900-NEXT:    ;;#ASMSTART
7942; GFX900-NEXT:    ; use s[8:15]
7943; GFX900-NEXT:    ;;#ASMEND
7944; GFX900-NEXT:    s_setpc_b64 s[30:31]
7945;
7946; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_u_u:
7947; GFX90A:       ; %bb.0:
7948; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7949; GFX90A-NEXT:    ;;#ASMSTART
7950; GFX90A-NEXT:    ; def s[4:9]
7951; GFX90A-NEXT:    ;;#ASMEND
7952; GFX90A-NEXT:    ;;#ASMSTART
7953; GFX90A-NEXT:    ; use s[8:15]
7954; GFX90A-NEXT:    ;;#ASMEND
7955; GFX90A-NEXT:    s_setpc_b64 s[30:31]
7956;
7957; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_u_u_u:
7958; GFX940:       ; %bb.0:
7959; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7960; GFX940-NEXT:    ;;#ASMSTART
7961; GFX940-NEXT:    ; def s[0:5]
7962; GFX940-NEXT:    ;;#ASMEND
7963; GFX940-NEXT:    s_mov_b32 s8, s4
7964; GFX940-NEXT:    s_mov_b32 s9, s5
7965; GFX940-NEXT:    ;;#ASMSTART
7966; GFX940-NEXT:    ; use s[8:15]
7967; GFX940-NEXT:    ;;#ASMEND
7968; GFX940-NEXT:    s_setpc_b64 s[30:31]
7969  %vec0 = call <3 x i64> asm "; def $0", "=s"()
7970  %vec1 = call <3 x i64> asm "; def $0", "=s"()
7971  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 poison, i32 poison>
7972  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
7973  ret void
7974}
7975
7976define void @s_shuffle_v4i64_v3i64__5_0_u_u() {
7977; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_u_u:
7978; GFX900:       ; %bb.0:
7979; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7980; GFX900-NEXT:    ;;#ASMSTART
7981; GFX900-NEXT:    ; def s[4:9]
7982; GFX900-NEXT:    ;;#ASMEND
7983; GFX900-NEXT:    ;;#ASMSTART
7984; GFX900-NEXT:    ; def s[8:13]
7985; GFX900-NEXT:    ;;#ASMEND
7986; GFX900-NEXT:    s_mov_b32 s8, s12
7987; GFX900-NEXT:    s_mov_b32 s9, s13
7988; GFX900-NEXT:    s_mov_b32 s10, s4
7989; GFX900-NEXT:    s_mov_b32 s11, s5
7990; GFX900-NEXT:    ;;#ASMSTART
7991; GFX900-NEXT:    ; use s[8:15]
7992; GFX900-NEXT:    ;;#ASMEND
7993; GFX900-NEXT:    s_setpc_b64 s[30:31]
7994;
7995; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_u_u:
7996; GFX90A:       ; %bb.0:
7997; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
7998; GFX90A-NEXT:    ;;#ASMSTART
7999; GFX90A-NEXT:    ; def s[4:9]
8000; GFX90A-NEXT:    ;;#ASMEND
8001; GFX90A-NEXT:    ;;#ASMSTART
8002; GFX90A-NEXT:    ; def s[8:13]
8003; GFX90A-NEXT:    ;;#ASMEND
8004; GFX90A-NEXT:    s_mov_b32 s8, s12
8005; GFX90A-NEXT:    s_mov_b32 s9, s13
8006; GFX90A-NEXT:    s_mov_b32 s10, s4
8007; GFX90A-NEXT:    s_mov_b32 s11, s5
8008; GFX90A-NEXT:    ;;#ASMSTART
8009; GFX90A-NEXT:    ; use s[8:15]
8010; GFX90A-NEXT:    ;;#ASMEND
8011; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8012;
8013; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_u_u:
8014; GFX940:       ; %bb.0:
8015; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8016; GFX940-NEXT:    ;;#ASMSTART
8017; GFX940-NEXT:    ; def s[0:5]
8018; GFX940-NEXT:    ;;#ASMEND
8019; GFX940-NEXT:    s_mov_b32 s10, s0
8020; GFX940-NEXT:    ;;#ASMSTART
8021; GFX940-NEXT:    ; def s[4:9]
8022; GFX940-NEXT:    ;;#ASMEND
8023; GFX940-NEXT:    s_mov_b32 s11, s1
8024; GFX940-NEXT:    ;;#ASMSTART
8025; GFX940-NEXT:    ; use s[8:15]
8026; GFX940-NEXT:    ;;#ASMEND
8027; GFX940-NEXT:    s_setpc_b64 s[30:31]
8028  %vec0 = call <3 x i64> asm "; def $0", "=s"()
8029  %vec1 = call <3 x i64> asm "; def $0", "=s"()
8030  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 poison, i32 poison>
8031  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8032  ret void
8033}
8034
8035define void @s_shuffle_v4i64_v3i64__5_1_u_u() {
8036; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_u_u:
8037; GFX900:       ; %bb.0:
8038; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8039; GFX900-NEXT:    ;;#ASMSTART
8040; GFX900-NEXT:    ; def s[8:13]
8041; GFX900-NEXT:    ;;#ASMEND
8042; GFX900-NEXT:    ;;#ASMSTART
8043; GFX900-NEXT:    ; def s[4:9]
8044; GFX900-NEXT:    ;;#ASMEND
8045; GFX900-NEXT:    ;;#ASMSTART
8046; GFX900-NEXT:    ; use s[8:15]
8047; GFX900-NEXT:    ;;#ASMEND
8048; GFX900-NEXT:    s_setpc_b64 s[30:31]
8049;
8050; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_u_u:
8051; GFX90A:       ; %bb.0:
8052; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8053; GFX90A-NEXT:    ;;#ASMSTART
8054; GFX90A-NEXT:    ; def s[8:13]
8055; GFX90A-NEXT:    ;;#ASMEND
8056; GFX90A-NEXT:    ;;#ASMSTART
8057; GFX90A-NEXT:    ; def s[4:9]
8058; GFX90A-NEXT:    ;;#ASMEND
8059; GFX90A-NEXT:    ;;#ASMSTART
8060; GFX90A-NEXT:    ; use s[8:15]
8061; GFX90A-NEXT:    ;;#ASMEND
8062; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8063;
8064; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_u_u:
8065; GFX940:       ; %bb.0:
8066; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8067; GFX940-NEXT:    ;;#ASMSTART
8068; GFX940-NEXT:    ; def s[8:13]
8069; GFX940-NEXT:    ;;#ASMEND
8070; GFX940-NEXT:    ;;#ASMSTART
8071; GFX940-NEXT:    ; def s[0:5]
8072; GFX940-NEXT:    ;;#ASMEND
8073; GFX940-NEXT:    s_mov_b32 s8, s4
8074; GFX940-NEXT:    s_mov_b32 s9, s5
8075; GFX940-NEXT:    ;;#ASMSTART
8076; GFX940-NEXT:    ; use s[8:15]
8077; GFX940-NEXT:    ;;#ASMEND
8078; GFX940-NEXT:    s_setpc_b64 s[30:31]
8079  %vec0 = call <3 x i64> asm "; def $0", "=s"()
8080  %vec1 = call <3 x i64> asm "; def $0", "=s"()
8081  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 poison, i32 poison>
8082  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8083  ret void
8084}
8085
8086define void @s_shuffle_v4i64_v3i64__5_2_u_u() {
8087; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_u_u:
8088; GFX900:       ; %bb.0:
8089; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8090; GFX900-NEXT:    ;;#ASMSTART
8091; GFX900-NEXT:    ; def s[8:13]
8092; GFX900-NEXT:    ;;#ASMEND
8093; GFX900-NEXT:    ;;#ASMSTART
8094; GFX900-NEXT:    ; def s[4:9]
8095; GFX900-NEXT:    ;;#ASMEND
8096; GFX900-NEXT:    s_mov_b32 s10, s12
8097; GFX900-NEXT:    s_mov_b32 s11, s13
8098; GFX900-NEXT:    ;;#ASMSTART
8099; GFX900-NEXT:    ; use s[8:15]
8100; GFX900-NEXT:    ;;#ASMEND
8101; GFX900-NEXT:    s_setpc_b64 s[30:31]
8102;
8103; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_u_u:
8104; GFX90A:       ; %bb.0:
8105; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8106; GFX90A-NEXT:    ;;#ASMSTART
8107; GFX90A-NEXT:    ; def s[8:13]
8108; GFX90A-NEXT:    ;;#ASMEND
8109; GFX90A-NEXT:    ;;#ASMSTART
8110; GFX90A-NEXT:    ; def s[4:9]
8111; GFX90A-NEXT:    ;;#ASMEND
8112; GFX90A-NEXT:    s_mov_b32 s10, s12
8113; GFX90A-NEXT:    s_mov_b32 s11, s13
8114; GFX90A-NEXT:    ;;#ASMSTART
8115; GFX90A-NEXT:    ; use s[8:15]
8116; GFX90A-NEXT:    ;;#ASMEND
8117; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8118;
8119; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_u_u:
8120; GFX940:       ; %bb.0:
8121; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8122; GFX940-NEXT:    ;;#ASMSTART
8123; GFX940-NEXT:    ; def s[8:13]
8124; GFX940-NEXT:    ;;#ASMEND
8125; GFX940-NEXT:    ;;#ASMSTART
8126; GFX940-NEXT:    ; def s[0:5]
8127; GFX940-NEXT:    ;;#ASMEND
8128; GFX940-NEXT:    s_mov_b32 s8, s12
8129; GFX940-NEXT:    s_mov_b32 s9, s13
8130; GFX940-NEXT:    s_mov_b32 s10, s4
8131; GFX940-NEXT:    s_mov_b32 s11, s5
8132; GFX940-NEXT:    ;;#ASMSTART
8133; GFX940-NEXT:    ; use s[8:15]
8134; GFX940-NEXT:    ;;#ASMEND
8135; GFX940-NEXT:    s_setpc_b64 s[30:31]
8136  %vec0 = call <3 x i64> asm "; def $0", "=s"()
8137  %vec1 = call <3 x i64> asm "; def $0", "=s"()
8138  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 poison, i32 poison>
8139  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8140  ret void
8141}
8142
8143define void @s_shuffle_v4i64_v3i64__5_3_u_u() {
8144; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_u_u:
8145; GFX900:       ; %bb.0:
8146; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8147; GFX900-NEXT:    ;;#ASMSTART
8148; GFX900-NEXT:    ; def s[4:9]
8149; GFX900-NEXT:    ;;#ASMEND
8150; GFX900-NEXT:    s_mov_b32 s10, s4
8151; GFX900-NEXT:    s_mov_b32 s11, s5
8152; GFX900-NEXT:    ;;#ASMSTART
8153; GFX900-NEXT:    ; use s[8:15]
8154; GFX900-NEXT:    ;;#ASMEND
8155; GFX900-NEXT:    s_setpc_b64 s[30:31]
8156;
8157; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_u_u:
8158; GFX90A:       ; %bb.0:
8159; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8160; GFX90A-NEXT:    ;;#ASMSTART
8161; GFX90A-NEXT:    ; def s[4:9]
8162; GFX90A-NEXT:    ;;#ASMEND
8163; GFX90A-NEXT:    s_mov_b32 s10, s4
8164; GFX90A-NEXT:    s_mov_b32 s11, s5
8165; GFX90A-NEXT:    ;;#ASMSTART
8166; GFX90A-NEXT:    ; use s[8:15]
8167; GFX90A-NEXT:    ;;#ASMEND
8168; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8169;
8170; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_u_u:
8171; GFX940:       ; %bb.0:
8172; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8173; GFX940-NEXT:    ;;#ASMSTART
8174; GFX940-NEXT:    ; def s[0:5]
8175; GFX940-NEXT:    ;;#ASMEND
8176; GFX940-NEXT:    s_mov_b32 s8, s4
8177; GFX940-NEXT:    s_mov_b32 s9, s5
8178; GFX940-NEXT:    s_mov_b32 s10, s0
8179; GFX940-NEXT:    s_mov_b32 s11, s1
8180; GFX940-NEXT:    ;;#ASMSTART
8181; GFX940-NEXT:    ; use s[8:15]
8182; GFX940-NEXT:    ;;#ASMEND
8183; GFX940-NEXT:    s_setpc_b64 s[30:31]
8184  %vec0 = call <3 x i64> asm "; def $0", "=s"()
8185  %vec1 = call <3 x i64> asm "; def $0", "=s"()
8186  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 poison, i32 poison>
8187  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8188  ret void
8189}
8190
8191define void @s_shuffle_v4i64_v3i64__5_4_u_u() {
8192; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_4_u_u:
8193; GFX9:       ; %bb.0:
8194; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8195; GFX9-NEXT:    ;;#ASMSTART
8196; GFX9-NEXT:    ; def s[8:13]
8197; GFX9-NEXT:    ;;#ASMEND
8198; GFX9-NEXT:    s_mov_b32 s8, s12
8199; GFX9-NEXT:    s_mov_b32 s9, s13
8200; GFX9-NEXT:    ;;#ASMSTART
8201; GFX9-NEXT:    ; use s[8:15]
8202; GFX9-NEXT:    ;;#ASMEND
8203; GFX9-NEXT:    s_setpc_b64 s[30:31]
8204  %vec0 = call <3 x i64> asm "; def $0", "=s"()
8205  %vec1 = call <3 x i64> asm "; def $0", "=s"()
8206  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 poison, i32 poison>
8207  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8208  ret void
8209}
8210
8211define void @s_shuffle_v4i64_v3i64__5_5_u_u() {
8212; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_u:
8213; GFX900:       ; %bb.0:
8214; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8215; GFX900-NEXT:    ;;#ASMSTART
8216; GFX900-NEXT:    ; def s[8:13]
8217; GFX900-NEXT:    ;;#ASMEND
8218; GFX900-NEXT:    s_mov_b32 s8, s12
8219; GFX900-NEXT:    s_mov_b32 s9, s13
8220; GFX900-NEXT:    s_mov_b32 s10, s12
8221; GFX900-NEXT:    s_mov_b32 s11, s13
8222; GFX900-NEXT:    ;;#ASMSTART
8223; GFX900-NEXT:    ; use s[8:15]
8224; GFX900-NEXT:    ;;#ASMEND
8225; GFX900-NEXT:    s_setpc_b64 s[30:31]
8226;
8227; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_u:
8228; GFX90A:       ; %bb.0:
8229; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8230; GFX90A-NEXT:    ;;#ASMSTART
8231; GFX90A-NEXT:    ; def s[8:13]
8232; GFX90A-NEXT:    ;;#ASMEND
8233; GFX90A-NEXT:    s_mov_b32 s8, s12
8234; GFX90A-NEXT:    s_mov_b32 s9, s13
8235; GFX90A-NEXT:    s_mov_b32 s10, s12
8236; GFX90A-NEXT:    s_mov_b32 s11, s13
8237; GFX90A-NEXT:    ;;#ASMSTART
8238; GFX90A-NEXT:    ; use s[8:15]
8239; GFX90A-NEXT:    ;;#ASMEND
8240; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8241;
8242; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_u:
8243; GFX940:       ; %bb.0:
8244; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8245; GFX940-NEXT:    ;;#ASMSTART
8246; GFX940-NEXT:    ; def s[0:5]
8247; GFX940-NEXT:    ;;#ASMEND
8248; GFX940-NEXT:    s_mov_b32 s8, s4
8249; GFX940-NEXT:    s_mov_b32 s9, s5
8250; GFX940-NEXT:    s_mov_b32 s10, s4
8251; GFX940-NEXT:    s_mov_b32 s11, s5
8252; GFX940-NEXT:    ;;#ASMSTART
8253; GFX940-NEXT:    ; use s[8:15]
8254; GFX940-NEXT:    ;;#ASMEND
8255; GFX940-NEXT:    s_setpc_b64 s[30:31]
8256  %vec0 = call <3 x i64> asm "; def $0", "=s"()
8257  %vec1 = call <3 x i64> asm "; def $0", "=s"()
8258  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 poison>
8259  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8260  ret void
8261}
8262
8263define void @s_shuffle_v4i64_v3i64__5_5_0_u() {
8264; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_u:
8265; GFX900:       ; %bb.0:
8266; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8267; GFX900-NEXT:    ;;#ASMSTART
8268; GFX900-NEXT:    ; def s[4:9]
8269; GFX900-NEXT:    ;;#ASMEND
8270; GFX900-NEXT:    ;;#ASMSTART
8271; GFX900-NEXT:    ; def s[8:13]
8272; GFX900-NEXT:    ;;#ASMEND
8273; GFX900-NEXT:    s_mov_b32 s8, s12
8274; GFX900-NEXT:    s_mov_b32 s9, s13
8275; GFX900-NEXT:    s_mov_b32 s10, s12
8276; GFX900-NEXT:    s_mov_b32 s11, s13
8277; GFX900-NEXT:    s_mov_b32 s12, s4
8278; GFX900-NEXT:    s_mov_b32 s13, s5
8279; GFX900-NEXT:    ;;#ASMSTART
8280; GFX900-NEXT:    ; use s[8:15]
8281; GFX900-NEXT:    ;;#ASMEND
8282; GFX900-NEXT:    s_setpc_b64 s[30:31]
8283;
8284; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_u:
8285; GFX90A:       ; %bb.0:
8286; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8287; GFX90A-NEXT:    ;;#ASMSTART
8288; GFX90A-NEXT:    ; def s[4:9]
8289; GFX90A-NEXT:    ;;#ASMEND
8290; GFX90A-NEXT:    ;;#ASMSTART
8291; GFX90A-NEXT:    ; def s[8:13]
8292; GFX90A-NEXT:    ;;#ASMEND
8293; GFX90A-NEXT:    s_mov_b32 s8, s12
8294; GFX90A-NEXT:    s_mov_b32 s9, s13
8295; GFX90A-NEXT:    s_mov_b32 s10, s12
8296; GFX90A-NEXT:    s_mov_b32 s11, s13
8297; GFX90A-NEXT:    s_mov_b32 s12, s4
8298; GFX90A-NEXT:    s_mov_b32 s13, s5
8299; GFX90A-NEXT:    ;;#ASMSTART
8300; GFX90A-NEXT:    ; use s[8:15]
8301; GFX90A-NEXT:    ;;#ASMEND
8302; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8303;
8304; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_u:
8305; GFX940:       ; %bb.0:
8306; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8307; GFX940-NEXT:    ;;#ASMSTART
8308; GFX940-NEXT:    ; def s[8:13]
8309; GFX940-NEXT:    ;;#ASMEND
8310; GFX940-NEXT:    ;;#ASMSTART
8311; GFX940-NEXT:    ; def s[0:5]
8312; GFX940-NEXT:    ;;#ASMEND
8313; GFX940-NEXT:    s_mov_b32 s8, s12
8314; GFX940-NEXT:    s_mov_b32 s9, s13
8315; GFX940-NEXT:    s_mov_b32 s10, s12
8316; GFX940-NEXT:    s_mov_b32 s11, s13
8317; GFX940-NEXT:    s_mov_b32 s12, s0
8318; GFX940-NEXT:    s_mov_b32 s13, s1
8319; GFX940-NEXT:    ;;#ASMSTART
8320; GFX940-NEXT:    ; use s[8:15]
8321; GFX940-NEXT:    ;;#ASMEND
8322; GFX940-NEXT:    s_setpc_b64 s[30:31]
8323  %vec0 = call <3 x i64> asm "; def $0", "=s"()
8324  %vec1 = call <3 x i64> asm "; def $0", "=s"()
8325  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 poison>
8326  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8327  ret void
8328}
8329
8330define void @s_shuffle_v4i64_v3i64__5_5_1_u() {
8331; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_u:
8332; GFX900:       ; %bb.0:
8333; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8334; GFX900-NEXT:    ;;#ASMSTART
8335; GFX900-NEXT:    ; def s[4:9]
8336; GFX900-NEXT:    ;;#ASMEND
8337; GFX900-NEXT:    ;;#ASMSTART
8338; GFX900-NEXT:    ; def s[8:13]
8339; GFX900-NEXT:    ;;#ASMEND
8340; GFX900-NEXT:    s_mov_b32 s8, s12
8341; GFX900-NEXT:    s_mov_b32 s9, s13
8342; GFX900-NEXT:    s_mov_b32 s10, s12
8343; GFX900-NEXT:    s_mov_b32 s11, s13
8344; GFX900-NEXT:    s_mov_b32 s12, s6
8345; GFX900-NEXT:    s_mov_b32 s13, s7
8346; GFX900-NEXT:    ;;#ASMSTART
8347; GFX900-NEXT:    ; use s[8:15]
8348; GFX900-NEXT:    ;;#ASMEND
8349; GFX900-NEXT:    s_setpc_b64 s[30:31]
8350;
8351; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_u:
8352; GFX90A:       ; %bb.0:
8353; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8354; GFX90A-NEXT:    ;;#ASMSTART
8355; GFX90A-NEXT:    ; def s[4:9]
8356; GFX90A-NEXT:    ;;#ASMEND
8357; GFX90A-NEXT:    ;;#ASMSTART
8358; GFX90A-NEXT:    ; def s[8:13]
8359; GFX90A-NEXT:    ;;#ASMEND
8360; GFX90A-NEXT:    s_mov_b32 s8, s12
8361; GFX90A-NEXT:    s_mov_b32 s9, s13
8362; GFX90A-NEXT:    s_mov_b32 s10, s12
8363; GFX90A-NEXT:    s_mov_b32 s11, s13
8364; GFX90A-NEXT:    s_mov_b32 s12, s6
8365; GFX90A-NEXT:    s_mov_b32 s13, s7
8366; GFX90A-NEXT:    ;;#ASMSTART
8367; GFX90A-NEXT:    ; use s[8:15]
8368; GFX90A-NEXT:    ;;#ASMEND
8369; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8370;
8371; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_u:
8372; GFX940:       ; %bb.0:
8373; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8374; GFX940-NEXT:    ;;#ASMSTART
8375; GFX940-NEXT:    ; def s[8:13]
8376; GFX940-NEXT:    ;;#ASMEND
8377; GFX940-NEXT:    ;;#ASMSTART
8378; GFX940-NEXT:    ; def s[0:5]
8379; GFX940-NEXT:    ;;#ASMEND
8380; GFX940-NEXT:    s_mov_b32 s8, s12
8381; GFX940-NEXT:    s_mov_b32 s9, s13
8382; GFX940-NEXT:    s_mov_b32 s10, s12
8383; GFX940-NEXT:    s_mov_b32 s11, s13
8384; GFX940-NEXT:    s_mov_b32 s12, s2
8385; GFX940-NEXT:    s_mov_b32 s13, s3
8386; GFX940-NEXT:    ;;#ASMSTART
8387; GFX940-NEXT:    ; use s[8:15]
8388; GFX940-NEXT:    ;;#ASMEND
8389; GFX940-NEXT:    s_setpc_b64 s[30:31]
8390  %vec0 = call <3 x i64> asm "; def $0", "=s"()
8391  %vec1 = call <3 x i64> asm "; def $0", "=s"()
8392  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 poison>
8393  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8394  ret void
8395}
8396
8397define void @s_shuffle_v4i64_v3i64__5_5_2_u() {
8398; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_u:
8399; GFX900:       ; %bb.0:
8400; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8401; GFX900-NEXT:    ;;#ASMSTART
8402; GFX900-NEXT:    ; def s[8:13]
8403; GFX900-NEXT:    ;;#ASMEND
8404; GFX900-NEXT:    ;;#ASMSTART
8405; GFX900-NEXT:    ; def s[16:21]
8406; GFX900-NEXT:    ;;#ASMEND
8407; GFX900-NEXT:    s_mov_b32 s8, s20
8408; GFX900-NEXT:    s_mov_b32 s9, s21
8409; GFX900-NEXT:    s_mov_b32 s10, s20
8410; GFX900-NEXT:    s_mov_b32 s11, s21
8411; GFX900-NEXT:    ;;#ASMSTART
8412; GFX900-NEXT:    ; use s[8:15]
8413; GFX900-NEXT:    ;;#ASMEND
8414; GFX900-NEXT:    s_setpc_b64 s[30:31]
8415;
8416; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_u:
8417; GFX90A:       ; %bb.0:
8418; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8419; GFX90A-NEXT:    ;;#ASMSTART
8420; GFX90A-NEXT:    ; def s[8:13]
8421; GFX90A-NEXT:    ;;#ASMEND
8422; GFX90A-NEXT:    ;;#ASMSTART
8423; GFX90A-NEXT:    ; def s[16:21]
8424; GFX90A-NEXT:    ;;#ASMEND
8425; GFX90A-NEXT:    s_mov_b32 s8, s20
8426; GFX90A-NEXT:    s_mov_b32 s9, s21
8427; GFX90A-NEXT:    s_mov_b32 s10, s20
8428; GFX90A-NEXT:    s_mov_b32 s11, s21
8429; GFX90A-NEXT:    ;;#ASMSTART
8430; GFX90A-NEXT:    ; use s[8:15]
8431; GFX90A-NEXT:    ;;#ASMEND
8432; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8433;
8434; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_u:
8435; GFX940:       ; %bb.0:
8436; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8437; GFX940-NEXT:    ;;#ASMSTART
8438; GFX940-NEXT:    ; def s[8:13]
8439; GFX940-NEXT:    ;;#ASMEND
8440; GFX940-NEXT:    ;;#ASMSTART
8441; GFX940-NEXT:    ; def s[0:5]
8442; GFX940-NEXT:    ;;#ASMEND
8443; GFX940-NEXT:    s_mov_b32 s8, s4
8444; GFX940-NEXT:    s_mov_b32 s9, s5
8445; GFX940-NEXT:    s_mov_b32 s10, s4
8446; GFX940-NEXT:    s_mov_b32 s11, s5
8447; GFX940-NEXT:    ;;#ASMSTART
8448; GFX940-NEXT:    ; use s[8:15]
8449; GFX940-NEXT:    ;;#ASMEND
8450; GFX940-NEXT:    s_setpc_b64 s[30:31]
8451  %vec0 = call <3 x i64> asm "; def $0", "=s"()
8452  %vec1 = call <3 x i64> asm "; def $0", "=s"()
8453  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 poison>
8454  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8455  ret void
8456}
8457
8458define void @s_shuffle_v4i64_v3i64__5_5_3_u() {
8459; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_u:
8460; GFX900:       ; %bb.0:
8461; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8462; GFX900-NEXT:    ;;#ASMSTART
8463; GFX900-NEXT:    ; def s[12:17]
8464; GFX900-NEXT:    ;;#ASMEND
8465; GFX900-NEXT:    s_mov_b32 s8, s16
8466; GFX900-NEXT:    s_mov_b32 s9, s17
8467; GFX900-NEXT:    s_mov_b32 s10, s16
8468; GFX900-NEXT:    s_mov_b32 s11, s17
8469; GFX900-NEXT:    ;;#ASMSTART
8470; GFX900-NEXT:    ; use s[8:15]
8471; GFX900-NEXT:    ;;#ASMEND
8472; GFX900-NEXT:    s_setpc_b64 s[30:31]
8473;
8474; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_u:
8475; GFX90A:       ; %bb.0:
8476; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8477; GFX90A-NEXT:    ;;#ASMSTART
8478; GFX90A-NEXT:    ; def s[12:17]
8479; GFX90A-NEXT:    ;;#ASMEND
8480; GFX90A-NEXT:    s_mov_b32 s8, s16
8481; GFX90A-NEXT:    s_mov_b32 s9, s17
8482; GFX90A-NEXT:    s_mov_b32 s10, s16
8483; GFX90A-NEXT:    s_mov_b32 s11, s17
8484; GFX90A-NEXT:    ;;#ASMSTART
8485; GFX90A-NEXT:    ; use s[8:15]
8486; GFX90A-NEXT:    ;;#ASMEND
8487; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8488;
8489; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_u:
8490; GFX940:       ; %bb.0:
8491; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8492; GFX940-NEXT:    ;;#ASMSTART
8493; GFX940-NEXT:    ; def s[0:5]
8494; GFX940-NEXT:    ;;#ASMEND
8495; GFX940-NEXT:    s_mov_b32 s8, s4
8496; GFX940-NEXT:    s_mov_b32 s9, s5
8497; GFX940-NEXT:    s_mov_b32 s10, s4
8498; GFX940-NEXT:    s_mov_b32 s11, s5
8499; GFX940-NEXT:    s_mov_b32 s12, s0
8500; GFX940-NEXT:    s_mov_b32 s13, s1
8501; GFX940-NEXT:    ;;#ASMSTART
8502; GFX940-NEXT:    ; use s[8:15]
8503; GFX940-NEXT:    ;;#ASMEND
8504; GFX940-NEXT:    s_setpc_b64 s[30:31]
8505  %vec0 = call <3 x i64> asm "; def $0", "=s"()
8506  %vec1 = call <3 x i64> asm "; def $0", "=s"()
8507  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 poison>
8508  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8509  ret void
8510}
8511
8512define void @s_shuffle_v4i64_v3i64__5_5_4_u() {
8513; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_u:
8514; GFX900:       ; %bb.0:
8515; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8516; GFX900-NEXT:    ;;#ASMSTART
8517; GFX900-NEXT:    ; def s[12:17]
8518; GFX900-NEXT:    ;;#ASMEND
8519; GFX900-NEXT:    s_mov_b32 s8, s16
8520; GFX900-NEXT:    s_mov_b32 s9, s17
8521; GFX900-NEXT:    s_mov_b32 s10, s16
8522; GFX900-NEXT:    s_mov_b32 s11, s17
8523; GFX900-NEXT:    s_mov_b32 s12, s14
8524; GFX900-NEXT:    s_mov_b32 s13, s15
8525; GFX900-NEXT:    ;;#ASMSTART
8526; GFX900-NEXT:    ; use s[8:15]
8527; GFX900-NEXT:    ;;#ASMEND
8528; GFX900-NEXT:    s_setpc_b64 s[30:31]
8529;
8530; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_u:
8531; GFX90A:       ; %bb.0:
8532; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8533; GFX90A-NEXT:    ;;#ASMSTART
8534; GFX90A-NEXT:    ; def s[12:17]
8535; GFX90A-NEXT:    ;;#ASMEND
8536; GFX90A-NEXT:    s_mov_b32 s8, s16
8537; GFX90A-NEXT:    s_mov_b32 s9, s17
8538; GFX90A-NEXT:    s_mov_b32 s10, s16
8539; GFX90A-NEXT:    s_mov_b32 s11, s17
8540; GFX90A-NEXT:    s_mov_b32 s12, s14
8541; GFX90A-NEXT:    s_mov_b32 s13, s15
8542; GFX90A-NEXT:    ;;#ASMSTART
8543; GFX90A-NEXT:    ; use s[8:15]
8544; GFX90A-NEXT:    ;;#ASMEND
8545; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8546;
8547; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_u:
8548; GFX940:       ; %bb.0:
8549; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8550; GFX940-NEXT:    ;;#ASMSTART
8551; GFX940-NEXT:    ; def s[0:5]
8552; GFX940-NEXT:    ;;#ASMEND
8553; GFX940-NEXT:    s_mov_b32 s8, s4
8554; GFX940-NEXT:    s_mov_b32 s9, s5
8555; GFX940-NEXT:    s_mov_b32 s10, s4
8556; GFX940-NEXT:    s_mov_b32 s11, s5
8557; GFX940-NEXT:    s_mov_b32 s12, s2
8558; GFX940-NEXT:    s_mov_b32 s13, s3
8559; GFX940-NEXT:    ;;#ASMSTART
8560; GFX940-NEXT:    ; use s[8:15]
8561; GFX940-NEXT:    ;;#ASMEND
8562; GFX940-NEXT:    s_setpc_b64 s[30:31]
8563  %vec0 = call <3 x i64> asm "; def $0", "=s"()
8564  %vec1 = call <3 x i64> asm "; def $0", "=s"()
8565  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 poison>
8566  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8567  ret void
8568}
8569
8570define void @s_shuffle_v4i64_v3i64__5_5_5_u() {
8571; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_5_u:
8572; GFX9:       ; %bb.0:
8573; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8574; GFX9-NEXT:    ;;#ASMSTART
8575; GFX9-NEXT:    ; def s[8:13]
8576; GFX9-NEXT:    ;;#ASMEND
8577; GFX9-NEXT:    s_mov_b32 s8, s12
8578; GFX9-NEXT:    s_mov_b32 s9, s13
8579; GFX9-NEXT:    s_mov_b32 s10, s12
8580; GFX9-NEXT:    s_mov_b32 s11, s13
8581; GFX9-NEXT:    ;;#ASMSTART
8582; GFX9-NEXT:    ; use s[8:15]
8583; GFX9-NEXT:    ;;#ASMEND
8584; GFX9-NEXT:    s_setpc_b64 s[30:31]
8585  %vec0 = call <3 x i64> asm "; def $0", "=s"()
8586  %vec1 = call <3 x i64> asm "; def $0", "=s"()
8587  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 poison>
8588  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8589  ret void
8590}
8591
8592define void @s_shuffle_v4i64_v3i64__5_5_5_0() {
8593; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_5_0:
8594; GFX900:       ; %bb.0:
8595; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8596; GFX900-NEXT:    ;;#ASMSTART
8597; GFX900-NEXT:    ; def s[4:9]
8598; GFX900-NEXT:    ;;#ASMEND
8599; GFX900-NEXT:    ;;#ASMSTART
8600; GFX900-NEXT:    ; def s[8:13]
8601; GFX900-NEXT:    ;;#ASMEND
8602; GFX900-NEXT:    s_mov_b32 s8, s12
8603; GFX900-NEXT:    s_mov_b32 s9, s13
8604; GFX900-NEXT:    s_mov_b32 s10, s12
8605; GFX900-NEXT:    s_mov_b32 s11, s13
8606; GFX900-NEXT:    s_mov_b32 s14, s4
8607; GFX900-NEXT:    s_mov_b32 s15, s5
8608; GFX900-NEXT:    ;;#ASMSTART
8609; GFX900-NEXT:    ; use s[8:15]
8610; GFX900-NEXT:    ;;#ASMEND
8611; GFX900-NEXT:    s_setpc_b64 s[30:31]
8612;
8613; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_5_0:
8614; GFX90A:       ; %bb.0:
8615; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8616; GFX90A-NEXT:    ;;#ASMSTART
8617; GFX90A-NEXT:    ; def s[4:9]
8618; GFX90A-NEXT:    ;;#ASMEND
8619; GFX90A-NEXT:    ;;#ASMSTART
8620; GFX90A-NEXT:    ; def s[8:13]
8621; GFX90A-NEXT:    ;;#ASMEND
8622; GFX90A-NEXT:    s_mov_b32 s8, s12
8623; GFX90A-NEXT:    s_mov_b32 s9, s13
8624; GFX90A-NEXT:    s_mov_b32 s10, s12
8625; GFX90A-NEXT:    s_mov_b32 s11, s13
8626; GFX90A-NEXT:    s_mov_b32 s14, s4
8627; GFX90A-NEXT:    s_mov_b32 s15, s5
8628; GFX90A-NEXT:    ;;#ASMSTART
8629; GFX90A-NEXT:    ; use s[8:15]
8630; GFX90A-NEXT:    ;;#ASMEND
8631; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8632;
8633; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_5_0:
8634; GFX940:       ; %bb.0:
8635; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8636; GFX940-NEXT:    ;;#ASMSTART
8637; GFX940-NEXT:    ; def s[8:13]
8638; GFX940-NEXT:    ;;#ASMEND
8639; GFX940-NEXT:    ;;#ASMSTART
8640; GFX940-NEXT:    ; def s[0:5]
8641; GFX940-NEXT:    ;;#ASMEND
8642; GFX940-NEXT:    s_mov_b32 s8, s12
8643; GFX940-NEXT:    s_mov_b32 s9, s13
8644; GFX940-NEXT:    s_mov_b32 s10, s12
8645; GFX940-NEXT:    s_mov_b32 s11, s13
8646; GFX940-NEXT:    s_mov_b32 s14, s0
8647; GFX940-NEXT:    s_mov_b32 s15, s1
8648; GFX940-NEXT:    ;;#ASMSTART
8649; GFX940-NEXT:    ; use s[8:15]
8650; GFX940-NEXT:    ;;#ASMEND
8651; GFX940-NEXT:    s_setpc_b64 s[30:31]
8652  %vec0 = call <3 x i64> asm "; def $0", "=s"()
8653  %vec1 = call <3 x i64> asm "; def $0", "=s"()
8654  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 0>
8655  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8656  ret void
8657}
8658
8659define void @s_shuffle_v4i64_v3i64__5_5_5_1() {
8660; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_5_1:
8661; GFX900:       ; %bb.0:
8662; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8663; GFX900-NEXT:    ;;#ASMSTART
8664; GFX900-NEXT:    ; def s[4:9]
8665; GFX900-NEXT:    ;;#ASMEND
8666; GFX900-NEXT:    ;;#ASMSTART
8667; GFX900-NEXT:    ; def s[8:13]
8668; GFX900-NEXT:    ;;#ASMEND
8669; GFX900-NEXT:    s_mov_b32 s8, s12
8670; GFX900-NEXT:    s_mov_b32 s9, s13
8671; GFX900-NEXT:    s_mov_b32 s10, s12
8672; GFX900-NEXT:    s_mov_b32 s11, s13
8673; GFX900-NEXT:    s_mov_b32 s14, s6
8674; GFX900-NEXT:    s_mov_b32 s15, s7
8675; GFX900-NEXT:    ;;#ASMSTART
8676; GFX900-NEXT:    ; use s[8:15]
8677; GFX900-NEXT:    ;;#ASMEND
8678; GFX900-NEXT:    s_setpc_b64 s[30:31]
8679;
8680; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_5_1:
8681; GFX90A:       ; %bb.0:
8682; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8683; GFX90A-NEXT:    ;;#ASMSTART
8684; GFX90A-NEXT:    ; def s[4:9]
8685; GFX90A-NEXT:    ;;#ASMEND
8686; GFX90A-NEXT:    ;;#ASMSTART
8687; GFX90A-NEXT:    ; def s[8:13]
8688; GFX90A-NEXT:    ;;#ASMEND
8689; GFX90A-NEXT:    s_mov_b32 s8, s12
8690; GFX90A-NEXT:    s_mov_b32 s9, s13
8691; GFX90A-NEXT:    s_mov_b32 s10, s12
8692; GFX90A-NEXT:    s_mov_b32 s11, s13
8693; GFX90A-NEXT:    s_mov_b32 s14, s6
8694; GFX90A-NEXT:    s_mov_b32 s15, s7
8695; GFX90A-NEXT:    ;;#ASMSTART
8696; GFX90A-NEXT:    ; use s[8:15]
8697; GFX90A-NEXT:    ;;#ASMEND
8698; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8699;
8700; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_5_1:
8701; GFX940:       ; %bb.0:
8702; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8703; GFX940-NEXT:    ;;#ASMSTART
8704; GFX940-NEXT:    ; def s[8:13]
8705; GFX940-NEXT:    ;;#ASMEND
8706; GFX940-NEXT:    ;;#ASMSTART
8707; GFX940-NEXT:    ; def s[0:5]
8708; GFX940-NEXT:    ;;#ASMEND
8709; GFX940-NEXT:    s_mov_b32 s8, s12
8710; GFX940-NEXT:    s_mov_b32 s9, s13
8711; GFX940-NEXT:    s_mov_b32 s10, s12
8712; GFX940-NEXT:    s_mov_b32 s11, s13
8713; GFX940-NEXT:    s_mov_b32 s14, s2
8714; GFX940-NEXT:    s_mov_b32 s15, s3
8715; GFX940-NEXT:    ;;#ASMSTART
8716; GFX940-NEXT:    ; use s[8:15]
8717; GFX940-NEXT:    ;;#ASMEND
8718; GFX940-NEXT:    s_setpc_b64 s[30:31]
8719  %vec0 = call <3 x i64> asm "; def $0", "=s"()
8720  %vec1 = call <3 x i64> asm "; def $0", "=s"()
8721  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 1>
8722  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8723  ret void
8724}
8725
8726define void @s_shuffle_v4i64_v3i64__5_5_5_2() {
8727; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_5_2:
8728; GFX900:       ; %bb.0:
8729; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8730; GFX900-NEXT:    ;;#ASMSTART
8731; GFX900-NEXT:    ; def s[12:17]
8732; GFX900-NEXT:    ;;#ASMEND
8733; GFX900-NEXT:    ;;#ASMSTART
8734; GFX900-NEXT:    ; def s[8:13]
8735; GFX900-NEXT:    ;;#ASMEND
8736; GFX900-NEXT:    s_mov_b32 s8, s12
8737; GFX900-NEXT:    s_mov_b32 s9, s13
8738; GFX900-NEXT:    s_mov_b32 s10, s12
8739; GFX900-NEXT:    s_mov_b32 s11, s13
8740; GFX900-NEXT:    s_mov_b32 s14, s16
8741; GFX900-NEXT:    s_mov_b32 s15, s17
8742; GFX900-NEXT:    ;;#ASMSTART
8743; GFX900-NEXT:    ; use s[8:15]
8744; GFX900-NEXT:    ;;#ASMEND
8745; GFX900-NEXT:    s_setpc_b64 s[30:31]
8746;
8747; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_5_2:
8748; GFX90A:       ; %bb.0:
8749; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8750; GFX90A-NEXT:    ;;#ASMSTART
8751; GFX90A-NEXT:    ; def s[12:17]
8752; GFX90A-NEXT:    ;;#ASMEND
8753; GFX90A-NEXT:    ;;#ASMSTART
8754; GFX90A-NEXT:    ; def s[8:13]
8755; GFX90A-NEXT:    ;;#ASMEND
8756; GFX90A-NEXT:    s_mov_b32 s8, s12
8757; GFX90A-NEXT:    s_mov_b32 s9, s13
8758; GFX90A-NEXT:    s_mov_b32 s10, s12
8759; GFX90A-NEXT:    s_mov_b32 s11, s13
8760; GFX90A-NEXT:    s_mov_b32 s14, s16
8761; GFX90A-NEXT:    s_mov_b32 s15, s17
8762; GFX90A-NEXT:    ;;#ASMSTART
8763; GFX90A-NEXT:    ; use s[8:15]
8764; GFX90A-NEXT:    ;;#ASMEND
8765; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8766;
8767; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_5_2:
8768; GFX940:       ; %bb.0:
8769; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8770; GFX940-NEXT:    ;;#ASMSTART
8771; GFX940-NEXT:    ; def s[8:13]
8772; GFX940-NEXT:    ;;#ASMEND
8773; GFX940-NEXT:    ;;#ASMSTART
8774; GFX940-NEXT:    ; def s[0:5]
8775; GFX940-NEXT:    ;;#ASMEND
8776; GFX940-NEXT:    s_mov_b32 s8, s12
8777; GFX940-NEXT:    s_mov_b32 s9, s13
8778; GFX940-NEXT:    s_mov_b32 s10, s12
8779; GFX940-NEXT:    s_mov_b32 s11, s13
8780; GFX940-NEXT:    s_mov_b32 s14, s4
8781; GFX940-NEXT:    s_mov_b32 s15, s5
8782; GFX940-NEXT:    ;;#ASMSTART
8783; GFX940-NEXT:    ; use s[8:15]
8784; GFX940-NEXT:    ;;#ASMEND
8785; GFX940-NEXT:    s_setpc_b64 s[30:31]
8786  %vec0 = call <3 x i64> asm "; def $0", "=s"()
8787  %vec1 = call <3 x i64> asm "; def $0", "=s"()
8788  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 2>
8789  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8790  ret void
8791}
8792
8793define void @s_shuffle_v4i64_v3i64__5_5_5_3() {
8794; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_5_3:
8795; GFX900:       ; %bb.0:
8796; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8797; GFX900-NEXT:    ;;#ASMSTART
8798; GFX900-NEXT:    ; def s[16:21]
8799; GFX900-NEXT:    ;;#ASMEND
8800; GFX900-NEXT:    s_mov_b32 s8, s20
8801; GFX900-NEXT:    s_mov_b32 s9, s21
8802; GFX900-NEXT:    s_mov_b32 s10, s20
8803; GFX900-NEXT:    s_mov_b32 s11, s21
8804; GFX900-NEXT:    s_mov_b32 s12, s20
8805; GFX900-NEXT:    s_mov_b32 s13, s21
8806; GFX900-NEXT:    s_mov_b32 s14, s16
8807; GFX900-NEXT:    s_mov_b32 s15, s17
8808; GFX900-NEXT:    ;;#ASMSTART
8809; GFX900-NEXT:    ; use s[8:15]
8810; GFX900-NEXT:    ;;#ASMEND
8811; GFX900-NEXT:    s_setpc_b64 s[30:31]
8812;
8813; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_5_3:
8814; GFX90A:       ; %bb.0:
8815; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8816; GFX90A-NEXT:    ;;#ASMSTART
8817; GFX90A-NEXT:    ; def s[16:21]
8818; GFX90A-NEXT:    ;;#ASMEND
8819; GFX90A-NEXT:    s_mov_b32 s8, s20
8820; GFX90A-NEXT:    s_mov_b32 s9, s21
8821; GFX90A-NEXT:    s_mov_b32 s10, s20
8822; GFX90A-NEXT:    s_mov_b32 s11, s21
8823; GFX90A-NEXT:    s_mov_b32 s12, s20
8824; GFX90A-NEXT:    s_mov_b32 s13, s21
8825; GFX90A-NEXT:    s_mov_b32 s14, s16
8826; GFX90A-NEXT:    s_mov_b32 s15, s17
8827; GFX90A-NEXT:    ;;#ASMSTART
8828; GFX90A-NEXT:    ; use s[8:15]
8829; GFX90A-NEXT:    ;;#ASMEND
8830; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8831;
8832; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_5_3:
8833; GFX940:       ; %bb.0:
8834; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8835; GFX940-NEXT:    ;;#ASMSTART
8836; GFX940-NEXT:    ; def s[0:5]
8837; GFX940-NEXT:    ;;#ASMEND
8838; GFX940-NEXT:    s_mov_b32 s8, s4
8839; GFX940-NEXT:    s_mov_b32 s9, s5
8840; GFX940-NEXT:    s_mov_b32 s10, s4
8841; GFX940-NEXT:    s_mov_b32 s11, s5
8842; GFX940-NEXT:    s_mov_b32 s12, s4
8843; GFX940-NEXT:    s_mov_b32 s13, s5
8844; GFX940-NEXT:    s_mov_b32 s14, s0
8845; GFX940-NEXT:    s_mov_b32 s15, s1
8846; GFX940-NEXT:    ;;#ASMSTART
8847; GFX940-NEXT:    ; use s[8:15]
8848; GFX940-NEXT:    ;;#ASMEND
8849; GFX940-NEXT:    s_setpc_b64 s[30:31]
8850  %vec0 = call <3 x i64> asm "; def $0", "=s"()
8851  %vec1 = call <3 x i64> asm "; def $0", "=s"()
8852  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 3>
8853  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8854  ret void
8855}
8856
8857define void @s_shuffle_v4i64_v3i64__5_5_5_4() {
8858; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_5_4:
8859; GFX900:       ; %bb.0:
8860; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8861; GFX900-NEXT:    ;;#ASMSTART
8862; GFX900-NEXT:    ; def s[12:17]
8863; GFX900-NEXT:    ;;#ASMEND
8864; GFX900-NEXT:    s_mov_b32 s8, s16
8865; GFX900-NEXT:    s_mov_b32 s9, s17
8866; GFX900-NEXT:    s_mov_b32 s10, s16
8867; GFX900-NEXT:    s_mov_b32 s11, s17
8868; GFX900-NEXT:    s_mov_b32 s12, s16
8869; GFX900-NEXT:    s_mov_b32 s13, s17
8870; GFX900-NEXT:    ;;#ASMSTART
8871; GFX900-NEXT:    ; use s[8:15]
8872; GFX900-NEXT:    ;;#ASMEND
8873; GFX900-NEXT:    s_setpc_b64 s[30:31]
8874;
8875; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_5_4:
8876; GFX90A:       ; %bb.0:
8877; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8878; GFX90A-NEXT:    ;;#ASMSTART
8879; GFX90A-NEXT:    ; def s[12:17]
8880; GFX90A-NEXT:    ;;#ASMEND
8881; GFX90A-NEXT:    s_mov_b32 s8, s16
8882; GFX90A-NEXT:    s_mov_b32 s9, s17
8883; GFX90A-NEXT:    s_mov_b32 s10, s16
8884; GFX90A-NEXT:    s_mov_b32 s11, s17
8885; GFX90A-NEXT:    s_mov_b32 s12, s16
8886; GFX90A-NEXT:    s_mov_b32 s13, s17
8887; GFX90A-NEXT:    ;;#ASMSTART
8888; GFX90A-NEXT:    ; use s[8:15]
8889; GFX90A-NEXT:    ;;#ASMEND
8890; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8891;
8892; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_5_4:
8893; GFX940:       ; %bb.0:
8894; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8895; GFX940-NEXT:    ;;#ASMSTART
8896; GFX940-NEXT:    ; def s[0:5]
8897; GFX940-NEXT:    ;;#ASMEND
8898; GFX940-NEXT:    s_mov_b32 s8, s4
8899; GFX940-NEXT:    s_mov_b32 s9, s5
8900; GFX940-NEXT:    s_mov_b32 s10, s4
8901; GFX940-NEXT:    s_mov_b32 s11, s5
8902; GFX940-NEXT:    s_mov_b32 s12, s4
8903; GFX940-NEXT:    s_mov_b32 s13, s5
8904; GFX940-NEXT:    s_mov_b32 s14, s2
8905; GFX940-NEXT:    s_mov_b32 s15, s3
8906; GFX940-NEXT:    ;;#ASMSTART
8907; GFX940-NEXT:    ; use s[8:15]
8908; GFX940-NEXT:    ;;#ASMEND
8909; GFX940-NEXT:    s_setpc_b64 s[30:31]
8910  %vec0 = call <3 x i64> asm "; def $0", "=s"()
8911  %vec1 = call <3 x i64> asm "; def $0", "=s"()
8912  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 4>
8913  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8914  ret void
8915}
8916
8917define void @s_shuffle_v4i64_v3i64__5_5_5_5() {
8918; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_5_5_5:
8919; GFX9:       ; %bb.0:
8920; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8921; GFX9-NEXT:    ;;#ASMSTART
8922; GFX9-NEXT:    ; def s[8:13]
8923; GFX9-NEXT:    ;;#ASMEND
8924; GFX9-NEXT:    s_mov_b32 s8, s12
8925; GFX9-NEXT:    s_mov_b32 s9, s13
8926; GFX9-NEXT:    s_mov_b32 s10, s12
8927; GFX9-NEXT:    s_mov_b32 s11, s13
8928; GFX9-NEXT:    s_mov_b32 s14, s12
8929; GFX9-NEXT:    s_mov_b32 s15, s13
8930; GFX9-NEXT:    ;;#ASMSTART
8931; GFX9-NEXT:    ; use s[8:15]
8932; GFX9-NEXT:    ;;#ASMEND
8933; GFX9-NEXT:    s_setpc_b64 s[30:31]
8934  %vec0 = call <3 x i64> asm "; def $0", "=s"()
8935  %vec1 = call <3 x i64> asm "; def $0", "=s"()
8936  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 5, i32 5>
8937  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8938  ret void
8939}
8940
8941define void @s_shuffle_v4i64_v3i64__u_0_0_0() {
8942; GFX900-LABEL: s_shuffle_v4i64_v3i64__u_0_0_0:
8943; GFX900:       ; %bb.0:
8944; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8945; GFX900-NEXT:    ;;#ASMSTART
8946; GFX900-NEXT:    ; def s[4:9]
8947; GFX900-NEXT:    ;;#ASMEND
8948; GFX900-NEXT:    s_mov_b32 s10, s4
8949; GFX900-NEXT:    s_mov_b32 s11, s5
8950; GFX900-NEXT:    s_mov_b32 s12, s4
8951; GFX900-NEXT:    s_mov_b32 s13, s5
8952; GFX900-NEXT:    s_mov_b32 s14, s4
8953; GFX900-NEXT:    s_mov_b32 s15, s5
8954; GFX900-NEXT:    ;;#ASMSTART
8955; GFX900-NEXT:    ; use s[8:15]
8956; GFX900-NEXT:    ;;#ASMEND
8957; GFX900-NEXT:    s_setpc_b64 s[30:31]
8958;
8959; GFX90A-LABEL: s_shuffle_v4i64_v3i64__u_0_0_0:
8960; GFX90A:       ; %bb.0:
8961; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8962; GFX90A-NEXT:    ;;#ASMSTART
8963; GFX90A-NEXT:    ; def s[4:9]
8964; GFX90A-NEXT:    ;;#ASMEND
8965; GFX90A-NEXT:    s_mov_b32 s10, s4
8966; GFX90A-NEXT:    s_mov_b32 s11, s5
8967; GFX90A-NEXT:    s_mov_b32 s12, s4
8968; GFX90A-NEXT:    s_mov_b32 s13, s5
8969; GFX90A-NEXT:    s_mov_b32 s14, s4
8970; GFX90A-NEXT:    s_mov_b32 s15, s5
8971; GFX90A-NEXT:    ;;#ASMSTART
8972; GFX90A-NEXT:    ; use s[8:15]
8973; GFX90A-NEXT:    ;;#ASMEND
8974; GFX90A-NEXT:    s_setpc_b64 s[30:31]
8975;
8976; GFX940-LABEL: s_shuffle_v4i64_v3i64__u_0_0_0:
8977; GFX940:       ; %bb.0:
8978; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
8979; GFX940-NEXT:    ;;#ASMSTART
8980; GFX940-NEXT:    ; def s[0:5]
8981; GFX940-NEXT:    ;;#ASMEND
8982; GFX940-NEXT:    s_mov_b32 s10, s0
8983; GFX940-NEXT:    s_mov_b32 s11, s1
8984; GFX940-NEXT:    s_mov_b32 s12, s0
8985; GFX940-NEXT:    s_mov_b32 s13, s1
8986; GFX940-NEXT:    s_mov_b32 s14, s0
8987; GFX940-NEXT:    s_mov_b32 s15, s1
8988; GFX940-NEXT:    ;;#ASMSTART
8989; GFX940-NEXT:    ; use s[8:15]
8990; GFX940-NEXT:    ;;#ASMEND
8991; GFX940-NEXT:    s_setpc_b64 s[30:31]
8992  %vec0 = call <3 x i64> asm "; def $0", "=s"()
8993  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 0, i32 0, i32 0>
8994  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
8995  ret void
8996}
8997
8998define void @s_shuffle_v4i64_v3i64__0_0_0_0() {
8999; GFX9-LABEL: s_shuffle_v4i64_v3i64__0_0_0_0:
9000; GFX9:       ; %bb.0:
9001; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9002; GFX9-NEXT:    ;;#ASMSTART
9003; GFX9-NEXT:    ; def s[8:13]
9004; GFX9-NEXT:    ;;#ASMEND
9005; GFX9-NEXT:    s_mov_b32 s10, s8
9006; GFX9-NEXT:    s_mov_b32 s11, s9
9007; GFX9-NEXT:    s_mov_b32 s12, s8
9008; GFX9-NEXT:    s_mov_b32 s13, s9
9009; GFX9-NEXT:    s_mov_b32 s14, s8
9010; GFX9-NEXT:    s_mov_b32 s15, s9
9011; GFX9-NEXT:    ;;#ASMSTART
9012; GFX9-NEXT:    ; use s[8:15]
9013; GFX9-NEXT:    ;;#ASMEND
9014; GFX9-NEXT:    s_setpc_b64 s[30:31]
9015  %vec0 = call <3 x i64> asm "; def $0", "=s"()
9016  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> zeroinitializer
9017  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9018  ret void
9019}
9020
9021define void @s_shuffle_v4i64_v3i64__1_0_0_0() {
9022; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_0_0_0:
9023; GFX900:       ; %bb.0:
9024; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9025; GFX900-NEXT:    ;;#ASMSTART
9026; GFX900-NEXT:    ; def s[4:9]
9027; GFX900-NEXT:    ;;#ASMEND
9028; GFX900-NEXT:    s_mov_b32 s8, s6
9029; GFX900-NEXT:    s_mov_b32 s9, s7
9030; GFX900-NEXT:    s_mov_b32 s10, s4
9031; GFX900-NEXT:    s_mov_b32 s11, s5
9032; GFX900-NEXT:    s_mov_b32 s12, s4
9033; GFX900-NEXT:    s_mov_b32 s13, s5
9034; GFX900-NEXT:    s_mov_b32 s14, s4
9035; GFX900-NEXT:    s_mov_b32 s15, s5
9036; GFX900-NEXT:    ;;#ASMSTART
9037; GFX900-NEXT:    ; use s[8:15]
9038; GFX900-NEXT:    ;;#ASMEND
9039; GFX900-NEXT:    s_setpc_b64 s[30:31]
9040;
9041; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_0_0_0:
9042; GFX90A:       ; %bb.0:
9043; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9044; GFX90A-NEXT:    ;;#ASMSTART
9045; GFX90A-NEXT:    ; def s[4:9]
9046; GFX90A-NEXT:    ;;#ASMEND
9047; GFX90A-NEXT:    s_mov_b32 s8, s6
9048; GFX90A-NEXT:    s_mov_b32 s9, s7
9049; GFX90A-NEXT:    s_mov_b32 s10, s4
9050; GFX90A-NEXT:    s_mov_b32 s11, s5
9051; GFX90A-NEXT:    s_mov_b32 s12, s4
9052; GFX90A-NEXT:    s_mov_b32 s13, s5
9053; GFX90A-NEXT:    s_mov_b32 s14, s4
9054; GFX90A-NEXT:    s_mov_b32 s15, s5
9055; GFX90A-NEXT:    ;;#ASMSTART
9056; GFX90A-NEXT:    ; use s[8:15]
9057; GFX90A-NEXT:    ;;#ASMEND
9058; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9059;
9060; GFX940-LABEL: s_shuffle_v4i64_v3i64__1_0_0_0:
9061; GFX940:       ; %bb.0:
9062; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9063; GFX940-NEXT:    ;;#ASMSTART
9064; GFX940-NEXT:    ; def s[0:5]
9065; GFX940-NEXT:    ;;#ASMEND
9066; GFX940-NEXT:    s_mov_b32 s8, s2
9067; GFX940-NEXT:    s_mov_b32 s9, s3
9068; GFX940-NEXT:    s_mov_b32 s10, s0
9069; GFX940-NEXT:    s_mov_b32 s11, s1
9070; GFX940-NEXT:    s_mov_b32 s12, s0
9071; GFX940-NEXT:    s_mov_b32 s13, s1
9072; GFX940-NEXT:    s_mov_b32 s14, s0
9073; GFX940-NEXT:    s_mov_b32 s15, s1
9074; GFX940-NEXT:    ;;#ASMSTART
9075; GFX940-NEXT:    ; use s[8:15]
9076; GFX940-NEXT:    ;;#ASMEND
9077; GFX940-NEXT:    s_setpc_b64 s[30:31]
9078  %vec0 = call <3 x i64> asm "; def $0", "=s"()
9079  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
9080  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9081  ret void
9082}
9083
9084define void @s_shuffle_v4i64_v3i64__2_0_0_0() {
9085; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_0_0_0:
9086; GFX900:       ; %bb.0:
9087; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9088; GFX900-NEXT:    ;;#ASMSTART
9089; GFX900-NEXT:    ; def s[4:9]
9090; GFX900-NEXT:    ;;#ASMEND
9091; GFX900-NEXT:    s_mov_b32 s10, s4
9092; GFX900-NEXT:    s_mov_b32 s11, s5
9093; GFX900-NEXT:    s_mov_b32 s12, s4
9094; GFX900-NEXT:    s_mov_b32 s13, s5
9095; GFX900-NEXT:    s_mov_b32 s14, s4
9096; GFX900-NEXT:    s_mov_b32 s15, s5
9097; GFX900-NEXT:    ;;#ASMSTART
9098; GFX900-NEXT:    ; use s[8:15]
9099; GFX900-NEXT:    ;;#ASMEND
9100; GFX900-NEXT:    s_setpc_b64 s[30:31]
9101;
9102; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_0_0_0:
9103; GFX90A:       ; %bb.0:
9104; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9105; GFX90A-NEXT:    ;;#ASMSTART
9106; GFX90A-NEXT:    ; def s[4:9]
9107; GFX90A-NEXT:    ;;#ASMEND
9108; GFX90A-NEXT:    s_mov_b32 s10, s4
9109; GFX90A-NEXT:    s_mov_b32 s11, s5
9110; GFX90A-NEXT:    s_mov_b32 s12, s4
9111; GFX90A-NEXT:    s_mov_b32 s13, s5
9112; GFX90A-NEXT:    s_mov_b32 s14, s4
9113; GFX90A-NEXT:    s_mov_b32 s15, s5
9114; GFX90A-NEXT:    ;;#ASMSTART
9115; GFX90A-NEXT:    ; use s[8:15]
9116; GFX90A-NEXT:    ;;#ASMEND
9117; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9118;
9119; GFX940-LABEL: s_shuffle_v4i64_v3i64__2_0_0_0:
9120; GFX940:       ; %bb.0:
9121; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9122; GFX940-NEXT:    ;;#ASMSTART
9123; GFX940-NEXT:    ; def s[0:5]
9124; GFX940-NEXT:    ;;#ASMEND
9125; GFX940-NEXT:    s_mov_b32 s8, s4
9126; GFX940-NEXT:    s_mov_b32 s9, s5
9127; GFX940-NEXT:    s_mov_b32 s10, s0
9128; GFX940-NEXT:    s_mov_b32 s11, s1
9129; GFX940-NEXT:    s_mov_b32 s12, s0
9130; GFX940-NEXT:    s_mov_b32 s13, s1
9131; GFX940-NEXT:    s_mov_b32 s14, s0
9132; GFX940-NEXT:    s_mov_b32 s15, s1
9133; GFX940-NEXT:    ;;#ASMSTART
9134; GFX940-NEXT:    ; use s[8:15]
9135; GFX940-NEXT:    ;;#ASMEND
9136; GFX940-NEXT:    s_setpc_b64 s[30:31]
9137  %vec0 = call <3 x i64> asm "; def $0", "=s"()
9138  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 0, i32 0, i32 0>
9139  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9140  ret void
9141}
9142
9143define void @s_shuffle_v4i64_v3i64__3_0_0_0() {
9144; GFX900-LABEL: s_shuffle_v4i64_v3i64__3_0_0_0:
9145; GFX900:       ; %bb.0:
9146; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9147; GFX900-NEXT:    ;;#ASMSTART
9148; GFX900-NEXT:    ; def s[4:9]
9149; GFX900-NEXT:    ;;#ASMEND
9150; GFX900-NEXT:    s_mov_b32 s10, s4
9151; GFX900-NEXT:    s_mov_b32 s11, s5
9152; GFX900-NEXT:    s_mov_b32 s12, s4
9153; GFX900-NEXT:    s_mov_b32 s13, s5
9154; GFX900-NEXT:    s_mov_b32 s14, s4
9155; GFX900-NEXT:    s_mov_b32 s15, s5
9156; GFX900-NEXT:    ;;#ASMSTART
9157; GFX900-NEXT:    ; use s[8:15]
9158; GFX900-NEXT:    ;;#ASMEND
9159; GFX900-NEXT:    s_setpc_b64 s[30:31]
9160;
9161; GFX90A-LABEL: s_shuffle_v4i64_v3i64__3_0_0_0:
9162; GFX90A:       ; %bb.0:
9163; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9164; GFX90A-NEXT:    ;;#ASMSTART
9165; GFX90A-NEXT:    ; def s[4:9]
9166; GFX90A-NEXT:    ;;#ASMEND
9167; GFX90A-NEXT:    s_mov_b32 s10, s4
9168; GFX90A-NEXT:    s_mov_b32 s11, s5
9169; GFX90A-NEXT:    s_mov_b32 s12, s4
9170; GFX90A-NEXT:    s_mov_b32 s13, s5
9171; GFX90A-NEXT:    s_mov_b32 s14, s4
9172; GFX90A-NEXT:    s_mov_b32 s15, s5
9173; GFX90A-NEXT:    ;;#ASMSTART
9174; GFX90A-NEXT:    ; use s[8:15]
9175; GFX90A-NEXT:    ;;#ASMEND
9176; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9177;
9178; GFX940-LABEL: s_shuffle_v4i64_v3i64__3_0_0_0:
9179; GFX940:       ; %bb.0:
9180; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9181; GFX940-NEXT:    ;;#ASMSTART
9182; GFX940-NEXT:    ; def s[0:5]
9183; GFX940-NEXT:    ;;#ASMEND
9184; GFX940-NEXT:    s_mov_b32 s10, s0
9185; GFX940-NEXT:    s_mov_b32 s11, s1
9186; GFX940-NEXT:    s_mov_b32 s12, s0
9187; GFX940-NEXT:    s_mov_b32 s13, s1
9188; GFX940-NEXT:    s_mov_b32 s14, s0
9189; GFX940-NEXT:    s_mov_b32 s15, s1
9190; GFX940-NEXT:    ;;#ASMSTART
9191; GFX940-NEXT:    ; use s[8:15]
9192; GFX940-NEXT:    ;;#ASMEND
9193; GFX940-NEXT:    s_setpc_b64 s[30:31]
9194  %vec0 = call <3 x i64> asm "; def $0", "=s"()
9195  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
9196  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9197  ret void
9198}
9199
9200define void @s_shuffle_v4i64_v3i64__4_0_0_0() {
9201; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_0_0_0:
9202; GFX900:       ; %bb.0:
9203; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9204; GFX900-NEXT:    ;;#ASMSTART
9205; GFX900-NEXT:    ; def s[4:9]
9206; GFX900-NEXT:    ;;#ASMEND
9207; GFX900-NEXT:    ;;#ASMSTART
9208; GFX900-NEXT:    ; def s[8:13]
9209; GFX900-NEXT:    ;;#ASMEND
9210; GFX900-NEXT:    s_mov_b32 s8, s10
9211; GFX900-NEXT:    s_mov_b32 s9, s11
9212; GFX900-NEXT:    s_mov_b32 s10, s4
9213; GFX900-NEXT:    s_mov_b32 s11, s5
9214; GFX900-NEXT:    s_mov_b32 s12, s4
9215; GFX900-NEXT:    s_mov_b32 s13, s5
9216; GFX900-NEXT:    s_mov_b32 s14, s4
9217; GFX900-NEXT:    s_mov_b32 s15, s5
9218; GFX900-NEXT:    ;;#ASMSTART
9219; GFX900-NEXT:    ; use s[8:15]
9220; GFX900-NEXT:    ;;#ASMEND
9221; GFX900-NEXT:    s_setpc_b64 s[30:31]
9222;
9223; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_0_0_0:
9224; GFX90A:       ; %bb.0:
9225; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9226; GFX90A-NEXT:    ;;#ASMSTART
9227; GFX90A-NEXT:    ; def s[4:9]
9228; GFX90A-NEXT:    ;;#ASMEND
9229; GFX90A-NEXT:    ;;#ASMSTART
9230; GFX90A-NEXT:    ; def s[8:13]
9231; GFX90A-NEXT:    ;;#ASMEND
9232; GFX90A-NEXT:    s_mov_b32 s8, s10
9233; GFX90A-NEXT:    s_mov_b32 s9, s11
9234; GFX90A-NEXT:    s_mov_b32 s10, s4
9235; GFX90A-NEXT:    s_mov_b32 s11, s5
9236; GFX90A-NEXT:    s_mov_b32 s12, s4
9237; GFX90A-NEXT:    s_mov_b32 s13, s5
9238; GFX90A-NEXT:    s_mov_b32 s14, s4
9239; GFX90A-NEXT:    s_mov_b32 s15, s5
9240; GFX90A-NEXT:    ;;#ASMSTART
9241; GFX90A-NEXT:    ; use s[8:15]
9242; GFX90A-NEXT:    ;;#ASMEND
9243; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9244;
9245; GFX940-LABEL: s_shuffle_v4i64_v3i64__4_0_0_0:
9246; GFX940:       ; %bb.0:
9247; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9248; GFX940-NEXT:    ;;#ASMSTART
9249; GFX940-NEXT:    ; def s[0:5]
9250; GFX940-NEXT:    ;;#ASMEND
9251; GFX940-NEXT:    s_mov_b32 s10, s0
9252; GFX940-NEXT:    ;;#ASMSTART
9253; GFX940-NEXT:    ; def s[4:9]
9254; GFX940-NEXT:    ;;#ASMEND
9255; GFX940-NEXT:    s_mov_b32 s8, s6
9256; GFX940-NEXT:    s_mov_b32 s9, s7
9257; GFX940-NEXT:    s_mov_b32 s11, s1
9258; GFX940-NEXT:    s_mov_b32 s12, s0
9259; GFX940-NEXT:    s_mov_b32 s13, s1
9260; GFX940-NEXT:    s_mov_b32 s14, s0
9261; GFX940-NEXT:    s_mov_b32 s15, s1
9262; GFX940-NEXT:    ;;#ASMSTART
9263; GFX940-NEXT:    ; use s[8:15]
9264; GFX940-NEXT:    ;;#ASMEND
9265; GFX940-NEXT:    s_setpc_b64 s[30:31]
9266  %vec0 = call <3 x i64> asm "; def $0", "=s"()
9267  %vec1 = call <3 x i64> asm "; def $0", "=s"()
9268  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 0, i32 0, i32 0>
9269  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9270  ret void
9271}
9272
9273define void @s_shuffle_v4i64_v3i64__5_0_0_0() {
9274; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_0_0:
9275; GFX900:       ; %bb.0:
9276; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9277; GFX900-NEXT:    ;;#ASMSTART
9278; GFX900-NEXT:    ; def s[4:9]
9279; GFX900-NEXT:    ;;#ASMEND
9280; GFX900-NEXT:    ;;#ASMSTART
9281; GFX900-NEXT:    ; def s[8:13]
9282; GFX900-NEXT:    ;;#ASMEND
9283; GFX900-NEXT:    s_mov_b32 s8, s12
9284; GFX900-NEXT:    s_mov_b32 s9, s13
9285; GFX900-NEXT:    s_mov_b32 s10, s4
9286; GFX900-NEXT:    s_mov_b32 s11, s5
9287; GFX900-NEXT:    s_mov_b32 s12, s4
9288; GFX900-NEXT:    s_mov_b32 s13, s5
9289; GFX900-NEXT:    s_mov_b32 s14, s4
9290; GFX900-NEXT:    s_mov_b32 s15, s5
9291; GFX900-NEXT:    ;;#ASMSTART
9292; GFX900-NEXT:    ; use s[8:15]
9293; GFX900-NEXT:    ;;#ASMEND
9294; GFX900-NEXT:    s_setpc_b64 s[30:31]
9295;
9296; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_0_0:
9297; GFX90A:       ; %bb.0:
9298; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9299; GFX90A-NEXT:    ;;#ASMSTART
9300; GFX90A-NEXT:    ; def s[4:9]
9301; GFX90A-NEXT:    ;;#ASMEND
9302; GFX90A-NEXT:    ;;#ASMSTART
9303; GFX90A-NEXT:    ; def s[8:13]
9304; GFX90A-NEXT:    ;;#ASMEND
9305; GFX90A-NEXT:    s_mov_b32 s8, s12
9306; GFX90A-NEXT:    s_mov_b32 s9, s13
9307; GFX90A-NEXT:    s_mov_b32 s10, s4
9308; GFX90A-NEXT:    s_mov_b32 s11, s5
9309; GFX90A-NEXT:    s_mov_b32 s12, s4
9310; GFX90A-NEXT:    s_mov_b32 s13, s5
9311; GFX90A-NEXT:    s_mov_b32 s14, s4
9312; GFX90A-NEXT:    s_mov_b32 s15, s5
9313; GFX90A-NEXT:    ;;#ASMSTART
9314; GFX90A-NEXT:    ; use s[8:15]
9315; GFX90A-NEXT:    ;;#ASMEND
9316; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9317;
9318; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_0_0:
9319; GFX940:       ; %bb.0:
9320; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9321; GFX940-NEXT:    ;;#ASMSTART
9322; GFX940-NEXT:    ; def s[0:5]
9323; GFX940-NEXT:    ;;#ASMEND
9324; GFX940-NEXT:    s_mov_b32 s10, s0
9325; GFX940-NEXT:    ;;#ASMSTART
9326; GFX940-NEXT:    ; def s[4:9]
9327; GFX940-NEXT:    ;;#ASMEND
9328; GFX940-NEXT:    s_mov_b32 s11, s1
9329; GFX940-NEXT:    s_mov_b32 s12, s0
9330; GFX940-NEXT:    s_mov_b32 s13, s1
9331; GFX940-NEXT:    s_mov_b32 s14, s0
9332; GFX940-NEXT:    s_mov_b32 s15, s1
9333; GFX940-NEXT:    ;;#ASMSTART
9334; GFX940-NEXT:    ; use s[8:15]
9335; GFX940-NEXT:    ;;#ASMEND
9336; GFX940-NEXT:    s_setpc_b64 s[30:31]
9337  %vec0 = call <3 x i64> asm "; def $0", "=s"()
9338  %vec1 = call <3 x i64> asm "; def $0", "=s"()
9339  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 0, i32 0>
9340  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9341  ret void
9342}
9343
9344define void @s_shuffle_v4i64_v3i64__5_u_0_0() {
9345; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_0_0:
9346; GFX900:       ; %bb.0:
9347; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9348; GFX900-NEXT:    ;;#ASMSTART
9349; GFX900-NEXT:    ; def s[4:9]
9350; GFX900-NEXT:    ;;#ASMEND
9351; GFX900-NEXT:    ;;#ASMSTART
9352; GFX900-NEXT:    ; def s[8:13]
9353; GFX900-NEXT:    ;;#ASMEND
9354; GFX900-NEXT:    s_mov_b32 s8, s12
9355; GFX900-NEXT:    s_mov_b32 s9, s13
9356; GFX900-NEXT:    s_mov_b32 s12, s4
9357; GFX900-NEXT:    s_mov_b32 s13, s5
9358; GFX900-NEXT:    s_mov_b32 s14, s4
9359; GFX900-NEXT:    s_mov_b32 s15, s5
9360; GFX900-NEXT:    ;;#ASMSTART
9361; GFX900-NEXT:    ; use s[8:15]
9362; GFX900-NEXT:    ;;#ASMEND
9363; GFX900-NEXT:    s_setpc_b64 s[30:31]
9364;
9365; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_0_0:
9366; GFX90A:       ; %bb.0:
9367; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9368; GFX90A-NEXT:    ;;#ASMSTART
9369; GFX90A-NEXT:    ; def s[4:9]
9370; GFX90A-NEXT:    ;;#ASMEND
9371; GFX90A-NEXT:    ;;#ASMSTART
9372; GFX90A-NEXT:    ; def s[8:13]
9373; GFX90A-NEXT:    ;;#ASMEND
9374; GFX90A-NEXT:    s_mov_b32 s8, s12
9375; GFX90A-NEXT:    s_mov_b32 s9, s13
9376; GFX90A-NEXT:    s_mov_b32 s12, s4
9377; GFX90A-NEXT:    s_mov_b32 s13, s5
9378; GFX90A-NEXT:    s_mov_b32 s14, s4
9379; GFX90A-NEXT:    s_mov_b32 s15, s5
9380; GFX90A-NEXT:    ;;#ASMSTART
9381; GFX90A-NEXT:    ; use s[8:15]
9382; GFX90A-NEXT:    ;;#ASMEND
9383; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9384;
9385; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_u_0_0:
9386; GFX940:       ; %bb.0:
9387; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9388; GFX940-NEXT:    ;;#ASMSTART
9389; GFX940-NEXT:    ; def s[0:5]
9390; GFX940-NEXT:    ;;#ASMEND
9391; GFX940-NEXT:    s_mov_b32 s12, s0
9392; GFX940-NEXT:    ;;#ASMSTART
9393; GFX940-NEXT:    ; def s[4:9]
9394; GFX940-NEXT:    ;;#ASMEND
9395; GFX940-NEXT:    s_mov_b32 s13, s1
9396; GFX940-NEXT:    s_mov_b32 s14, s0
9397; GFX940-NEXT:    s_mov_b32 s15, s1
9398; GFX940-NEXT:    ;;#ASMSTART
9399; GFX940-NEXT:    ; use s[8:15]
9400; GFX940-NEXT:    ;;#ASMEND
9401; GFX940-NEXT:    s_setpc_b64 s[30:31]
9402  %vec0 = call <3 x i64> asm "; def $0", "=s"()
9403  %vec1 = call <3 x i64> asm "; def $0", "=s"()
9404  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 0, i32 0>
9405  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9406  ret void
9407}
9408
9409define void @s_shuffle_v4i64_v3i64__5_1_0_0() {
9410; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_0_0:
9411; GFX900:       ; %bb.0:
9412; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9413; GFX900-NEXT:    ;;#ASMSTART
9414; GFX900-NEXT:    ; def s[4:9]
9415; GFX900-NEXT:    ;;#ASMEND
9416; GFX900-NEXT:    ;;#ASMSTART
9417; GFX900-NEXT:    ; def s[8:13]
9418; GFX900-NEXT:    ;;#ASMEND
9419; GFX900-NEXT:    s_mov_b32 s8, s12
9420; GFX900-NEXT:    s_mov_b32 s9, s13
9421; GFX900-NEXT:    s_mov_b32 s10, s6
9422; GFX900-NEXT:    s_mov_b32 s11, s7
9423; GFX900-NEXT:    s_mov_b32 s12, s4
9424; GFX900-NEXT:    s_mov_b32 s13, s5
9425; GFX900-NEXT:    s_mov_b32 s14, s4
9426; GFX900-NEXT:    s_mov_b32 s15, s5
9427; GFX900-NEXT:    ;;#ASMSTART
9428; GFX900-NEXT:    ; use s[8:15]
9429; GFX900-NEXT:    ;;#ASMEND
9430; GFX900-NEXT:    s_setpc_b64 s[30:31]
9431;
9432; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_0_0:
9433; GFX90A:       ; %bb.0:
9434; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9435; GFX90A-NEXT:    ;;#ASMSTART
9436; GFX90A-NEXT:    ; def s[4:9]
9437; GFX90A-NEXT:    ;;#ASMEND
9438; GFX90A-NEXT:    ;;#ASMSTART
9439; GFX90A-NEXT:    ; def s[8:13]
9440; GFX90A-NEXT:    ;;#ASMEND
9441; GFX90A-NEXT:    s_mov_b32 s8, s12
9442; GFX90A-NEXT:    s_mov_b32 s9, s13
9443; GFX90A-NEXT:    s_mov_b32 s10, s6
9444; GFX90A-NEXT:    s_mov_b32 s11, s7
9445; GFX90A-NEXT:    s_mov_b32 s12, s4
9446; GFX90A-NEXT:    s_mov_b32 s13, s5
9447; GFX90A-NEXT:    s_mov_b32 s14, s4
9448; GFX90A-NEXT:    s_mov_b32 s15, s5
9449; GFX90A-NEXT:    ;;#ASMSTART
9450; GFX90A-NEXT:    ; use s[8:15]
9451; GFX90A-NEXT:    ;;#ASMEND
9452; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9453;
9454; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_0_0:
9455; GFX940:       ; %bb.0:
9456; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9457; GFX940-NEXT:    ;;#ASMSTART
9458; GFX940-NEXT:    ; def s[0:5]
9459; GFX940-NEXT:    ;;#ASMEND
9460; GFX940-NEXT:    s_mov_b32 s10, s2
9461; GFX940-NEXT:    ;;#ASMSTART
9462; GFX940-NEXT:    ; def s[4:9]
9463; GFX940-NEXT:    ;;#ASMEND
9464; GFX940-NEXT:    s_mov_b32 s11, s3
9465; GFX940-NEXT:    s_mov_b32 s12, s0
9466; GFX940-NEXT:    s_mov_b32 s13, s1
9467; GFX940-NEXT:    s_mov_b32 s14, s0
9468; GFX940-NEXT:    s_mov_b32 s15, s1
9469; GFX940-NEXT:    ;;#ASMSTART
9470; GFX940-NEXT:    ; use s[8:15]
9471; GFX940-NEXT:    ;;#ASMEND
9472; GFX940-NEXT:    s_setpc_b64 s[30:31]
9473  %vec0 = call <3 x i64> asm "; def $0", "=s"()
9474  %vec1 = call <3 x i64> asm "; def $0", "=s"()
9475  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 0, i32 0>
9476  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9477  ret void
9478}
9479
9480define void @s_shuffle_v4i64_v3i64__5_2_0_0() {
9481; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_0_0:
9482; GFX900:       ; %bb.0:
9483; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9484; GFX900-NEXT:    ;;#ASMSTART
9485; GFX900-NEXT:    ; def s[16:21]
9486; GFX900-NEXT:    ;;#ASMEND
9487; GFX900-NEXT:    ;;#ASMSTART
9488; GFX900-NEXT:    ; def s[4:9]
9489; GFX900-NEXT:    ;;#ASMEND
9490; GFX900-NEXT:    s_mov_b32 s10, s20
9491; GFX900-NEXT:    s_mov_b32 s11, s21
9492; GFX900-NEXT:    s_mov_b32 s12, s16
9493; GFX900-NEXT:    s_mov_b32 s13, s17
9494; GFX900-NEXT:    s_mov_b32 s14, s16
9495; GFX900-NEXT:    s_mov_b32 s15, s17
9496; GFX900-NEXT:    ;;#ASMSTART
9497; GFX900-NEXT:    ; use s[8:15]
9498; GFX900-NEXT:    ;;#ASMEND
9499; GFX900-NEXT:    s_setpc_b64 s[30:31]
9500;
9501; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_0_0:
9502; GFX90A:       ; %bb.0:
9503; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9504; GFX90A-NEXT:    ;;#ASMSTART
9505; GFX90A-NEXT:    ; def s[16:21]
9506; GFX90A-NEXT:    ;;#ASMEND
9507; GFX90A-NEXT:    ;;#ASMSTART
9508; GFX90A-NEXT:    ; def s[4:9]
9509; GFX90A-NEXT:    ;;#ASMEND
9510; GFX90A-NEXT:    s_mov_b32 s10, s20
9511; GFX90A-NEXT:    s_mov_b32 s11, s21
9512; GFX90A-NEXT:    s_mov_b32 s12, s16
9513; GFX90A-NEXT:    s_mov_b32 s13, s17
9514; GFX90A-NEXT:    s_mov_b32 s14, s16
9515; GFX90A-NEXT:    s_mov_b32 s15, s17
9516; GFX90A-NEXT:    ;;#ASMSTART
9517; GFX90A-NEXT:    ; use s[8:15]
9518; GFX90A-NEXT:    ;;#ASMEND
9519; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9520;
9521; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_0_0:
9522; GFX940:       ; %bb.0:
9523; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9524; GFX940-NEXT:    ;;#ASMSTART
9525; GFX940-NEXT:    ; def s[8:13]
9526; GFX940-NEXT:    ;;#ASMEND
9527; GFX940-NEXT:    ;;#ASMSTART
9528; GFX940-NEXT:    ; def s[0:5]
9529; GFX940-NEXT:    ;;#ASMEND
9530; GFX940-NEXT:    s_mov_b32 s8, s12
9531; GFX940-NEXT:    s_mov_b32 s9, s13
9532; GFX940-NEXT:    s_mov_b32 s10, s4
9533; GFX940-NEXT:    s_mov_b32 s11, s5
9534; GFX940-NEXT:    s_mov_b32 s12, s0
9535; GFX940-NEXT:    s_mov_b32 s13, s1
9536; GFX940-NEXT:    s_mov_b32 s14, s0
9537; GFX940-NEXT:    s_mov_b32 s15, s1
9538; GFX940-NEXT:    ;;#ASMSTART
9539; GFX940-NEXT:    ; use s[8:15]
9540; GFX940-NEXT:    ;;#ASMEND
9541; GFX940-NEXT:    s_setpc_b64 s[30:31]
9542  %vec0 = call <3 x i64> asm "; def $0", "=s"()
9543  %vec1 = call <3 x i64> asm "; def $0", "=s"()
9544  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 0, i32 0>
9545  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9546  ret void
9547}
9548
9549define void @s_shuffle_v4i64_v3i64__5_3_0_0() {
9550; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_0_0:
9551; GFX900:       ; %bb.0:
9552; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9553; GFX900-NEXT:    ;;#ASMSTART
9554; GFX900-NEXT:    ; def s[4:9]
9555; GFX900-NEXT:    ;;#ASMEND
9556; GFX900-NEXT:    ;;#ASMSTART
9557; GFX900-NEXT:    ; def s[12:17]
9558; GFX900-NEXT:    ;;#ASMEND
9559; GFX900-NEXT:    s_mov_b32 s8, s16
9560; GFX900-NEXT:    s_mov_b32 s9, s17
9561; GFX900-NEXT:    s_mov_b32 s10, s12
9562; GFX900-NEXT:    s_mov_b32 s11, s13
9563; GFX900-NEXT:    s_mov_b32 s12, s4
9564; GFX900-NEXT:    s_mov_b32 s13, s5
9565; GFX900-NEXT:    s_mov_b32 s14, s4
9566; GFX900-NEXT:    s_mov_b32 s15, s5
9567; GFX900-NEXT:    ;;#ASMSTART
9568; GFX900-NEXT:    ; use s[8:15]
9569; GFX900-NEXT:    ;;#ASMEND
9570; GFX900-NEXT:    s_setpc_b64 s[30:31]
9571;
9572; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_0_0:
9573; GFX90A:       ; %bb.0:
9574; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9575; GFX90A-NEXT:    ;;#ASMSTART
9576; GFX90A-NEXT:    ; def s[4:9]
9577; GFX90A-NEXT:    ;;#ASMEND
9578; GFX90A-NEXT:    ;;#ASMSTART
9579; GFX90A-NEXT:    ; def s[12:17]
9580; GFX90A-NEXT:    ;;#ASMEND
9581; GFX90A-NEXT:    s_mov_b32 s8, s16
9582; GFX90A-NEXT:    s_mov_b32 s9, s17
9583; GFX90A-NEXT:    s_mov_b32 s10, s12
9584; GFX90A-NEXT:    s_mov_b32 s11, s13
9585; GFX90A-NEXT:    s_mov_b32 s12, s4
9586; GFX90A-NEXT:    s_mov_b32 s13, s5
9587; GFX90A-NEXT:    s_mov_b32 s14, s4
9588; GFX90A-NEXT:    s_mov_b32 s15, s5
9589; GFX90A-NEXT:    ;;#ASMSTART
9590; GFX90A-NEXT:    ; use s[8:15]
9591; GFX90A-NEXT:    ;;#ASMEND
9592; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9593;
9594; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_0_0:
9595; GFX940:       ; %bb.0:
9596; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9597; GFX940-NEXT:    ;;#ASMSTART
9598; GFX940-NEXT:    ; def s[0:5]
9599; GFX940-NEXT:    ;;#ASMEND
9600; GFX940-NEXT:    s_mov_b32 s12, s0
9601; GFX940-NEXT:    ;;#ASMSTART
9602; GFX940-NEXT:    ; def s[4:9]
9603; GFX940-NEXT:    ;;#ASMEND
9604; GFX940-NEXT:    s_mov_b32 s10, s4
9605; GFX940-NEXT:    s_mov_b32 s11, s5
9606; GFX940-NEXT:    s_mov_b32 s13, s1
9607; GFX940-NEXT:    s_mov_b32 s14, s0
9608; GFX940-NEXT:    s_mov_b32 s15, s1
9609; GFX940-NEXT:    ;;#ASMSTART
9610; GFX940-NEXT:    ; use s[8:15]
9611; GFX940-NEXT:    ;;#ASMEND
9612; GFX940-NEXT:    s_setpc_b64 s[30:31]
9613  %vec0 = call <3 x i64> asm "; def $0", "=s"()
9614  %vec1 = call <3 x i64> asm "; def $0", "=s"()
9615  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 0, i32 0>
9616  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9617  ret void
9618}
9619
9620define void @s_shuffle_v4i64_v3i64__5_4_0_0() {
9621; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_4_0_0:
9622; GFX900:       ; %bb.0:
9623; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9624; GFX900-NEXT:    ;;#ASMSTART
9625; GFX900-NEXT:    ; def s[4:9]
9626; GFX900-NEXT:    ;;#ASMEND
9627; GFX900-NEXT:    ;;#ASMSTART
9628; GFX900-NEXT:    ; def s[8:13]
9629; GFX900-NEXT:    ;;#ASMEND
9630; GFX900-NEXT:    s_mov_b32 s8, s12
9631; GFX900-NEXT:    s_mov_b32 s9, s13
9632; GFX900-NEXT:    s_mov_b32 s12, s4
9633; GFX900-NEXT:    s_mov_b32 s13, s5
9634; GFX900-NEXT:    s_mov_b32 s14, s4
9635; GFX900-NEXT:    s_mov_b32 s15, s5
9636; GFX900-NEXT:    ;;#ASMSTART
9637; GFX900-NEXT:    ; use s[8:15]
9638; GFX900-NEXT:    ;;#ASMEND
9639; GFX900-NEXT:    s_setpc_b64 s[30:31]
9640;
9641; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_4_0_0:
9642; GFX90A:       ; %bb.0:
9643; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9644; GFX90A-NEXT:    ;;#ASMSTART
9645; GFX90A-NEXT:    ; def s[4:9]
9646; GFX90A-NEXT:    ;;#ASMEND
9647; GFX90A-NEXT:    ;;#ASMSTART
9648; GFX90A-NEXT:    ; def s[8:13]
9649; GFX90A-NEXT:    ;;#ASMEND
9650; GFX90A-NEXT:    s_mov_b32 s8, s12
9651; GFX90A-NEXT:    s_mov_b32 s9, s13
9652; GFX90A-NEXT:    s_mov_b32 s12, s4
9653; GFX90A-NEXT:    s_mov_b32 s13, s5
9654; GFX90A-NEXT:    s_mov_b32 s14, s4
9655; GFX90A-NEXT:    s_mov_b32 s15, s5
9656; GFX90A-NEXT:    ;;#ASMSTART
9657; GFX90A-NEXT:    ; use s[8:15]
9658; GFX90A-NEXT:    ;;#ASMEND
9659; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9660;
9661; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_4_0_0:
9662; GFX940:       ; %bb.0:
9663; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9664; GFX940-NEXT:    ;;#ASMSTART
9665; GFX940-NEXT:    ; def s[8:13]
9666; GFX940-NEXT:    ;;#ASMEND
9667; GFX940-NEXT:    ;;#ASMSTART
9668; GFX940-NEXT:    ; def s[0:5]
9669; GFX940-NEXT:    ;;#ASMEND
9670; GFX940-NEXT:    s_mov_b32 s8, s12
9671; GFX940-NEXT:    s_mov_b32 s9, s13
9672; GFX940-NEXT:    s_mov_b32 s12, s0
9673; GFX940-NEXT:    s_mov_b32 s13, s1
9674; GFX940-NEXT:    s_mov_b32 s14, s0
9675; GFX940-NEXT:    s_mov_b32 s15, s1
9676; GFX940-NEXT:    ;;#ASMSTART
9677; GFX940-NEXT:    ; use s[8:15]
9678; GFX940-NEXT:    ;;#ASMEND
9679; GFX940-NEXT:    s_setpc_b64 s[30:31]
9680  %vec0 = call <3 x i64> asm "; def $0", "=s"()
9681  %vec1 = call <3 x i64> asm "; def $0", "=s"()
9682  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 0, i32 0>
9683  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9684  ret void
9685}
9686
9687define void @s_shuffle_v4i64_v3i64__5_5_0_0() {
9688; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_0:
9689; GFX900:       ; %bb.0:
9690; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9691; GFX900-NEXT:    ;;#ASMSTART
9692; GFX900-NEXT:    ; def s[4:9]
9693; GFX900-NEXT:    ;;#ASMEND
9694; GFX900-NEXT:    ;;#ASMSTART
9695; GFX900-NEXT:    ; def s[8:13]
9696; GFX900-NEXT:    ;;#ASMEND
9697; GFX900-NEXT:    s_mov_b32 s8, s12
9698; GFX900-NEXT:    s_mov_b32 s9, s13
9699; GFX900-NEXT:    s_mov_b32 s10, s12
9700; GFX900-NEXT:    s_mov_b32 s11, s13
9701; GFX900-NEXT:    s_mov_b32 s12, s4
9702; GFX900-NEXT:    s_mov_b32 s13, s5
9703; GFX900-NEXT:    s_mov_b32 s14, s4
9704; GFX900-NEXT:    s_mov_b32 s15, s5
9705; GFX900-NEXT:    ;;#ASMSTART
9706; GFX900-NEXT:    ; use s[8:15]
9707; GFX900-NEXT:    ;;#ASMEND
9708; GFX900-NEXT:    s_setpc_b64 s[30:31]
9709;
9710; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_0:
9711; GFX90A:       ; %bb.0:
9712; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9713; GFX90A-NEXT:    ;;#ASMSTART
9714; GFX90A-NEXT:    ; def s[4:9]
9715; GFX90A-NEXT:    ;;#ASMEND
9716; GFX90A-NEXT:    ;;#ASMSTART
9717; GFX90A-NEXT:    ; def s[8:13]
9718; GFX90A-NEXT:    ;;#ASMEND
9719; GFX90A-NEXT:    s_mov_b32 s8, s12
9720; GFX90A-NEXT:    s_mov_b32 s9, s13
9721; GFX90A-NEXT:    s_mov_b32 s10, s12
9722; GFX90A-NEXT:    s_mov_b32 s11, s13
9723; GFX90A-NEXT:    s_mov_b32 s12, s4
9724; GFX90A-NEXT:    s_mov_b32 s13, s5
9725; GFX90A-NEXT:    s_mov_b32 s14, s4
9726; GFX90A-NEXT:    s_mov_b32 s15, s5
9727; GFX90A-NEXT:    ;;#ASMSTART
9728; GFX90A-NEXT:    ; use s[8:15]
9729; GFX90A-NEXT:    ;;#ASMEND
9730; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9731;
9732; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_0:
9733; GFX940:       ; %bb.0:
9734; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9735; GFX940-NEXT:    ;;#ASMSTART
9736; GFX940-NEXT:    ; def s[8:13]
9737; GFX940-NEXT:    ;;#ASMEND
9738; GFX940-NEXT:    ;;#ASMSTART
9739; GFX940-NEXT:    ; def s[0:5]
9740; GFX940-NEXT:    ;;#ASMEND
9741; GFX940-NEXT:    s_mov_b32 s8, s12
9742; GFX940-NEXT:    s_mov_b32 s9, s13
9743; GFX940-NEXT:    s_mov_b32 s10, s12
9744; GFX940-NEXT:    s_mov_b32 s11, s13
9745; GFX940-NEXT:    s_mov_b32 s12, s0
9746; GFX940-NEXT:    s_mov_b32 s13, s1
9747; GFX940-NEXT:    s_mov_b32 s14, s0
9748; GFX940-NEXT:    s_mov_b32 s15, s1
9749; GFX940-NEXT:    ;;#ASMSTART
9750; GFX940-NEXT:    ; use s[8:15]
9751; GFX940-NEXT:    ;;#ASMEND
9752; GFX940-NEXT:    s_setpc_b64 s[30:31]
9753  %vec0 = call <3 x i64> asm "; def $0", "=s"()
9754  %vec1 = call <3 x i64> asm "; def $0", "=s"()
9755  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 0>
9756  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9757  ret void
9758}
9759
9760define void @s_shuffle_v4i64_v3i64__5_5_u_0() {
9761; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_0:
9762; GFX900:       ; %bb.0:
9763; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9764; GFX900-NEXT:    ;;#ASMSTART
9765; GFX900-NEXT:    ; def s[4:9]
9766; GFX900-NEXT:    ;;#ASMEND
9767; GFX900-NEXT:    ;;#ASMSTART
9768; GFX900-NEXT:    ; def s[8:13]
9769; GFX900-NEXT:    ;;#ASMEND
9770; GFX900-NEXT:    s_mov_b32 s8, s12
9771; GFX900-NEXT:    s_mov_b32 s9, s13
9772; GFX900-NEXT:    s_mov_b32 s10, s12
9773; GFX900-NEXT:    s_mov_b32 s11, s13
9774; GFX900-NEXT:    s_mov_b32 s14, s4
9775; GFX900-NEXT:    s_mov_b32 s15, s5
9776; GFX900-NEXT:    ;;#ASMSTART
9777; GFX900-NEXT:    ; use s[8:15]
9778; GFX900-NEXT:    ;;#ASMEND
9779; GFX900-NEXT:    s_setpc_b64 s[30:31]
9780;
9781; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_0:
9782; GFX90A:       ; %bb.0:
9783; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9784; GFX90A-NEXT:    ;;#ASMSTART
9785; GFX90A-NEXT:    ; def s[4:9]
9786; GFX90A-NEXT:    ;;#ASMEND
9787; GFX90A-NEXT:    ;;#ASMSTART
9788; GFX90A-NEXT:    ; def s[8:13]
9789; GFX90A-NEXT:    ;;#ASMEND
9790; GFX90A-NEXT:    s_mov_b32 s8, s12
9791; GFX90A-NEXT:    s_mov_b32 s9, s13
9792; GFX90A-NEXT:    s_mov_b32 s10, s12
9793; GFX90A-NEXT:    s_mov_b32 s11, s13
9794; GFX90A-NEXT:    s_mov_b32 s14, s4
9795; GFX90A-NEXT:    s_mov_b32 s15, s5
9796; GFX90A-NEXT:    ;;#ASMSTART
9797; GFX90A-NEXT:    ; use s[8:15]
9798; GFX90A-NEXT:    ;;#ASMEND
9799; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9800;
9801; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_0:
9802; GFX940:       ; %bb.0:
9803; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9804; GFX940-NEXT:    ;;#ASMSTART
9805; GFX940-NEXT:    ; def s[8:13]
9806; GFX940-NEXT:    ;;#ASMEND
9807; GFX940-NEXT:    ;;#ASMSTART
9808; GFX940-NEXT:    ; def s[0:5]
9809; GFX940-NEXT:    ;;#ASMEND
9810; GFX940-NEXT:    s_mov_b32 s8, s12
9811; GFX940-NEXT:    s_mov_b32 s9, s13
9812; GFX940-NEXT:    s_mov_b32 s10, s12
9813; GFX940-NEXT:    s_mov_b32 s11, s13
9814; GFX940-NEXT:    s_mov_b32 s14, s0
9815; GFX940-NEXT:    s_mov_b32 s15, s1
9816; GFX940-NEXT:    ;;#ASMSTART
9817; GFX940-NEXT:    ; use s[8:15]
9818; GFX940-NEXT:    ;;#ASMEND
9819; GFX940-NEXT:    s_setpc_b64 s[30:31]
9820  %vec0 = call <3 x i64> asm "; def $0", "=s"()
9821  %vec1 = call <3 x i64> asm "; def $0", "=s"()
9822  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 0>
9823  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9824  ret void
9825}
9826
9827define void @s_shuffle_v4i64_v3i64__5_5_1_0() {
9828; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_0:
9829; GFX900:       ; %bb.0:
9830; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9831; GFX900-NEXT:    ;;#ASMSTART
9832; GFX900-NEXT:    ; def s[4:9]
9833; GFX900-NEXT:    ;;#ASMEND
9834; GFX900-NEXT:    ;;#ASMSTART
9835; GFX900-NEXT:    ; def s[8:13]
9836; GFX900-NEXT:    ;;#ASMEND
9837; GFX900-NEXT:    s_mov_b32 s8, s12
9838; GFX900-NEXT:    s_mov_b32 s9, s13
9839; GFX900-NEXT:    s_mov_b32 s10, s12
9840; GFX900-NEXT:    s_mov_b32 s11, s13
9841; GFX900-NEXT:    s_mov_b32 s12, s6
9842; GFX900-NEXT:    s_mov_b32 s13, s7
9843; GFX900-NEXT:    s_mov_b32 s14, s4
9844; GFX900-NEXT:    s_mov_b32 s15, s5
9845; GFX900-NEXT:    ;;#ASMSTART
9846; GFX900-NEXT:    ; use s[8:15]
9847; GFX900-NEXT:    ;;#ASMEND
9848; GFX900-NEXT:    s_setpc_b64 s[30:31]
9849;
9850; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_0:
9851; GFX90A:       ; %bb.0:
9852; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9853; GFX90A-NEXT:    ;;#ASMSTART
9854; GFX90A-NEXT:    ; def s[4:9]
9855; GFX90A-NEXT:    ;;#ASMEND
9856; GFX90A-NEXT:    ;;#ASMSTART
9857; GFX90A-NEXT:    ; def s[8:13]
9858; GFX90A-NEXT:    ;;#ASMEND
9859; GFX90A-NEXT:    s_mov_b32 s8, s12
9860; GFX90A-NEXT:    s_mov_b32 s9, s13
9861; GFX90A-NEXT:    s_mov_b32 s10, s12
9862; GFX90A-NEXT:    s_mov_b32 s11, s13
9863; GFX90A-NEXT:    s_mov_b32 s12, s6
9864; GFX90A-NEXT:    s_mov_b32 s13, s7
9865; GFX90A-NEXT:    s_mov_b32 s14, s4
9866; GFX90A-NEXT:    s_mov_b32 s15, s5
9867; GFX90A-NEXT:    ;;#ASMSTART
9868; GFX90A-NEXT:    ; use s[8:15]
9869; GFX90A-NEXT:    ;;#ASMEND
9870; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9871;
9872; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_0:
9873; GFX940:       ; %bb.0:
9874; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9875; GFX940-NEXT:    ;;#ASMSTART
9876; GFX940-NEXT:    ; def s[8:13]
9877; GFX940-NEXT:    ;;#ASMEND
9878; GFX940-NEXT:    ;;#ASMSTART
9879; GFX940-NEXT:    ; def s[0:5]
9880; GFX940-NEXT:    ;;#ASMEND
9881; GFX940-NEXT:    s_mov_b32 s8, s12
9882; GFX940-NEXT:    s_mov_b32 s9, s13
9883; GFX940-NEXT:    s_mov_b32 s10, s12
9884; GFX940-NEXT:    s_mov_b32 s11, s13
9885; GFX940-NEXT:    s_mov_b32 s12, s2
9886; GFX940-NEXT:    s_mov_b32 s13, s3
9887; GFX940-NEXT:    s_mov_b32 s14, s0
9888; GFX940-NEXT:    s_mov_b32 s15, s1
9889; GFX940-NEXT:    ;;#ASMSTART
9890; GFX940-NEXT:    ; use s[8:15]
9891; GFX940-NEXT:    ;;#ASMEND
9892; GFX940-NEXT:    s_setpc_b64 s[30:31]
9893  %vec0 = call <3 x i64> asm "; def $0", "=s"()
9894  %vec1 = call <3 x i64> asm "; def $0", "=s"()
9895  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 0>
9896  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9897  ret void
9898}
9899
9900define void @s_shuffle_v4i64_v3i64__5_5_2_0() {
9901; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_0:
9902; GFX900:       ; %bb.0:
9903; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9904; GFX900-NEXT:    ;;#ASMSTART
9905; GFX900-NEXT:    ; def s[8:13]
9906; GFX900-NEXT:    ;;#ASMEND
9907; GFX900-NEXT:    ;;#ASMSTART
9908; GFX900-NEXT:    ; def s[16:21]
9909; GFX900-NEXT:    ;;#ASMEND
9910; GFX900-NEXT:    s_mov_b32 s8, s12
9911; GFX900-NEXT:    s_mov_b32 s9, s13
9912; GFX900-NEXT:    s_mov_b32 s10, s12
9913; GFX900-NEXT:    s_mov_b32 s11, s13
9914; GFX900-NEXT:    s_mov_b32 s12, s20
9915; GFX900-NEXT:    s_mov_b32 s13, s21
9916; GFX900-NEXT:    s_mov_b32 s14, s16
9917; GFX900-NEXT:    s_mov_b32 s15, s17
9918; GFX900-NEXT:    ;;#ASMSTART
9919; GFX900-NEXT:    ; use s[8:15]
9920; GFX900-NEXT:    ;;#ASMEND
9921; GFX900-NEXT:    s_setpc_b64 s[30:31]
9922;
9923; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_0:
9924; GFX90A:       ; %bb.0:
9925; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9926; GFX90A-NEXT:    ;;#ASMSTART
9927; GFX90A-NEXT:    ; def s[8:13]
9928; GFX90A-NEXT:    ;;#ASMEND
9929; GFX90A-NEXT:    ;;#ASMSTART
9930; GFX90A-NEXT:    ; def s[16:21]
9931; GFX90A-NEXT:    ;;#ASMEND
9932; GFX90A-NEXT:    s_mov_b32 s8, s12
9933; GFX90A-NEXT:    s_mov_b32 s9, s13
9934; GFX90A-NEXT:    s_mov_b32 s10, s12
9935; GFX90A-NEXT:    s_mov_b32 s11, s13
9936; GFX90A-NEXT:    s_mov_b32 s12, s20
9937; GFX90A-NEXT:    s_mov_b32 s13, s21
9938; GFX90A-NEXT:    s_mov_b32 s14, s16
9939; GFX90A-NEXT:    s_mov_b32 s15, s17
9940; GFX90A-NEXT:    ;;#ASMSTART
9941; GFX90A-NEXT:    ; use s[8:15]
9942; GFX90A-NEXT:    ;;#ASMEND
9943; GFX90A-NEXT:    s_setpc_b64 s[30:31]
9944;
9945; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_0:
9946; GFX940:       ; %bb.0:
9947; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9948; GFX940-NEXT:    ;;#ASMSTART
9949; GFX940-NEXT:    ; def s[8:13]
9950; GFX940-NEXT:    ;;#ASMEND
9951; GFX940-NEXT:    ;;#ASMSTART
9952; GFX940-NEXT:    ; def s[0:5]
9953; GFX940-NEXT:    ;;#ASMEND
9954; GFX940-NEXT:    s_mov_b32 s8, s12
9955; GFX940-NEXT:    s_mov_b32 s9, s13
9956; GFX940-NEXT:    s_mov_b32 s10, s12
9957; GFX940-NEXT:    s_mov_b32 s11, s13
9958; GFX940-NEXT:    s_mov_b32 s12, s4
9959; GFX940-NEXT:    s_mov_b32 s13, s5
9960; GFX940-NEXT:    s_mov_b32 s14, s0
9961; GFX940-NEXT:    s_mov_b32 s15, s1
9962; GFX940-NEXT:    ;;#ASMSTART
9963; GFX940-NEXT:    ; use s[8:15]
9964; GFX940-NEXT:    ;;#ASMEND
9965; GFX940-NEXT:    s_setpc_b64 s[30:31]
9966  %vec0 = call <3 x i64> asm "; def $0", "=s"()
9967  %vec1 = call <3 x i64> asm "; def $0", "=s"()
9968  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 0>
9969  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
9970  ret void
9971}
9972
9973define void @s_shuffle_v4i64_v3i64__5_5_3_0() {
9974; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_0:
9975; GFX900:       ; %bb.0:
9976; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9977; GFX900-NEXT:    ;;#ASMSTART
9978; GFX900-NEXT:    ; def s[4:9]
9979; GFX900-NEXT:    ;;#ASMEND
9980; GFX900-NEXT:    ;;#ASMSTART
9981; GFX900-NEXT:    ; def s[12:17]
9982; GFX900-NEXT:    ;;#ASMEND
9983; GFX900-NEXT:    s_mov_b32 s8, s16
9984; GFX900-NEXT:    s_mov_b32 s9, s17
9985; GFX900-NEXT:    s_mov_b32 s10, s16
9986; GFX900-NEXT:    s_mov_b32 s11, s17
9987; GFX900-NEXT:    s_mov_b32 s14, s4
9988; GFX900-NEXT:    s_mov_b32 s15, s5
9989; GFX900-NEXT:    ;;#ASMSTART
9990; GFX900-NEXT:    ; use s[8:15]
9991; GFX900-NEXT:    ;;#ASMEND
9992; GFX900-NEXT:    s_setpc_b64 s[30:31]
9993;
9994; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_0:
9995; GFX90A:       ; %bb.0:
9996; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9997; GFX90A-NEXT:    ;;#ASMSTART
9998; GFX90A-NEXT:    ; def s[4:9]
9999; GFX90A-NEXT:    ;;#ASMEND
10000; GFX90A-NEXT:    ;;#ASMSTART
10001; GFX90A-NEXT:    ; def s[12:17]
10002; GFX90A-NEXT:    ;;#ASMEND
10003; GFX90A-NEXT:    s_mov_b32 s8, s16
10004; GFX90A-NEXT:    s_mov_b32 s9, s17
10005; GFX90A-NEXT:    s_mov_b32 s10, s16
10006; GFX90A-NEXT:    s_mov_b32 s11, s17
10007; GFX90A-NEXT:    s_mov_b32 s14, s4
10008; GFX90A-NEXT:    s_mov_b32 s15, s5
10009; GFX90A-NEXT:    ;;#ASMSTART
10010; GFX90A-NEXT:    ; use s[8:15]
10011; GFX90A-NEXT:    ;;#ASMEND
10012; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10013;
10014; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_0:
10015; GFX940:       ; %bb.0:
10016; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10017; GFX940-NEXT:    ;;#ASMSTART
10018; GFX940-NEXT:    ; def s[12:17]
10019; GFX940-NEXT:    ;;#ASMEND
10020; GFX940-NEXT:    ;;#ASMSTART
10021; GFX940-NEXT:    ; def s[0:5]
10022; GFX940-NEXT:    ;;#ASMEND
10023; GFX940-NEXT:    s_mov_b32 s8, s16
10024; GFX940-NEXT:    s_mov_b32 s9, s17
10025; GFX940-NEXT:    s_mov_b32 s10, s16
10026; GFX940-NEXT:    s_mov_b32 s11, s17
10027; GFX940-NEXT:    s_mov_b32 s14, s0
10028; GFX940-NEXT:    s_mov_b32 s15, s1
10029; GFX940-NEXT:    ;;#ASMSTART
10030; GFX940-NEXT:    ; use s[8:15]
10031; GFX940-NEXT:    ;;#ASMEND
10032; GFX940-NEXT:    s_setpc_b64 s[30:31]
10033  %vec0 = call <3 x i64> asm "; def $0", "=s"()
10034  %vec1 = call <3 x i64> asm "; def $0", "=s"()
10035  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 0>
10036  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10037  ret void
10038}
10039
10040define void @s_shuffle_v4i64_v3i64__5_5_4_0() {
10041; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_0:
10042; GFX900:       ; %bb.0:
10043; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10044; GFX900-NEXT:    ;;#ASMSTART
10045; GFX900-NEXT:    ; def s[4:9]
10046; GFX900-NEXT:    ;;#ASMEND
10047; GFX900-NEXT:    ;;#ASMSTART
10048; GFX900-NEXT:    ; def s[12:17]
10049; GFX900-NEXT:    ;;#ASMEND
10050; GFX900-NEXT:    s_mov_b32 s8, s16
10051; GFX900-NEXT:    s_mov_b32 s9, s17
10052; GFX900-NEXT:    s_mov_b32 s10, s16
10053; GFX900-NEXT:    s_mov_b32 s11, s17
10054; GFX900-NEXT:    s_mov_b32 s12, s14
10055; GFX900-NEXT:    s_mov_b32 s13, s15
10056; GFX900-NEXT:    s_mov_b32 s14, s4
10057; GFX900-NEXT:    s_mov_b32 s15, s5
10058; GFX900-NEXT:    ;;#ASMSTART
10059; GFX900-NEXT:    ; use s[8:15]
10060; GFX900-NEXT:    ;;#ASMEND
10061; GFX900-NEXT:    s_setpc_b64 s[30:31]
10062;
10063; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_0:
10064; GFX90A:       ; %bb.0:
10065; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10066; GFX90A-NEXT:    ;;#ASMSTART
10067; GFX90A-NEXT:    ; def s[4:9]
10068; GFX90A-NEXT:    ;;#ASMEND
10069; GFX90A-NEXT:    ;;#ASMSTART
10070; GFX90A-NEXT:    ; def s[12:17]
10071; GFX90A-NEXT:    ;;#ASMEND
10072; GFX90A-NEXT:    s_mov_b32 s8, s16
10073; GFX90A-NEXT:    s_mov_b32 s9, s17
10074; GFX90A-NEXT:    s_mov_b32 s10, s16
10075; GFX90A-NEXT:    s_mov_b32 s11, s17
10076; GFX90A-NEXT:    s_mov_b32 s12, s14
10077; GFX90A-NEXT:    s_mov_b32 s13, s15
10078; GFX90A-NEXT:    s_mov_b32 s14, s4
10079; GFX90A-NEXT:    s_mov_b32 s15, s5
10080; GFX90A-NEXT:    ;;#ASMSTART
10081; GFX90A-NEXT:    ; use s[8:15]
10082; GFX90A-NEXT:    ;;#ASMEND
10083; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10084;
10085; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_0:
10086; GFX940:       ; %bb.0:
10087; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10088; GFX940-NEXT:    ;;#ASMSTART
10089; GFX940-NEXT:    ; def s[12:17]
10090; GFX940-NEXT:    ;;#ASMEND
10091; GFX940-NEXT:    ;;#ASMSTART
10092; GFX940-NEXT:    ; def s[0:5]
10093; GFX940-NEXT:    ;;#ASMEND
10094; GFX940-NEXT:    s_mov_b32 s8, s16
10095; GFX940-NEXT:    s_mov_b32 s9, s17
10096; GFX940-NEXT:    s_mov_b32 s10, s16
10097; GFX940-NEXT:    s_mov_b32 s11, s17
10098; GFX940-NEXT:    s_mov_b32 s12, s14
10099; GFX940-NEXT:    s_mov_b32 s13, s15
10100; GFX940-NEXT:    s_mov_b32 s14, s0
10101; GFX940-NEXT:    s_mov_b32 s15, s1
10102; GFX940-NEXT:    ;;#ASMSTART
10103; GFX940-NEXT:    ; use s[8:15]
10104; GFX940-NEXT:    ;;#ASMEND
10105; GFX940-NEXT:    s_setpc_b64 s[30:31]
10106  %vec0 = call <3 x i64> asm "; def $0", "=s"()
10107  %vec1 = call <3 x i64> asm "; def $0", "=s"()
10108  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 0>
10109  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10110  ret void
10111}
10112
10113define void @s_shuffle_v4i64_v3i64__u_1_1_1() {
10114; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_1_1_1:
10115; GFX9:       ; %bb.0:
10116; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10117; GFX9-NEXT:    ;;#ASMSTART
10118; GFX9-NEXT:    ; def s[8:13]
10119; GFX9-NEXT:    ;;#ASMEND
10120; GFX9-NEXT:    s_mov_b32 s12, s10
10121; GFX9-NEXT:    s_mov_b32 s13, s11
10122; GFX9-NEXT:    s_mov_b32 s14, s10
10123; GFX9-NEXT:    s_mov_b32 s15, s11
10124; GFX9-NEXT:    ;;#ASMSTART
10125; GFX9-NEXT:    ; use s[8:15]
10126; GFX9-NEXT:    ;;#ASMEND
10127; GFX9-NEXT:    s_setpc_b64 s[30:31]
10128  %vec0 = call <3 x i64> asm "; def $0", "=s"()
10129  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 1, i32 1, i32 1>
10130  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10131  ret void
10132}
10133
10134define void @s_shuffle_v4i64_v3i64__0_1_1_1() {
10135; GFX9-LABEL: s_shuffle_v4i64_v3i64__0_1_1_1:
10136; GFX9:       ; %bb.0:
10137; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10138; GFX9-NEXT:    ;;#ASMSTART
10139; GFX9-NEXT:    ; def s[8:13]
10140; GFX9-NEXT:    ;;#ASMEND
10141; GFX9-NEXT:    s_mov_b32 s12, s10
10142; GFX9-NEXT:    s_mov_b32 s13, s11
10143; GFX9-NEXT:    s_mov_b32 s14, s10
10144; GFX9-NEXT:    s_mov_b32 s15, s11
10145; GFX9-NEXT:    ;;#ASMSTART
10146; GFX9-NEXT:    ; use s[8:15]
10147; GFX9-NEXT:    ;;#ASMEND
10148; GFX9-NEXT:    s_setpc_b64 s[30:31]
10149  %vec0 = call <3 x i64> asm "; def $0", "=s"()
10150  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
10151  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10152  ret void
10153}
10154
10155define void @s_shuffle_v4i64_v3i64__1_1_1_1() {
10156; GFX9-LABEL: s_shuffle_v4i64_v3i64__1_1_1_1:
10157; GFX9:       ; %bb.0:
10158; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10159; GFX9-NEXT:    ;;#ASMSTART
10160; GFX9-NEXT:    ; def s[8:13]
10161; GFX9-NEXT:    ;;#ASMEND
10162; GFX9-NEXT:    s_mov_b32 s8, s10
10163; GFX9-NEXT:    s_mov_b32 s9, s11
10164; GFX9-NEXT:    s_mov_b32 s12, s10
10165; GFX9-NEXT:    s_mov_b32 s13, s11
10166; GFX9-NEXT:    s_mov_b32 s14, s10
10167; GFX9-NEXT:    s_mov_b32 s15, s11
10168; GFX9-NEXT:    ;;#ASMSTART
10169; GFX9-NEXT:    ; use s[8:15]
10170; GFX9-NEXT:    ;;#ASMEND
10171; GFX9-NEXT:    s_setpc_b64 s[30:31]
10172  %vec0 = call <3 x i64> asm "; def $0", "=s"()
10173  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
10174  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10175  ret void
10176}
10177
10178define void @s_shuffle_v4i64_v3i64__2_1_1_1() {
10179; GFX9-LABEL: s_shuffle_v4i64_v3i64__2_1_1_1:
10180; GFX9:       ; %bb.0:
10181; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10182; GFX9-NEXT:    ;;#ASMSTART
10183; GFX9-NEXT:    ; def s[8:13]
10184; GFX9-NEXT:    ;;#ASMEND
10185; GFX9-NEXT:    s_mov_b32 s8, s12
10186; GFX9-NEXT:    s_mov_b32 s9, s13
10187; GFX9-NEXT:    s_mov_b32 s12, s10
10188; GFX9-NEXT:    s_mov_b32 s13, s11
10189; GFX9-NEXT:    s_mov_b32 s14, s10
10190; GFX9-NEXT:    s_mov_b32 s15, s11
10191; GFX9-NEXT:    ;;#ASMSTART
10192; GFX9-NEXT:    ; use s[8:15]
10193; GFX9-NEXT:    ;;#ASMEND
10194; GFX9-NEXT:    s_setpc_b64 s[30:31]
10195  %vec0 = call <3 x i64> asm "; def $0", "=s"()
10196  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
10197  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10198  ret void
10199}
10200
10201define void @s_shuffle_v4i64_v3i64__3_1_1_1() {
10202; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_1_1_1:
10203; GFX9:       ; %bb.0:
10204; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10205; GFX9-NEXT:    ;;#ASMSTART
10206; GFX9-NEXT:    ; def s[8:13]
10207; GFX9-NEXT:    ;;#ASMEND
10208; GFX9-NEXT:    s_mov_b32 s12, s10
10209; GFX9-NEXT:    s_mov_b32 s13, s11
10210; GFX9-NEXT:    s_mov_b32 s14, s10
10211; GFX9-NEXT:    s_mov_b32 s15, s11
10212; GFX9-NEXT:    ;;#ASMSTART
10213; GFX9-NEXT:    ; use s[8:15]
10214; GFX9-NEXT:    ;;#ASMEND
10215; GFX9-NEXT:    s_setpc_b64 s[30:31]
10216  %vec0 = call <3 x i64> asm "; def $0", "=s"()
10217  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
10218  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10219  ret void
10220}
10221
10222define void @s_shuffle_v4i64_v3i64__4_1_1_1() {
10223; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_1_1_1:
10224; GFX900:       ; %bb.0:
10225; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10226; GFX900-NEXT:    ;;#ASMSTART
10227; GFX900-NEXT:    ; def s[8:13]
10228; GFX900-NEXT:    ;;#ASMEND
10229; GFX900-NEXT:    ;;#ASMSTART
10230; GFX900-NEXT:    ; def s[4:9]
10231; GFX900-NEXT:    ;;#ASMEND
10232; GFX900-NEXT:    s_mov_b32 s8, s6
10233; GFX900-NEXT:    s_mov_b32 s9, s7
10234; GFX900-NEXT:    s_mov_b32 s12, s10
10235; GFX900-NEXT:    s_mov_b32 s13, s11
10236; GFX900-NEXT:    s_mov_b32 s14, s10
10237; GFX900-NEXT:    s_mov_b32 s15, s11
10238; GFX900-NEXT:    ;;#ASMSTART
10239; GFX900-NEXT:    ; use s[8:15]
10240; GFX900-NEXT:    ;;#ASMEND
10241; GFX900-NEXT:    s_setpc_b64 s[30:31]
10242;
10243; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_1_1_1:
10244; GFX90A:       ; %bb.0:
10245; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10246; GFX90A-NEXT:    ;;#ASMSTART
10247; GFX90A-NEXT:    ; def s[8:13]
10248; GFX90A-NEXT:    ;;#ASMEND
10249; GFX90A-NEXT:    ;;#ASMSTART
10250; GFX90A-NEXT:    ; def s[4:9]
10251; GFX90A-NEXT:    ;;#ASMEND
10252; GFX90A-NEXT:    s_mov_b32 s8, s6
10253; GFX90A-NEXT:    s_mov_b32 s9, s7
10254; GFX90A-NEXT:    s_mov_b32 s12, s10
10255; GFX90A-NEXT:    s_mov_b32 s13, s11
10256; GFX90A-NEXT:    s_mov_b32 s14, s10
10257; GFX90A-NEXT:    s_mov_b32 s15, s11
10258; GFX90A-NEXT:    ;;#ASMSTART
10259; GFX90A-NEXT:    ; use s[8:15]
10260; GFX90A-NEXT:    ;;#ASMEND
10261; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10262;
10263; GFX940-LABEL: s_shuffle_v4i64_v3i64__4_1_1_1:
10264; GFX940:       ; %bb.0:
10265; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10266; GFX940-NEXT:    ;;#ASMSTART
10267; GFX940-NEXT:    ; def s[8:13]
10268; GFX940-NEXT:    ;;#ASMEND
10269; GFX940-NEXT:    ;;#ASMSTART
10270; GFX940-NEXT:    ; def s[0:5]
10271; GFX940-NEXT:    ;;#ASMEND
10272; GFX940-NEXT:    s_mov_b32 s8, s2
10273; GFX940-NEXT:    s_mov_b32 s9, s3
10274; GFX940-NEXT:    s_mov_b32 s12, s10
10275; GFX940-NEXT:    s_mov_b32 s13, s11
10276; GFX940-NEXT:    s_mov_b32 s14, s10
10277; GFX940-NEXT:    s_mov_b32 s15, s11
10278; GFX940-NEXT:    ;;#ASMSTART
10279; GFX940-NEXT:    ; use s[8:15]
10280; GFX940-NEXT:    ;;#ASMEND
10281; GFX940-NEXT:    s_setpc_b64 s[30:31]
10282  %vec0 = call <3 x i64> asm "; def $0", "=s"()
10283  %vec1 = call <3 x i64> asm "; def $0", "=s"()
10284  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 1, i32 1, i32 1>
10285  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10286  ret void
10287}
10288
10289define void @s_shuffle_v4i64_v3i64__5_1_1_1() {
10290; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_1_1:
10291; GFX900:       ; %bb.0:
10292; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10293; GFX900-NEXT:    ;;#ASMSTART
10294; GFX900-NEXT:    ; def s[8:13]
10295; GFX900-NEXT:    ;;#ASMEND
10296; GFX900-NEXT:    ;;#ASMSTART
10297; GFX900-NEXT:    ; def s[4:9]
10298; GFX900-NEXT:    ;;#ASMEND
10299; GFX900-NEXT:    s_mov_b32 s12, s10
10300; GFX900-NEXT:    s_mov_b32 s13, s11
10301; GFX900-NEXT:    s_mov_b32 s14, s10
10302; GFX900-NEXT:    s_mov_b32 s15, s11
10303; GFX900-NEXT:    ;;#ASMSTART
10304; GFX900-NEXT:    ; use s[8:15]
10305; GFX900-NEXT:    ;;#ASMEND
10306; GFX900-NEXT:    s_setpc_b64 s[30:31]
10307;
10308; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_1_1:
10309; GFX90A:       ; %bb.0:
10310; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10311; GFX90A-NEXT:    ;;#ASMSTART
10312; GFX90A-NEXT:    ; def s[8:13]
10313; GFX90A-NEXT:    ;;#ASMEND
10314; GFX90A-NEXT:    ;;#ASMSTART
10315; GFX90A-NEXT:    ; def s[4:9]
10316; GFX90A-NEXT:    ;;#ASMEND
10317; GFX90A-NEXT:    s_mov_b32 s12, s10
10318; GFX90A-NEXT:    s_mov_b32 s13, s11
10319; GFX90A-NEXT:    s_mov_b32 s14, s10
10320; GFX90A-NEXT:    s_mov_b32 s15, s11
10321; GFX90A-NEXT:    ;;#ASMSTART
10322; GFX90A-NEXT:    ; use s[8:15]
10323; GFX90A-NEXT:    ;;#ASMEND
10324; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10325;
10326; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_1_1:
10327; GFX940:       ; %bb.0:
10328; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10329; GFX940-NEXT:    ;;#ASMSTART
10330; GFX940-NEXT:    ; def s[8:13]
10331; GFX940-NEXT:    ;;#ASMEND
10332; GFX940-NEXT:    ;;#ASMSTART
10333; GFX940-NEXT:    ; def s[0:5]
10334; GFX940-NEXT:    ;;#ASMEND
10335; GFX940-NEXT:    s_mov_b32 s8, s4
10336; GFX940-NEXT:    s_mov_b32 s9, s5
10337; GFX940-NEXT:    s_mov_b32 s12, s10
10338; GFX940-NEXT:    s_mov_b32 s13, s11
10339; GFX940-NEXT:    s_mov_b32 s14, s10
10340; GFX940-NEXT:    s_mov_b32 s15, s11
10341; GFX940-NEXT:    ;;#ASMSTART
10342; GFX940-NEXT:    ; use s[8:15]
10343; GFX940-NEXT:    ;;#ASMEND
10344; GFX940-NEXT:    s_setpc_b64 s[30:31]
10345  %vec0 = call <3 x i64> asm "; def $0", "=s"()
10346  %vec1 = call <3 x i64> asm "; def $0", "=s"()
10347  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 1, i32 1>
10348  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10349  ret void
10350}
10351
10352define void @s_shuffle_v4i64_v3i64__5_u_1_1() {
10353; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_1_1:
10354; GFX900:       ; %bb.0:
10355; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10356; GFX900-NEXT:    ;;#ASMSTART
10357; GFX900-NEXT:    ; def s[4:9]
10358; GFX900-NEXT:    ;;#ASMEND
10359; GFX900-NEXT:    ;;#ASMSTART
10360; GFX900-NEXT:    ; def s[8:13]
10361; GFX900-NEXT:    ;;#ASMEND
10362; GFX900-NEXT:    s_mov_b32 s8, s12
10363; GFX900-NEXT:    s_mov_b32 s9, s13
10364; GFX900-NEXT:    s_mov_b32 s12, s6
10365; GFX900-NEXT:    s_mov_b32 s13, s7
10366; GFX900-NEXT:    s_mov_b32 s14, s6
10367; GFX900-NEXT:    s_mov_b32 s15, s7
10368; GFX900-NEXT:    ;;#ASMSTART
10369; GFX900-NEXT:    ; use s[8:15]
10370; GFX900-NEXT:    ;;#ASMEND
10371; GFX900-NEXT:    s_setpc_b64 s[30:31]
10372;
10373; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_1_1:
10374; GFX90A:       ; %bb.0:
10375; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10376; GFX90A-NEXT:    ;;#ASMSTART
10377; GFX90A-NEXT:    ; def s[4:9]
10378; GFX90A-NEXT:    ;;#ASMEND
10379; GFX90A-NEXT:    ;;#ASMSTART
10380; GFX90A-NEXT:    ; def s[8:13]
10381; GFX90A-NEXT:    ;;#ASMEND
10382; GFX90A-NEXT:    s_mov_b32 s8, s12
10383; GFX90A-NEXT:    s_mov_b32 s9, s13
10384; GFX90A-NEXT:    s_mov_b32 s12, s6
10385; GFX90A-NEXT:    s_mov_b32 s13, s7
10386; GFX90A-NEXT:    s_mov_b32 s14, s6
10387; GFX90A-NEXT:    s_mov_b32 s15, s7
10388; GFX90A-NEXT:    ;;#ASMSTART
10389; GFX90A-NEXT:    ; use s[8:15]
10390; GFX90A-NEXT:    ;;#ASMEND
10391; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10392;
10393; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_u_1_1:
10394; GFX940:       ; %bb.0:
10395; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10396; GFX940-NEXT:    ;;#ASMSTART
10397; GFX940-NEXT:    ; def s[0:5]
10398; GFX940-NEXT:    ;;#ASMEND
10399; GFX940-NEXT:    s_mov_b32 s12, s2
10400; GFX940-NEXT:    ;;#ASMSTART
10401; GFX940-NEXT:    ; def s[4:9]
10402; GFX940-NEXT:    ;;#ASMEND
10403; GFX940-NEXT:    s_mov_b32 s13, s3
10404; GFX940-NEXT:    s_mov_b32 s14, s2
10405; GFX940-NEXT:    s_mov_b32 s15, s3
10406; GFX940-NEXT:    ;;#ASMSTART
10407; GFX940-NEXT:    ; use s[8:15]
10408; GFX940-NEXT:    ;;#ASMEND
10409; GFX940-NEXT:    s_setpc_b64 s[30:31]
10410  %vec0 = call <3 x i64> asm "; def $0", "=s"()
10411  %vec1 = call <3 x i64> asm "; def $0", "=s"()
10412  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 1, i32 1>
10413  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10414  ret void
10415}
10416
10417define void @s_shuffle_v4i64_v3i64__5_0_1_1() {
10418; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_1_1:
10419; GFX900:       ; %bb.0:
10420; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10421; GFX900-NEXT:    ;;#ASMSTART
10422; GFX900-NEXT:    ; def s[4:9]
10423; GFX900-NEXT:    ;;#ASMEND
10424; GFX900-NEXT:    ;;#ASMSTART
10425; GFX900-NEXT:    ; def s[8:13]
10426; GFX900-NEXT:    ;;#ASMEND
10427; GFX900-NEXT:    s_mov_b32 s8, s12
10428; GFX900-NEXT:    s_mov_b32 s9, s13
10429; GFX900-NEXT:    s_mov_b32 s10, s4
10430; GFX900-NEXT:    s_mov_b32 s11, s5
10431; GFX900-NEXT:    s_mov_b32 s12, s6
10432; GFX900-NEXT:    s_mov_b32 s13, s7
10433; GFX900-NEXT:    s_mov_b32 s14, s6
10434; GFX900-NEXT:    s_mov_b32 s15, s7
10435; GFX900-NEXT:    ;;#ASMSTART
10436; GFX900-NEXT:    ; use s[8:15]
10437; GFX900-NEXT:    ;;#ASMEND
10438; GFX900-NEXT:    s_setpc_b64 s[30:31]
10439;
10440; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_1_1:
10441; GFX90A:       ; %bb.0:
10442; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10443; GFX90A-NEXT:    ;;#ASMSTART
10444; GFX90A-NEXT:    ; def s[4:9]
10445; GFX90A-NEXT:    ;;#ASMEND
10446; GFX90A-NEXT:    ;;#ASMSTART
10447; GFX90A-NEXT:    ; def s[8:13]
10448; GFX90A-NEXT:    ;;#ASMEND
10449; GFX90A-NEXT:    s_mov_b32 s8, s12
10450; GFX90A-NEXT:    s_mov_b32 s9, s13
10451; GFX90A-NEXT:    s_mov_b32 s10, s4
10452; GFX90A-NEXT:    s_mov_b32 s11, s5
10453; GFX90A-NEXT:    s_mov_b32 s12, s6
10454; GFX90A-NEXT:    s_mov_b32 s13, s7
10455; GFX90A-NEXT:    s_mov_b32 s14, s6
10456; GFX90A-NEXT:    s_mov_b32 s15, s7
10457; GFX90A-NEXT:    ;;#ASMSTART
10458; GFX90A-NEXT:    ; use s[8:15]
10459; GFX90A-NEXT:    ;;#ASMEND
10460; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10461;
10462; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_1_1:
10463; GFX940:       ; %bb.0:
10464; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10465; GFX940-NEXT:    ;;#ASMSTART
10466; GFX940-NEXT:    ; def s[0:5]
10467; GFX940-NEXT:    ;;#ASMEND
10468; GFX940-NEXT:    s_mov_b32 s10, s0
10469; GFX940-NEXT:    ;;#ASMSTART
10470; GFX940-NEXT:    ; def s[4:9]
10471; GFX940-NEXT:    ;;#ASMEND
10472; GFX940-NEXT:    s_mov_b32 s11, s1
10473; GFX940-NEXT:    s_mov_b32 s12, s2
10474; GFX940-NEXT:    s_mov_b32 s13, s3
10475; GFX940-NEXT:    s_mov_b32 s14, s2
10476; GFX940-NEXT:    s_mov_b32 s15, s3
10477; GFX940-NEXT:    ;;#ASMSTART
10478; GFX940-NEXT:    ; use s[8:15]
10479; GFX940-NEXT:    ;;#ASMEND
10480; GFX940-NEXT:    s_setpc_b64 s[30:31]
10481  %vec0 = call <3 x i64> asm "; def $0", "=s"()
10482  %vec1 = call <3 x i64> asm "; def $0", "=s"()
10483  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 1, i32 1>
10484  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10485  ret void
10486}
10487
10488define void @s_shuffle_v4i64_v3i64__5_2_1_1() {
10489; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_1_1:
10490; GFX900:       ; %bb.0:
10491; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10492; GFX900-NEXT:    ;;#ASMSTART
10493; GFX900-NEXT:    ; def s[12:17]
10494; GFX900-NEXT:    ;;#ASMEND
10495; GFX900-NEXT:    ;;#ASMSTART
10496; GFX900-NEXT:    ; def s[4:9]
10497; GFX900-NEXT:    ;;#ASMEND
10498; GFX900-NEXT:    s_mov_b32 s10, s16
10499; GFX900-NEXT:    s_mov_b32 s11, s17
10500; GFX900-NEXT:    s_mov_b32 s12, s14
10501; GFX900-NEXT:    s_mov_b32 s13, s15
10502; GFX900-NEXT:    ;;#ASMSTART
10503; GFX900-NEXT:    ; use s[8:15]
10504; GFX900-NEXT:    ;;#ASMEND
10505; GFX900-NEXT:    s_setpc_b64 s[30:31]
10506;
10507; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_1_1:
10508; GFX90A:       ; %bb.0:
10509; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10510; GFX90A-NEXT:    ;;#ASMSTART
10511; GFX90A-NEXT:    ; def s[12:17]
10512; GFX90A-NEXT:    ;;#ASMEND
10513; GFX90A-NEXT:    ;;#ASMSTART
10514; GFX90A-NEXT:    ; def s[4:9]
10515; GFX90A-NEXT:    ;;#ASMEND
10516; GFX90A-NEXT:    s_mov_b32 s10, s16
10517; GFX90A-NEXT:    s_mov_b32 s11, s17
10518; GFX90A-NEXT:    s_mov_b32 s12, s14
10519; GFX90A-NEXT:    s_mov_b32 s13, s15
10520; GFX90A-NEXT:    ;;#ASMSTART
10521; GFX90A-NEXT:    ; use s[8:15]
10522; GFX90A-NEXT:    ;;#ASMEND
10523; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10524;
10525; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_1_1:
10526; GFX940:       ; %bb.0:
10527; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10528; GFX940-NEXT:    ;;#ASMSTART
10529; GFX940-NEXT:    ; def s[8:13]
10530; GFX940-NEXT:    ;;#ASMEND
10531; GFX940-NEXT:    ;;#ASMSTART
10532; GFX940-NEXT:    ; def s[0:5]
10533; GFX940-NEXT:    ;;#ASMEND
10534; GFX940-NEXT:    s_mov_b32 s8, s12
10535; GFX940-NEXT:    s_mov_b32 s9, s13
10536; GFX940-NEXT:    s_mov_b32 s10, s4
10537; GFX940-NEXT:    s_mov_b32 s11, s5
10538; GFX940-NEXT:    s_mov_b32 s12, s2
10539; GFX940-NEXT:    s_mov_b32 s13, s3
10540; GFX940-NEXT:    s_mov_b32 s14, s2
10541; GFX940-NEXT:    s_mov_b32 s15, s3
10542; GFX940-NEXT:    ;;#ASMSTART
10543; GFX940-NEXT:    ; use s[8:15]
10544; GFX940-NEXT:    ;;#ASMEND
10545; GFX940-NEXT:    s_setpc_b64 s[30:31]
10546  %vec0 = call <3 x i64> asm "; def $0", "=s"()
10547  %vec1 = call <3 x i64> asm "; def $0", "=s"()
10548  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 1, i32 1>
10549  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10550  ret void
10551}
10552
10553define void @s_shuffle_v4i64_v3i64__5_3_1_1() {
10554; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_1_1:
10555; GFX900:       ; %bb.0:
10556; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10557; GFX900-NEXT:    ;;#ASMSTART
10558; GFX900-NEXT:    ; def s[4:9]
10559; GFX900-NEXT:    ;;#ASMEND
10560; GFX900-NEXT:    ;;#ASMSTART
10561; GFX900-NEXT:    ; def s[12:17]
10562; GFX900-NEXT:    ;;#ASMEND
10563; GFX900-NEXT:    s_mov_b32 s8, s16
10564; GFX900-NEXT:    s_mov_b32 s9, s17
10565; GFX900-NEXT:    s_mov_b32 s10, s12
10566; GFX900-NEXT:    s_mov_b32 s11, s13
10567; GFX900-NEXT:    s_mov_b32 s12, s6
10568; GFX900-NEXT:    s_mov_b32 s13, s7
10569; GFX900-NEXT:    s_mov_b32 s14, s6
10570; GFX900-NEXT:    s_mov_b32 s15, s7
10571; GFX900-NEXT:    ;;#ASMSTART
10572; GFX900-NEXT:    ; use s[8:15]
10573; GFX900-NEXT:    ;;#ASMEND
10574; GFX900-NEXT:    s_setpc_b64 s[30:31]
10575;
10576; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_1_1:
10577; GFX90A:       ; %bb.0:
10578; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10579; GFX90A-NEXT:    ;;#ASMSTART
10580; GFX90A-NEXT:    ; def s[4:9]
10581; GFX90A-NEXT:    ;;#ASMEND
10582; GFX90A-NEXT:    ;;#ASMSTART
10583; GFX90A-NEXT:    ; def s[12:17]
10584; GFX90A-NEXT:    ;;#ASMEND
10585; GFX90A-NEXT:    s_mov_b32 s8, s16
10586; GFX90A-NEXT:    s_mov_b32 s9, s17
10587; GFX90A-NEXT:    s_mov_b32 s10, s12
10588; GFX90A-NEXT:    s_mov_b32 s11, s13
10589; GFX90A-NEXT:    s_mov_b32 s12, s6
10590; GFX90A-NEXT:    s_mov_b32 s13, s7
10591; GFX90A-NEXT:    s_mov_b32 s14, s6
10592; GFX90A-NEXT:    s_mov_b32 s15, s7
10593; GFX90A-NEXT:    ;;#ASMSTART
10594; GFX90A-NEXT:    ; use s[8:15]
10595; GFX90A-NEXT:    ;;#ASMEND
10596; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10597;
10598; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_1_1:
10599; GFX940:       ; %bb.0:
10600; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10601; GFX940-NEXT:    ;;#ASMSTART
10602; GFX940-NEXT:    ; def s[0:5]
10603; GFX940-NEXT:    ;;#ASMEND
10604; GFX940-NEXT:    s_mov_b32 s12, s2
10605; GFX940-NEXT:    ;;#ASMSTART
10606; GFX940-NEXT:    ; def s[4:9]
10607; GFX940-NEXT:    ;;#ASMEND
10608; GFX940-NEXT:    s_mov_b32 s10, s4
10609; GFX940-NEXT:    s_mov_b32 s11, s5
10610; GFX940-NEXT:    s_mov_b32 s13, s3
10611; GFX940-NEXT:    s_mov_b32 s14, s2
10612; GFX940-NEXT:    s_mov_b32 s15, s3
10613; GFX940-NEXT:    ;;#ASMSTART
10614; GFX940-NEXT:    ; use s[8:15]
10615; GFX940-NEXT:    ;;#ASMEND
10616; GFX940-NEXT:    s_setpc_b64 s[30:31]
10617  %vec0 = call <3 x i64> asm "; def $0", "=s"()
10618  %vec1 = call <3 x i64> asm "; def $0", "=s"()
10619  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 1, i32 1>
10620  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10621  ret void
10622}
10623
10624define void @s_shuffle_v4i64_v3i64__5_4_1_1() {
10625; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_4_1_1:
10626; GFX900:       ; %bb.0:
10627; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10628; GFX900-NEXT:    ;;#ASMSTART
10629; GFX900-NEXT:    ; def s[4:9]
10630; GFX900-NEXT:    ;;#ASMEND
10631; GFX900-NEXT:    ;;#ASMSTART
10632; GFX900-NEXT:    ; def s[8:13]
10633; GFX900-NEXT:    ;;#ASMEND
10634; GFX900-NEXT:    s_mov_b32 s8, s12
10635; GFX900-NEXT:    s_mov_b32 s9, s13
10636; GFX900-NEXT:    s_mov_b32 s12, s6
10637; GFX900-NEXT:    s_mov_b32 s13, s7
10638; GFX900-NEXT:    s_mov_b32 s14, s6
10639; GFX900-NEXT:    s_mov_b32 s15, s7
10640; GFX900-NEXT:    ;;#ASMSTART
10641; GFX900-NEXT:    ; use s[8:15]
10642; GFX900-NEXT:    ;;#ASMEND
10643; GFX900-NEXT:    s_setpc_b64 s[30:31]
10644;
10645; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_4_1_1:
10646; GFX90A:       ; %bb.0:
10647; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10648; GFX90A-NEXT:    ;;#ASMSTART
10649; GFX90A-NEXT:    ; def s[4:9]
10650; GFX90A-NEXT:    ;;#ASMEND
10651; GFX90A-NEXT:    ;;#ASMSTART
10652; GFX90A-NEXT:    ; def s[8:13]
10653; GFX90A-NEXT:    ;;#ASMEND
10654; GFX90A-NEXT:    s_mov_b32 s8, s12
10655; GFX90A-NEXT:    s_mov_b32 s9, s13
10656; GFX90A-NEXT:    s_mov_b32 s12, s6
10657; GFX90A-NEXT:    s_mov_b32 s13, s7
10658; GFX90A-NEXT:    s_mov_b32 s14, s6
10659; GFX90A-NEXT:    s_mov_b32 s15, s7
10660; GFX90A-NEXT:    ;;#ASMSTART
10661; GFX90A-NEXT:    ; use s[8:15]
10662; GFX90A-NEXT:    ;;#ASMEND
10663; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10664;
10665; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_4_1_1:
10666; GFX940:       ; %bb.0:
10667; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10668; GFX940-NEXT:    ;;#ASMSTART
10669; GFX940-NEXT:    ; def s[8:13]
10670; GFX940-NEXT:    ;;#ASMEND
10671; GFX940-NEXT:    ;;#ASMSTART
10672; GFX940-NEXT:    ; def s[0:5]
10673; GFX940-NEXT:    ;;#ASMEND
10674; GFX940-NEXT:    s_mov_b32 s8, s12
10675; GFX940-NEXT:    s_mov_b32 s9, s13
10676; GFX940-NEXT:    s_mov_b32 s12, s2
10677; GFX940-NEXT:    s_mov_b32 s13, s3
10678; GFX940-NEXT:    s_mov_b32 s14, s2
10679; GFX940-NEXT:    s_mov_b32 s15, s3
10680; GFX940-NEXT:    ;;#ASMSTART
10681; GFX940-NEXT:    ; use s[8:15]
10682; GFX940-NEXT:    ;;#ASMEND
10683; GFX940-NEXT:    s_setpc_b64 s[30:31]
10684  %vec0 = call <3 x i64> asm "; def $0", "=s"()
10685  %vec1 = call <3 x i64> asm "; def $0", "=s"()
10686  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 1, i32 1>
10687  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10688  ret void
10689}
10690
10691define void @s_shuffle_v4i64_v3i64__5_5_1_1() {
10692; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_1:
10693; GFX900:       ; %bb.0:
10694; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10695; GFX900-NEXT:    ;;#ASMSTART
10696; GFX900-NEXT:    ; def s[4:9]
10697; GFX900-NEXT:    ;;#ASMEND
10698; GFX900-NEXT:    ;;#ASMSTART
10699; GFX900-NEXT:    ; def s[8:13]
10700; GFX900-NEXT:    ;;#ASMEND
10701; GFX900-NEXT:    s_mov_b32 s8, s12
10702; GFX900-NEXT:    s_mov_b32 s9, s13
10703; GFX900-NEXT:    s_mov_b32 s10, s12
10704; GFX900-NEXT:    s_mov_b32 s11, s13
10705; GFX900-NEXT:    s_mov_b32 s12, s6
10706; GFX900-NEXT:    s_mov_b32 s13, s7
10707; GFX900-NEXT:    s_mov_b32 s14, s6
10708; GFX900-NEXT:    s_mov_b32 s15, s7
10709; GFX900-NEXT:    ;;#ASMSTART
10710; GFX900-NEXT:    ; use s[8:15]
10711; GFX900-NEXT:    ;;#ASMEND
10712; GFX900-NEXT:    s_setpc_b64 s[30:31]
10713;
10714; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_1:
10715; GFX90A:       ; %bb.0:
10716; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10717; GFX90A-NEXT:    ;;#ASMSTART
10718; GFX90A-NEXT:    ; def s[4:9]
10719; GFX90A-NEXT:    ;;#ASMEND
10720; GFX90A-NEXT:    ;;#ASMSTART
10721; GFX90A-NEXT:    ; def s[8:13]
10722; GFX90A-NEXT:    ;;#ASMEND
10723; GFX90A-NEXT:    s_mov_b32 s8, s12
10724; GFX90A-NEXT:    s_mov_b32 s9, s13
10725; GFX90A-NEXT:    s_mov_b32 s10, s12
10726; GFX90A-NEXT:    s_mov_b32 s11, s13
10727; GFX90A-NEXT:    s_mov_b32 s12, s6
10728; GFX90A-NEXT:    s_mov_b32 s13, s7
10729; GFX90A-NEXT:    s_mov_b32 s14, s6
10730; GFX90A-NEXT:    s_mov_b32 s15, s7
10731; GFX90A-NEXT:    ;;#ASMSTART
10732; GFX90A-NEXT:    ; use s[8:15]
10733; GFX90A-NEXT:    ;;#ASMEND
10734; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10735;
10736; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_1:
10737; GFX940:       ; %bb.0:
10738; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10739; GFX940-NEXT:    ;;#ASMSTART
10740; GFX940-NEXT:    ; def s[8:13]
10741; GFX940-NEXT:    ;;#ASMEND
10742; GFX940-NEXT:    ;;#ASMSTART
10743; GFX940-NEXT:    ; def s[0:5]
10744; GFX940-NEXT:    ;;#ASMEND
10745; GFX940-NEXT:    s_mov_b32 s8, s12
10746; GFX940-NEXT:    s_mov_b32 s9, s13
10747; GFX940-NEXT:    s_mov_b32 s10, s12
10748; GFX940-NEXT:    s_mov_b32 s11, s13
10749; GFX940-NEXT:    s_mov_b32 s12, s2
10750; GFX940-NEXT:    s_mov_b32 s13, s3
10751; GFX940-NEXT:    s_mov_b32 s14, s2
10752; GFX940-NEXT:    s_mov_b32 s15, s3
10753; GFX940-NEXT:    ;;#ASMSTART
10754; GFX940-NEXT:    ; use s[8:15]
10755; GFX940-NEXT:    ;;#ASMEND
10756; GFX940-NEXT:    s_setpc_b64 s[30:31]
10757  %vec0 = call <3 x i64> asm "; def $0", "=s"()
10758  %vec1 = call <3 x i64> asm "; def $0", "=s"()
10759  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 1>
10760  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10761  ret void
10762}
10763
10764define void @s_shuffle_v4i64_v3i64__5_5_u_1() {
10765; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_1:
10766; GFX900:       ; %bb.0:
10767; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10768; GFX900-NEXT:    ;;#ASMSTART
10769; GFX900-NEXT:    ; def s[4:9]
10770; GFX900-NEXT:    ;;#ASMEND
10771; GFX900-NEXT:    ;;#ASMSTART
10772; GFX900-NEXT:    ; def s[8:13]
10773; GFX900-NEXT:    ;;#ASMEND
10774; GFX900-NEXT:    s_mov_b32 s8, s12
10775; GFX900-NEXT:    s_mov_b32 s9, s13
10776; GFX900-NEXT:    s_mov_b32 s10, s12
10777; GFX900-NEXT:    s_mov_b32 s11, s13
10778; GFX900-NEXT:    s_mov_b32 s14, s6
10779; GFX900-NEXT:    s_mov_b32 s15, s7
10780; GFX900-NEXT:    ;;#ASMSTART
10781; GFX900-NEXT:    ; use s[8:15]
10782; GFX900-NEXT:    ;;#ASMEND
10783; GFX900-NEXT:    s_setpc_b64 s[30:31]
10784;
10785; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_1:
10786; GFX90A:       ; %bb.0:
10787; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10788; GFX90A-NEXT:    ;;#ASMSTART
10789; GFX90A-NEXT:    ; def s[4:9]
10790; GFX90A-NEXT:    ;;#ASMEND
10791; GFX90A-NEXT:    ;;#ASMSTART
10792; GFX90A-NEXT:    ; def s[8:13]
10793; GFX90A-NEXT:    ;;#ASMEND
10794; GFX90A-NEXT:    s_mov_b32 s8, s12
10795; GFX90A-NEXT:    s_mov_b32 s9, s13
10796; GFX90A-NEXT:    s_mov_b32 s10, s12
10797; GFX90A-NEXT:    s_mov_b32 s11, s13
10798; GFX90A-NEXT:    s_mov_b32 s14, s6
10799; GFX90A-NEXT:    s_mov_b32 s15, s7
10800; GFX90A-NEXT:    ;;#ASMSTART
10801; GFX90A-NEXT:    ; use s[8:15]
10802; GFX90A-NEXT:    ;;#ASMEND
10803; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10804;
10805; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_1:
10806; GFX940:       ; %bb.0:
10807; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10808; GFX940-NEXT:    ;;#ASMSTART
10809; GFX940-NEXT:    ; def s[8:13]
10810; GFX940-NEXT:    ;;#ASMEND
10811; GFX940-NEXT:    ;;#ASMSTART
10812; GFX940-NEXT:    ; def s[0:5]
10813; GFX940-NEXT:    ;;#ASMEND
10814; GFX940-NEXT:    s_mov_b32 s8, s12
10815; GFX940-NEXT:    s_mov_b32 s9, s13
10816; GFX940-NEXT:    s_mov_b32 s10, s12
10817; GFX940-NEXT:    s_mov_b32 s11, s13
10818; GFX940-NEXT:    s_mov_b32 s14, s2
10819; GFX940-NEXT:    s_mov_b32 s15, s3
10820; GFX940-NEXT:    ;;#ASMSTART
10821; GFX940-NEXT:    ; use s[8:15]
10822; GFX940-NEXT:    ;;#ASMEND
10823; GFX940-NEXT:    s_setpc_b64 s[30:31]
10824  %vec0 = call <3 x i64> asm "; def $0", "=s"()
10825  %vec1 = call <3 x i64> asm "; def $0", "=s"()
10826  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 1>
10827  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10828  ret void
10829}
10830
10831define void @s_shuffle_v4i64_v3i64__5_5_0_1() {
10832; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_1:
10833; GFX900:       ; %bb.0:
10834; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10835; GFX900-NEXT:    ;;#ASMSTART
10836; GFX900-NEXT:    ; def s[4:9]
10837; GFX900-NEXT:    ;;#ASMEND
10838; GFX900-NEXT:    ;;#ASMSTART
10839; GFX900-NEXT:    ; def s[8:13]
10840; GFX900-NEXT:    ;;#ASMEND
10841; GFX900-NEXT:    s_mov_b32 s8, s12
10842; GFX900-NEXT:    s_mov_b32 s9, s13
10843; GFX900-NEXT:    s_mov_b32 s10, s12
10844; GFX900-NEXT:    s_mov_b32 s11, s13
10845; GFX900-NEXT:    s_mov_b32 s12, s4
10846; GFX900-NEXT:    s_mov_b32 s13, s5
10847; GFX900-NEXT:    s_mov_b32 s14, s6
10848; GFX900-NEXT:    s_mov_b32 s15, s7
10849; GFX900-NEXT:    ;;#ASMSTART
10850; GFX900-NEXT:    ; use s[8:15]
10851; GFX900-NEXT:    ;;#ASMEND
10852; GFX900-NEXT:    s_setpc_b64 s[30:31]
10853;
10854; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_1:
10855; GFX90A:       ; %bb.0:
10856; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10857; GFX90A-NEXT:    ;;#ASMSTART
10858; GFX90A-NEXT:    ; def s[4:9]
10859; GFX90A-NEXT:    ;;#ASMEND
10860; GFX90A-NEXT:    ;;#ASMSTART
10861; GFX90A-NEXT:    ; def s[8:13]
10862; GFX90A-NEXT:    ;;#ASMEND
10863; GFX90A-NEXT:    s_mov_b32 s8, s12
10864; GFX90A-NEXT:    s_mov_b32 s9, s13
10865; GFX90A-NEXT:    s_mov_b32 s10, s12
10866; GFX90A-NEXT:    s_mov_b32 s11, s13
10867; GFX90A-NEXT:    s_mov_b32 s12, s4
10868; GFX90A-NEXT:    s_mov_b32 s13, s5
10869; GFX90A-NEXT:    s_mov_b32 s14, s6
10870; GFX90A-NEXT:    s_mov_b32 s15, s7
10871; GFX90A-NEXT:    ;;#ASMSTART
10872; GFX90A-NEXT:    ; use s[8:15]
10873; GFX90A-NEXT:    ;;#ASMEND
10874; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10875;
10876; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_1:
10877; GFX940:       ; %bb.0:
10878; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10879; GFX940-NEXT:    ;;#ASMSTART
10880; GFX940-NEXT:    ; def s[8:13]
10881; GFX940-NEXT:    ;;#ASMEND
10882; GFX940-NEXT:    ;;#ASMSTART
10883; GFX940-NEXT:    ; def s[0:5]
10884; GFX940-NEXT:    ;;#ASMEND
10885; GFX940-NEXT:    s_mov_b32 s8, s12
10886; GFX940-NEXT:    s_mov_b32 s9, s13
10887; GFX940-NEXT:    s_mov_b32 s10, s12
10888; GFX940-NEXT:    s_mov_b32 s11, s13
10889; GFX940-NEXT:    s_mov_b32 s12, s0
10890; GFX940-NEXT:    s_mov_b32 s13, s1
10891; GFX940-NEXT:    s_mov_b32 s14, s2
10892; GFX940-NEXT:    s_mov_b32 s15, s3
10893; GFX940-NEXT:    ;;#ASMSTART
10894; GFX940-NEXT:    ; use s[8:15]
10895; GFX940-NEXT:    ;;#ASMEND
10896; GFX940-NEXT:    s_setpc_b64 s[30:31]
10897  %vec0 = call <3 x i64> asm "; def $0", "=s"()
10898  %vec1 = call <3 x i64> asm "; def $0", "=s"()
10899  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 1>
10900  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10901  ret void
10902}
10903
10904define void @s_shuffle_v4i64_v3i64__5_5_2_1() {
10905; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_1:
10906; GFX900:       ; %bb.0:
10907; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10908; GFX900-NEXT:    ;;#ASMSTART
10909; GFX900-NEXT:    ; def s[12:17]
10910; GFX900-NEXT:    ;;#ASMEND
10911; GFX900-NEXT:    ;;#ASMSTART
10912; GFX900-NEXT:    ; def s[8:13]
10913; GFX900-NEXT:    ;;#ASMEND
10914; GFX900-NEXT:    s_mov_b32 s8, s12
10915; GFX900-NEXT:    s_mov_b32 s9, s13
10916; GFX900-NEXT:    s_mov_b32 s10, s12
10917; GFX900-NEXT:    s_mov_b32 s11, s13
10918; GFX900-NEXT:    s_mov_b32 s12, s16
10919; GFX900-NEXT:    s_mov_b32 s13, s17
10920; GFX900-NEXT:    ;;#ASMSTART
10921; GFX900-NEXT:    ; use s[8:15]
10922; GFX900-NEXT:    ;;#ASMEND
10923; GFX900-NEXT:    s_setpc_b64 s[30:31]
10924;
10925; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_1:
10926; GFX90A:       ; %bb.0:
10927; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10928; GFX90A-NEXT:    ;;#ASMSTART
10929; GFX90A-NEXT:    ; def s[12:17]
10930; GFX90A-NEXT:    ;;#ASMEND
10931; GFX90A-NEXT:    ;;#ASMSTART
10932; GFX90A-NEXT:    ; def s[8:13]
10933; GFX90A-NEXT:    ;;#ASMEND
10934; GFX90A-NEXT:    s_mov_b32 s8, s12
10935; GFX90A-NEXT:    s_mov_b32 s9, s13
10936; GFX90A-NEXT:    s_mov_b32 s10, s12
10937; GFX90A-NEXT:    s_mov_b32 s11, s13
10938; GFX90A-NEXT:    s_mov_b32 s12, s16
10939; GFX90A-NEXT:    s_mov_b32 s13, s17
10940; GFX90A-NEXT:    ;;#ASMSTART
10941; GFX90A-NEXT:    ; use s[8:15]
10942; GFX90A-NEXT:    ;;#ASMEND
10943; GFX90A-NEXT:    s_setpc_b64 s[30:31]
10944;
10945; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_1:
10946; GFX940:       ; %bb.0:
10947; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10948; GFX940-NEXT:    ;;#ASMSTART
10949; GFX940-NEXT:    ; def s[8:13]
10950; GFX940-NEXT:    ;;#ASMEND
10951; GFX940-NEXT:    ;;#ASMSTART
10952; GFX940-NEXT:    ; def s[0:5]
10953; GFX940-NEXT:    ;;#ASMEND
10954; GFX940-NEXT:    s_mov_b32 s8, s12
10955; GFX940-NEXT:    s_mov_b32 s9, s13
10956; GFX940-NEXT:    s_mov_b32 s10, s12
10957; GFX940-NEXT:    s_mov_b32 s11, s13
10958; GFX940-NEXT:    s_mov_b32 s12, s4
10959; GFX940-NEXT:    s_mov_b32 s13, s5
10960; GFX940-NEXT:    s_mov_b32 s14, s2
10961; GFX940-NEXT:    s_mov_b32 s15, s3
10962; GFX940-NEXT:    ;;#ASMSTART
10963; GFX940-NEXT:    ; use s[8:15]
10964; GFX940-NEXT:    ;;#ASMEND
10965; GFX940-NEXT:    s_setpc_b64 s[30:31]
10966  %vec0 = call <3 x i64> asm "; def $0", "=s"()
10967  %vec1 = call <3 x i64> asm "; def $0", "=s"()
10968  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 1>
10969  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
10970  ret void
10971}
10972
10973define void @s_shuffle_v4i64_v3i64__5_5_3_1() {
10974; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_1:
10975; GFX900:       ; %bb.0:
10976; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10977; GFX900-NEXT:    ;;#ASMSTART
10978; GFX900-NEXT:    ; def s[4:9]
10979; GFX900-NEXT:    ;;#ASMEND
10980; GFX900-NEXT:    ;;#ASMSTART
10981; GFX900-NEXT:    ; def s[12:17]
10982; GFX900-NEXT:    ;;#ASMEND
10983; GFX900-NEXT:    s_mov_b32 s8, s16
10984; GFX900-NEXT:    s_mov_b32 s9, s17
10985; GFX900-NEXT:    s_mov_b32 s10, s16
10986; GFX900-NEXT:    s_mov_b32 s11, s17
10987; GFX900-NEXT:    s_mov_b32 s14, s6
10988; GFX900-NEXT:    s_mov_b32 s15, s7
10989; GFX900-NEXT:    ;;#ASMSTART
10990; GFX900-NEXT:    ; use s[8:15]
10991; GFX900-NEXT:    ;;#ASMEND
10992; GFX900-NEXT:    s_setpc_b64 s[30:31]
10993;
10994; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_1:
10995; GFX90A:       ; %bb.0:
10996; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10997; GFX90A-NEXT:    ;;#ASMSTART
10998; GFX90A-NEXT:    ; def s[4:9]
10999; GFX90A-NEXT:    ;;#ASMEND
11000; GFX90A-NEXT:    ;;#ASMSTART
11001; GFX90A-NEXT:    ; def s[12:17]
11002; GFX90A-NEXT:    ;;#ASMEND
11003; GFX90A-NEXT:    s_mov_b32 s8, s16
11004; GFX90A-NEXT:    s_mov_b32 s9, s17
11005; GFX90A-NEXT:    s_mov_b32 s10, s16
11006; GFX90A-NEXT:    s_mov_b32 s11, s17
11007; GFX90A-NEXT:    s_mov_b32 s14, s6
11008; GFX90A-NEXT:    s_mov_b32 s15, s7
11009; GFX90A-NEXT:    ;;#ASMSTART
11010; GFX90A-NEXT:    ; use s[8:15]
11011; GFX90A-NEXT:    ;;#ASMEND
11012; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11013;
11014; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_1:
11015; GFX940:       ; %bb.0:
11016; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11017; GFX940-NEXT:    ;;#ASMSTART
11018; GFX940-NEXT:    ; def s[12:17]
11019; GFX940-NEXT:    ;;#ASMEND
11020; GFX940-NEXT:    ;;#ASMSTART
11021; GFX940-NEXT:    ; def s[0:5]
11022; GFX940-NEXT:    ;;#ASMEND
11023; GFX940-NEXT:    s_mov_b32 s8, s16
11024; GFX940-NEXT:    s_mov_b32 s9, s17
11025; GFX940-NEXT:    s_mov_b32 s10, s16
11026; GFX940-NEXT:    s_mov_b32 s11, s17
11027; GFX940-NEXT:    s_mov_b32 s14, s2
11028; GFX940-NEXT:    s_mov_b32 s15, s3
11029; GFX940-NEXT:    ;;#ASMSTART
11030; GFX940-NEXT:    ; use s[8:15]
11031; GFX940-NEXT:    ;;#ASMEND
11032; GFX940-NEXT:    s_setpc_b64 s[30:31]
11033  %vec0 = call <3 x i64> asm "; def $0", "=s"()
11034  %vec1 = call <3 x i64> asm "; def $0", "=s"()
11035  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 1>
11036  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11037  ret void
11038}
11039
11040define void @s_shuffle_v4i64_v3i64__5_5_4_1() {
11041; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_1:
11042; GFX900:       ; %bb.0:
11043; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11044; GFX900-NEXT:    ;;#ASMSTART
11045; GFX900-NEXT:    ; def s[4:9]
11046; GFX900-NEXT:    ;;#ASMEND
11047; GFX900-NEXT:    ;;#ASMSTART
11048; GFX900-NEXT:    ; def s[12:17]
11049; GFX900-NEXT:    ;;#ASMEND
11050; GFX900-NEXT:    s_mov_b32 s8, s16
11051; GFX900-NEXT:    s_mov_b32 s9, s17
11052; GFX900-NEXT:    s_mov_b32 s10, s16
11053; GFX900-NEXT:    s_mov_b32 s11, s17
11054; GFX900-NEXT:    s_mov_b32 s12, s14
11055; GFX900-NEXT:    s_mov_b32 s13, s15
11056; GFX900-NEXT:    s_mov_b32 s14, s6
11057; GFX900-NEXT:    s_mov_b32 s15, s7
11058; GFX900-NEXT:    ;;#ASMSTART
11059; GFX900-NEXT:    ; use s[8:15]
11060; GFX900-NEXT:    ;;#ASMEND
11061; GFX900-NEXT:    s_setpc_b64 s[30:31]
11062;
11063; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_1:
11064; GFX90A:       ; %bb.0:
11065; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11066; GFX90A-NEXT:    ;;#ASMSTART
11067; GFX90A-NEXT:    ; def s[4:9]
11068; GFX90A-NEXT:    ;;#ASMEND
11069; GFX90A-NEXT:    ;;#ASMSTART
11070; GFX90A-NEXT:    ; def s[12:17]
11071; GFX90A-NEXT:    ;;#ASMEND
11072; GFX90A-NEXT:    s_mov_b32 s8, s16
11073; GFX90A-NEXT:    s_mov_b32 s9, s17
11074; GFX90A-NEXT:    s_mov_b32 s10, s16
11075; GFX90A-NEXT:    s_mov_b32 s11, s17
11076; GFX90A-NEXT:    s_mov_b32 s12, s14
11077; GFX90A-NEXT:    s_mov_b32 s13, s15
11078; GFX90A-NEXT:    s_mov_b32 s14, s6
11079; GFX90A-NEXT:    s_mov_b32 s15, s7
11080; GFX90A-NEXT:    ;;#ASMSTART
11081; GFX90A-NEXT:    ; use s[8:15]
11082; GFX90A-NEXT:    ;;#ASMEND
11083; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11084;
11085; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_1:
11086; GFX940:       ; %bb.0:
11087; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11088; GFX940-NEXT:    ;;#ASMSTART
11089; GFX940-NEXT:    ; def s[12:17]
11090; GFX940-NEXT:    ;;#ASMEND
11091; GFX940-NEXT:    ;;#ASMSTART
11092; GFX940-NEXT:    ; def s[0:5]
11093; GFX940-NEXT:    ;;#ASMEND
11094; GFX940-NEXT:    s_mov_b32 s8, s16
11095; GFX940-NEXT:    s_mov_b32 s9, s17
11096; GFX940-NEXT:    s_mov_b32 s10, s16
11097; GFX940-NEXT:    s_mov_b32 s11, s17
11098; GFX940-NEXT:    s_mov_b32 s12, s14
11099; GFX940-NEXT:    s_mov_b32 s13, s15
11100; GFX940-NEXT:    s_mov_b32 s14, s2
11101; GFX940-NEXT:    s_mov_b32 s15, s3
11102; GFX940-NEXT:    ;;#ASMSTART
11103; GFX940-NEXT:    ; use s[8:15]
11104; GFX940-NEXT:    ;;#ASMEND
11105; GFX940-NEXT:    s_setpc_b64 s[30:31]
11106  %vec0 = call <3 x i64> asm "; def $0", "=s"()
11107  %vec1 = call <3 x i64> asm "; def $0", "=s"()
11108  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 1>
11109  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11110  ret void
11111}
11112
11113define void @s_shuffle_v4i64_v3i64__u_2_2_2() {
11114; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_2_2_2:
11115; GFX9:       ; %bb.0:
11116; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11117; GFX9-NEXT:    ;;#ASMSTART
11118; GFX9-NEXT:    ; def s[8:13]
11119; GFX9-NEXT:    ;;#ASMEND
11120; GFX9-NEXT:    s_mov_b32 s10, s12
11121; GFX9-NEXT:    s_mov_b32 s11, s13
11122; GFX9-NEXT:    s_mov_b32 s14, s12
11123; GFX9-NEXT:    s_mov_b32 s15, s13
11124; GFX9-NEXT:    ;;#ASMSTART
11125; GFX9-NEXT:    ; use s[8:15]
11126; GFX9-NEXT:    ;;#ASMEND
11127; GFX9-NEXT:    s_setpc_b64 s[30:31]
11128  %vec0 = call <3 x i64> asm "; def $0", "=s"()
11129  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 2, i32 2, i32 2>
11130  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11131  ret void
11132}
11133
11134define void @s_shuffle_v4i64_v3i64__0_2_2_2() {
11135; GFX9-LABEL: s_shuffle_v4i64_v3i64__0_2_2_2:
11136; GFX9:       ; %bb.0:
11137; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11138; GFX9-NEXT:    ;;#ASMSTART
11139; GFX9-NEXT:    ; def s[8:13]
11140; GFX9-NEXT:    ;;#ASMEND
11141; GFX9-NEXT:    s_mov_b32 s10, s12
11142; GFX9-NEXT:    s_mov_b32 s11, s13
11143; GFX9-NEXT:    s_mov_b32 s14, s12
11144; GFX9-NEXT:    s_mov_b32 s15, s13
11145; GFX9-NEXT:    ;;#ASMSTART
11146; GFX9-NEXT:    ; use s[8:15]
11147; GFX9-NEXT:    ;;#ASMEND
11148; GFX9-NEXT:    s_setpc_b64 s[30:31]
11149  %vec0 = call <3 x i64> asm "; def $0", "=s"()
11150  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 2, i32 2, i32 2>
11151  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11152  ret void
11153}
11154
11155define void @s_shuffle_v4i64_v3i64__1_2_2_2() {
11156; GFX9-LABEL: s_shuffle_v4i64_v3i64__1_2_2_2:
11157; GFX9:       ; %bb.0:
11158; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11159; GFX9-NEXT:    ;;#ASMSTART
11160; GFX9-NEXT:    ; def s[8:13]
11161; GFX9-NEXT:    ;;#ASMEND
11162; GFX9-NEXT:    s_mov_b32 s8, s10
11163; GFX9-NEXT:    s_mov_b32 s9, s11
11164; GFX9-NEXT:    s_mov_b32 s10, s12
11165; GFX9-NEXT:    s_mov_b32 s11, s13
11166; GFX9-NEXT:    s_mov_b32 s14, s12
11167; GFX9-NEXT:    s_mov_b32 s15, s13
11168; GFX9-NEXT:    ;;#ASMSTART
11169; GFX9-NEXT:    ; use s[8:15]
11170; GFX9-NEXT:    ;;#ASMEND
11171; GFX9-NEXT:    s_setpc_b64 s[30:31]
11172  %vec0 = call <3 x i64> asm "; def $0", "=s"()
11173  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 2, i32 2, i32 2>
11174  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11175  ret void
11176}
11177
11178define void @s_shuffle_v4i64_v3i64__2_2_2_2() {
11179; GFX9-LABEL: s_shuffle_v4i64_v3i64__2_2_2_2:
11180; GFX9:       ; %bb.0:
11181; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11182; GFX9-NEXT:    ;;#ASMSTART
11183; GFX9-NEXT:    ; def s[8:13]
11184; GFX9-NEXT:    ;;#ASMEND
11185; GFX9-NEXT:    s_mov_b32 s8, s12
11186; GFX9-NEXT:    s_mov_b32 s9, s13
11187; GFX9-NEXT:    s_mov_b32 s10, s12
11188; GFX9-NEXT:    s_mov_b32 s11, s13
11189; GFX9-NEXT:    s_mov_b32 s14, s12
11190; GFX9-NEXT:    s_mov_b32 s15, s13
11191; GFX9-NEXT:    ;;#ASMSTART
11192; GFX9-NEXT:    ; use s[8:15]
11193; GFX9-NEXT:    ;;#ASMEND
11194; GFX9-NEXT:    s_setpc_b64 s[30:31]
11195  %vec0 = call <3 x i64> asm "; def $0", "=s"()
11196  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
11197  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11198  ret void
11199}
11200
11201define void @s_shuffle_v4i64_v3i64__3_2_2_2() {
11202; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_2_2_2:
11203; GFX9:       ; %bb.0:
11204; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11205; GFX9-NEXT:    ;;#ASMSTART
11206; GFX9-NEXT:    ; def s[8:13]
11207; GFX9-NEXT:    ;;#ASMEND
11208; GFX9-NEXT:    s_mov_b32 s10, s12
11209; GFX9-NEXT:    s_mov_b32 s11, s13
11210; GFX9-NEXT:    s_mov_b32 s14, s12
11211; GFX9-NEXT:    s_mov_b32 s15, s13
11212; GFX9-NEXT:    ;;#ASMSTART
11213; GFX9-NEXT:    ; use s[8:15]
11214; GFX9-NEXT:    ;;#ASMEND
11215; GFX9-NEXT:    s_setpc_b64 s[30:31]
11216  %vec0 = call <3 x i64> asm "; def $0", "=s"()
11217  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 2, i32 2, i32 2>
11218  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11219  ret void
11220}
11221
11222define void @s_shuffle_v4i64_v3i64__4_2_2_2() {
11223; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_2_2_2:
11224; GFX900:       ; %bb.0:
11225; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11226; GFX900-NEXT:    ;;#ASMSTART
11227; GFX900-NEXT:    ; def s[8:13]
11228; GFX900-NEXT:    ;;#ASMEND
11229; GFX900-NEXT:    ;;#ASMSTART
11230; GFX900-NEXT:    ; def s[4:9]
11231; GFX900-NEXT:    ;;#ASMEND
11232; GFX900-NEXT:    s_mov_b32 s8, s6
11233; GFX900-NEXT:    s_mov_b32 s9, s7
11234; GFX900-NEXT:    s_mov_b32 s10, s12
11235; GFX900-NEXT:    s_mov_b32 s11, s13
11236; GFX900-NEXT:    s_mov_b32 s14, s12
11237; GFX900-NEXT:    s_mov_b32 s15, s13
11238; GFX900-NEXT:    ;;#ASMSTART
11239; GFX900-NEXT:    ; use s[8:15]
11240; GFX900-NEXT:    ;;#ASMEND
11241; GFX900-NEXT:    s_setpc_b64 s[30:31]
11242;
11243; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_2_2_2:
11244; GFX90A:       ; %bb.0:
11245; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11246; GFX90A-NEXT:    ;;#ASMSTART
11247; GFX90A-NEXT:    ; def s[8:13]
11248; GFX90A-NEXT:    ;;#ASMEND
11249; GFX90A-NEXT:    ;;#ASMSTART
11250; GFX90A-NEXT:    ; def s[4:9]
11251; GFX90A-NEXT:    ;;#ASMEND
11252; GFX90A-NEXT:    s_mov_b32 s8, s6
11253; GFX90A-NEXT:    s_mov_b32 s9, s7
11254; GFX90A-NEXT:    s_mov_b32 s10, s12
11255; GFX90A-NEXT:    s_mov_b32 s11, s13
11256; GFX90A-NEXT:    s_mov_b32 s14, s12
11257; GFX90A-NEXT:    s_mov_b32 s15, s13
11258; GFX90A-NEXT:    ;;#ASMSTART
11259; GFX90A-NEXT:    ; use s[8:15]
11260; GFX90A-NEXT:    ;;#ASMEND
11261; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11262;
11263; GFX940-LABEL: s_shuffle_v4i64_v3i64__4_2_2_2:
11264; GFX940:       ; %bb.0:
11265; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11266; GFX940-NEXT:    ;;#ASMSTART
11267; GFX940-NEXT:    ; def s[8:13]
11268; GFX940-NEXT:    ;;#ASMEND
11269; GFX940-NEXT:    ;;#ASMSTART
11270; GFX940-NEXT:    ; def s[0:5]
11271; GFX940-NEXT:    ;;#ASMEND
11272; GFX940-NEXT:    s_mov_b32 s8, s2
11273; GFX940-NEXT:    s_mov_b32 s9, s3
11274; GFX940-NEXT:    s_mov_b32 s10, s12
11275; GFX940-NEXT:    s_mov_b32 s11, s13
11276; GFX940-NEXT:    s_mov_b32 s14, s12
11277; GFX940-NEXT:    s_mov_b32 s15, s13
11278; GFX940-NEXT:    ;;#ASMSTART
11279; GFX940-NEXT:    ; use s[8:15]
11280; GFX940-NEXT:    ;;#ASMEND
11281; GFX940-NEXT:    s_setpc_b64 s[30:31]
11282  %vec0 = call <3 x i64> asm "; def $0", "=s"()
11283  %vec1 = call <3 x i64> asm "; def $0", "=s"()
11284  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 2, i32 2, i32 2>
11285  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11286  ret void
11287}
11288
11289define void @s_shuffle_v4i64_v3i64__5_2_2_2() {
11290; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_2_2:
11291; GFX900:       ; %bb.0:
11292; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11293; GFX900-NEXT:    ;;#ASMSTART
11294; GFX900-NEXT:    ; def s[8:13]
11295; GFX900-NEXT:    ;;#ASMEND
11296; GFX900-NEXT:    ;;#ASMSTART
11297; GFX900-NEXT:    ; def s[4:9]
11298; GFX900-NEXT:    ;;#ASMEND
11299; GFX900-NEXT:    s_mov_b32 s10, s12
11300; GFX900-NEXT:    s_mov_b32 s11, s13
11301; GFX900-NEXT:    s_mov_b32 s14, s12
11302; GFX900-NEXT:    s_mov_b32 s15, s13
11303; GFX900-NEXT:    ;;#ASMSTART
11304; GFX900-NEXT:    ; use s[8:15]
11305; GFX900-NEXT:    ;;#ASMEND
11306; GFX900-NEXT:    s_setpc_b64 s[30:31]
11307;
11308; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_2_2:
11309; GFX90A:       ; %bb.0:
11310; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11311; GFX90A-NEXT:    ;;#ASMSTART
11312; GFX90A-NEXT:    ; def s[8:13]
11313; GFX90A-NEXT:    ;;#ASMEND
11314; GFX90A-NEXT:    ;;#ASMSTART
11315; GFX90A-NEXT:    ; def s[4:9]
11316; GFX90A-NEXT:    ;;#ASMEND
11317; GFX90A-NEXT:    s_mov_b32 s10, s12
11318; GFX90A-NEXT:    s_mov_b32 s11, s13
11319; GFX90A-NEXT:    s_mov_b32 s14, s12
11320; GFX90A-NEXT:    s_mov_b32 s15, s13
11321; GFX90A-NEXT:    ;;#ASMSTART
11322; GFX90A-NEXT:    ; use s[8:15]
11323; GFX90A-NEXT:    ;;#ASMEND
11324; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11325;
11326; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_2_2:
11327; GFX940:       ; %bb.0:
11328; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11329; GFX940-NEXT:    ;;#ASMSTART
11330; GFX940-NEXT:    ; def s[8:13]
11331; GFX940-NEXT:    ;;#ASMEND
11332; GFX940-NEXT:    ;;#ASMSTART
11333; GFX940-NEXT:    ; def s[0:5]
11334; GFX940-NEXT:    ;;#ASMEND
11335; GFX940-NEXT:    s_mov_b32 s8, s4
11336; GFX940-NEXT:    s_mov_b32 s9, s5
11337; GFX940-NEXT:    s_mov_b32 s10, s12
11338; GFX940-NEXT:    s_mov_b32 s11, s13
11339; GFX940-NEXT:    s_mov_b32 s14, s12
11340; GFX940-NEXT:    s_mov_b32 s15, s13
11341; GFX940-NEXT:    ;;#ASMSTART
11342; GFX940-NEXT:    ; use s[8:15]
11343; GFX940-NEXT:    ;;#ASMEND
11344; GFX940-NEXT:    s_setpc_b64 s[30:31]
11345  %vec0 = call <3 x i64> asm "; def $0", "=s"()
11346  %vec1 = call <3 x i64> asm "; def $0", "=s"()
11347  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 2, i32 2>
11348  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11349  ret void
11350}
11351
11352define void @s_shuffle_v4i64_v3i64__5_u_2_2() {
11353; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_2_2:
11354; GFX900:       ; %bb.0:
11355; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11356; GFX900-NEXT:    ;;#ASMSTART
11357; GFX900-NEXT:    ; def s[8:13]
11358; GFX900-NEXT:    ;;#ASMEND
11359; GFX900-NEXT:    ;;#ASMSTART
11360; GFX900-NEXT:    ; def s[4:9]
11361; GFX900-NEXT:    ;;#ASMEND
11362; GFX900-NEXT:    s_mov_b32 s14, s12
11363; GFX900-NEXT:    s_mov_b32 s15, s13
11364; GFX900-NEXT:    ;;#ASMSTART
11365; GFX900-NEXT:    ; use s[8:15]
11366; GFX900-NEXT:    ;;#ASMEND
11367; GFX900-NEXT:    s_setpc_b64 s[30:31]
11368;
11369; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_2_2:
11370; GFX90A:       ; %bb.0:
11371; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11372; GFX90A-NEXT:    ;;#ASMSTART
11373; GFX90A-NEXT:    ; def s[8:13]
11374; GFX90A-NEXT:    ;;#ASMEND
11375; GFX90A-NEXT:    ;;#ASMSTART
11376; GFX90A-NEXT:    ; def s[4:9]
11377; GFX90A-NEXT:    ;;#ASMEND
11378; GFX90A-NEXT:    s_mov_b32 s14, s12
11379; GFX90A-NEXT:    s_mov_b32 s15, s13
11380; GFX90A-NEXT:    ;;#ASMSTART
11381; GFX90A-NEXT:    ; use s[8:15]
11382; GFX90A-NEXT:    ;;#ASMEND
11383; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11384;
11385; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_u_2_2:
11386; GFX940:       ; %bb.0:
11387; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11388; GFX940-NEXT:    ;;#ASMSTART
11389; GFX940-NEXT:    ; def s[8:13]
11390; GFX940-NEXT:    ;;#ASMEND
11391; GFX940-NEXT:    ;;#ASMSTART
11392; GFX940-NEXT:    ; def s[0:5]
11393; GFX940-NEXT:    ;;#ASMEND
11394; GFX940-NEXT:    s_mov_b32 s8, s4
11395; GFX940-NEXT:    s_mov_b32 s9, s5
11396; GFX940-NEXT:    s_mov_b32 s14, s12
11397; GFX940-NEXT:    s_mov_b32 s15, s13
11398; GFX940-NEXT:    ;;#ASMSTART
11399; GFX940-NEXT:    ; use s[8:15]
11400; GFX940-NEXT:    ;;#ASMEND
11401; GFX940-NEXT:    s_setpc_b64 s[30:31]
11402  %vec0 = call <3 x i64> asm "; def $0", "=s"()
11403  %vec1 = call <3 x i64> asm "; def $0", "=s"()
11404  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 2, i32 2>
11405  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11406  ret void
11407}
11408
11409define void @s_shuffle_v4i64_v3i64__5_0_2_2() {
11410; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_2_2:
11411; GFX900:       ; %bb.0:
11412; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11413; GFX900-NEXT:    ;;#ASMSTART
11414; GFX900-NEXT:    ; def s[12:17]
11415; GFX900-NEXT:    ;;#ASMEND
11416; GFX900-NEXT:    ;;#ASMSTART
11417; GFX900-NEXT:    ; def s[4:9]
11418; GFX900-NEXT:    ;;#ASMEND
11419; GFX900-NEXT:    s_mov_b32 s10, s12
11420; GFX900-NEXT:    s_mov_b32 s11, s13
11421; GFX900-NEXT:    s_mov_b32 s12, s16
11422; GFX900-NEXT:    s_mov_b32 s13, s17
11423; GFX900-NEXT:    s_mov_b32 s14, s16
11424; GFX900-NEXT:    s_mov_b32 s15, s17
11425; GFX900-NEXT:    ;;#ASMSTART
11426; GFX900-NEXT:    ; use s[8:15]
11427; GFX900-NEXT:    ;;#ASMEND
11428; GFX900-NEXT:    s_setpc_b64 s[30:31]
11429;
11430; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_2_2:
11431; GFX90A:       ; %bb.0:
11432; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11433; GFX90A-NEXT:    ;;#ASMSTART
11434; GFX90A-NEXT:    ; def s[12:17]
11435; GFX90A-NEXT:    ;;#ASMEND
11436; GFX90A-NEXT:    ;;#ASMSTART
11437; GFX90A-NEXT:    ; def s[4:9]
11438; GFX90A-NEXT:    ;;#ASMEND
11439; GFX90A-NEXT:    s_mov_b32 s10, s12
11440; GFX90A-NEXT:    s_mov_b32 s11, s13
11441; GFX90A-NEXT:    s_mov_b32 s12, s16
11442; GFX90A-NEXT:    s_mov_b32 s13, s17
11443; GFX90A-NEXT:    s_mov_b32 s14, s16
11444; GFX90A-NEXT:    s_mov_b32 s15, s17
11445; GFX90A-NEXT:    ;;#ASMSTART
11446; GFX90A-NEXT:    ; use s[8:15]
11447; GFX90A-NEXT:    ;;#ASMEND
11448; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11449;
11450; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_2_2:
11451; GFX940:       ; %bb.0:
11452; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11453; GFX940-NEXT:    ;;#ASMSTART
11454; GFX940-NEXT:    ; def s[8:13]
11455; GFX940-NEXT:    ;;#ASMEND
11456; GFX940-NEXT:    ;;#ASMSTART
11457; GFX940-NEXT:    ; def s[0:5]
11458; GFX940-NEXT:    ;;#ASMEND
11459; GFX940-NEXT:    s_mov_b32 s8, s12
11460; GFX940-NEXT:    s_mov_b32 s9, s13
11461; GFX940-NEXT:    s_mov_b32 s10, s0
11462; GFX940-NEXT:    s_mov_b32 s11, s1
11463; GFX940-NEXT:    s_mov_b32 s12, s4
11464; GFX940-NEXT:    s_mov_b32 s13, s5
11465; GFX940-NEXT:    s_mov_b32 s14, s4
11466; GFX940-NEXT:    s_mov_b32 s15, s5
11467; GFX940-NEXT:    ;;#ASMSTART
11468; GFX940-NEXT:    ; use s[8:15]
11469; GFX940-NEXT:    ;;#ASMEND
11470; GFX940-NEXT:    s_setpc_b64 s[30:31]
11471  %vec0 = call <3 x i64> asm "; def $0", "=s"()
11472  %vec1 = call <3 x i64> asm "; def $0", "=s"()
11473  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 2, i32 2>
11474  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11475  ret void
11476}
11477
11478define void @s_shuffle_v4i64_v3i64__5_1_2_2() {
11479; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_2_2:
11480; GFX900:       ; %bb.0:
11481; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11482; GFX900-NEXT:    ;;#ASMSTART
11483; GFX900-NEXT:    ; def s[8:13]
11484; GFX900-NEXT:    ;;#ASMEND
11485; GFX900-NEXT:    ;;#ASMSTART
11486; GFX900-NEXT:    ; def s[4:9]
11487; GFX900-NEXT:    ;;#ASMEND
11488; GFX900-NEXT:    s_mov_b32 s14, s12
11489; GFX900-NEXT:    s_mov_b32 s15, s13
11490; GFX900-NEXT:    ;;#ASMSTART
11491; GFX900-NEXT:    ; use s[8:15]
11492; GFX900-NEXT:    ;;#ASMEND
11493; GFX900-NEXT:    s_setpc_b64 s[30:31]
11494;
11495; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_2_2:
11496; GFX90A:       ; %bb.0:
11497; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11498; GFX90A-NEXT:    ;;#ASMSTART
11499; GFX90A-NEXT:    ; def s[8:13]
11500; GFX90A-NEXT:    ;;#ASMEND
11501; GFX90A-NEXT:    ;;#ASMSTART
11502; GFX90A-NEXT:    ; def s[4:9]
11503; GFX90A-NEXT:    ;;#ASMEND
11504; GFX90A-NEXT:    s_mov_b32 s14, s12
11505; GFX90A-NEXT:    s_mov_b32 s15, s13
11506; GFX90A-NEXT:    ;;#ASMSTART
11507; GFX90A-NEXT:    ; use s[8:15]
11508; GFX90A-NEXT:    ;;#ASMEND
11509; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11510;
11511; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_2_2:
11512; GFX940:       ; %bb.0:
11513; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11514; GFX940-NEXT:    ;;#ASMSTART
11515; GFX940-NEXT:    ; def s[8:13]
11516; GFX940-NEXT:    ;;#ASMEND
11517; GFX940-NEXT:    ;;#ASMSTART
11518; GFX940-NEXT:    ; def s[0:5]
11519; GFX940-NEXT:    ;;#ASMEND
11520; GFX940-NEXT:    s_mov_b32 s8, s4
11521; GFX940-NEXT:    s_mov_b32 s9, s5
11522; GFX940-NEXT:    s_mov_b32 s14, s12
11523; GFX940-NEXT:    s_mov_b32 s15, s13
11524; GFX940-NEXT:    ;;#ASMSTART
11525; GFX940-NEXT:    ; use s[8:15]
11526; GFX940-NEXT:    ;;#ASMEND
11527; GFX940-NEXT:    s_setpc_b64 s[30:31]
11528  %vec0 = call <3 x i64> asm "; def $0", "=s"()
11529  %vec1 = call <3 x i64> asm "; def $0", "=s"()
11530  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 2, i32 2>
11531  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11532  ret void
11533}
11534
11535define void @s_shuffle_v4i64_v3i64__5_3_2_2() {
11536; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_2_2:
11537; GFX900:       ; %bb.0:
11538; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11539; GFX900-NEXT:    ;;#ASMSTART
11540; GFX900-NEXT:    ; def s[8:13]
11541; GFX900-NEXT:    ;;#ASMEND
11542; GFX900-NEXT:    ;;#ASMSTART
11543; GFX900-NEXT:    ; def s[4:9]
11544; GFX900-NEXT:    ;;#ASMEND
11545; GFX900-NEXT:    s_mov_b32 s10, s4
11546; GFX900-NEXT:    s_mov_b32 s11, s5
11547; GFX900-NEXT:    s_mov_b32 s14, s12
11548; GFX900-NEXT:    s_mov_b32 s15, s13
11549; GFX900-NEXT:    ;;#ASMSTART
11550; GFX900-NEXT:    ; use s[8:15]
11551; GFX900-NEXT:    ;;#ASMEND
11552; GFX900-NEXT:    s_setpc_b64 s[30:31]
11553;
11554; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_2_2:
11555; GFX90A:       ; %bb.0:
11556; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11557; GFX90A-NEXT:    ;;#ASMSTART
11558; GFX90A-NEXT:    ; def s[8:13]
11559; GFX90A-NEXT:    ;;#ASMEND
11560; GFX90A-NEXT:    ;;#ASMSTART
11561; GFX90A-NEXT:    ; def s[4:9]
11562; GFX90A-NEXT:    ;;#ASMEND
11563; GFX90A-NEXT:    s_mov_b32 s10, s4
11564; GFX90A-NEXT:    s_mov_b32 s11, s5
11565; GFX90A-NEXT:    s_mov_b32 s14, s12
11566; GFX90A-NEXT:    s_mov_b32 s15, s13
11567; GFX90A-NEXT:    ;;#ASMSTART
11568; GFX90A-NEXT:    ; use s[8:15]
11569; GFX90A-NEXT:    ;;#ASMEND
11570; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11571;
11572; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_2_2:
11573; GFX940:       ; %bb.0:
11574; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11575; GFX940-NEXT:    ;;#ASMSTART
11576; GFX940-NEXT:    ; def s[8:13]
11577; GFX940-NEXT:    ;;#ASMEND
11578; GFX940-NEXT:    ;;#ASMSTART
11579; GFX940-NEXT:    ; def s[0:5]
11580; GFX940-NEXT:    ;;#ASMEND
11581; GFX940-NEXT:    s_mov_b32 s8, s4
11582; GFX940-NEXT:    s_mov_b32 s9, s5
11583; GFX940-NEXT:    s_mov_b32 s10, s0
11584; GFX940-NEXT:    s_mov_b32 s11, s1
11585; GFX940-NEXT:    s_mov_b32 s14, s12
11586; GFX940-NEXT:    s_mov_b32 s15, s13
11587; GFX940-NEXT:    ;;#ASMSTART
11588; GFX940-NEXT:    ; use s[8:15]
11589; GFX940-NEXT:    ;;#ASMEND
11590; GFX940-NEXT:    s_setpc_b64 s[30:31]
11591  %vec0 = call <3 x i64> asm "; def $0", "=s"()
11592  %vec1 = call <3 x i64> asm "; def $0", "=s"()
11593  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 2, i32 2>
11594  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11595  ret void
11596}
11597
11598define void @s_shuffle_v4i64_v3i64__5_4_2_2() {
11599; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_4_2_2:
11600; GFX900:       ; %bb.0:
11601; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11602; GFX900-NEXT:    ;;#ASMSTART
11603; GFX900-NEXT:    ; def s[12:17]
11604; GFX900-NEXT:    ;;#ASMEND
11605; GFX900-NEXT:    ;;#ASMSTART
11606; GFX900-NEXT:    ; def s[8:13]
11607; GFX900-NEXT:    ;;#ASMEND
11608; GFX900-NEXT:    s_mov_b32 s8, s12
11609; GFX900-NEXT:    s_mov_b32 s9, s13
11610; GFX900-NEXT:    s_mov_b32 s12, s16
11611; GFX900-NEXT:    s_mov_b32 s13, s17
11612; GFX900-NEXT:    s_mov_b32 s14, s16
11613; GFX900-NEXT:    s_mov_b32 s15, s17
11614; GFX900-NEXT:    ;;#ASMSTART
11615; GFX900-NEXT:    ; use s[8:15]
11616; GFX900-NEXT:    ;;#ASMEND
11617; GFX900-NEXT:    s_setpc_b64 s[30:31]
11618;
11619; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_4_2_2:
11620; GFX90A:       ; %bb.0:
11621; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11622; GFX90A-NEXT:    ;;#ASMSTART
11623; GFX90A-NEXT:    ; def s[12:17]
11624; GFX90A-NEXT:    ;;#ASMEND
11625; GFX90A-NEXT:    ;;#ASMSTART
11626; GFX90A-NEXT:    ; def s[8:13]
11627; GFX90A-NEXT:    ;;#ASMEND
11628; GFX90A-NEXT:    s_mov_b32 s8, s12
11629; GFX90A-NEXT:    s_mov_b32 s9, s13
11630; GFX90A-NEXT:    s_mov_b32 s12, s16
11631; GFX90A-NEXT:    s_mov_b32 s13, s17
11632; GFX90A-NEXT:    s_mov_b32 s14, s16
11633; GFX90A-NEXT:    s_mov_b32 s15, s17
11634; GFX90A-NEXT:    ;;#ASMSTART
11635; GFX90A-NEXT:    ; use s[8:15]
11636; GFX90A-NEXT:    ;;#ASMEND
11637; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11638;
11639; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_4_2_2:
11640; GFX940:       ; %bb.0:
11641; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11642; GFX940-NEXT:    ;;#ASMSTART
11643; GFX940-NEXT:    ; def s[8:13]
11644; GFX940-NEXT:    ;;#ASMEND
11645; GFX940-NEXT:    ;;#ASMSTART
11646; GFX940-NEXT:    ; def s[0:5]
11647; GFX940-NEXT:    ;;#ASMEND
11648; GFX940-NEXT:    s_mov_b32 s8, s12
11649; GFX940-NEXT:    s_mov_b32 s9, s13
11650; GFX940-NEXT:    s_mov_b32 s12, s4
11651; GFX940-NEXT:    s_mov_b32 s13, s5
11652; GFX940-NEXT:    s_mov_b32 s14, s4
11653; GFX940-NEXT:    s_mov_b32 s15, s5
11654; GFX940-NEXT:    ;;#ASMSTART
11655; GFX940-NEXT:    ; use s[8:15]
11656; GFX940-NEXT:    ;;#ASMEND
11657; GFX940-NEXT:    s_setpc_b64 s[30:31]
11658  %vec0 = call <3 x i64> asm "; def $0", "=s"()
11659  %vec1 = call <3 x i64> asm "; def $0", "=s"()
11660  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 2, i32 2>
11661  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11662  ret void
11663}
11664
11665define void @s_shuffle_v4i64_v3i64__5_5_2_2() {
11666; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_2:
11667; GFX900:       ; %bb.0:
11668; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11669; GFX900-NEXT:    ;;#ASMSTART
11670; GFX900-NEXT:    ; def s[8:13]
11671; GFX900-NEXT:    ;;#ASMEND
11672; GFX900-NEXT:    ;;#ASMSTART
11673; GFX900-NEXT:    ; def s[16:21]
11674; GFX900-NEXT:    ;;#ASMEND
11675; GFX900-NEXT:    s_mov_b32 s8, s20
11676; GFX900-NEXT:    s_mov_b32 s9, s21
11677; GFX900-NEXT:    s_mov_b32 s10, s20
11678; GFX900-NEXT:    s_mov_b32 s11, s21
11679; GFX900-NEXT:    s_mov_b32 s14, s12
11680; GFX900-NEXT:    s_mov_b32 s15, s13
11681; GFX900-NEXT:    ;;#ASMSTART
11682; GFX900-NEXT:    ; use s[8:15]
11683; GFX900-NEXT:    ;;#ASMEND
11684; GFX900-NEXT:    s_setpc_b64 s[30:31]
11685;
11686; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_2:
11687; GFX90A:       ; %bb.0:
11688; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11689; GFX90A-NEXT:    ;;#ASMSTART
11690; GFX90A-NEXT:    ; def s[8:13]
11691; GFX90A-NEXT:    ;;#ASMEND
11692; GFX90A-NEXT:    ;;#ASMSTART
11693; GFX90A-NEXT:    ; def s[16:21]
11694; GFX90A-NEXT:    ;;#ASMEND
11695; GFX90A-NEXT:    s_mov_b32 s8, s20
11696; GFX90A-NEXT:    s_mov_b32 s9, s21
11697; GFX90A-NEXT:    s_mov_b32 s10, s20
11698; GFX90A-NEXT:    s_mov_b32 s11, s21
11699; GFX90A-NEXT:    s_mov_b32 s14, s12
11700; GFX90A-NEXT:    s_mov_b32 s15, s13
11701; GFX90A-NEXT:    ;;#ASMSTART
11702; GFX90A-NEXT:    ; use s[8:15]
11703; GFX90A-NEXT:    ;;#ASMEND
11704; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11705;
11706; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_2:
11707; GFX940:       ; %bb.0:
11708; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11709; GFX940-NEXT:    ;;#ASMSTART
11710; GFX940-NEXT:    ; def s[8:13]
11711; GFX940-NEXT:    ;;#ASMEND
11712; GFX940-NEXT:    ;;#ASMSTART
11713; GFX940-NEXT:    ; def s[0:5]
11714; GFX940-NEXT:    ;;#ASMEND
11715; GFX940-NEXT:    s_mov_b32 s8, s4
11716; GFX940-NEXT:    s_mov_b32 s9, s5
11717; GFX940-NEXT:    s_mov_b32 s10, s4
11718; GFX940-NEXT:    s_mov_b32 s11, s5
11719; GFX940-NEXT:    s_mov_b32 s14, s12
11720; GFX940-NEXT:    s_mov_b32 s15, s13
11721; GFX940-NEXT:    ;;#ASMSTART
11722; GFX940-NEXT:    ; use s[8:15]
11723; GFX940-NEXT:    ;;#ASMEND
11724; GFX940-NEXT:    s_setpc_b64 s[30:31]
11725  %vec0 = call <3 x i64> asm "; def $0", "=s"()
11726  %vec1 = call <3 x i64> asm "; def $0", "=s"()
11727  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 2>
11728  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11729  ret void
11730}
11731
11732define void @s_shuffle_v4i64_v3i64__5_5_u_2() {
11733; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_2:
11734; GFX900:       ; %bb.0:
11735; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11736; GFX900-NEXT:    ;;#ASMSTART
11737; GFX900-NEXT:    ; def s[8:13]
11738; GFX900-NEXT:    ;;#ASMEND
11739; GFX900-NEXT:    ;;#ASMSTART
11740; GFX900-NEXT:    ; def s[16:21]
11741; GFX900-NEXT:    ;;#ASMEND
11742; GFX900-NEXT:    s_mov_b32 s8, s20
11743; GFX900-NEXT:    s_mov_b32 s9, s21
11744; GFX900-NEXT:    s_mov_b32 s10, s20
11745; GFX900-NEXT:    s_mov_b32 s11, s21
11746; GFX900-NEXT:    s_mov_b32 s14, s12
11747; GFX900-NEXT:    s_mov_b32 s15, s13
11748; GFX900-NEXT:    ;;#ASMSTART
11749; GFX900-NEXT:    ; use s[8:15]
11750; GFX900-NEXT:    ;;#ASMEND
11751; GFX900-NEXT:    s_setpc_b64 s[30:31]
11752;
11753; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_2:
11754; GFX90A:       ; %bb.0:
11755; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11756; GFX90A-NEXT:    ;;#ASMSTART
11757; GFX90A-NEXT:    ; def s[8:13]
11758; GFX90A-NEXT:    ;;#ASMEND
11759; GFX90A-NEXT:    ;;#ASMSTART
11760; GFX90A-NEXT:    ; def s[16:21]
11761; GFX90A-NEXT:    ;;#ASMEND
11762; GFX90A-NEXT:    s_mov_b32 s8, s20
11763; GFX90A-NEXT:    s_mov_b32 s9, s21
11764; GFX90A-NEXT:    s_mov_b32 s10, s20
11765; GFX90A-NEXT:    s_mov_b32 s11, s21
11766; GFX90A-NEXT:    s_mov_b32 s14, s12
11767; GFX90A-NEXT:    s_mov_b32 s15, s13
11768; GFX90A-NEXT:    ;;#ASMSTART
11769; GFX90A-NEXT:    ; use s[8:15]
11770; GFX90A-NEXT:    ;;#ASMEND
11771; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11772;
11773; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_2:
11774; GFX940:       ; %bb.0:
11775; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11776; GFX940-NEXT:    ;;#ASMSTART
11777; GFX940-NEXT:    ; def s[8:13]
11778; GFX940-NEXT:    ;;#ASMEND
11779; GFX940-NEXT:    ;;#ASMSTART
11780; GFX940-NEXT:    ; def s[0:5]
11781; GFX940-NEXT:    ;;#ASMEND
11782; GFX940-NEXT:    s_mov_b32 s8, s12
11783; GFX940-NEXT:    s_mov_b32 s9, s13
11784; GFX940-NEXT:    s_mov_b32 s10, s12
11785; GFX940-NEXT:    s_mov_b32 s11, s13
11786; GFX940-NEXT:    s_mov_b32 s14, s4
11787; GFX940-NEXT:    s_mov_b32 s15, s5
11788; GFX940-NEXT:    ;;#ASMSTART
11789; GFX940-NEXT:    ; use s[8:15]
11790; GFX940-NEXT:    ;;#ASMEND
11791; GFX940-NEXT:    s_setpc_b64 s[30:31]
11792  %vec0 = call <3 x i64> asm "; def $0", "=s"()
11793  %vec1 = call <3 x i64> asm "; def $0", "=s"()
11794  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 2>
11795  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11796  ret void
11797}
11798
11799define void @s_shuffle_v4i64_v3i64__5_5_0_2() {
11800; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_2:
11801; GFX900:       ; %bb.0:
11802; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11803; GFX900-NEXT:    ;;#ASMSTART
11804; GFX900-NEXT:    ; def s[12:17]
11805; GFX900-NEXT:    ;;#ASMEND
11806; GFX900-NEXT:    ;;#ASMSTART
11807; GFX900-NEXT:    ; def s[20:25]
11808; GFX900-NEXT:    ;;#ASMEND
11809; GFX900-NEXT:    s_mov_b32 s8, s24
11810; GFX900-NEXT:    s_mov_b32 s9, s25
11811; GFX900-NEXT:    s_mov_b32 s10, s24
11812; GFX900-NEXT:    s_mov_b32 s11, s25
11813; GFX900-NEXT:    s_mov_b32 s14, s16
11814; GFX900-NEXT:    s_mov_b32 s15, s17
11815; GFX900-NEXT:    ;;#ASMSTART
11816; GFX900-NEXT:    ; use s[8:15]
11817; GFX900-NEXT:    ;;#ASMEND
11818; GFX900-NEXT:    s_setpc_b64 s[30:31]
11819;
11820; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_2:
11821; GFX90A:       ; %bb.0:
11822; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11823; GFX90A-NEXT:    ;;#ASMSTART
11824; GFX90A-NEXT:    ; def s[12:17]
11825; GFX90A-NEXT:    ;;#ASMEND
11826; GFX90A-NEXT:    ;;#ASMSTART
11827; GFX90A-NEXT:    ; def s[20:25]
11828; GFX90A-NEXT:    ;;#ASMEND
11829; GFX90A-NEXT:    s_mov_b32 s8, s24
11830; GFX90A-NEXT:    s_mov_b32 s9, s25
11831; GFX90A-NEXT:    s_mov_b32 s10, s24
11832; GFX90A-NEXT:    s_mov_b32 s11, s25
11833; GFX90A-NEXT:    s_mov_b32 s14, s16
11834; GFX90A-NEXT:    s_mov_b32 s15, s17
11835; GFX90A-NEXT:    ;;#ASMSTART
11836; GFX90A-NEXT:    ; use s[8:15]
11837; GFX90A-NEXT:    ;;#ASMEND
11838; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11839;
11840; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_2:
11841; GFX940:       ; %bb.0:
11842; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11843; GFX940-NEXT:    ;;#ASMSTART
11844; GFX940-NEXT:    ; def s[8:13]
11845; GFX940-NEXT:    ;;#ASMEND
11846; GFX940-NEXT:    ;;#ASMSTART
11847; GFX940-NEXT:    ; def s[0:5]
11848; GFX940-NEXT:    ;;#ASMEND
11849; GFX940-NEXT:    s_mov_b32 s8, s12
11850; GFX940-NEXT:    s_mov_b32 s9, s13
11851; GFX940-NEXT:    s_mov_b32 s10, s12
11852; GFX940-NEXT:    s_mov_b32 s11, s13
11853; GFX940-NEXT:    s_mov_b32 s12, s0
11854; GFX940-NEXT:    s_mov_b32 s13, s1
11855; GFX940-NEXT:    s_mov_b32 s14, s4
11856; GFX940-NEXT:    s_mov_b32 s15, s5
11857; GFX940-NEXT:    ;;#ASMSTART
11858; GFX940-NEXT:    ; use s[8:15]
11859; GFX940-NEXT:    ;;#ASMEND
11860; GFX940-NEXT:    s_setpc_b64 s[30:31]
11861  %vec0 = call <3 x i64> asm "; def $0", "=s"()
11862  %vec1 = call <3 x i64> asm "; def $0", "=s"()
11863  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 2>
11864  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11865  ret void
11866}
11867
11868define void @s_shuffle_v4i64_v3i64__5_5_1_2() {
11869; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_2:
11870; GFX900:       ; %bb.0:
11871; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11872; GFX900-NEXT:    ;;#ASMSTART
11873; GFX900-NEXT:    ; def s[12:17]
11874; GFX900-NEXT:    ;;#ASMEND
11875; GFX900-NEXT:    ;;#ASMSTART
11876; GFX900-NEXT:    ; def s[8:13]
11877; GFX900-NEXT:    ;;#ASMEND
11878; GFX900-NEXT:    s_mov_b32 s8, s12
11879; GFX900-NEXT:    s_mov_b32 s9, s13
11880; GFX900-NEXT:    s_mov_b32 s10, s12
11881; GFX900-NEXT:    s_mov_b32 s11, s13
11882; GFX900-NEXT:    s_mov_b32 s12, s14
11883; GFX900-NEXT:    s_mov_b32 s13, s15
11884; GFX900-NEXT:    s_mov_b32 s14, s16
11885; GFX900-NEXT:    s_mov_b32 s15, s17
11886; GFX900-NEXT:    ;;#ASMSTART
11887; GFX900-NEXT:    ; use s[8:15]
11888; GFX900-NEXT:    ;;#ASMEND
11889; GFX900-NEXT:    s_setpc_b64 s[30:31]
11890;
11891; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_2:
11892; GFX90A:       ; %bb.0:
11893; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11894; GFX90A-NEXT:    ;;#ASMSTART
11895; GFX90A-NEXT:    ; def s[12:17]
11896; GFX90A-NEXT:    ;;#ASMEND
11897; GFX90A-NEXT:    ;;#ASMSTART
11898; GFX90A-NEXT:    ; def s[8:13]
11899; GFX90A-NEXT:    ;;#ASMEND
11900; GFX90A-NEXT:    s_mov_b32 s8, s12
11901; GFX90A-NEXT:    s_mov_b32 s9, s13
11902; GFX90A-NEXT:    s_mov_b32 s10, s12
11903; GFX90A-NEXT:    s_mov_b32 s11, s13
11904; GFX90A-NEXT:    s_mov_b32 s12, s14
11905; GFX90A-NEXT:    s_mov_b32 s13, s15
11906; GFX90A-NEXT:    s_mov_b32 s14, s16
11907; GFX90A-NEXT:    s_mov_b32 s15, s17
11908; GFX90A-NEXT:    ;;#ASMSTART
11909; GFX90A-NEXT:    ; use s[8:15]
11910; GFX90A-NEXT:    ;;#ASMEND
11911; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11912;
11913; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_2:
11914; GFX940:       ; %bb.0:
11915; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11916; GFX940-NEXT:    ;;#ASMSTART
11917; GFX940-NEXT:    ; def s[8:13]
11918; GFX940-NEXT:    ;;#ASMEND
11919; GFX940-NEXT:    ;;#ASMSTART
11920; GFX940-NEXT:    ; def s[0:5]
11921; GFX940-NEXT:    ;;#ASMEND
11922; GFX940-NEXT:    s_mov_b32 s8, s12
11923; GFX940-NEXT:    s_mov_b32 s9, s13
11924; GFX940-NEXT:    s_mov_b32 s10, s12
11925; GFX940-NEXT:    s_mov_b32 s11, s13
11926; GFX940-NEXT:    s_mov_b32 s12, s2
11927; GFX940-NEXT:    s_mov_b32 s13, s3
11928; GFX940-NEXT:    s_mov_b32 s14, s4
11929; GFX940-NEXT:    s_mov_b32 s15, s5
11930; GFX940-NEXT:    ;;#ASMSTART
11931; GFX940-NEXT:    ; use s[8:15]
11932; GFX940-NEXT:    ;;#ASMEND
11933; GFX940-NEXT:    s_setpc_b64 s[30:31]
11934  %vec0 = call <3 x i64> asm "; def $0", "=s"()
11935  %vec1 = call <3 x i64> asm "; def $0", "=s"()
11936  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 2>
11937  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
11938  ret void
11939}
11940
11941define void @s_shuffle_v4i64_v3i64__5_5_3_2() {
11942; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_2:
11943; GFX900:       ; %bb.0:
11944; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11945; GFX900-NEXT:    ;;#ASMSTART
11946; GFX900-NEXT:    ; def s[12:17]
11947; GFX900-NEXT:    ;;#ASMEND
11948; GFX900-NEXT:    ;;#ASMSTART
11949; GFX900-NEXT:    ; def s[20:25]
11950; GFX900-NEXT:    ;;#ASMEND
11951; GFX900-NEXT:    s_mov_b32 s8, s24
11952; GFX900-NEXT:    s_mov_b32 s9, s25
11953; GFX900-NEXT:    s_mov_b32 s10, s24
11954; GFX900-NEXT:    s_mov_b32 s11, s25
11955; GFX900-NEXT:    s_mov_b32 s12, s20
11956; GFX900-NEXT:    s_mov_b32 s13, s21
11957; GFX900-NEXT:    s_mov_b32 s14, s16
11958; GFX900-NEXT:    s_mov_b32 s15, s17
11959; GFX900-NEXT:    ;;#ASMSTART
11960; GFX900-NEXT:    ; use s[8:15]
11961; GFX900-NEXT:    ;;#ASMEND
11962; GFX900-NEXT:    s_setpc_b64 s[30:31]
11963;
11964; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_2:
11965; GFX90A:       ; %bb.0:
11966; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11967; GFX90A-NEXT:    ;;#ASMSTART
11968; GFX90A-NEXT:    ; def s[12:17]
11969; GFX90A-NEXT:    ;;#ASMEND
11970; GFX90A-NEXT:    ;;#ASMSTART
11971; GFX90A-NEXT:    ; def s[20:25]
11972; GFX90A-NEXT:    ;;#ASMEND
11973; GFX90A-NEXT:    s_mov_b32 s8, s24
11974; GFX90A-NEXT:    s_mov_b32 s9, s25
11975; GFX90A-NEXT:    s_mov_b32 s10, s24
11976; GFX90A-NEXT:    s_mov_b32 s11, s25
11977; GFX90A-NEXT:    s_mov_b32 s12, s20
11978; GFX90A-NEXT:    s_mov_b32 s13, s21
11979; GFX90A-NEXT:    s_mov_b32 s14, s16
11980; GFX90A-NEXT:    s_mov_b32 s15, s17
11981; GFX90A-NEXT:    ;;#ASMSTART
11982; GFX90A-NEXT:    ; use s[8:15]
11983; GFX90A-NEXT:    ;;#ASMEND
11984; GFX90A-NEXT:    s_setpc_b64 s[30:31]
11985;
11986; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_2:
11987; GFX940:       ; %bb.0:
11988; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11989; GFX940-NEXT:    ;;#ASMSTART
11990; GFX940-NEXT:    ; def s[12:17]
11991; GFX940-NEXT:    ;;#ASMEND
11992; GFX940-NEXT:    ;;#ASMSTART
11993; GFX940-NEXT:    ; def s[0:5]
11994; GFX940-NEXT:    ;;#ASMEND
11995; GFX940-NEXT:    s_mov_b32 s8, s16
11996; GFX940-NEXT:    s_mov_b32 s9, s17
11997; GFX940-NEXT:    s_mov_b32 s10, s16
11998; GFX940-NEXT:    s_mov_b32 s11, s17
11999; GFX940-NEXT:    s_mov_b32 s14, s4
12000; GFX940-NEXT:    s_mov_b32 s15, s5
12001; GFX940-NEXT:    ;;#ASMSTART
12002; GFX940-NEXT:    ; use s[8:15]
12003; GFX940-NEXT:    ;;#ASMEND
12004; GFX940-NEXT:    s_setpc_b64 s[30:31]
12005  %vec0 = call <3 x i64> asm "; def $0", "=s"()
12006  %vec1 = call <3 x i64> asm "; def $0", "=s"()
12007  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 2>
12008  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12009  ret void
12010}
12011
12012define void @s_shuffle_v4i64_v3i64__5_5_4_2() {
12013; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_2:
12014; GFX900:       ; %bb.0:
12015; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12016; GFX900-NEXT:    ;;#ASMSTART
12017; GFX900-NEXT:    ; def s[12:17]
12018; GFX900-NEXT:    ;;#ASMEND
12019; GFX900-NEXT:    ;;#ASMSTART
12020; GFX900-NEXT:    ; def s[20:25]
12021; GFX900-NEXT:    ;;#ASMEND
12022; GFX900-NEXT:    s_mov_b32 s8, s24
12023; GFX900-NEXT:    s_mov_b32 s9, s25
12024; GFX900-NEXT:    s_mov_b32 s10, s24
12025; GFX900-NEXT:    s_mov_b32 s11, s25
12026; GFX900-NEXT:    s_mov_b32 s12, s22
12027; GFX900-NEXT:    s_mov_b32 s13, s23
12028; GFX900-NEXT:    s_mov_b32 s14, s16
12029; GFX900-NEXT:    s_mov_b32 s15, s17
12030; GFX900-NEXT:    ;;#ASMSTART
12031; GFX900-NEXT:    ; use s[8:15]
12032; GFX900-NEXT:    ;;#ASMEND
12033; GFX900-NEXT:    s_setpc_b64 s[30:31]
12034;
12035; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_2:
12036; GFX90A:       ; %bb.0:
12037; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12038; GFX90A-NEXT:    ;;#ASMSTART
12039; GFX90A-NEXT:    ; def s[12:17]
12040; GFX90A-NEXT:    ;;#ASMEND
12041; GFX90A-NEXT:    ;;#ASMSTART
12042; GFX90A-NEXT:    ; def s[20:25]
12043; GFX90A-NEXT:    ;;#ASMEND
12044; GFX90A-NEXT:    s_mov_b32 s8, s24
12045; GFX90A-NEXT:    s_mov_b32 s9, s25
12046; GFX90A-NEXT:    s_mov_b32 s10, s24
12047; GFX90A-NEXT:    s_mov_b32 s11, s25
12048; GFX90A-NEXT:    s_mov_b32 s12, s22
12049; GFX90A-NEXT:    s_mov_b32 s13, s23
12050; GFX90A-NEXT:    s_mov_b32 s14, s16
12051; GFX90A-NEXT:    s_mov_b32 s15, s17
12052; GFX90A-NEXT:    ;;#ASMSTART
12053; GFX90A-NEXT:    ; use s[8:15]
12054; GFX90A-NEXT:    ;;#ASMEND
12055; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12056;
12057; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_2:
12058; GFX940:       ; %bb.0:
12059; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12060; GFX940-NEXT:    ;;#ASMSTART
12061; GFX940-NEXT:    ; def s[12:17]
12062; GFX940-NEXT:    ;;#ASMEND
12063; GFX940-NEXT:    ;;#ASMSTART
12064; GFX940-NEXT:    ; def s[0:5]
12065; GFX940-NEXT:    ;;#ASMEND
12066; GFX940-NEXT:    s_mov_b32 s8, s16
12067; GFX940-NEXT:    s_mov_b32 s9, s17
12068; GFX940-NEXT:    s_mov_b32 s10, s16
12069; GFX940-NEXT:    s_mov_b32 s11, s17
12070; GFX940-NEXT:    s_mov_b32 s12, s14
12071; GFX940-NEXT:    s_mov_b32 s13, s15
12072; GFX940-NEXT:    s_mov_b32 s14, s4
12073; GFX940-NEXT:    s_mov_b32 s15, s5
12074; GFX940-NEXT:    ;;#ASMSTART
12075; GFX940-NEXT:    ; use s[8:15]
12076; GFX940-NEXT:    ;;#ASMEND
12077; GFX940-NEXT:    s_setpc_b64 s[30:31]
12078  %vec0 = call <3 x i64> asm "; def $0", "=s"()
12079  %vec1 = call <3 x i64> asm "; def $0", "=s"()
12080  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 2>
12081  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12082  ret void
12083}
12084
12085define void @s_shuffle_v4i64_v3i64__u_3_3_3() {
12086; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_3_3_3:
12087; GFX9:       ; %bb.0:
12088; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12089; GFX9-NEXT:    ;;#ASMSTART
12090; GFX9-NEXT:    ; use s[8:15]
12091; GFX9-NEXT:    ;;#ASMEND
12092; GFX9-NEXT:    s_setpc_b64 s[30:31]
12093  %vec0 = call <3 x i64> asm "; def $0", "=s"()
12094  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 poison, i32 3, i32 3, i32 3>
12095  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12096  ret void
12097}
12098
12099define void @s_shuffle_v4i64_v3i64__0_3_3_3() {
12100; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_3_3_3:
12101; GFX900:       ; %bb.0:
12102; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12103; GFX900-NEXT:    ;;#ASMSTART
12104; GFX900-NEXT:    ; def s[8:13]
12105; GFX900-NEXT:    ;;#ASMEND
12106; GFX900-NEXT:    ;;#ASMSTART
12107; GFX900-NEXT:    ; use s[8:15]
12108; GFX900-NEXT:    ;;#ASMEND
12109; GFX900-NEXT:    s_setpc_b64 s[30:31]
12110;
12111; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_3_3_3:
12112; GFX90A:       ; %bb.0:
12113; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12114; GFX90A-NEXT:    ;;#ASMSTART
12115; GFX90A-NEXT:    ; def s[8:13]
12116; GFX90A-NEXT:    ;;#ASMEND
12117; GFX90A-NEXT:    ;;#ASMSTART
12118; GFX90A-NEXT:    ; use s[8:15]
12119; GFX90A-NEXT:    ;;#ASMEND
12120; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12121;
12122; GFX940-LABEL: s_shuffle_v4i64_v3i64__0_3_3_3:
12123; GFX940:       ; %bb.0:
12124; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12125; GFX940-NEXT:    ;;#ASMSTART
12126; GFX940-NEXT:    ; def s[8:13]
12127; GFX940-NEXT:    ;;#ASMEND
12128; GFX940-NEXT:    s_nop 0
12129; GFX940-NEXT:    ;;#ASMSTART
12130; GFX940-NEXT:    ; use s[8:15]
12131; GFX940-NEXT:    ;;#ASMEND
12132; GFX940-NEXT:    s_setpc_b64 s[30:31]
12133  %vec0 = call <3 x i64> asm "; def $0", "=s"()
12134  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 0, i32 3, i32 3, i32 3>
12135  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12136  ret void
12137}
12138
12139define void @s_shuffle_v4i64_v3i64__1_3_3_3() {
12140; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_3_3_3:
12141; GFX900:       ; %bb.0:
12142; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12143; GFX900-NEXT:    ;;#ASMSTART
12144; GFX900-NEXT:    ; def s[4:9]
12145; GFX900-NEXT:    ;;#ASMEND
12146; GFX900-NEXT:    s_mov_b32 s8, s6
12147; GFX900-NEXT:    s_mov_b32 s9, s7
12148; GFX900-NEXT:    ;;#ASMSTART
12149; GFX900-NEXT:    ; use s[8:15]
12150; GFX900-NEXT:    ;;#ASMEND
12151; GFX900-NEXT:    s_setpc_b64 s[30:31]
12152;
12153; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_3_3_3:
12154; GFX90A:       ; %bb.0:
12155; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12156; GFX90A-NEXT:    ;;#ASMSTART
12157; GFX90A-NEXT:    ; def s[4:9]
12158; GFX90A-NEXT:    ;;#ASMEND
12159; GFX90A-NEXT:    s_mov_b32 s8, s6
12160; GFX90A-NEXT:    s_mov_b32 s9, s7
12161; GFX90A-NEXT:    ;;#ASMSTART
12162; GFX90A-NEXT:    ; use s[8:15]
12163; GFX90A-NEXT:    ;;#ASMEND
12164; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12165;
12166; GFX940-LABEL: s_shuffle_v4i64_v3i64__1_3_3_3:
12167; GFX940:       ; %bb.0:
12168; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12169; GFX940-NEXT:    ;;#ASMSTART
12170; GFX940-NEXT:    ; def s[0:5]
12171; GFX940-NEXT:    ;;#ASMEND
12172; GFX940-NEXT:    s_mov_b32 s8, s2
12173; GFX940-NEXT:    s_mov_b32 s9, s3
12174; GFX940-NEXT:    ;;#ASMSTART
12175; GFX940-NEXT:    ; use s[8:15]
12176; GFX940-NEXT:    ;;#ASMEND
12177; GFX940-NEXT:    s_setpc_b64 s[30:31]
12178  %vec0 = call <3 x i64> asm "; def $0", "=s"()
12179  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 1, i32 3, i32 3, i32 3>
12180  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12181  ret void
12182}
12183
12184define void @s_shuffle_v4i64_v3i64__2_3_3_3() {
12185; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_3_3_3:
12186; GFX900:       ; %bb.0:
12187; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12188; GFX900-NEXT:    ;;#ASMSTART
12189; GFX900-NEXT:    ; def s[4:9]
12190; GFX900-NEXT:    ;;#ASMEND
12191; GFX900-NEXT:    ;;#ASMSTART
12192; GFX900-NEXT:    ; use s[8:15]
12193; GFX900-NEXT:    ;;#ASMEND
12194; GFX900-NEXT:    s_setpc_b64 s[30:31]
12195;
12196; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_3_3_3:
12197; GFX90A:       ; %bb.0:
12198; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12199; GFX90A-NEXT:    ;;#ASMSTART
12200; GFX90A-NEXT:    ; def s[4:9]
12201; GFX90A-NEXT:    ;;#ASMEND
12202; GFX90A-NEXT:    ;;#ASMSTART
12203; GFX90A-NEXT:    ; use s[8:15]
12204; GFX90A-NEXT:    ;;#ASMEND
12205; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12206;
12207; GFX940-LABEL: s_shuffle_v4i64_v3i64__2_3_3_3:
12208; GFX940:       ; %bb.0:
12209; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12210; GFX940-NEXT:    ;;#ASMSTART
12211; GFX940-NEXT:    ; def s[0:5]
12212; GFX940-NEXT:    ;;#ASMEND
12213; GFX940-NEXT:    s_mov_b32 s8, s4
12214; GFX940-NEXT:    s_mov_b32 s9, s5
12215; GFX940-NEXT:    ;;#ASMSTART
12216; GFX940-NEXT:    ; use s[8:15]
12217; GFX940-NEXT:    ;;#ASMEND
12218; GFX940-NEXT:    s_setpc_b64 s[30:31]
12219  %vec0 = call <3 x i64> asm "; def $0", "=s"()
12220  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 2, i32 3, i32 3, i32 3>
12221  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12222  ret void
12223}
12224
12225define void @s_shuffle_v4i64_v3i64__3_3_3_3() {
12226; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_3_3_3:
12227; GFX9:       ; %bb.0:
12228; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12229; GFX9-NEXT:    ;;#ASMSTART
12230; GFX9-NEXT:    ; use s[8:15]
12231; GFX9-NEXT:    ;;#ASMEND
12232; GFX9-NEXT:    s_setpc_b64 s[30:31]
12233  %vec0 = call <3 x i64> asm "; def $0", "=s"()
12234  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
12235  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12236  ret void
12237}
12238
12239define void @s_shuffle_v4i64_v3i64__4_3_3_3() {
12240; GFX900-LABEL: s_shuffle_v4i64_v3i64__4_3_3_3:
12241; GFX900:       ; %bb.0:
12242; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12243; GFX900-NEXT:    ;;#ASMSTART
12244; GFX900-NEXT:    ; def s[4:9]
12245; GFX900-NEXT:    ;;#ASMEND
12246; GFX900-NEXT:    s_mov_b32 s8, s6
12247; GFX900-NEXT:    s_mov_b32 s9, s7
12248; GFX900-NEXT:    s_mov_b32 s10, s4
12249; GFX900-NEXT:    s_mov_b32 s11, s5
12250; GFX900-NEXT:    s_mov_b32 s12, s4
12251; GFX900-NEXT:    s_mov_b32 s13, s5
12252; GFX900-NEXT:    s_mov_b32 s14, s4
12253; GFX900-NEXT:    s_mov_b32 s15, s5
12254; GFX900-NEXT:    ;;#ASMSTART
12255; GFX900-NEXT:    ; use s[8:15]
12256; GFX900-NEXT:    ;;#ASMEND
12257; GFX900-NEXT:    s_setpc_b64 s[30:31]
12258;
12259; GFX90A-LABEL: s_shuffle_v4i64_v3i64__4_3_3_3:
12260; GFX90A:       ; %bb.0:
12261; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12262; GFX90A-NEXT:    ;;#ASMSTART
12263; GFX90A-NEXT:    ; def s[4:9]
12264; GFX90A-NEXT:    ;;#ASMEND
12265; GFX90A-NEXT:    s_mov_b32 s8, s6
12266; GFX90A-NEXT:    s_mov_b32 s9, s7
12267; GFX90A-NEXT:    s_mov_b32 s10, s4
12268; GFX90A-NEXT:    s_mov_b32 s11, s5
12269; GFX90A-NEXT:    s_mov_b32 s12, s4
12270; GFX90A-NEXT:    s_mov_b32 s13, s5
12271; GFX90A-NEXT:    s_mov_b32 s14, s4
12272; GFX90A-NEXT:    s_mov_b32 s15, s5
12273; GFX90A-NEXT:    ;;#ASMSTART
12274; GFX90A-NEXT:    ; use s[8:15]
12275; GFX90A-NEXT:    ;;#ASMEND
12276; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12277;
12278; GFX940-LABEL: s_shuffle_v4i64_v3i64__4_3_3_3:
12279; GFX940:       ; %bb.0:
12280; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12281; GFX940-NEXT:    ;;#ASMSTART
12282; GFX940-NEXT:    ; def s[0:5]
12283; GFX940-NEXT:    ;;#ASMEND
12284; GFX940-NEXT:    s_mov_b32 s8, s2
12285; GFX940-NEXT:    s_mov_b32 s9, s3
12286; GFX940-NEXT:    s_mov_b32 s10, s0
12287; GFX940-NEXT:    s_mov_b32 s11, s1
12288; GFX940-NEXT:    s_mov_b32 s12, s0
12289; GFX940-NEXT:    s_mov_b32 s13, s1
12290; GFX940-NEXT:    s_mov_b32 s14, s0
12291; GFX940-NEXT:    s_mov_b32 s15, s1
12292; GFX940-NEXT:    ;;#ASMSTART
12293; GFX940-NEXT:    ; use s[8:15]
12294; GFX940-NEXT:    ;;#ASMEND
12295; GFX940-NEXT:    s_setpc_b64 s[30:31]
12296  %vec0 = call <3 x i64> asm "; def $0", "=s"()
12297  %vec1 = call <3 x i64> asm "; def $0", "=s"()
12298  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 3, i32 3, i32 3>
12299  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12300  ret void
12301}
12302
12303define void @s_shuffle_v4i64_v3i64__5_3_3_3() {
12304; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_3_3:
12305; GFX900:       ; %bb.0:
12306; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12307; GFX900-NEXT:    ;;#ASMSTART
12308; GFX900-NEXT:    ; def s[4:9]
12309; GFX900-NEXT:    ;;#ASMEND
12310; GFX900-NEXT:    s_mov_b32 s10, s4
12311; GFX900-NEXT:    s_mov_b32 s11, s5
12312; GFX900-NEXT:    s_mov_b32 s12, s4
12313; GFX900-NEXT:    s_mov_b32 s13, s5
12314; GFX900-NEXT:    s_mov_b32 s14, s4
12315; GFX900-NEXT:    s_mov_b32 s15, s5
12316; GFX900-NEXT:    ;;#ASMSTART
12317; GFX900-NEXT:    ; use s[8:15]
12318; GFX900-NEXT:    ;;#ASMEND
12319; GFX900-NEXT:    s_setpc_b64 s[30:31]
12320;
12321; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_3_3:
12322; GFX90A:       ; %bb.0:
12323; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12324; GFX90A-NEXT:    ;;#ASMSTART
12325; GFX90A-NEXT:    ; def s[4:9]
12326; GFX90A-NEXT:    ;;#ASMEND
12327; GFX90A-NEXT:    s_mov_b32 s10, s4
12328; GFX90A-NEXT:    s_mov_b32 s11, s5
12329; GFX90A-NEXT:    s_mov_b32 s12, s4
12330; GFX90A-NEXT:    s_mov_b32 s13, s5
12331; GFX90A-NEXT:    s_mov_b32 s14, s4
12332; GFX90A-NEXT:    s_mov_b32 s15, s5
12333; GFX90A-NEXT:    ;;#ASMSTART
12334; GFX90A-NEXT:    ; use s[8:15]
12335; GFX90A-NEXT:    ;;#ASMEND
12336; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12337;
12338; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_3_3:
12339; GFX940:       ; %bb.0:
12340; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12341; GFX940-NEXT:    ;;#ASMSTART
12342; GFX940-NEXT:    ; def s[0:5]
12343; GFX940-NEXT:    ;;#ASMEND
12344; GFX940-NEXT:    s_mov_b32 s8, s4
12345; GFX940-NEXT:    s_mov_b32 s9, s5
12346; GFX940-NEXT:    s_mov_b32 s10, s0
12347; GFX940-NEXT:    s_mov_b32 s11, s1
12348; GFX940-NEXT:    s_mov_b32 s12, s0
12349; GFX940-NEXT:    s_mov_b32 s13, s1
12350; GFX940-NEXT:    s_mov_b32 s14, s0
12351; GFX940-NEXT:    s_mov_b32 s15, s1
12352; GFX940-NEXT:    ;;#ASMSTART
12353; GFX940-NEXT:    ; use s[8:15]
12354; GFX940-NEXT:    ;;#ASMEND
12355; GFX940-NEXT:    s_setpc_b64 s[30:31]
12356  %vec0 = call <3 x i64> asm "; def $0", "=s"()
12357  %vec1 = call <3 x i64> asm "; def $0", "=s"()
12358  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 3, i32 3>
12359  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12360  ret void
12361}
12362
12363define void @s_shuffle_v4i64_v3i64__5_u_3_3() {
12364; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_3_3:
12365; GFX900:       ; %bb.0:
12366; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12367; GFX900-NEXT:    ;;#ASMSTART
12368; GFX900-NEXT:    ; def s[4:9]
12369; GFX900-NEXT:    ;;#ASMEND
12370; GFX900-NEXT:    s_mov_b32 s12, s4
12371; GFX900-NEXT:    s_mov_b32 s13, s5
12372; GFX900-NEXT:    s_mov_b32 s14, s4
12373; GFX900-NEXT:    s_mov_b32 s15, s5
12374; GFX900-NEXT:    ;;#ASMSTART
12375; GFX900-NEXT:    ; use s[8:15]
12376; GFX900-NEXT:    ;;#ASMEND
12377; GFX900-NEXT:    s_setpc_b64 s[30:31]
12378;
12379; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_3_3:
12380; GFX90A:       ; %bb.0:
12381; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12382; GFX90A-NEXT:    ;;#ASMSTART
12383; GFX90A-NEXT:    ; def s[4:9]
12384; GFX90A-NEXT:    ;;#ASMEND
12385; GFX90A-NEXT:    s_mov_b32 s12, s4
12386; GFX90A-NEXT:    s_mov_b32 s13, s5
12387; GFX90A-NEXT:    s_mov_b32 s14, s4
12388; GFX90A-NEXT:    s_mov_b32 s15, s5
12389; GFX90A-NEXT:    ;;#ASMSTART
12390; GFX90A-NEXT:    ; use s[8:15]
12391; GFX90A-NEXT:    ;;#ASMEND
12392; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12393;
12394; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_u_3_3:
12395; GFX940:       ; %bb.0:
12396; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12397; GFX940-NEXT:    ;;#ASMSTART
12398; GFX940-NEXT:    ; def s[0:5]
12399; GFX940-NEXT:    ;;#ASMEND
12400; GFX940-NEXT:    s_mov_b32 s8, s4
12401; GFX940-NEXT:    s_mov_b32 s9, s5
12402; GFX940-NEXT:    s_mov_b32 s12, s0
12403; GFX940-NEXT:    s_mov_b32 s13, s1
12404; GFX940-NEXT:    s_mov_b32 s14, s0
12405; GFX940-NEXT:    s_mov_b32 s15, s1
12406; GFX940-NEXT:    ;;#ASMSTART
12407; GFX940-NEXT:    ; use s[8:15]
12408; GFX940-NEXT:    ;;#ASMEND
12409; GFX940-NEXT:    s_setpc_b64 s[30:31]
12410  %vec0 = call <3 x i64> asm "; def $0", "=s"()
12411  %vec1 = call <3 x i64> asm "; def $0", "=s"()
12412  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 3, i32 3>
12413  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12414  ret void
12415}
12416
12417define void @s_shuffle_v4i64_v3i64__5_0_3_3() {
12418; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_3_3:
12419; GFX900:       ; %bb.0:
12420; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12421; GFX900-NEXT:    ;;#ASMSTART
12422; GFX900-NEXT:    ; def s[4:9]
12423; GFX900-NEXT:    ;;#ASMEND
12424; GFX900-NEXT:    ;;#ASMSTART
12425; GFX900-NEXT:    ; def s[16:21]
12426; GFX900-NEXT:    ;;#ASMEND
12427; GFX900-NEXT:    s_mov_b32 s8, s20
12428; GFX900-NEXT:    s_mov_b32 s9, s21
12429; GFX900-NEXT:    s_mov_b32 s10, s4
12430; GFX900-NEXT:    s_mov_b32 s11, s5
12431; GFX900-NEXT:    s_mov_b32 s12, s16
12432; GFX900-NEXT:    s_mov_b32 s13, s17
12433; GFX900-NEXT:    s_mov_b32 s14, s16
12434; GFX900-NEXT:    s_mov_b32 s15, s17
12435; GFX900-NEXT:    ;;#ASMSTART
12436; GFX900-NEXT:    ; use s[8:15]
12437; GFX900-NEXT:    ;;#ASMEND
12438; GFX900-NEXT:    s_setpc_b64 s[30:31]
12439;
12440; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_3_3:
12441; GFX90A:       ; %bb.0:
12442; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12443; GFX90A-NEXT:    ;;#ASMSTART
12444; GFX90A-NEXT:    ; def s[4:9]
12445; GFX90A-NEXT:    ;;#ASMEND
12446; GFX90A-NEXT:    ;;#ASMSTART
12447; GFX90A-NEXT:    ; def s[16:21]
12448; GFX90A-NEXT:    ;;#ASMEND
12449; GFX90A-NEXT:    s_mov_b32 s8, s20
12450; GFX90A-NEXT:    s_mov_b32 s9, s21
12451; GFX90A-NEXT:    s_mov_b32 s10, s4
12452; GFX90A-NEXT:    s_mov_b32 s11, s5
12453; GFX90A-NEXT:    s_mov_b32 s12, s16
12454; GFX90A-NEXT:    s_mov_b32 s13, s17
12455; GFX90A-NEXT:    s_mov_b32 s14, s16
12456; GFX90A-NEXT:    s_mov_b32 s15, s17
12457; GFX90A-NEXT:    ;;#ASMSTART
12458; GFX90A-NEXT:    ; use s[8:15]
12459; GFX90A-NEXT:    ;;#ASMEND
12460; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12461;
12462; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_3_3:
12463; GFX940:       ; %bb.0:
12464; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12465; GFX940-NEXT:    ;;#ASMSTART
12466; GFX940-NEXT:    ; def s[0:5]
12467; GFX940-NEXT:    ;;#ASMEND
12468; GFX940-NEXT:    s_mov_b32 s10, s0
12469; GFX940-NEXT:    ;;#ASMSTART
12470; GFX940-NEXT:    ; def s[4:9]
12471; GFX940-NEXT:    ;;#ASMEND
12472; GFX940-NEXT:    s_mov_b32 s11, s1
12473; GFX940-NEXT:    s_mov_b32 s12, s4
12474; GFX940-NEXT:    s_mov_b32 s13, s5
12475; GFX940-NEXT:    s_mov_b32 s14, s4
12476; GFX940-NEXT:    s_mov_b32 s15, s5
12477; GFX940-NEXT:    ;;#ASMSTART
12478; GFX940-NEXT:    ; use s[8:15]
12479; GFX940-NEXT:    ;;#ASMEND
12480; GFX940-NEXT:    s_setpc_b64 s[30:31]
12481  %vec0 = call <3 x i64> asm "; def $0", "=s"()
12482  %vec1 = call <3 x i64> asm "; def $0", "=s"()
12483  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 3, i32 3>
12484  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12485  ret void
12486}
12487
12488define void @s_shuffle_v4i64_v3i64__5_1_3_3() {
12489; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_3_3:
12490; GFX900:       ; %bb.0:
12491; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12492; GFX900-NEXT:    ;;#ASMSTART
12493; GFX900-NEXT:    ; def s[8:13]
12494; GFX900-NEXT:    ;;#ASMEND
12495; GFX900-NEXT:    ;;#ASMSTART
12496; GFX900-NEXT:    ; def s[4:9]
12497; GFX900-NEXT:    ;;#ASMEND
12498; GFX900-NEXT:    s_mov_b32 s12, s4
12499; GFX900-NEXT:    s_mov_b32 s13, s5
12500; GFX900-NEXT:    s_mov_b32 s14, s4
12501; GFX900-NEXT:    s_mov_b32 s15, s5
12502; GFX900-NEXT:    ;;#ASMSTART
12503; GFX900-NEXT:    ; use s[8:15]
12504; GFX900-NEXT:    ;;#ASMEND
12505; GFX900-NEXT:    s_setpc_b64 s[30:31]
12506;
12507; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_3_3:
12508; GFX90A:       ; %bb.0:
12509; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12510; GFX90A-NEXT:    ;;#ASMSTART
12511; GFX90A-NEXT:    ; def s[8:13]
12512; GFX90A-NEXT:    ;;#ASMEND
12513; GFX90A-NEXT:    ;;#ASMSTART
12514; GFX90A-NEXT:    ; def s[4:9]
12515; GFX90A-NEXT:    ;;#ASMEND
12516; GFX90A-NEXT:    s_mov_b32 s12, s4
12517; GFX90A-NEXT:    s_mov_b32 s13, s5
12518; GFX90A-NEXT:    s_mov_b32 s14, s4
12519; GFX90A-NEXT:    s_mov_b32 s15, s5
12520; GFX90A-NEXT:    ;;#ASMSTART
12521; GFX90A-NEXT:    ; use s[8:15]
12522; GFX90A-NEXT:    ;;#ASMEND
12523; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12524;
12525; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_3_3:
12526; GFX940:       ; %bb.0:
12527; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12528; GFX940-NEXT:    ;;#ASMSTART
12529; GFX940-NEXT:    ; def s[8:13]
12530; GFX940-NEXT:    ;;#ASMEND
12531; GFX940-NEXT:    ;;#ASMSTART
12532; GFX940-NEXT:    ; def s[0:5]
12533; GFX940-NEXT:    ;;#ASMEND
12534; GFX940-NEXT:    s_mov_b32 s8, s4
12535; GFX940-NEXT:    s_mov_b32 s9, s5
12536; GFX940-NEXT:    s_mov_b32 s12, s0
12537; GFX940-NEXT:    s_mov_b32 s13, s1
12538; GFX940-NEXT:    s_mov_b32 s14, s0
12539; GFX940-NEXT:    s_mov_b32 s15, s1
12540; GFX940-NEXT:    ;;#ASMSTART
12541; GFX940-NEXT:    ; use s[8:15]
12542; GFX940-NEXT:    ;;#ASMEND
12543; GFX940-NEXT:    s_setpc_b64 s[30:31]
12544  %vec0 = call <3 x i64> asm "; def $0", "=s"()
12545  %vec1 = call <3 x i64> asm "; def $0", "=s"()
12546  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 3, i32 3>
12547  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12548  ret void
12549}
12550
12551define void @s_shuffle_v4i64_v3i64__5_2_3_3() {
12552; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_3_3:
12553; GFX900:       ; %bb.0:
12554; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12555; GFX900-NEXT:    ;;#ASMSTART
12556; GFX900-NEXT:    ; def s[8:13]
12557; GFX900-NEXT:    ;;#ASMEND
12558; GFX900-NEXT:    ;;#ASMSTART
12559; GFX900-NEXT:    ; def s[4:9]
12560; GFX900-NEXT:    ;;#ASMEND
12561; GFX900-NEXT:    s_mov_b32 s10, s12
12562; GFX900-NEXT:    s_mov_b32 s11, s13
12563; GFX900-NEXT:    s_mov_b32 s12, s4
12564; GFX900-NEXT:    s_mov_b32 s13, s5
12565; GFX900-NEXT:    s_mov_b32 s14, s4
12566; GFX900-NEXT:    s_mov_b32 s15, s5
12567; GFX900-NEXT:    ;;#ASMSTART
12568; GFX900-NEXT:    ; use s[8:15]
12569; GFX900-NEXT:    ;;#ASMEND
12570; GFX900-NEXT:    s_setpc_b64 s[30:31]
12571;
12572; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_3_3:
12573; GFX90A:       ; %bb.0:
12574; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12575; GFX90A-NEXT:    ;;#ASMSTART
12576; GFX90A-NEXT:    ; def s[8:13]
12577; GFX90A-NEXT:    ;;#ASMEND
12578; GFX90A-NEXT:    ;;#ASMSTART
12579; GFX90A-NEXT:    ; def s[4:9]
12580; GFX90A-NEXT:    ;;#ASMEND
12581; GFX90A-NEXT:    s_mov_b32 s10, s12
12582; GFX90A-NEXT:    s_mov_b32 s11, s13
12583; GFX90A-NEXT:    s_mov_b32 s12, s4
12584; GFX90A-NEXT:    s_mov_b32 s13, s5
12585; GFX90A-NEXT:    s_mov_b32 s14, s4
12586; GFX90A-NEXT:    s_mov_b32 s15, s5
12587; GFX90A-NEXT:    ;;#ASMSTART
12588; GFX90A-NEXT:    ; use s[8:15]
12589; GFX90A-NEXT:    ;;#ASMEND
12590; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12591;
12592; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_3_3:
12593; GFX940:       ; %bb.0:
12594; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12595; GFX940-NEXT:    ;;#ASMSTART
12596; GFX940-NEXT:    ; def s[0:5]
12597; GFX940-NEXT:    ;;#ASMEND
12598; GFX940-NEXT:    ;;#ASMSTART
12599; GFX940-NEXT:    ; def s[16:21]
12600; GFX940-NEXT:    ;;#ASMEND
12601; GFX940-NEXT:    s_mov_b32 s8, s20
12602; GFX940-NEXT:    s_mov_b32 s9, s21
12603; GFX940-NEXT:    s_mov_b32 s10, s4
12604; GFX940-NEXT:    s_mov_b32 s11, s5
12605; GFX940-NEXT:    s_mov_b32 s12, s16
12606; GFX940-NEXT:    s_mov_b32 s13, s17
12607; GFX940-NEXT:    s_mov_b32 s14, s16
12608; GFX940-NEXT:    s_mov_b32 s15, s17
12609; GFX940-NEXT:    ;;#ASMSTART
12610; GFX940-NEXT:    ; use s[8:15]
12611; GFX940-NEXT:    ;;#ASMEND
12612; GFX940-NEXT:    s_setpc_b64 s[30:31]
12613  %vec0 = call <3 x i64> asm "; def $0", "=s"()
12614  %vec1 = call <3 x i64> asm "; def $0", "=s"()
12615  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 3, i32 3>
12616  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12617  ret void
12618}
12619
12620define void @s_shuffle_v4i64_v3i64__5_4_3_3() {
12621; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_4_3_3:
12622; GFX900:       ; %bb.0:
12623; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12624; GFX900-NEXT:    ;;#ASMSTART
12625; GFX900-NEXT:    ; def s[4:9]
12626; GFX900-NEXT:    ;;#ASMEND
12627; GFX900-NEXT:    s_mov_b32 s10, s6
12628; GFX900-NEXT:    s_mov_b32 s11, s7
12629; GFX900-NEXT:    s_mov_b32 s12, s4
12630; GFX900-NEXT:    s_mov_b32 s13, s5
12631; GFX900-NEXT:    s_mov_b32 s14, s4
12632; GFX900-NEXT:    s_mov_b32 s15, s5
12633; GFX900-NEXT:    ;;#ASMSTART
12634; GFX900-NEXT:    ; use s[8:15]
12635; GFX900-NEXT:    ;;#ASMEND
12636; GFX900-NEXT:    s_setpc_b64 s[30:31]
12637;
12638; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_4_3_3:
12639; GFX90A:       ; %bb.0:
12640; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12641; GFX90A-NEXT:    ;;#ASMSTART
12642; GFX90A-NEXT:    ; def s[4:9]
12643; GFX90A-NEXT:    ;;#ASMEND
12644; GFX90A-NEXT:    s_mov_b32 s10, s6
12645; GFX90A-NEXT:    s_mov_b32 s11, s7
12646; GFX90A-NEXT:    s_mov_b32 s12, s4
12647; GFX90A-NEXT:    s_mov_b32 s13, s5
12648; GFX90A-NEXT:    s_mov_b32 s14, s4
12649; GFX90A-NEXT:    s_mov_b32 s15, s5
12650; GFX90A-NEXT:    ;;#ASMSTART
12651; GFX90A-NEXT:    ; use s[8:15]
12652; GFX90A-NEXT:    ;;#ASMEND
12653; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12654;
12655; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_4_3_3:
12656; GFX940:       ; %bb.0:
12657; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12658; GFX940-NEXT:    ;;#ASMSTART
12659; GFX940-NEXT:    ; def s[0:5]
12660; GFX940-NEXT:    ;;#ASMEND
12661; GFX940-NEXT:    s_mov_b32 s8, s4
12662; GFX940-NEXT:    s_mov_b32 s9, s5
12663; GFX940-NEXT:    s_mov_b32 s10, s2
12664; GFX940-NEXT:    s_mov_b32 s11, s3
12665; GFX940-NEXT:    s_mov_b32 s12, s0
12666; GFX940-NEXT:    s_mov_b32 s13, s1
12667; GFX940-NEXT:    s_mov_b32 s14, s0
12668; GFX940-NEXT:    s_mov_b32 s15, s1
12669; GFX940-NEXT:    ;;#ASMSTART
12670; GFX940-NEXT:    ; use s[8:15]
12671; GFX940-NEXT:    ;;#ASMEND
12672; GFX940-NEXT:    s_setpc_b64 s[30:31]
12673  %vec0 = call <3 x i64> asm "; def $0", "=s"()
12674  %vec1 = call <3 x i64> asm "; def $0", "=s"()
12675  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 3, i32 3>
12676  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12677  ret void
12678}
12679
12680define void @s_shuffle_v4i64_v3i64__5_5_3_3() {
12681; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_3:
12682; GFX900:       ; %bb.0:
12683; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12684; GFX900-NEXT:    ;;#ASMSTART
12685; GFX900-NEXT:    ; def s[16:21]
12686; GFX900-NEXT:    ;;#ASMEND
12687; GFX900-NEXT:    s_mov_b32 s8, s20
12688; GFX900-NEXT:    s_mov_b32 s9, s21
12689; GFX900-NEXT:    s_mov_b32 s10, s20
12690; GFX900-NEXT:    s_mov_b32 s11, s21
12691; GFX900-NEXT:    s_mov_b32 s12, s16
12692; GFX900-NEXT:    s_mov_b32 s13, s17
12693; GFX900-NEXT:    s_mov_b32 s14, s16
12694; GFX900-NEXT:    s_mov_b32 s15, s17
12695; GFX900-NEXT:    ;;#ASMSTART
12696; GFX900-NEXT:    ; use s[8:15]
12697; GFX900-NEXT:    ;;#ASMEND
12698; GFX900-NEXT:    s_setpc_b64 s[30:31]
12699;
12700; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_3:
12701; GFX90A:       ; %bb.0:
12702; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12703; GFX90A-NEXT:    ;;#ASMSTART
12704; GFX90A-NEXT:    ; def s[16:21]
12705; GFX90A-NEXT:    ;;#ASMEND
12706; GFX90A-NEXT:    s_mov_b32 s8, s20
12707; GFX90A-NEXT:    s_mov_b32 s9, s21
12708; GFX90A-NEXT:    s_mov_b32 s10, s20
12709; GFX90A-NEXT:    s_mov_b32 s11, s21
12710; GFX90A-NEXT:    s_mov_b32 s12, s16
12711; GFX90A-NEXT:    s_mov_b32 s13, s17
12712; GFX90A-NEXT:    s_mov_b32 s14, s16
12713; GFX90A-NEXT:    s_mov_b32 s15, s17
12714; GFX90A-NEXT:    ;;#ASMSTART
12715; GFX90A-NEXT:    ; use s[8:15]
12716; GFX90A-NEXT:    ;;#ASMEND
12717; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12718;
12719; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_3:
12720; GFX940:       ; %bb.0:
12721; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12722; GFX940-NEXT:    ;;#ASMSTART
12723; GFX940-NEXT:    ; def s[0:5]
12724; GFX940-NEXT:    ;;#ASMEND
12725; GFX940-NEXT:    s_mov_b32 s8, s4
12726; GFX940-NEXT:    s_mov_b32 s9, s5
12727; GFX940-NEXT:    s_mov_b32 s10, s4
12728; GFX940-NEXT:    s_mov_b32 s11, s5
12729; GFX940-NEXT:    s_mov_b32 s12, s0
12730; GFX940-NEXT:    s_mov_b32 s13, s1
12731; GFX940-NEXT:    s_mov_b32 s14, s0
12732; GFX940-NEXT:    s_mov_b32 s15, s1
12733; GFX940-NEXT:    ;;#ASMSTART
12734; GFX940-NEXT:    ; use s[8:15]
12735; GFX940-NEXT:    ;;#ASMEND
12736; GFX940-NEXT:    s_setpc_b64 s[30:31]
12737  %vec0 = call <3 x i64> asm "; def $0", "=s"()
12738  %vec1 = call <3 x i64> asm "; def $0", "=s"()
12739  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 3>
12740  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12741  ret void
12742}
12743
12744define void @s_shuffle_v4i64_v3i64__5_5_u_3() {
12745; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_3:
12746; GFX900:       ; %bb.0:
12747; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12748; GFX900-NEXT:    ;;#ASMSTART
12749; GFX900-NEXT:    ; def s[12:17]
12750; GFX900-NEXT:    ;;#ASMEND
12751; GFX900-NEXT:    s_mov_b32 s8, s16
12752; GFX900-NEXT:    s_mov_b32 s9, s17
12753; GFX900-NEXT:    s_mov_b32 s10, s16
12754; GFX900-NEXT:    s_mov_b32 s11, s17
12755; GFX900-NEXT:    s_mov_b32 s14, s12
12756; GFX900-NEXT:    s_mov_b32 s15, s13
12757; GFX900-NEXT:    ;;#ASMSTART
12758; GFX900-NEXT:    ; use s[8:15]
12759; GFX900-NEXT:    ;;#ASMEND
12760; GFX900-NEXT:    s_setpc_b64 s[30:31]
12761;
12762; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_3:
12763; GFX90A:       ; %bb.0:
12764; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12765; GFX90A-NEXT:    ;;#ASMSTART
12766; GFX90A-NEXT:    ; def s[12:17]
12767; GFX90A-NEXT:    ;;#ASMEND
12768; GFX90A-NEXT:    s_mov_b32 s8, s16
12769; GFX90A-NEXT:    s_mov_b32 s9, s17
12770; GFX90A-NEXT:    s_mov_b32 s10, s16
12771; GFX90A-NEXT:    s_mov_b32 s11, s17
12772; GFX90A-NEXT:    s_mov_b32 s14, s12
12773; GFX90A-NEXT:    s_mov_b32 s15, s13
12774; GFX90A-NEXT:    ;;#ASMSTART
12775; GFX90A-NEXT:    ; use s[8:15]
12776; GFX90A-NEXT:    ;;#ASMEND
12777; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12778;
12779; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_3:
12780; GFX940:       ; %bb.0:
12781; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12782; GFX940-NEXT:    ;;#ASMSTART
12783; GFX940-NEXT:    ; def s[0:5]
12784; GFX940-NEXT:    ;;#ASMEND
12785; GFX940-NEXT:    s_mov_b32 s8, s4
12786; GFX940-NEXT:    s_mov_b32 s9, s5
12787; GFX940-NEXT:    s_mov_b32 s10, s4
12788; GFX940-NEXT:    s_mov_b32 s11, s5
12789; GFX940-NEXT:    s_mov_b32 s14, s0
12790; GFX940-NEXT:    s_mov_b32 s15, s1
12791; GFX940-NEXT:    ;;#ASMSTART
12792; GFX940-NEXT:    ; use s[8:15]
12793; GFX940-NEXT:    ;;#ASMEND
12794; GFX940-NEXT:    s_setpc_b64 s[30:31]
12795  %vec0 = call <3 x i64> asm "; def $0", "=s"()
12796  %vec1 = call <3 x i64> asm "; def $0", "=s"()
12797  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 3>
12798  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12799  ret void
12800}
12801
12802define void @s_shuffle_v4i64_v3i64__5_5_0_3() {
12803; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_3:
12804; GFX900:       ; %bb.0:
12805; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12806; GFX900-NEXT:    ;;#ASMSTART
12807; GFX900-NEXT:    ; def s[4:9]
12808; GFX900-NEXT:    ;;#ASMEND
12809; GFX900-NEXT:    ;;#ASMSTART
12810; GFX900-NEXT:    ; def s[16:21]
12811; GFX900-NEXT:    ;;#ASMEND
12812; GFX900-NEXT:    s_mov_b32 s8, s20
12813; GFX900-NEXT:    s_mov_b32 s9, s21
12814; GFX900-NEXT:    s_mov_b32 s10, s20
12815; GFX900-NEXT:    s_mov_b32 s11, s21
12816; GFX900-NEXT:    s_mov_b32 s12, s4
12817; GFX900-NEXT:    s_mov_b32 s13, s5
12818; GFX900-NEXT:    s_mov_b32 s14, s16
12819; GFX900-NEXT:    s_mov_b32 s15, s17
12820; GFX900-NEXT:    ;;#ASMSTART
12821; GFX900-NEXT:    ; use s[8:15]
12822; GFX900-NEXT:    ;;#ASMEND
12823; GFX900-NEXT:    s_setpc_b64 s[30:31]
12824;
12825; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_3:
12826; GFX90A:       ; %bb.0:
12827; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12828; GFX90A-NEXT:    ;;#ASMSTART
12829; GFX90A-NEXT:    ; def s[4:9]
12830; GFX90A-NEXT:    ;;#ASMEND
12831; GFX90A-NEXT:    ;;#ASMSTART
12832; GFX90A-NEXT:    ; def s[16:21]
12833; GFX90A-NEXT:    ;;#ASMEND
12834; GFX90A-NEXT:    s_mov_b32 s8, s20
12835; GFX90A-NEXT:    s_mov_b32 s9, s21
12836; GFX90A-NEXT:    s_mov_b32 s10, s20
12837; GFX90A-NEXT:    s_mov_b32 s11, s21
12838; GFX90A-NEXT:    s_mov_b32 s12, s4
12839; GFX90A-NEXT:    s_mov_b32 s13, s5
12840; GFX90A-NEXT:    s_mov_b32 s14, s16
12841; GFX90A-NEXT:    s_mov_b32 s15, s17
12842; GFX90A-NEXT:    ;;#ASMSTART
12843; GFX90A-NEXT:    ; use s[8:15]
12844; GFX90A-NEXT:    ;;#ASMEND
12845; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12846;
12847; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_3:
12848; GFX940:       ; %bb.0:
12849; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12850; GFX940-NEXT:    ;;#ASMSTART
12851; GFX940-NEXT:    ; def s[0:5]
12852; GFX940-NEXT:    ;;#ASMEND
12853; GFX940-NEXT:    ;;#ASMSTART
12854; GFX940-NEXT:    ; def s[16:21]
12855; GFX940-NEXT:    ;;#ASMEND
12856; GFX940-NEXT:    s_mov_b32 s8, s20
12857; GFX940-NEXT:    s_mov_b32 s9, s21
12858; GFX940-NEXT:    s_mov_b32 s10, s20
12859; GFX940-NEXT:    s_mov_b32 s11, s21
12860; GFX940-NEXT:    s_mov_b32 s12, s0
12861; GFX940-NEXT:    s_mov_b32 s13, s1
12862; GFX940-NEXT:    s_mov_b32 s14, s16
12863; GFX940-NEXT:    s_mov_b32 s15, s17
12864; GFX940-NEXT:    ;;#ASMSTART
12865; GFX940-NEXT:    ; use s[8:15]
12866; GFX940-NEXT:    ;;#ASMEND
12867; GFX940-NEXT:    s_setpc_b64 s[30:31]
12868  %vec0 = call <3 x i64> asm "; def $0", "=s"()
12869  %vec1 = call <3 x i64> asm "; def $0", "=s"()
12870  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 3>
12871  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12872  ret void
12873}
12874
12875define void @s_shuffle_v4i64_v3i64__5_5_1_3() {
12876; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_3:
12877; GFX900:       ; %bb.0:
12878; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12879; GFX900-NEXT:    ;;#ASMSTART
12880; GFX900-NEXT:    ; def s[4:9]
12881; GFX900-NEXT:    ;;#ASMEND
12882; GFX900-NEXT:    ;;#ASMSTART
12883; GFX900-NEXT:    ; def s[16:21]
12884; GFX900-NEXT:    ;;#ASMEND
12885; GFX900-NEXT:    s_mov_b32 s8, s20
12886; GFX900-NEXT:    s_mov_b32 s9, s21
12887; GFX900-NEXT:    s_mov_b32 s10, s20
12888; GFX900-NEXT:    s_mov_b32 s11, s21
12889; GFX900-NEXT:    s_mov_b32 s12, s6
12890; GFX900-NEXT:    s_mov_b32 s13, s7
12891; GFX900-NEXT:    s_mov_b32 s14, s16
12892; GFX900-NEXT:    s_mov_b32 s15, s17
12893; GFX900-NEXT:    ;;#ASMSTART
12894; GFX900-NEXT:    ; use s[8:15]
12895; GFX900-NEXT:    ;;#ASMEND
12896; GFX900-NEXT:    s_setpc_b64 s[30:31]
12897;
12898; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_3:
12899; GFX90A:       ; %bb.0:
12900; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12901; GFX90A-NEXT:    ;;#ASMSTART
12902; GFX90A-NEXT:    ; def s[4:9]
12903; GFX90A-NEXT:    ;;#ASMEND
12904; GFX90A-NEXT:    ;;#ASMSTART
12905; GFX90A-NEXT:    ; def s[16:21]
12906; GFX90A-NEXT:    ;;#ASMEND
12907; GFX90A-NEXT:    s_mov_b32 s8, s20
12908; GFX90A-NEXT:    s_mov_b32 s9, s21
12909; GFX90A-NEXT:    s_mov_b32 s10, s20
12910; GFX90A-NEXT:    s_mov_b32 s11, s21
12911; GFX90A-NEXT:    s_mov_b32 s12, s6
12912; GFX90A-NEXT:    s_mov_b32 s13, s7
12913; GFX90A-NEXT:    s_mov_b32 s14, s16
12914; GFX90A-NEXT:    s_mov_b32 s15, s17
12915; GFX90A-NEXT:    ;;#ASMSTART
12916; GFX90A-NEXT:    ; use s[8:15]
12917; GFX90A-NEXT:    ;;#ASMEND
12918; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12919;
12920; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_3:
12921; GFX940:       ; %bb.0:
12922; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12923; GFX940-NEXT:    ;;#ASMSTART
12924; GFX940-NEXT:    ; def s[0:5]
12925; GFX940-NEXT:    ;;#ASMEND
12926; GFX940-NEXT:    ;;#ASMSTART
12927; GFX940-NEXT:    ; def s[16:21]
12928; GFX940-NEXT:    ;;#ASMEND
12929; GFX940-NEXT:    s_mov_b32 s8, s20
12930; GFX940-NEXT:    s_mov_b32 s9, s21
12931; GFX940-NEXT:    s_mov_b32 s10, s20
12932; GFX940-NEXT:    s_mov_b32 s11, s21
12933; GFX940-NEXT:    s_mov_b32 s12, s2
12934; GFX940-NEXT:    s_mov_b32 s13, s3
12935; GFX940-NEXT:    s_mov_b32 s14, s16
12936; GFX940-NEXT:    s_mov_b32 s15, s17
12937; GFX940-NEXT:    ;;#ASMSTART
12938; GFX940-NEXT:    ; use s[8:15]
12939; GFX940-NEXT:    ;;#ASMEND
12940; GFX940-NEXT:    s_setpc_b64 s[30:31]
12941  %vec0 = call <3 x i64> asm "; def $0", "=s"()
12942  %vec1 = call <3 x i64> asm "; def $0", "=s"()
12943  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 3>
12944  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
12945  ret void
12946}
12947
12948define void @s_shuffle_v4i64_v3i64__5_5_2_3() {
12949; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_3:
12950; GFX900:       ; %bb.0:
12951; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12952; GFX900-NEXT:    ;;#ASMSTART
12953; GFX900-NEXT:    ; def s[8:13]
12954; GFX900-NEXT:    ;;#ASMEND
12955; GFX900-NEXT:    ;;#ASMSTART
12956; GFX900-NEXT:    ; def s[16:21]
12957; GFX900-NEXT:    ;;#ASMEND
12958; GFX900-NEXT:    s_mov_b32 s8, s20
12959; GFX900-NEXT:    s_mov_b32 s9, s21
12960; GFX900-NEXT:    s_mov_b32 s10, s20
12961; GFX900-NEXT:    s_mov_b32 s11, s21
12962; GFX900-NEXT:    s_mov_b32 s14, s16
12963; GFX900-NEXT:    s_mov_b32 s15, s17
12964; GFX900-NEXT:    ;;#ASMSTART
12965; GFX900-NEXT:    ; use s[8:15]
12966; GFX900-NEXT:    ;;#ASMEND
12967; GFX900-NEXT:    s_setpc_b64 s[30:31]
12968;
12969; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_3:
12970; GFX90A:       ; %bb.0:
12971; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12972; GFX90A-NEXT:    ;;#ASMSTART
12973; GFX90A-NEXT:    ; def s[8:13]
12974; GFX90A-NEXT:    ;;#ASMEND
12975; GFX90A-NEXT:    ;;#ASMSTART
12976; GFX90A-NEXT:    ; def s[16:21]
12977; GFX90A-NEXT:    ;;#ASMEND
12978; GFX90A-NEXT:    s_mov_b32 s8, s20
12979; GFX90A-NEXT:    s_mov_b32 s9, s21
12980; GFX90A-NEXT:    s_mov_b32 s10, s20
12981; GFX90A-NEXT:    s_mov_b32 s11, s21
12982; GFX90A-NEXT:    s_mov_b32 s14, s16
12983; GFX90A-NEXT:    s_mov_b32 s15, s17
12984; GFX90A-NEXT:    ;;#ASMSTART
12985; GFX90A-NEXT:    ; use s[8:15]
12986; GFX90A-NEXT:    ;;#ASMEND
12987; GFX90A-NEXT:    s_setpc_b64 s[30:31]
12988;
12989; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_3:
12990; GFX940:       ; %bb.0:
12991; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12992; GFX940-NEXT:    ;;#ASMSTART
12993; GFX940-NEXT:    ; def s[8:13]
12994; GFX940-NEXT:    ;;#ASMEND
12995; GFX940-NEXT:    ;;#ASMSTART
12996; GFX940-NEXT:    ; def s[0:5]
12997; GFX940-NEXT:    ;;#ASMEND
12998; GFX940-NEXT:    s_mov_b32 s8, s4
12999; GFX940-NEXT:    s_mov_b32 s9, s5
13000; GFX940-NEXT:    s_mov_b32 s10, s4
13001; GFX940-NEXT:    s_mov_b32 s11, s5
13002; GFX940-NEXT:    s_mov_b32 s14, s0
13003; GFX940-NEXT:    s_mov_b32 s15, s1
13004; GFX940-NEXT:    ;;#ASMSTART
13005; GFX940-NEXT:    ; use s[8:15]
13006; GFX940-NEXT:    ;;#ASMEND
13007; GFX940-NEXT:    s_setpc_b64 s[30:31]
13008  %vec0 = call <3 x i64> asm "; def $0", "=s"()
13009  %vec1 = call <3 x i64> asm "; def $0", "=s"()
13010  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 3>
13011  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13012  ret void
13013}
13014
13015define void @s_shuffle_v4i64_v3i64__5_5_4_3() {
13016; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_3:
13017; GFX900:       ; %bb.0:
13018; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13019; GFX900-NEXT:    ;;#ASMSTART
13020; GFX900-NEXT:    ; def s[16:21]
13021; GFX900-NEXT:    ;;#ASMEND
13022; GFX900-NEXT:    s_mov_b32 s8, s20
13023; GFX900-NEXT:    s_mov_b32 s9, s21
13024; GFX900-NEXT:    s_mov_b32 s10, s20
13025; GFX900-NEXT:    s_mov_b32 s11, s21
13026; GFX900-NEXT:    s_mov_b32 s12, s18
13027; GFX900-NEXT:    s_mov_b32 s13, s19
13028; GFX900-NEXT:    s_mov_b32 s14, s16
13029; GFX900-NEXT:    s_mov_b32 s15, s17
13030; GFX900-NEXT:    ;;#ASMSTART
13031; GFX900-NEXT:    ; use s[8:15]
13032; GFX900-NEXT:    ;;#ASMEND
13033; GFX900-NEXT:    s_setpc_b64 s[30:31]
13034;
13035; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_3:
13036; GFX90A:       ; %bb.0:
13037; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13038; GFX90A-NEXT:    ;;#ASMSTART
13039; GFX90A-NEXT:    ; def s[16:21]
13040; GFX90A-NEXT:    ;;#ASMEND
13041; GFX90A-NEXT:    s_mov_b32 s8, s20
13042; GFX90A-NEXT:    s_mov_b32 s9, s21
13043; GFX90A-NEXT:    s_mov_b32 s10, s20
13044; GFX90A-NEXT:    s_mov_b32 s11, s21
13045; GFX90A-NEXT:    s_mov_b32 s12, s18
13046; GFX90A-NEXT:    s_mov_b32 s13, s19
13047; GFX90A-NEXT:    s_mov_b32 s14, s16
13048; GFX90A-NEXT:    s_mov_b32 s15, s17
13049; GFX90A-NEXT:    ;;#ASMSTART
13050; GFX90A-NEXT:    ; use s[8:15]
13051; GFX90A-NEXT:    ;;#ASMEND
13052; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13053;
13054; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_3:
13055; GFX940:       ; %bb.0:
13056; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13057; GFX940-NEXT:    ;;#ASMSTART
13058; GFX940-NEXT:    ; def s[0:5]
13059; GFX940-NEXT:    ;;#ASMEND
13060; GFX940-NEXT:    s_mov_b32 s8, s4
13061; GFX940-NEXT:    s_mov_b32 s9, s5
13062; GFX940-NEXT:    s_mov_b32 s10, s4
13063; GFX940-NEXT:    s_mov_b32 s11, s5
13064; GFX940-NEXT:    s_mov_b32 s12, s2
13065; GFX940-NEXT:    s_mov_b32 s13, s3
13066; GFX940-NEXT:    s_mov_b32 s14, s0
13067; GFX940-NEXT:    s_mov_b32 s15, s1
13068; GFX940-NEXT:    ;;#ASMSTART
13069; GFX940-NEXT:    ; use s[8:15]
13070; GFX940-NEXT:    ;;#ASMEND
13071; GFX940-NEXT:    s_setpc_b64 s[30:31]
13072  %vec0 = call <3 x i64> asm "; def $0", "=s"()
13073  %vec1 = call <3 x i64> asm "; def $0", "=s"()
13074  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 3>
13075  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13076  ret void
13077}
13078
13079define void @s_shuffle_v4i64_v3i64__u_4_4_4() {
13080; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_4_4_4:
13081; GFX9:       ; %bb.0:
13082; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13083; GFX9-NEXT:    ;;#ASMSTART
13084; GFX9-NEXT:    ; def s[8:13]
13085; GFX9-NEXT:    ;;#ASMEND
13086; GFX9-NEXT:    s_mov_b32 s12, s10
13087; GFX9-NEXT:    s_mov_b32 s13, s11
13088; GFX9-NEXT:    s_mov_b32 s14, s10
13089; GFX9-NEXT:    s_mov_b32 s15, s11
13090; GFX9-NEXT:    ;;#ASMSTART
13091; GFX9-NEXT:    ; use s[8:15]
13092; GFX9-NEXT:    ;;#ASMEND
13093; GFX9-NEXT:    s_setpc_b64 s[30:31]
13094  %vec0 = call <3 x i64> asm "; def $0", "=s"()
13095  %vec1 = call <3 x i64> asm "; def $0", "=s"()
13096  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 poison, i32 4, i32 4, i32 4>
13097  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13098  ret void
13099}
13100
13101define void @s_shuffle_v4i64_v3i64__0_4_4_4() {
13102; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_4_4_4:
13103; GFX900:       ; %bb.0:
13104; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13105; GFX900-NEXT:    ;;#ASMSTART
13106; GFX900-NEXT:    ; def s[8:13]
13107; GFX900-NEXT:    ;;#ASMEND
13108; GFX900-NEXT:    ;;#ASMSTART
13109; GFX900-NEXT:    ; def s[12:17]
13110; GFX900-NEXT:    ;;#ASMEND
13111; GFX900-NEXT:    s_mov_b32 s10, s14
13112; GFX900-NEXT:    s_mov_b32 s11, s15
13113; GFX900-NEXT:    s_mov_b32 s12, s14
13114; GFX900-NEXT:    s_mov_b32 s13, s15
13115; GFX900-NEXT:    ;;#ASMSTART
13116; GFX900-NEXT:    ; use s[8:15]
13117; GFX900-NEXT:    ;;#ASMEND
13118; GFX900-NEXT:    s_setpc_b64 s[30:31]
13119;
13120; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_4_4_4:
13121; GFX90A:       ; %bb.0:
13122; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13123; GFX90A-NEXT:    ;;#ASMSTART
13124; GFX90A-NEXT:    ; def s[8:13]
13125; GFX90A-NEXT:    ;;#ASMEND
13126; GFX90A-NEXT:    ;;#ASMSTART
13127; GFX90A-NEXT:    ; def s[12:17]
13128; GFX90A-NEXT:    ;;#ASMEND
13129; GFX90A-NEXT:    s_mov_b32 s10, s14
13130; GFX90A-NEXT:    s_mov_b32 s11, s15
13131; GFX90A-NEXT:    s_mov_b32 s12, s14
13132; GFX90A-NEXT:    s_mov_b32 s13, s15
13133; GFX90A-NEXT:    ;;#ASMSTART
13134; GFX90A-NEXT:    ; use s[8:15]
13135; GFX90A-NEXT:    ;;#ASMEND
13136; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13137;
13138; GFX940-LABEL: s_shuffle_v4i64_v3i64__0_4_4_4:
13139; GFX940:       ; %bb.0:
13140; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13141; GFX940-NEXT:    ;;#ASMSTART
13142; GFX940-NEXT:    ; def s[8:13]
13143; GFX940-NEXT:    ;;#ASMEND
13144; GFX940-NEXT:    ;;#ASMSTART
13145; GFX940-NEXT:    ; def s[0:5]
13146; GFX940-NEXT:    ;;#ASMEND
13147; GFX940-NEXT:    s_mov_b32 s10, s2
13148; GFX940-NEXT:    s_mov_b32 s11, s3
13149; GFX940-NEXT:    s_mov_b32 s12, s2
13150; GFX940-NEXT:    s_mov_b32 s13, s3
13151; GFX940-NEXT:    s_mov_b32 s14, s2
13152; GFX940-NEXT:    s_mov_b32 s15, s3
13153; GFX940-NEXT:    ;;#ASMSTART
13154; GFX940-NEXT:    ; use s[8:15]
13155; GFX940-NEXT:    ;;#ASMEND
13156; GFX940-NEXT:    s_setpc_b64 s[30:31]
13157  %vec0 = call <3 x i64> asm "; def $0", "=s"()
13158  %vec1 = call <3 x i64> asm "; def $0", "=s"()
13159  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
13160  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13161  ret void
13162}
13163
13164define void @s_shuffle_v4i64_v3i64__1_4_4_4() {
13165; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_4_4_4:
13166; GFX900:       ; %bb.0:
13167; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13168; GFX900-NEXT:    ;;#ASMSTART
13169; GFX900-NEXT:    ; def s[4:9]
13170; GFX900-NEXT:    ;;#ASMEND
13171; GFX900-NEXT:    ;;#ASMSTART
13172; GFX900-NEXT:    ; def s[8:13]
13173; GFX900-NEXT:    ;;#ASMEND
13174; GFX900-NEXT:    s_mov_b32 s8, s6
13175; GFX900-NEXT:    s_mov_b32 s9, s7
13176; GFX900-NEXT:    s_mov_b32 s12, s10
13177; GFX900-NEXT:    s_mov_b32 s13, s11
13178; GFX900-NEXT:    s_mov_b32 s14, s10
13179; GFX900-NEXT:    s_mov_b32 s15, s11
13180; GFX900-NEXT:    ;;#ASMSTART
13181; GFX900-NEXT:    ; use s[8:15]
13182; GFX900-NEXT:    ;;#ASMEND
13183; GFX900-NEXT:    s_setpc_b64 s[30:31]
13184;
13185; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_4_4_4:
13186; GFX90A:       ; %bb.0:
13187; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13188; GFX90A-NEXT:    ;;#ASMSTART
13189; GFX90A-NEXT:    ; def s[4:9]
13190; GFX90A-NEXT:    ;;#ASMEND
13191; GFX90A-NEXT:    ;;#ASMSTART
13192; GFX90A-NEXT:    ; def s[8:13]
13193; GFX90A-NEXT:    ;;#ASMEND
13194; GFX90A-NEXT:    s_mov_b32 s8, s6
13195; GFX90A-NEXT:    s_mov_b32 s9, s7
13196; GFX90A-NEXT:    s_mov_b32 s12, s10
13197; GFX90A-NEXT:    s_mov_b32 s13, s11
13198; GFX90A-NEXT:    s_mov_b32 s14, s10
13199; GFX90A-NEXT:    s_mov_b32 s15, s11
13200; GFX90A-NEXT:    ;;#ASMSTART
13201; GFX90A-NEXT:    ; use s[8:15]
13202; GFX90A-NEXT:    ;;#ASMEND
13203; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13204;
13205; GFX940-LABEL: s_shuffle_v4i64_v3i64__1_4_4_4:
13206; GFX940:       ; %bb.0:
13207; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13208; GFX940-NEXT:    ;;#ASMSTART
13209; GFX940-NEXT:    ; def s[8:13]
13210; GFX940-NEXT:    ;;#ASMEND
13211; GFX940-NEXT:    ;;#ASMSTART
13212; GFX940-NEXT:    ; def s[0:5]
13213; GFX940-NEXT:    ;;#ASMEND
13214; GFX940-NEXT:    s_mov_b32 s8, s2
13215; GFX940-NEXT:    s_mov_b32 s9, s3
13216; GFX940-NEXT:    s_mov_b32 s12, s10
13217; GFX940-NEXT:    s_mov_b32 s13, s11
13218; GFX940-NEXT:    s_mov_b32 s14, s10
13219; GFX940-NEXT:    s_mov_b32 s15, s11
13220; GFX940-NEXT:    ;;#ASMSTART
13221; GFX940-NEXT:    ; use s[8:15]
13222; GFX940-NEXT:    ;;#ASMEND
13223; GFX940-NEXT:    s_setpc_b64 s[30:31]
13224  %vec0 = call <3 x i64> asm "; def $0", "=s"()
13225  %vec1 = call <3 x i64> asm "; def $0", "=s"()
13226  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 1, i32 4, i32 4, i32 4>
13227  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13228  ret void
13229}
13230
13231define void @s_shuffle_v4i64_v3i64__2_4_4_4() {
13232; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_4_4_4:
13233; GFX900:       ; %bb.0:
13234; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13235; GFX900-NEXT:    ;;#ASMSTART
13236; GFX900-NEXT:    ; def s[12:17]
13237; GFX900-NEXT:    ;;#ASMEND
13238; GFX900-NEXT:    ;;#ASMSTART
13239; GFX900-NEXT:    ; def s[8:13]
13240; GFX900-NEXT:    ;;#ASMEND
13241; GFX900-NEXT:    s_mov_b32 s8, s16
13242; GFX900-NEXT:    s_mov_b32 s9, s17
13243; GFX900-NEXT:    s_mov_b32 s12, s10
13244; GFX900-NEXT:    s_mov_b32 s13, s11
13245; GFX900-NEXT:    s_mov_b32 s14, s10
13246; GFX900-NEXT:    s_mov_b32 s15, s11
13247; GFX900-NEXT:    ;;#ASMSTART
13248; GFX900-NEXT:    ; use s[8:15]
13249; GFX900-NEXT:    ;;#ASMEND
13250; GFX900-NEXT:    s_setpc_b64 s[30:31]
13251;
13252; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_4_4_4:
13253; GFX90A:       ; %bb.0:
13254; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13255; GFX90A-NEXT:    ;;#ASMSTART
13256; GFX90A-NEXT:    ; def s[12:17]
13257; GFX90A-NEXT:    ;;#ASMEND
13258; GFX90A-NEXT:    ;;#ASMSTART
13259; GFX90A-NEXT:    ; def s[8:13]
13260; GFX90A-NEXT:    ;;#ASMEND
13261; GFX90A-NEXT:    s_mov_b32 s8, s16
13262; GFX90A-NEXT:    s_mov_b32 s9, s17
13263; GFX90A-NEXT:    s_mov_b32 s12, s10
13264; GFX90A-NEXT:    s_mov_b32 s13, s11
13265; GFX90A-NEXT:    s_mov_b32 s14, s10
13266; GFX90A-NEXT:    s_mov_b32 s15, s11
13267; GFX90A-NEXT:    ;;#ASMSTART
13268; GFX90A-NEXT:    ; use s[8:15]
13269; GFX90A-NEXT:    ;;#ASMEND
13270; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13271;
13272; GFX940-LABEL: s_shuffle_v4i64_v3i64__2_4_4_4:
13273; GFX940:       ; %bb.0:
13274; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13275; GFX940-NEXT:    ;;#ASMSTART
13276; GFX940-NEXT:    ; def s[8:13]
13277; GFX940-NEXT:    ;;#ASMEND
13278; GFX940-NEXT:    ;;#ASMSTART
13279; GFX940-NEXT:    ; def s[0:5]
13280; GFX940-NEXT:    ;;#ASMEND
13281; GFX940-NEXT:    s_mov_b32 s8, s4
13282; GFX940-NEXT:    s_mov_b32 s9, s5
13283; GFX940-NEXT:    s_mov_b32 s12, s10
13284; GFX940-NEXT:    s_mov_b32 s13, s11
13285; GFX940-NEXT:    s_mov_b32 s14, s10
13286; GFX940-NEXT:    s_mov_b32 s15, s11
13287; GFX940-NEXT:    ;;#ASMSTART
13288; GFX940-NEXT:    ; use s[8:15]
13289; GFX940-NEXT:    ;;#ASMEND
13290; GFX940-NEXT:    s_setpc_b64 s[30:31]
13291  %vec0 = call <3 x i64> asm "; def $0", "=s"()
13292  %vec1 = call <3 x i64> asm "; def $0", "=s"()
13293  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 2, i32 4, i32 4, i32 4>
13294  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13295  ret void
13296}
13297
13298define void @s_shuffle_v4i64_v3i64__3_4_4_4() {
13299; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_4_4_4:
13300; GFX9:       ; %bb.0:
13301; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13302; GFX9-NEXT:    ;;#ASMSTART
13303; GFX9-NEXT:    ; def s[8:13]
13304; GFX9-NEXT:    ;;#ASMEND
13305; GFX9-NEXT:    s_mov_b32 s12, s10
13306; GFX9-NEXT:    s_mov_b32 s13, s11
13307; GFX9-NEXT:    s_mov_b32 s14, s10
13308; GFX9-NEXT:    s_mov_b32 s15, s11
13309; GFX9-NEXT:    ;;#ASMSTART
13310; GFX9-NEXT:    ; use s[8:15]
13311; GFX9-NEXT:    ;;#ASMEND
13312; GFX9-NEXT:    s_setpc_b64 s[30:31]
13313  %vec0 = call <3 x i64> asm "; def $0", "=s"()
13314  %vec1 = call <3 x i64> asm "; def $0", "=s"()
13315  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 3, i32 4, i32 4, i32 4>
13316  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13317  ret void
13318}
13319
13320define void @s_shuffle_v4i64_v3i64__4_4_4_4() {
13321; GFX9-LABEL: s_shuffle_v4i64_v3i64__4_4_4_4:
13322; GFX9:       ; %bb.0:
13323; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13324; GFX9-NEXT:    ;;#ASMSTART
13325; GFX9-NEXT:    ; def s[8:13]
13326; GFX9-NEXT:    ;;#ASMEND
13327; GFX9-NEXT:    s_mov_b32 s8, s10
13328; GFX9-NEXT:    s_mov_b32 s9, s11
13329; GFX9-NEXT:    s_mov_b32 s12, s10
13330; GFX9-NEXT:    s_mov_b32 s13, s11
13331; GFX9-NEXT:    s_mov_b32 s14, s10
13332; GFX9-NEXT:    s_mov_b32 s15, s11
13333; GFX9-NEXT:    ;;#ASMSTART
13334; GFX9-NEXT:    ; use s[8:15]
13335; GFX9-NEXT:    ;;#ASMEND
13336; GFX9-NEXT:    s_setpc_b64 s[30:31]
13337  %vec0 = call <3 x i64> asm "; def $0", "=s"()
13338  %vec1 = call <3 x i64> asm "; def $0", "=s"()
13339  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
13340  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13341  ret void
13342}
13343
13344define void @s_shuffle_v4i64_v3i64__5_4_4_4() {
13345; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_4_4_4:
13346; GFX9:       ; %bb.0:
13347; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13348; GFX9-NEXT:    ;;#ASMSTART
13349; GFX9-NEXT:    ; def s[8:13]
13350; GFX9-NEXT:    ;;#ASMEND
13351; GFX9-NEXT:    s_mov_b32 s8, s12
13352; GFX9-NEXT:    s_mov_b32 s9, s13
13353; GFX9-NEXT:    s_mov_b32 s12, s10
13354; GFX9-NEXT:    s_mov_b32 s13, s11
13355; GFX9-NEXT:    s_mov_b32 s14, s10
13356; GFX9-NEXT:    s_mov_b32 s15, s11
13357; GFX9-NEXT:    ;;#ASMSTART
13358; GFX9-NEXT:    ; use s[8:15]
13359; GFX9-NEXT:    ;;#ASMEND
13360; GFX9-NEXT:    s_setpc_b64 s[30:31]
13361  %vec0 = call <3 x i64> asm "; def $0", "=s"()
13362  %vec1 = call <3 x i64> asm "; def $0", "=s"()
13363  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 4, i32 4>
13364  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13365  ret void
13366}
13367
13368define void @s_shuffle_v4i64_v3i64__5_u_4_4() {
13369; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_u_4_4:
13370; GFX900:       ; %bb.0:
13371; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13372; GFX900-NEXT:    ;;#ASMSTART
13373; GFX900-NEXT:    ; def s[4:9]
13374; GFX900-NEXT:    ;;#ASMEND
13375; GFX900-NEXT:    s_mov_b32 s12, s6
13376; GFX900-NEXT:    s_mov_b32 s13, s7
13377; GFX900-NEXT:    s_mov_b32 s14, s6
13378; GFX900-NEXT:    s_mov_b32 s15, s7
13379; GFX900-NEXT:    ;;#ASMSTART
13380; GFX900-NEXT:    ; use s[8:15]
13381; GFX900-NEXT:    ;;#ASMEND
13382; GFX900-NEXT:    s_setpc_b64 s[30:31]
13383;
13384; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_u_4_4:
13385; GFX90A:       ; %bb.0:
13386; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13387; GFX90A-NEXT:    ;;#ASMSTART
13388; GFX90A-NEXT:    ; def s[4:9]
13389; GFX90A-NEXT:    ;;#ASMEND
13390; GFX90A-NEXT:    s_mov_b32 s12, s6
13391; GFX90A-NEXT:    s_mov_b32 s13, s7
13392; GFX90A-NEXT:    s_mov_b32 s14, s6
13393; GFX90A-NEXT:    s_mov_b32 s15, s7
13394; GFX90A-NEXT:    ;;#ASMSTART
13395; GFX90A-NEXT:    ; use s[8:15]
13396; GFX90A-NEXT:    ;;#ASMEND
13397; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13398;
13399; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_u_4_4:
13400; GFX940:       ; %bb.0:
13401; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13402; GFX940-NEXT:    ;;#ASMSTART
13403; GFX940-NEXT:    ; def s[0:5]
13404; GFX940-NEXT:    ;;#ASMEND
13405; GFX940-NEXT:    s_mov_b32 s8, s4
13406; GFX940-NEXT:    s_mov_b32 s9, s5
13407; GFX940-NEXT:    s_mov_b32 s12, s2
13408; GFX940-NEXT:    s_mov_b32 s13, s3
13409; GFX940-NEXT:    s_mov_b32 s14, s2
13410; GFX940-NEXT:    s_mov_b32 s15, s3
13411; GFX940-NEXT:    ;;#ASMSTART
13412; GFX940-NEXT:    ; use s[8:15]
13413; GFX940-NEXT:    ;;#ASMEND
13414; GFX940-NEXT:    s_setpc_b64 s[30:31]
13415  %vec0 = call <3 x i64> asm "; def $0", "=s"()
13416  %vec1 = call <3 x i64> asm "; def $0", "=s"()
13417  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 4, i32 4>
13418  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13419  ret void
13420}
13421
13422define void @s_shuffle_v4i64_v3i64__5_0_4_4() {
13423; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_4_4:
13424; GFX900:       ; %bb.0:
13425; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13426; GFX900-NEXT:    ;;#ASMSTART
13427; GFX900-NEXT:    ; def s[4:9]
13428; GFX900-NEXT:    ;;#ASMEND
13429; GFX900-NEXT:    ;;#ASMSTART
13430; GFX900-NEXT:    ; def s[12:17]
13431; GFX900-NEXT:    ;;#ASMEND
13432; GFX900-NEXT:    s_mov_b32 s8, s16
13433; GFX900-NEXT:    s_mov_b32 s9, s17
13434; GFX900-NEXT:    s_mov_b32 s10, s4
13435; GFX900-NEXT:    s_mov_b32 s11, s5
13436; GFX900-NEXT:    s_mov_b32 s12, s14
13437; GFX900-NEXT:    s_mov_b32 s13, s15
13438; GFX900-NEXT:    ;;#ASMSTART
13439; GFX900-NEXT:    ; use s[8:15]
13440; GFX900-NEXT:    ;;#ASMEND
13441; GFX900-NEXT:    s_setpc_b64 s[30:31]
13442;
13443; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_4_4:
13444; GFX90A:       ; %bb.0:
13445; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13446; GFX90A-NEXT:    ;;#ASMSTART
13447; GFX90A-NEXT:    ; def s[4:9]
13448; GFX90A-NEXT:    ;;#ASMEND
13449; GFX90A-NEXT:    ;;#ASMSTART
13450; GFX90A-NEXT:    ; def s[12:17]
13451; GFX90A-NEXT:    ;;#ASMEND
13452; GFX90A-NEXT:    s_mov_b32 s8, s16
13453; GFX90A-NEXT:    s_mov_b32 s9, s17
13454; GFX90A-NEXT:    s_mov_b32 s10, s4
13455; GFX90A-NEXT:    s_mov_b32 s11, s5
13456; GFX90A-NEXT:    s_mov_b32 s12, s14
13457; GFX90A-NEXT:    s_mov_b32 s13, s15
13458; GFX90A-NEXT:    ;;#ASMSTART
13459; GFX90A-NEXT:    ; use s[8:15]
13460; GFX90A-NEXT:    ;;#ASMEND
13461; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13462;
13463; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_4_4:
13464; GFX940:       ; %bb.0:
13465; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13466; GFX940-NEXT:    ;;#ASMSTART
13467; GFX940-NEXT:    ; def s[0:5]
13468; GFX940-NEXT:    ;;#ASMEND
13469; GFX940-NEXT:    s_mov_b32 s10, s0
13470; GFX940-NEXT:    ;;#ASMSTART
13471; GFX940-NEXT:    ; def s[4:9]
13472; GFX940-NEXT:    ;;#ASMEND
13473; GFX940-NEXT:    s_mov_b32 s11, s1
13474; GFX940-NEXT:    s_mov_b32 s12, s6
13475; GFX940-NEXT:    s_mov_b32 s13, s7
13476; GFX940-NEXT:    s_mov_b32 s14, s6
13477; GFX940-NEXT:    s_mov_b32 s15, s7
13478; GFX940-NEXT:    ;;#ASMSTART
13479; GFX940-NEXT:    ; use s[8:15]
13480; GFX940-NEXT:    ;;#ASMEND
13481; GFX940-NEXT:    s_setpc_b64 s[30:31]
13482  %vec0 = call <3 x i64> asm "; def $0", "=s"()
13483  %vec1 = call <3 x i64> asm "; def $0", "=s"()
13484  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 4, i32 4>
13485  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13486  ret void
13487}
13488
13489define void @s_shuffle_v4i64_v3i64__5_1_4_4() {
13490; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_4_4:
13491; GFX900:       ; %bb.0:
13492; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13493; GFX900-NEXT:    ;;#ASMSTART
13494; GFX900-NEXT:    ; def s[8:13]
13495; GFX900-NEXT:    ;;#ASMEND
13496; GFX900-NEXT:    ;;#ASMSTART
13497; GFX900-NEXT:    ; def s[4:9]
13498; GFX900-NEXT:    ;;#ASMEND
13499; GFX900-NEXT:    s_mov_b32 s12, s6
13500; GFX900-NEXT:    s_mov_b32 s13, s7
13501; GFX900-NEXT:    s_mov_b32 s14, s6
13502; GFX900-NEXT:    s_mov_b32 s15, s7
13503; GFX900-NEXT:    ;;#ASMSTART
13504; GFX900-NEXT:    ; use s[8:15]
13505; GFX900-NEXT:    ;;#ASMEND
13506; GFX900-NEXT:    s_setpc_b64 s[30:31]
13507;
13508; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_4_4:
13509; GFX90A:       ; %bb.0:
13510; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13511; GFX90A-NEXT:    ;;#ASMSTART
13512; GFX90A-NEXT:    ; def s[8:13]
13513; GFX90A-NEXT:    ;;#ASMEND
13514; GFX90A-NEXT:    ;;#ASMSTART
13515; GFX90A-NEXT:    ; def s[4:9]
13516; GFX90A-NEXT:    ;;#ASMEND
13517; GFX90A-NEXT:    s_mov_b32 s12, s6
13518; GFX90A-NEXT:    s_mov_b32 s13, s7
13519; GFX90A-NEXT:    s_mov_b32 s14, s6
13520; GFX90A-NEXT:    s_mov_b32 s15, s7
13521; GFX90A-NEXT:    ;;#ASMSTART
13522; GFX90A-NEXT:    ; use s[8:15]
13523; GFX90A-NEXT:    ;;#ASMEND
13524; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13525;
13526; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_4_4:
13527; GFX940:       ; %bb.0:
13528; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13529; GFX940-NEXT:    ;;#ASMSTART
13530; GFX940-NEXT:    ; def s[8:13]
13531; GFX940-NEXT:    ;;#ASMEND
13532; GFX940-NEXT:    ;;#ASMSTART
13533; GFX940-NEXT:    ; def s[0:5]
13534; GFX940-NEXT:    ;;#ASMEND
13535; GFX940-NEXT:    s_mov_b32 s8, s4
13536; GFX940-NEXT:    s_mov_b32 s9, s5
13537; GFX940-NEXT:    s_mov_b32 s12, s2
13538; GFX940-NEXT:    s_mov_b32 s13, s3
13539; GFX940-NEXT:    s_mov_b32 s14, s2
13540; GFX940-NEXT:    s_mov_b32 s15, s3
13541; GFX940-NEXT:    ;;#ASMSTART
13542; GFX940-NEXT:    ; use s[8:15]
13543; GFX940-NEXT:    ;;#ASMEND
13544; GFX940-NEXT:    s_setpc_b64 s[30:31]
13545  %vec0 = call <3 x i64> asm "; def $0", "=s"()
13546  %vec1 = call <3 x i64> asm "; def $0", "=s"()
13547  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 4, i32 4>
13548  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13549  ret void
13550}
13551
13552define void @s_shuffle_v4i64_v3i64__5_2_4_4() {
13553; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_4_4:
13554; GFX900:       ; %bb.0:
13555; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13556; GFX900-NEXT:    ;;#ASMSTART
13557; GFX900-NEXT:    ; def s[8:13]
13558; GFX900-NEXT:    ;;#ASMEND
13559; GFX900-NEXT:    ;;#ASMSTART
13560; GFX900-NEXT:    ; def s[4:9]
13561; GFX900-NEXT:    ;;#ASMEND
13562; GFX900-NEXT:    s_mov_b32 s10, s12
13563; GFX900-NEXT:    s_mov_b32 s11, s13
13564; GFX900-NEXT:    s_mov_b32 s12, s6
13565; GFX900-NEXT:    s_mov_b32 s13, s7
13566; GFX900-NEXT:    s_mov_b32 s14, s6
13567; GFX900-NEXT:    s_mov_b32 s15, s7
13568; GFX900-NEXT:    ;;#ASMSTART
13569; GFX900-NEXT:    ; use s[8:15]
13570; GFX900-NEXT:    ;;#ASMEND
13571; GFX900-NEXT:    s_setpc_b64 s[30:31]
13572;
13573; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_4_4:
13574; GFX90A:       ; %bb.0:
13575; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13576; GFX90A-NEXT:    ;;#ASMSTART
13577; GFX90A-NEXT:    ; def s[8:13]
13578; GFX90A-NEXT:    ;;#ASMEND
13579; GFX90A-NEXT:    ;;#ASMSTART
13580; GFX90A-NEXT:    ; def s[4:9]
13581; GFX90A-NEXT:    ;;#ASMEND
13582; GFX90A-NEXT:    s_mov_b32 s10, s12
13583; GFX90A-NEXT:    s_mov_b32 s11, s13
13584; GFX90A-NEXT:    s_mov_b32 s12, s6
13585; GFX90A-NEXT:    s_mov_b32 s13, s7
13586; GFX90A-NEXT:    s_mov_b32 s14, s6
13587; GFX90A-NEXT:    s_mov_b32 s15, s7
13588; GFX90A-NEXT:    ;;#ASMSTART
13589; GFX90A-NEXT:    ; use s[8:15]
13590; GFX90A-NEXT:    ;;#ASMEND
13591; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13592;
13593; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_4_4:
13594; GFX940:       ; %bb.0:
13595; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13596; GFX940-NEXT:    ;;#ASMSTART
13597; GFX940-NEXT:    ; def s[12:17]
13598; GFX940-NEXT:    ;;#ASMEND
13599; GFX940-NEXT:    ;;#ASMSTART
13600; GFX940-NEXT:    ; def s[0:5]
13601; GFX940-NEXT:    ;;#ASMEND
13602; GFX940-NEXT:    s_mov_b32 s8, s16
13603; GFX940-NEXT:    s_mov_b32 s9, s17
13604; GFX940-NEXT:    s_mov_b32 s10, s4
13605; GFX940-NEXT:    s_mov_b32 s11, s5
13606; GFX940-NEXT:    s_mov_b32 s12, s14
13607; GFX940-NEXT:    s_mov_b32 s13, s15
13608; GFX940-NEXT:    ;;#ASMSTART
13609; GFX940-NEXT:    ; use s[8:15]
13610; GFX940-NEXT:    ;;#ASMEND
13611; GFX940-NEXT:    s_setpc_b64 s[30:31]
13612  %vec0 = call <3 x i64> asm "; def $0", "=s"()
13613  %vec1 = call <3 x i64> asm "; def $0", "=s"()
13614  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 4, i32 4>
13615  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13616  ret void
13617}
13618
13619define void @s_shuffle_v4i64_v3i64__5_3_4_4() {
13620; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_4_4:
13621; GFX900:       ; %bb.0:
13622; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13623; GFX900-NEXT:    ;;#ASMSTART
13624; GFX900-NEXT:    ; def s[4:9]
13625; GFX900-NEXT:    ;;#ASMEND
13626; GFX900-NEXT:    s_mov_b32 s10, s4
13627; GFX900-NEXT:    s_mov_b32 s11, s5
13628; GFX900-NEXT:    s_mov_b32 s12, s6
13629; GFX900-NEXT:    s_mov_b32 s13, s7
13630; GFX900-NEXT:    s_mov_b32 s14, s6
13631; GFX900-NEXT:    s_mov_b32 s15, s7
13632; GFX900-NEXT:    ;;#ASMSTART
13633; GFX900-NEXT:    ; use s[8:15]
13634; GFX900-NEXT:    ;;#ASMEND
13635; GFX900-NEXT:    s_setpc_b64 s[30:31]
13636;
13637; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_4_4:
13638; GFX90A:       ; %bb.0:
13639; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13640; GFX90A-NEXT:    ;;#ASMSTART
13641; GFX90A-NEXT:    ; def s[4:9]
13642; GFX90A-NEXT:    ;;#ASMEND
13643; GFX90A-NEXT:    s_mov_b32 s10, s4
13644; GFX90A-NEXT:    s_mov_b32 s11, s5
13645; GFX90A-NEXT:    s_mov_b32 s12, s6
13646; GFX90A-NEXT:    s_mov_b32 s13, s7
13647; GFX90A-NEXT:    s_mov_b32 s14, s6
13648; GFX90A-NEXT:    s_mov_b32 s15, s7
13649; GFX90A-NEXT:    ;;#ASMSTART
13650; GFX90A-NEXT:    ; use s[8:15]
13651; GFX90A-NEXT:    ;;#ASMEND
13652; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13653;
13654; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_4_4:
13655; GFX940:       ; %bb.0:
13656; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13657; GFX940-NEXT:    ;;#ASMSTART
13658; GFX940-NEXT:    ; def s[0:5]
13659; GFX940-NEXT:    ;;#ASMEND
13660; GFX940-NEXT:    s_mov_b32 s8, s4
13661; GFX940-NEXT:    s_mov_b32 s9, s5
13662; GFX940-NEXT:    s_mov_b32 s10, s0
13663; GFX940-NEXT:    s_mov_b32 s11, s1
13664; GFX940-NEXT:    s_mov_b32 s12, s2
13665; GFX940-NEXT:    s_mov_b32 s13, s3
13666; GFX940-NEXT:    s_mov_b32 s14, s2
13667; GFX940-NEXT:    s_mov_b32 s15, s3
13668; GFX940-NEXT:    ;;#ASMSTART
13669; GFX940-NEXT:    ; use s[8:15]
13670; GFX940-NEXT:    ;;#ASMEND
13671; GFX940-NEXT:    s_setpc_b64 s[30:31]
13672  %vec0 = call <3 x i64> asm "; def $0", "=s"()
13673  %vec1 = call <3 x i64> asm "; def $0", "=s"()
13674  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 4, i32 4>
13675  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13676  ret void
13677}
13678
13679define void @s_shuffle_v4i64_v3i64__5_5_4_4() {
13680; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_4:
13681; GFX900:       ; %bb.0:
13682; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13683; GFX900-NEXT:    ;;#ASMSTART
13684; GFX900-NEXT:    ; def s[12:17]
13685; GFX900-NEXT:    ;;#ASMEND
13686; GFX900-NEXT:    s_mov_b32 s8, s16
13687; GFX900-NEXT:    s_mov_b32 s9, s17
13688; GFX900-NEXT:    s_mov_b32 s10, s16
13689; GFX900-NEXT:    s_mov_b32 s11, s17
13690; GFX900-NEXT:    s_mov_b32 s12, s14
13691; GFX900-NEXT:    s_mov_b32 s13, s15
13692; GFX900-NEXT:    ;;#ASMSTART
13693; GFX900-NEXT:    ; use s[8:15]
13694; GFX900-NEXT:    ;;#ASMEND
13695; GFX900-NEXT:    s_setpc_b64 s[30:31]
13696;
13697; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_4:
13698; GFX90A:       ; %bb.0:
13699; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13700; GFX90A-NEXT:    ;;#ASMSTART
13701; GFX90A-NEXT:    ; def s[12:17]
13702; GFX90A-NEXT:    ;;#ASMEND
13703; GFX90A-NEXT:    s_mov_b32 s8, s16
13704; GFX90A-NEXT:    s_mov_b32 s9, s17
13705; GFX90A-NEXT:    s_mov_b32 s10, s16
13706; GFX90A-NEXT:    s_mov_b32 s11, s17
13707; GFX90A-NEXT:    s_mov_b32 s12, s14
13708; GFX90A-NEXT:    s_mov_b32 s13, s15
13709; GFX90A-NEXT:    ;;#ASMSTART
13710; GFX90A-NEXT:    ; use s[8:15]
13711; GFX90A-NEXT:    ;;#ASMEND
13712; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13713;
13714; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_4:
13715; GFX940:       ; %bb.0:
13716; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13717; GFX940-NEXT:    ;;#ASMSTART
13718; GFX940-NEXT:    ; def s[0:5]
13719; GFX940-NEXT:    ;;#ASMEND
13720; GFX940-NEXT:    s_mov_b32 s8, s4
13721; GFX940-NEXT:    s_mov_b32 s9, s5
13722; GFX940-NEXT:    s_mov_b32 s10, s4
13723; GFX940-NEXT:    s_mov_b32 s11, s5
13724; GFX940-NEXT:    s_mov_b32 s12, s2
13725; GFX940-NEXT:    s_mov_b32 s13, s3
13726; GFX940-NEXT:    s_mov_b32 s14, s2
13727; GFX940-NEXT:    s_mov_b32 s15, s3
13728; GFX940-NEXT:    ;;#ASMSTART
13729; GFX940-NEXT:    ; use s[8:15]
13730; GFX940-NEXT:    ;;#ASMEND
13731; GFX940-NEXT:    s_setpc_b64 s[30:31]
13732  %vec0 = call <3 x i64> asm "; def $0", "=s"()
13733  %vec1 = call <3 x i64> asm "; def $0", "=s"()
13734  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 4>
13735  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13736  ret void
13737}
13738
13739define void @s_shuffle_v4i64_v3i64__5_5_u_4() {
13740; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_4:
13741; GFX900:       ; %bb.0:
13742; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13743; GFX900-NEXT:    ;;#ASMSTART
13744; GFX900-NEXT:    ; def s[12:17]
13745; GFX900-NEXT:    ;;#ASMEND
13746; GFX900-NEXT:    s_mov_b32 s8, s16
13747; GFX900-NEXT:    s_mov_b32 s9, s17
13748; GFX900-NEXT:    s_mov_b32 s10, s16
13749; GFX900-NEXT:    s_mov_b32 s11, s17
13750; GFX900-NEXT:    ;;#ASMSTART
13751; GFX900-NEXT:    ; use s[8:15]
13752; GFX900-NEXT:    ;;#ASMEND
13753; GFX900-NEXT:    s_setpc_b64 s[30:31]
13754;
13755; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_4:
13756; GFX90A:       ; %bb.0:
13757; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13758; GFX90A-NEXT:    ;;#ASMSTART
13759; GFX90A-NEXT:    ; def s[12:17]
13760; GFX90A-NEXT:    ;;#ASMEND
13761; GFX90A-NEXT:    s_mov_b32 s8, s16
13762; GFX90A-NEXT:    s_mov_b32 s9, s17
13763; GFX90A-NEXT:    s_mov_b32 s10, s16
13764; GFX90A-NEXT:    s_mov_b32 s11, s17
13765; GFX90A-NEXT:    ;;#ASMSTART
13766; GFX90A-NEXT:    ; use s[8:15]
13767; GFX90A-NEXT:    ;;#ASMEND
13768; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13769;
13770; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_4:
13771; GFX940:       ; %bb.0:
13772; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13773; GFX940-NEXT:    ;;#ASMSTART
13774; GFX940-NEXT:    ; def s[0:5]
13775; GFX940-NEXT:    ;;#ASMEND
13776; GFX940-NEXT:    s_mov_b32 s8, s4
13777; GFX940-NEXT:    s_mov_b32 s9, s5
13778; GFX940-NEXT:    s_mov_b32 s10, s4
13779; GFX940-NEXT:    s_mov_b32 s11, s5
13780; GFX940-NEXT:    s_mov_b32 s14, s2
13781; GFX940-NEXT:    s_mov_b32 s15, s3
13782; GFX940-NEXT:    ;;#ASMSTART
13783; GFX940-NEXT:    ; use s[8:15]
13784; GFX940-NEXT:    ;;#ASMEND
13785; GFX940-NEXT:    s_setpc_b64 s[30:31]
13786  %vec0 = call <3 x i64> asm "; def $0", "=s"()
13787  %vec1 = call <3 x i64> asm "; def $0", "=s"()
13788  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 4>
13789  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13790  ret void
13791}
13792
13793define void @s_shuffle_v4i64_v3i64__5_5_0_4() {
13794; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_4:
13795; GFX900:       ; %bb.0:
13796; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13797; GFX900-NEXT:    ;;#ASMSTART
13798; GFX900-NEXT:    ; def s[4:9]
13799; GFX900-NEXT:    ;;#ASMEND
13800; GFX900-NEXT:    ;;#ASMSTART
13801; GFX900-NEXT:    ; def s[12:17]
13802; GFX900-NEXT:    ;;#ASMEND
13803; GFX900-NEXT:    s_mov_b32 s8, s16
13804; GFX900-NEXT:    s_mov_b32 s9, s17
13805; GFX900-NEXT:    s_mov_b32 s10, s16
13806; GFX900-NEXT:    s_mov_b32 s11, s17
13807; GFX900-NEXT:    s_mov_b32 s12, s4
13808; GFX900-NEXT:    s_mov_b32 s13, s5
13809; GFX900-NEXT:    ;;#ASMSTART
13810; GFX900-NEXT:    ; use s[8:15]
13811; GFX900-NEXT:    ;;#ASMEND
13812; GFX900-NEXT:    s_setpc_b64 s[30:31]
13813;
13814; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_4:
13815; GFX90A:       ; %bb.0:
13816; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13817; GFX90A-NEXT:    ;;#ASMSTART
13818; GFX90A-NEXT:    ; def s[4:9]
13819; GFX90A-NEXT:    ;;#ASMEND
13820; GFX90A-NEXT:    ;;#ASMSTART
13821; GFX90A-NEXT:    ; def s[12:17]
13822; GFX90A-NEXT:    ;;#ASMEND
13823; GFX90A-NEXT:    s_mov_b32 s8, s16
13824; GFX90A-NEXT:    s_mov_b32 s9, s17
13825; GFX90A-NEXT:    s_mov_b32 s10, s16
13826; GFX90A-NEXT:    s_mov_b32 s11, s17
13827; GFX90A-NEXT:    s_mov_b32 s12, s4
13828; GFX90A-NEXT:    s_mov_b32 s13, s5
13829; GFX90A-NEXT:    ;;#ASMSTART
13830; GFX90A-NEXT:    ; use s[8:15]
13831; GFX90A-NEXT:    ;;#ASMEND
13832; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13833;
13834; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_4:
13835; GFX940:       ; %bb.0:
13836; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13837; GFX940-NEXT:    ;;#ASMSTART
13838; GFX940-NEXT:    ; def s[12:17]
13839; GFX940-NEXT:    ;;#ASMEND
13840; GFX940-NEXT:    ;;#ASMSTART
13841; GFX940-NEXT:    ; def s[0:5]
13842; GFX940-NEXT:    ;;#ASMEND
13843; GFX940-NEXT:    s_mov_b32 s8, s16
13844; GFX940-NEXT:    s_mov_b32 s9, s17
13845; GFX940-NEXT:    s_mov_b32 s10, s16
13846; GFX940-NEXT:    s_mov_b32 s11, s17
13847; GFX940-NEXT:    s_mov_b32 s12, s0
13848; GFX940-NEXT:    s_mov_b32 s13, s1
13849; GFX940-NEXT:    ;;#ASMSTART
13850; GFX940-NEXT:    ; use s[8:15]
13851; GFX940-NEXT:    ;;#ASMEND
13852; GFX940-NEXT:    s_setpc_b64 s[30:31]
13853  %vec0 = call <3 x i64> asm "; def $0", "=s"()
13854  %vec1 = call <3 x i64> asm "; def $0", "=s"()
13855  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 4>
13856  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13857  ret void
13858}
13859
13860define void @s_shuffle_v4i64_v3i64__5_5_1_4() {
13861; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_4:
13862; GFX900:       ; %bb.0:
13863; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13864; GFX900-NEXT:    ;;#ASMSTART
13865; GFX900-NEXT:    ; def s[4:9]
13866; GFX900-NEXT:    ;;#ASMEND
13867; GFX900-NEXT:    ;;#ASMSTART
13868; GFX900-NEXT:    ; def s[12:17]
13869; GFX900-NEXT:    ;;#ASMEND
13870; GFX900-NEXT:    s_mov_b32 s8, s16
13871; GFX900-NEXT:    s_mov_b32 s9, s17
13872; GFX900-NEXT:    s_mov_b32 s10, s16
13873; GFX900-NEXT:    s_mov_b32 s11, s17
13874; GFX900-NEXT:    s_mov_b32 s12, s6
13875; GFX900-NEXT:    s_mov_b32 s13, s7
13876; GFX900-NEXT:    ;;#ASMSTART
13877; GFX900-NEXT:    ; use s[8:15]
13878; GFX900-NEXT:    ;;#ASMEND
13879; GFX900-NEXT:    s_setpc_b64 s[30:31]
13880;
13881; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_4:
13882; GFX90A:       ; %bb.0:
13883; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13884; GFX90A-NEXT:    ;;#ASMSTART
13885; GFX90A-NEXT:    ; def s[4:9]
13886; GFX90A-NEXT:    ;;#ASMEND
13887; GFX90A-NEXT:    ;;#ASMSTART
13888; GFX90A-NEXT:    ; def s[12:17]
13889; GFX90A-NEXT:    ;;#ASMEND
13890; GFX90A-NEXT:    s_mov_b32 s8, s16
13891; GFX90A-NEXT:    s_mov_b32 s9, s17
13892; GFX90A-NEXT:    s_mov_b32 s10, s16
13893; GFX90A-NEXT:    s_mov_b32 s11, s17
13894; GFX90A-NEXT:    s_mov_b32 s12, s6
13895; GFX90A-NEXT:    s_mov_b32 s13, s7
13896; GFX90A-NEXT:    ;;#ASMSTART
13897; GFX90A-NEXT:    ; use s[8:15]
13898; GFX90A-NEXT:    ;;#ASMEND
13899; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13900;
13901; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_4:
13902; GFX940:       ; %bb.0:
13903; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13904; GFX940-NEXT:    ;;#ASMSTART
13905; GFX940-NEXT:    ; def s[12:17]
13906; GFX940-NEXT:    ;;#ASMEND
13907; GFX940-NEXT:    ;;#ASMSTART
13908; GFX940-NEXT:    ; def s[0:5]
13909; GFX940-NEXT:    ;;#ASMEND
13910; GFX940-NEXT:    s_mov_b32 s8, s16
13911; GFX940-NEXT:    s_mov_b32 s9, s17
13912; GFX940-NEXT:    s_mov_b32 s10, s16
13913; GFX940-NEXT:    s_mov_b32 s11, s17
13914; GFX940-NEXT:    s_mov_b32 s12, s2
13915; GFX940-NEXT:    s_mov_b32 s13, s3
13916; GFX940-NEXT:    ;;#ASMSTART
13917; GFX940-NEXT:    ; use s[8:15]
13918; GFX940-NEXT:    ;;#ASMEND
13919; GFX940-NEXT:    s_setpc_b64 s[30:31]
13920  %vec0 = call <3 x i64> asm "; def $0", "=s"()
13921  %vec1 = call <3 x i64> asm "; def $0", "=s"()
13922  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 4>
13923  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13924  ret void
13925}
13926
13927define void @s_shuffle_v4i64_v3i64__5_5_2_4() {
13928; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_4:
13929; GFX900:       ; %bb.0:
13930; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13931; GFX900-NEXT:    ;;#ASMSTART
13932; GFX900-NEXT:    ; def s[8:13]
13933; GFX900-NEXT:    ;;#ASMEND
13934; GFX900-NEXT:    ;;#ASMSTART
13935; GFX900-NEXT:    ; def s[16:21]
13936; GFX900-NEXT:    ;;#ASMEND
13937; GFX900-NEXT:    s_mov_b32 s8, s20
13938; GFX900-NEXT:    s_mov_b32 s9, s21
13939; GFX900-NEXT:    s_mov_b32 s10, s20
13940; GFX900-NEXT:    s_mov_b32 s11, s21
13941; GFX900-NEXT:    s_mov_b32 s14, s18
13942; GFX900-NEXT:    s_mov_b32 s15, s19
13943; GFX900-NEXT:    ;;#ASMSTART
13944; GFX900-NEXT:    ; use s[8:15]
13945; GFX900-NEXT:    ;;#ASMEND
13946; GFX900-NEXT:    s_setpc_b64 s[30:31]
13947;
13948; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_4:
13949; GFX90A:       ; %bb.0:
13950; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13951; GFX90A-NEXT:    ;;#ASMSTART
13952; GFX90A-NEXT:    ; def s[8:13]
13953; GFX90A-NEXT:    ;;#ASMEND
13954; GFX90A-NEXT:    ;;#ASMSTART
13955; GFX90A-NEXT:    ; def s[16:21]
13956; GFX90A-NEXT:    ;;#ASMEND
13957; GFX90A-NEXT:    s_mov_b32 s8, s20
13958; GFX90A-NEXT:    s_mov_b32 s9, s21
13959; GFX90A-NEXT:    s_mov_b32 s10, s20
13960; GFX90A-NEXT:    s_mov_b32 s11, s21
13961; GFX90A-NEXT:    s_mov_b32 s14, s18
13962; GFX90A-NEXT:    s_mov_b32 s15, s19
13963; GFX90A-NEXT:    ;;#ASMSTART
13964; GFX90A-NEXT:    ; use s[8:15]
13965; GFX90A-NEXT:    ;;#ASMEND
13966; GFX90A-NEXT:    s_setpc_b64 s[30:31]
13967;
13968; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_4:
13969; GFX940:       ; %bb.0:
13970; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13971; GFX940-NEXT:    ;;#ASMSTART
13972; GFX940-NEXT:    ; def s[8:13]
13973; GFX940-NEXT:    ;;#ASMEND
13974; GFX940-NEXT:    ;;#ASMSTART
13975; GFX940-NEXT:    ; def s[0:5]
13976; GFX940-NEXT:    ;;#ASMEND
13977; GFX940-NEXT:    s_mov_b32 s8, s4
13978; GFX940-NEXT:    s_mov_b32 s9, s5
13979; GFX940-NEXT:    s_mov_b32 s10, s4
13980; GFX940-NEXT:    s_mov_b32 s11, s5
13981; GFX940-NEXT:    s_mov_b32 s14, s2
13982; GFX940-NEXT:    s_mov_b32 s15, s3
13983; GFX940-NEXT:    ;;#ASMSTART
13984; GFX940-NEXT:    ; use s[8:15]
13985; GFX940-NEXT:    ;;#ASMEND
13986; GFX940-NEXT:    s_setpc_b64 s[30:31]
13987  %vec0 = call <3 x i64> asm "; def $0", "=s"()
13988  %vec1 = call <3 x i64> asm "; def $0", "=s"()
13989  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 4>
13990  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
13991  ret void
13992}
13993
13994define void @s_shuffle_v4i64_v3i64__5_5_3_4() {
13995; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_4:
13996; GFX900:       ; %bb.0:
13997; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13998; GFX900-NEXT:    ;;#ASMSTART
13999; GFX900-NEXT:    ; def s[12:17]
14000; GFX900-NEXT:    ;;#ASMEND
14001; GFX900-NEXT:    s_mov_b32 s8, s16
14002; GFX900-NEXT:    s_mov_b32 s9, s17
14003; GFX900-NEXT:    s_mov_b32 s10, s16
14004; GFX900-NEXT:    s_mov_b32 s11, s17
14005; GFX900-NEXT:    ;;#ASMSTART
14006; GFX900-NEXT:    ; use s[8:15]
14007; GFX900-NEXT:    ;;#ASMEND
14008; GFX900-NEXT:    s_setpc_b64 s[30:31]
14009;
14010; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_4:
14011; GFX90A:       ; %bb.0:
14012; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14013; GFX90A-NEXT:    ;;#ASMSTART
14014; GFX90A-NEXT:    ; def s[12:17]
14015; GFX90A-NEXT:    ;;#ASMEND
14016; GFX90A-NEXT:    s_mov_b32 s8, s16
14017; GFX90A-NEXT:    s_mov_b32 s9, s17
14018; GFX90A-NEXT:    s_mov_b32 s10, s16
14019; GFX90A-NEXT:    s_mov_b32 s11, s17
14020; GFX90A-NEXT:    ;;#ASMSTART
14021; GFX90A-NEXT:    ; use s[8:15]
14022; GFX90A-NEXT:    ;;#ASMEND
14023; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14024;
14025; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_4:
14026; GFX940:       ; %bb.0:
14027; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14028; GFX940-NEXT:    ;;#ASMSTART
14029; GFX940-NEXT:    ; def s[0:5]
14030; GFX940-NEXT:    ;;#ASMEND
14031; GFX940-NEXT:    s_mov_b32 s8, s4
14032; GFX940-NEXT:    s_mov_b32 s9, s5
14033; GFX940-NEXT:    s_mov_b32 s10, s4
14034; GFX940-NEXT:    s_mov_b32 s11, s5
14035; GFX940-NEXT:    s_mov_b32 s12, s0
14036; GFX940-NEXT:    s_mov_b32 s13, s1
14037; GFX940-NEXT:    s_mov_b32 s14, s2
14038; GFX940-NEXT:    s_mov_b32 s15, s3
14039; GFX940-NEXT:    ;;#ASMSTART
14040; GFX940-NEXT:    ; use s[8:15]
14041; GFX940-NEXT:    ;;#ASMEND
14042; GFX940-NEXT:    s_setpc_b64 s[30:31]
14043  %vec0 = call <3 x i64> asm "; def $0", "=s"()
14044  %vec1 = call <3 x i64> asm "; def $0", "=s"()
14045  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 4>
14046  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14047  ret void
14048}
14049
14050define void @s_shuffle_v4i64_v3i64__u_5_5_5() {
14051; GFX9-LABEL: s_shuffle_v4i64_v3i64__u_5_5_5:
14052; GFX9:       ; %bb.0:
14053; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14054; GFX9-NEXT:    ;;#ASMSTART
14055; GFX9-NEXT:    ; def s[8:13]
14056; GFX9-NEXT:    ;;#ASMEND
14057; GFX9-NEXT:    s_mov_b32 s10, s12
14058; GFX9-NEXT:    s_mov_b32 s11, s13
14059; GFX9-NEXT:    s_mov_b32 s14, s12
14060; GFX9-NEXT:    s_mov_b32 s15, s13
14061; GFX9-NEXT:    ;;#ASMSTART
14062; GFX9-NEXT:    ; use s[8:15]
14063; GFX9-NEXT:    ;;#ASMEND
14064; GFX9-NEXT:    s_setpc_b64 s[30:31]
14065  %vec0 = call <3 x i64> asm "; def $0", "=s"()
14066  %vec1 = call <3 x i64> asm "; def $0", "=s"()
14067  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 poison, i32 5, i32 5, i32 5>
14068  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14069  ret void
14070}
14071
14072define void @s_shuffle_v4i64_v3i64__0_5_5_5() {
14073; GFX900-LABEL: s_shuffle_v4i64_v3i64__0_5_5_5:
14074; GFX900:       ; %bb.0:
14075; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14076; GFX900-NEXT:    ;;#ASMSTART
14077; GFX900-NEXT:    ; def s[8:13]
14078; GFX900-NEXT:    ;;#ASMEND
14079; GFX900-NEXT:    ;;#ASMSTART
14080; GFX900-NEXT:    ; def s[12:17]
14081; GFX900-NEXT:    ;;#ASMEND
14082; GFX900-NEXT:    s_mov_b32 s10, s16
14083; GFX900-NEXT:    s_mov_b32 s11, s17
14084; GFX900-NEXT:    s_mov_b32 s12, s16
14085; GFX900-NEXT:    s_mov_b32 s13, s17
14086; GFX900-NEXT:    s_mov_b32 s14, s16
14087; GFX900-NEXT:    s_mov_b32 s15, s17
14088; GFX900-NEXT:    ;;#ASMSTART
14089; GFX900-NEXT:    ; use s[8:15]
14090; GFX900-NEXT:    ;;#ASMEND
14091; GFX900-NEXT:    s_setpc_b64 s[30:31]
14092;
14093; GFX90A-LABEL: s_shuffle_v4i64_v3i64__0_5_5_5:
14094; GFX90A:       ; %bb.0:
14095; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14096; GFX90A-NEXT:    ;;#ASMSTART
14097; GFX90A-NEXT:    ; def s[8:13]
14098; GFX90A-NEXT:    ;;#ASMEND
14099; GFX90A-NEXT:    ;;#ASMSTART
14100; GFX90A-NEXT:    ; def s[12:17]
14101; GFX90A-NEXT:    ;;#ASMEND
14102; GFX90A-NEXT:    s_mov_b32 s10, s16
14103; GFX90A-NEXT:    s_mov_b32 s11, s17
14104; GFX90A-NEXT:    s_mov_b32 s12, s16
14105; GFX90A-NEXT:    s_mov_b32 s13, s17
14106; GFX90A-NEXT:    s_mov_b32 s14, s16
14107; GFX90A-NEXT:    s_mov_b32 s15, s17
14108; GFX90A-NEXT:    ;;#ASMSTART
14109; GFX90A-NEXT:    ; use s[8:15]
14110; GFX90A-NEXT:    ;;#ASMEND
14111; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14112;
14113; GFX940-LABEL: s_shuffle_v4i64_v3i64__0_5_5_5:
14114; GFX940:       ; %bb.0:
14115; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14116; GFX940-NEXT:    ;;#ASMSTART
14117; GFX940-NEXT:    ; def s[8:13]
14118; GFX940-NEXT:    ;;#ASMEND
14119; GFX940-NEXT:    ;;#ASMSTART
14120; GFX940-NEXT:    ; def s[0:5]
14121; GFX940-NEXT:    ;;#ASMEND
14122; GFX940-NEXT:    s_mov_b32 s10, s4
14123; GFX940-NEXT:    s_mov_b32 s11, s5
14124; GFX940-NEXT:    s_mov_b32 s12, s4
14125; GFX940-NEXT:    s_mov_b32 s13, s5
14126; GFX940-NEXT:    s_mov_b32 s14, s4
14127; GFX940-NEXT:    s_mov_b32 s15, s5
14128; GFX940-NEXT:    ;;#ASMSTART
14129; GFX940-NEXT:    ; use s[8:15]
14130; GFX940-NEXT:    ;;#ASMEND
14131; GFX940-NEXT:    s_setpc_b64 s[30:31]
14132  %vec0 = call <3 x i64> asm "; def $0", "=s"()
14133  %vec1 = call <3 x i64> asm "; def $0", "=s"()
14134  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 0, i32 5, i32 5, i32 5>
14135  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14136  ret void
14137}
14138
14139define void @s_shuffle_v4i64_v3i64__1_5_5_5() {
14140; GFX900-LABEL: s_shuffle_v4i64_v3i64__1_5_5_5:
14141; GFX900:       ; %bb.0:
14142; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14143; GFX900-NEXT:    ;;#ASMSTART
14144; GFX900-NEXT:    ; def s[4:9]
14145; GFX900-NEXT:    ;;#ASMEND
14146; GFX900-NEXT:    ;;#ASMSTART
14147; GFX900-NEXT:    ; def s[8:13]
14148; GFX900-NEXT:    ;;#ASMEND
14149; GFX900-NEXT:    s_mov_b32 s8, s6
14150; GFX900-NEXT:    s_mov_b32 s9, s7
14151; GFX900-NEXT:    s_mov_b32 s10, s12
14152; GFX900-NEXT:    s_mov_b32 s11, s13
14153; GFX900-NEXT:    s_mov_b32 s14, s12
14154; GFX900-NEXT:    s_mov_b32 s15, s13
14155; GFX900-NEXT:    ;;#ASMSTART
14156; GFX900-NEXT:    ; use s[8:15]
14157; GFX900-NEXT:    ;;#ASMEND
14158; GFX900-NEXT:    s_setpc_b64 s[30:31]
14159;
14160; GFX90A-LABEL: s_shuffle_v4i64_v3i64__1_5_5_5:
14161; GFX90A:       ; %bb.0:
14162; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14163; GFX90A-NEXT:    ;;#ASMSTART
14164; GFX90A-NEXT:    ; def s[4:9]
14165; GFX90A-NEXT:    ;;#ASMEND
14166; GFX90A-NEXT:    ;;#ASMSTART
14167; GFX90A-NEXT:    ; def s[8:13]
14168; GFX90A-NEXT:    ;;#ASMEND
14169; GFX90A-NEXT:    s_mov_b32 s8, s6
14170; GFX90A-NEXT:    s_mov_b32 s9, s7
14171; GFX90A-NEXT:    s_mov_b32 s10, s12
14172; GFX90A-NEXT:    s_mov_b32 s11, s13
14173; GFX90A-NEXT:    s_mov_b32 s14, s12
14174; GFX90A-NEXT:    s_mov_b32 s15, s13
14175; GFX90A-NEXT:    ;;#ASMSTART
14176; GFX90A-NEXT:    ; use s[8:15]
14177; GFX90A-NEXT:    ;;#ASMEND
14178; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14179;
14180; GFX940-LABEL: s_shuffle_v4i64_v3i64__1_5_5_5:
14181; GFX940:       ; %bb.0:
14182; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14183; GFX940-NEXT:    ;;#ASMSTART
14184; GFX940-NEXT:    ; def s[8:13]
14185; GFX940-NEXT:    ;;#ASMEND
14186; GFX940-NEXT:    ;;#ASMSTART
14187; GFX940-NEXT:    ; def s[0:5]
14188; GFX940-NEXT:    ;;#ASMEND
14189; GFX940-NEXT:    s_mov_b32 s8, s2
14190; GFX940-NEXT:    s_mov_b32 s9, s3
14191; GFX940-NEXT:    s_mov_b32 s10, s12
14192; GFX940-NEXT:    s_mov_b32 s11, s13
14193; GFX940-NEXT:    s_mov_b32 s14, s12
14194; GFX940-NEXT:    s_mov_b32 s15, s13
14195; GFX940-NEXT:    ;;#ASMSTART
14196; GFX940-NEXT:    ; use s[8:15]
14197; GFX940-NEXT:    ;;#ASMEND
14198; GFX940-NEXT:    s_setpc_b64 s[30:31]
14199  %vec0 = call <3 x i64> asm "; def $0", "=s"()
14200  %vec1 = call <3 x i64> asm "; def $0", "=s"()
14201  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 1, i32 5, i32 5, i32 5>
14202  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14203  ret void
14204}
14205
14206define void @s_shuffle_v4i64_v3i64__2_5_5_5() {
14207; GFX900-LABEL: s_shuffle_v4i64_v3i64__2_5_5_5:
14208; GFX900:       ; %bb.0:
14209; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14210; GFX900-NEXT:    ;;#ASMSTART
14211; GFX900-NEXT:    ; def s[12:17]
14212; GFX900-NEXT:    ;;#ASMEND
14213; GFX900-NEXT:    ;;#ASMSTART
14214; GFX900-NEXT:    ; def s[8:13]
14215; GFX900-NEXT:    ;;#ASMEND
14216; GFX900-NEXT:    s_mov_b32 s8, s16
14217; GFX900-NEXT:    s_mov_b32 s9, s17
14218; GFX900-NEXT:    s_mov_b32 s10, s12
14219; GFX900-NEXT:    s_mov_b32 s11, s13
14220; GFX900-NEXT:    s_mov_b32 s14, s12
14221; GFX900-NEXT:    s_mov_b32 s15, s13
14222; GFX900-NEXT:    ;;#ASMSTART
14223; GFX900-NEXT:    ; use s[8:15]
14224; GFX900-NEXT:    ;;#ASMEND
14225; GFX900-NEXT:    s_setpc_b64 s[30:31]
14226;
14227; GFX90A-LABEL: s_shuffle_v4i64_v3i64__2_5_5_5:
14228; GFX90A:       ; %bb.0:
14229; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14230; GFX90A-NEXT:    ;;#ASMSTART
14231; GFX90A-NEXT:    ; def s[12:17]
14232; GFX90A-NEXT:    ;;#ASMEND
14233; GFX90A-NEXT:    ;;#ASMSTART
14234; GFX90A-NEXT:    ; def s[8:13]
14235; GFX90A-NEXT:    ;;#ASMEND
14236; GFX90A-NEXT:    s_mov_b32 s8, s16
14237; GFX90A-NEXT:    s_mov_b32 s9, s17
14238; GFX90A-NEXT:    s_mov_b32 s10, s12
14239; GFX90A-NEXT:    s_mov_b32 s11, s13
14240; GFX90A-NEXT:    s_mov_b32 s14, s12
14241; GFX90A-NEXT:    s_mov_b32 s15, s13
14242; GFX90A-NEXT:    ;;#ASMSTART
14243; GFX90A-NEXT:    ; use s[8:15]
14244; GFX90A-NEXT:    ;;#ASMEND
14245; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14246;
14247; GFX940-LABEL: s_shuffle_v4i64_v3i64__2_5_5_5:
14248; GFX940:       ; %bb.0:
14249; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14250; GFX940-NEXT:    ;;#ASMSTART
14251; GFX940-NEXT:    ; def s[8:13]
14252; GFX940-NEXT:    ;;#ASMEND
14253; GFX940-NEXT:    ;;#ASMSTART
14254; GFX940-NEXT:    ; def s[0:5]
14255; GFX940-NEXT:    ;;#ASMEND
14256; GFX940-NEXT:    s_mov_b32 s8, s4
14257; GFX940-NEXT:    s_mov_b32 s9, s5
14258; GFX940-NEXT:    s_mov_b32 s10, s12
14259; GFX940-NEXT:    s_mov_b32 s11, s13
14260; GFX940-NEXT:    s_mov_b32 s14, s12
14261; GFX940-NEXT:    s_mov_b32 s15, s13
14262; GFX940-NEXT:    ;;#ASMSTART
14263; GFX940-NEXT:    ; use s[8:15]
14264; GFX940-NEXT:    ;;#ASMEND
14265; GFX940-NEXT:    s_setpc_b64 s[30:31]
14266  %vec0 = call <3 x i64> asm "; def $0", "=s"()
14267  %vec1 = call <3 x i64> asm "; def $0", "=s"()
14268  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 2, i32 5, i32 5, i32 5>
14269  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14270  ret void
14271}
14272
14273define void @s_shuffle_v4i64_v3i64__3_5_5_5() {
14274; GFX9-LABEL: s_shuffle_v4i64_v3i64__3_5_5_5:
14275; GFX9:       ; %bb.0:
14276; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14277; GFX9-NEXT:    ;;#ASMSTART
14278; GFX9-NEXT:    ; def s[8:13]
14279; GFX9-NEXT:    ;;#ASMEND
14280; GFX9-NEXT:    s_mov_b32 s10, s12
14281; GFX9-NEXT:    s_mov_b32 s11, s13
14282; GFX9-NEXT:    s_mov_b32 s14, s12
14283; GFX9-NEXT:    s_mov_b32 s15, s13
14284; GFX9-NEXT:    ;;#ASMSTART
14285; GFX9-NEXT:    ; use s[8:15]
14286; GFX9-NEXT:    ;;#ASMEND
14287; GFX9-NEXT:    s_setpc_b64 s[30:31]
14288  %vec0 = call <3 x i64> asm "; def $0", "=s"()
14289  %vec1 = call <3 x i64> asm "; def $0", "=s"()
14290  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 3, i32 5, i32 5, i32 5>
14291  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14292  ret void
14293}
14294
14295define void @s_shuffle_v4i64_v3i64__4_5_5_5() {
14296; GFX9-LABEL: s_shuffle_v4i64_v3i64__4_5_5_5:
14297; GFX9:       ; %bb.0:
14298; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14299; GFX9-NEXT:    ;;#ASMSTART
14300; GFX9-NEXT:    ; def s[8:13]
14301; GFX9-NEXT:    ;;#ASMEND
14302; GFX9-NEXT:    s_mov_b32 s8, s10
14303; GFX9-NEXT:    s_mov_b32 s9, s11
14304; GFX9-NEXT:    s_mov_b32 s10, s12
14305; GFX9-NEXT:    s_mov_b32 s11, s13
14306; GFX9-NEXT:    s_mov_b32 s14, s12
14307; GFX9-NEXT:    s_mov_b32 s15, s13
14308; GFX9-NEXT:    ;;#ASMSTART
14309; GFX9-NEXT:    ; use s[8:15]
14310; GFX9-NEXT:    ;;#ASMEND
14311; GFX9-NEXT:    s_setpc_b64 s[30:31]
14312  %vec0 = call <3 x i64> asm "; def $0", "=s"()
14313  %vec1 = call <3 x i64> asm "; def $0", "=s"()
14314  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 4, i32 5, i32 5, i32 5>
14315  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14316  ret void
14317}
14318
14319define void @s_shuffle_v4i64_v3i64__5_u_5_5() {
14320; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_u_5_5:
14321; GFX9:       ; %bb.0:
14322; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14323; GFX9-NEXT:    ;;#ASMSTART
14324; GFX9-NEXT:    ; def s[8:13]
14325; GFX9-NEXT:    ;;#ASMEND
14326; GFX9-NEXT:    s_mov_b32 s8, s12
14327; GFX9-NEXT:    s_mov_b32 s9, s13
14328; GFX9-NEXT:    s_mov_b32 s14, s12
14329; GFX9-NEXT:    s_mov_b32 s15, s13
14330; GFX9-NEXT:    ;;#ASMSTART
14331; GFX9-NEXT:    ; use s[8:15]
14332; GFX9-NEXT:    ;;#ASMEND
14333; GFX9-NEXT:    s_setpc_b64 s[30:31]
14334  %vec0 = call <3 x i64> asm "; def $0", "=s"()
14335  %vec1 = call <3 x i64> asm "; def $0", "=s"()
14336  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 poison, i32 5, i32 5>
14337  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14338  ret void
14339}
14340
14341define void @s_shuffle_v4i64_v3i64__5_0_5_5() {
14342; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_0_5_5:
14343; GFX900:       ; %bb.0:
14344; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14345; GFX900-NEXT:    ;;#ASMSTART
14346; GFX900-NEXT:    ; def s[4:9]
14347; GFX900-NEXT:    ;;#ASMEND
14348; GFX900-NEXT:    ;;#ASMSTART
14349; GFX900-NEXT:    ; def s[8:13]
14350; GFX900-NEXT:    ;;#ASMEND
14351; GFX900-NEXT:    s_mov_b32 s8, s12
14352; GFX900-NEXT:    s_mov_b32 s9, s13
14353; GFX900-NEXT:    s_mov_b32 s10, s4
14354; GFX900-NEXT:    s_mov_b32 s11, s5
14355; GFX900-NEXT:    s_mov_b32 s14, s12
14356; GFX900-NEXT:    s_mov_b32 s15, s13
14357; GFX900-NEXT:    ;;#ASMSTART
14358; GFX900-NEXT:    ; use s[8:15]
14359; GFX900-NEXT:    ;;#ASMEND
14360; GFX900-NEXT:    s_setpc_b64 s[30:31]
14361;
14362; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_0_5_5:
14363; GFX90A:       ; %bb.0:
14364; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14365; GFX90A-NEXT:    ;;#ASMSTART
14366; GFX90A-NEXT:    ; def s[4:9]
14367; GFX90A-NEXT:    ;;#ASMEND
14368; GFX90A-NEXT:    ;;#ASMSTART
14369; GFX90A-NEXT:    ; def s[8:13]
14370; GFX90A-NEXT:    ;;#ASMEND
14371; GFX90A-NEXT:    s_mov_b32 s8, s12
14372; GFX90A-NEXT:    s_mov_b32 s9, s13
14373; GFX90A-NEXT:    s_mov_b32 s10, s4
14374; GFX90A-NEXT:    s_mov_b32 s11, s5
14375; GFX90A-NEXT:    s_mov_b32 s14, s12
14376; GFX90A-NEXT:    s_mov_b32 s15, s13
14377; GFX90A-NEXT:    ;;#ASMSTART
14378; GFX90A-NEXT:    ; use s[8:15]
14379; GFX90A-NEXT:    ;;#ASMEND
14380; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14381;
14382; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_0_5_5:
14383; GFX940:       ; %bb.0:
14384; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14385; GFX940-NEXT:    ;;#ASMSTART
14386; GFX940-NEXT:    ; def s[8:13]
14387; GFX940-NEXT:    ;;#ASMEND
14388; GFX940-NEXT:    ;;#ASMSTART
14389; GFX940-NEXT:    ; def s[0:5]
14390; GFX940-NEXT:    ;;#ASMEND
14391; GFX940-NEXT:    s_mov_b32 s8, s12
14392; GFX940-NEXT:    s_mov_b32 s9, s13
14393; GFX940-NEXT:    s_mov_b32 s10, s0
14394; GFX940-NEXT:    s_mov_b32 s11, s1
14395; GFX940-NEXT:    s_mov_b32 s14, s12
14396; GFX940-NEXT:    s_mov_b32 s15, s13
14397; GFX940-NEXT:    ;;#ASMSTART
14398; GFX940-NEXT:    ; use s[8:15]
14399; GFX940-NEXT:    ;;#ASMEND
14400; GFX940-NEXT:    s_setpc_b64 s[30:31]
14401  %vec0 = call <3 x i64> asm "; def $0", "=s"()
14402  %vec1 = call <3 x i64> asm "; def $0", "=s"()
14403  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 0, i32 5, i32 5>
14404  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14405  ret void
14406}
14407
14408define void @s_shuffle_v4i64_v3i64__5_1_5_5() {
14409; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_1_5_5:
14410; GFX900:       ; %bb.0:
14411; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14412; GFX900-NEXT:    ;;#ASMSTART
14413; GFX900-NEXT:    ; def s[8:13]
14414; GFX900-NEXT:    ;;#ASMEND
14415; GFX900-NEXT:    ;;#ASMSTART
14416; GFX900-NEXT:    ; def s[12:17]
14417; GFX900-NEXT:    ;;#ASMEND
14418; GFX900-NEXT:    s_mov_b32 s8, s16
14419; GFX900-NEXT:    s_mov_b32 s9, s17
14420; GFX900-NEXT:    s_mov_b32 s12, s16
14421; GFX900-NEXT:    s_mov_b32 s13, s17
14422; GFX900-NEXT:    s_mov_b32 s14, s16
14423; GFX900-NEXT:    s_mov_b32 s15, s17
14424; GFX900-NEXT:    ;;#ASMSTART
14425; GFX900-NEXT:    ; use s[8:15]
14426; GFX900-NEXT:    ;;#ASMEND
14427; GFX900-NEXT:    s_setpc_b64 s[30:31]
14428;
14429; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_1_5_5:
14430; GFX90A:       ; %bb.0:
14431; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14432; GFX90A-NEXT:    ;;#ASMSTART
14433; GFX90A-NEXT:    ; def s[8:13]
14434; GFX90A-NEXT:    ;;#ASMEND
14435; GFX90A-NEXT:    ;;#ASMSTART
14436; GFX90A-NEXT:    ; def s[12:17]
14437; GFX90A-NEXT:    ;;#ASMEND
14438; GFX90A-NEXT:    s_mov_b32 s8, s16
14439; GFX90A-NEXT:    s_mov_b32 s9, s17
14440; GFX90A-NEXT:    s_mov_b32 s12, s16
14441; GFX90A-NEXT:    s_mov_b32 s13, s17
14442; GFX90A-NEXT:    s_mov_b32 s14, s16
14443; GFX90A-NEXT:    s_mov_b32 s15, s17
14444; GFX90A-NEXT:    ;;#ASMSTART
14445; GFX90A-NEXT:    ; use s[8:15]
14446; GFX90A-NEXT:    ;;#ASMEND
14447; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14448;
14449; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_1_5_5:
14450; GFX940:       ; %bb.0:
14451; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14452; GFX940-NEXT:    ;;#ASMSTART
14453; GFX940-NEXT:    ; def s[8:13]
14454; GFX940-NEXT:    ;;#ASMEND
14455; GFX940-NEXT:    ;;#ASMSTART
14456; GFX940-NEXT:    ; def s[0:5]
14457; GFX940-NEXT:    ;;#ASMEND
14458; GFX940-NEXT:    s_mov_b32 s8, s4
14459; GFX940-NEXT:    s_mov_b32 s9, s5
14460; GFX940-NEXT:    s_mov_b32 s12, s4
14461; GFX940-NEXT:    s_mov_b32 s13, s5
14462; GFX940-NEXT:    s_mov_b32 s14, s4
14463; GFX940-NEXT:    s_mov_b32 s15, s5
14464; GFX940-NEXT:    ;;#ASMSTART
14465; GFX940-NEXT:    ; use s[8:15]
14466; GFX940-NEXT:    ;;#ASMEND
14467; GFX940-NEXT:    s_setpc_b64 s[30:31]
14468  %vec0 = call <3 x i64> asm "; def $0", "=s"()
14469  %vec1 = call <3 x i64> asm "; def $0", "=s"()
14470  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 1, i32 5, i32 5>
14471  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14472  ret void
14473}
14474
14475define void @s_shuffle_v4i64_v3i64__5_2_5_5() {
14476; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_2_5_5:
14477; GFX900:       ; %bb.0:
14478; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14479; GFX900-NEXT:    ;;#ASMSTART
14480; GFX900-NEXT:    ; def s[12:17]
14481; GFX900-NEXT:    ;;#ASMEND
14482; GFX900-NEXT:    ;;#ASMSTART
14483; GFX900-NEXT:    ; def s[8:13]
14484; GFX900-NEXT:    ;;#ASMEND
14485; GFX900-NEXT:    s_mov_b32 s8, s12
14486; GFX900-NEXT:    s_mov_b32 s9, s13
14487; GFX900-NEXT:    s_mov_b32 s10, s16
14488; GFX900-NEXT:    s_mov_b32 s11, s17
14489; GFX900-NEXT:    s_mov_b32 s14, s12
14490; GFX900-NEXT:    s_mov_b32 s15, s13
14491; GFX900-NEXT:    ;;#ASMSTART
14492; GFX900-NEXT:    ; use s[8:15]
14493; GFX900-NEXT:    ;;#ASMEND
14494; GFX900-NEXT:    s_setpc_b64 s[30:31]
14495;
14496; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_2_5_5:
14497; GFX90A:       ; %bb.0:
14498; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14499; GFX90A-NEXT:    ;;#ASMSTART
14500; GFX90A-NEXT:    ; def s[12:17]
14501; GFX90A-NEXT:    ;;#ASMEND
14502; GFX90A-NEXT:    ;;#ASMSTART
14503; GFX90A-NEXT:    ; def s[8:13]
14504; GFX90A-NEXT:    ;;#ASMEND
14505; GFX90A-NEXT:    s_mov_b32 s8, s12
14506; GFX90A-NEXT:    s_mov_b32 s9, s13
14507; GFX90A-NEXT:    s_mov_b32 s10, s16
14508; GFX90A-NEXT:    s_mov_b32 s11, s17
14509; GFX90A-NEXT:    s_mov_b32 s14, s12
14510; GFX90A-NEXT:    s_mov_b32 s15, s13
14511; GFX90A-NEXT:    ;;#ASMSTART
14512; GFX90A-NEXT:    ; use s[8:15]
14513; GFX90A-NEXT:    ;;#ASMEND
14514; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14515;
14516; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_2_5_5:
14517; GFX940:       ; %bb.0:
14518; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14519; GFX940-NEXT:    ;;#ASMSTART
14520; GFX940-NEXT:    ; def s[8:13]
14521; GFX940-NEXT:    ;;#ASMEND
14522; GFX940-NEXT:    ;;#ASMSTART
14523; GFX940-NEXT:    ; def s[0:5]
14524; GFX940-NEXT:    ;;#ASMEND
14525; GFX940-NEXT:    s_mov_b32 s8, s12
14526; GFX940-NEXT:    s_mov_b32 s9, s13
14527; GFX940-NEXT:    s_mov_b32 s10, s4
14528; GFX940-NEXT:    s_mov_b32 s11, s5
14529; GFX940-NEXT:    s_mov_b32 s14, s12
14530; GFX940-NEXT:    s_mov_b32 s15, s13
14531; GFX940-NEXT:    ;;#ASMSTART
14532; GFX940-NEXT:    ; use s[8:15]
14533; GFX940-NEXT:    ;;#ASMEND
14534; GFX940-NEXT:    s_setpc_b64 s[30:31]
14535  %vec0 = call <3 x i64> asm "; def $0", "=s"()
14536  %vec1 = call <3 x i64> asm "; def $0", "=s"()
14537  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 2, i32 5, i32 5>
14538  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14539  ret void
14540}
14541
14542define void @s_shuffle_v4i64_v3i64__5_3_5_5() {
14543; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_3_5_5:
14544; GFX900:       ; %bb.0:
14545; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14546; GFX900-NEXT:    ;;#ASMSTART
14547; GFX900-NEXT:    ; def s[12:17]
14548; GFX900-NEXT:    ;;#ASMEND
14549; GFX900-NEXT:    s_mov_b32 s8, s16
14550; GFX900-NEXT:    s_mov_b32 s9, s17
14551; GFX900-NEXT:    s_mov_b32 s10, s12
14552; GFX900-NEXT:    s_mov_b32 s11, s13
14553; GFX900-NEXT:    s_mov_b32 s12, s16
14554; GFX900-NEXT:    s_mov_b32 s13, s17
14555; GFX900-NEXT:    s_mov_b32 s14, s16
14556; GFX900-NEXT:    s_mov_b32 s15, s17
14557; GFX900-NEXT:    ;;#ASMSTART
14558; GFX900-NEXT:    ; use s[8:15]
14559; GFX900-NEXT:    ;;#ASMEND
14560; GFX900-NEXT:    s_setpc_b64 s[30:31]
14561;
14562; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_3_5_5:
14563; GFX90A:       ; %bb.0:
14564; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14565; GFX90A-NEXT:    ;;#ASMSTART
14566; GFX90A-NEXT:    ; def s[12:17]
14567; GFX90A-NEXT:    ;;#ASMEND
14568; GFX90A-NEXT:    s_mov_b32 s8, s16
14569; GFX90A-NEXT:    s_mov_b32 s9, s17
14570; GFX90A-NEXT:    s_mov_b32 s10, s12
14571; GFX90A-NEXT:    s_mov_b32 s11, s13
14572; GFX90A-NEXT:    s_mov_b32 s12, s16
14573; GFX90A-NEXT:    s_mov_b32 s13, s17
14574; GFX90A-NEXT:    s_mov_b32 s14, s16
14575; GFX90A-NEXT:    s_mov_b32 s15, s17
14576; GFX90A-NEXT:    ;;#ASMSTART
14577; GFX90A-NEXT:    ; use s[8:15]
14578; GFX90A-NEXT:    ;;#ASMEND
14579; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14580;
14581; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_3_5_5:
14582; GFX940:       ; %bb.0:
14583; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14584; GFX940-NEXT:    ;;#ASMSTART
14585; GFX940-NEXT:    ; def s[0:5]
14586; GFX940-NEXT:    ;;#ASMEND
14587; GFX940-NEXT:    s_mov_b32 s8, s4
14588; GFX940-NEXT:    s_mov_b32 s9, s5
14589; GFX940-NEXT:    s_mov_b32 s10, s0
14590; GFX940-NEXT:    s_mov_b32 s11, s1
14591; GFX940-NEXT:    s_mov_b32 s12, s4
14592; GFX940-NEXT:    s_mov_b32 s13, s5
14593; GFX940-NEXT:    s_mov_b32 s14, s4
14594; GFX940-NEXT:    s_mov_b32 s15, s5
14595; GFX940-NEXT:    ;;#ASMSTART
14596; GFX940-NEXT:    ; use s[8:15]
14597; GFX940-NEXT:    ;;#ASMEND
14598; GFX940-NEXT:    s_setpc_b64 s[30:31]
14599  %vec0 = call <3 x i64> asm "; def $0", "=s"()
14600  %vec1 = call <3 x i64> asm "; def $0", "=s"()
14601  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 3, i32 5, i32 5>
14602  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14603  ret void
14604}
14605
14606define void @s_shuffle_v4i64_v3i64__5_4_5_5() {
14607; GFX9-LABEL: s_shuffle_v4i64_v3i64__5_4_5_5:
14608; GFX9:       ; %bb.0:
14609; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14610; GFX9-NEXT:    ;;#ASMSTART
14611; GFX9-NEXT:    ; def s[8:13]
14612; GFX9-NEXT:    ;;#ASMEND
14613; GFX9-NEXT:    s_mov_b32 s8, s12
14614; GFX9-NEXT:    s_mov_b32 s9, s13
14615; GFX9-NEXT:    s_mov_b32 s14, s12
14616; GFX9-NEXT:    s_mov_b32 s15, s13
14617; GFX9-NEXT:    ;;#ASMSTART
14618; GFX9-NEXT:    ; use s[8:15]
14619; GFX9-NEXT:    ;;#ASMEND
14620; GFX9-NEXT:    s_setpc_b64 s[30:31]
14621  %vec0 = call <3 x i64> asm "; def $0", "=s"()
14622  %vec1 = call <3 x i64> asm "; def $0", "=s"()
14623  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 4, i32 5, i32 5>
14624  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14625  ret void
14626}
14627
14628define void @s_shuffle_v4i64_v3i64__5_5_u_5() {
14629; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_u_5:
14630; GFX900:       ; %bb.0:
14631; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14632; GFX900-NEXT:    ;;#ASMSTART
14633; GFX900-NEXT:    ; def s[8:13]
14634; GFX900-NEXT:    ;;#ASMEND
14635; GFX900-NEXT:    s_mov_b32 s8, s12
14636; GFX900-NEXT:    s_mov_b32 s9, s13
14637; GFX900-NEXT:    s_mov_b32 s10, s12
14638; GFX900-NEXT:    s_mov_b32 s11, s13
14639; GFX900-NEXT:    s_mov_b32 s14, s12
14640; GFX900-NEXT:    s_mov_b32 s15, s13
14641; GFX900-NEXT:    ;;#ASMSTART
14642; GFX900-NEXT:    ; use s[8:15]
14643; GFX900-NEXT:    ;;#ASMEND
14644; GFX900-NEXT:    s_setpc_b64 s[30:31]
14645;
14646; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_u_5:
14647; GFX90A:       ; %bb.0:
14648; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14649; GFX90A-NEXT:    ;;#ASMSTART
14650; GFX90A-NEXT:    ; def s[8:13]
14651; GFX90A-NEXT:    ;;#ASMEND
14652; GFX90A-NEXT:    s_mov_b32 s8, s12
14653; GFX90A-NEXT:    s_mov_b32 s9, s13
14654; GFX90A-NEXT:    s_mov_b32 s10, s12
14655; GFX90A-NEXT:    s_mov_b32 s11, s13
14656; GFX90A-NEXT:    s_mov_b32 s14, s12
14657; GFX90A-NEXT:    s_mov_b32 s15, s13
14658; GFX90A-NEXT:    ;;#ASMSTART
14659; GFX90A-NEXT:    ; use s[8:15]
14660; GFX90A-NEXT:    ;;#ASMEND
14661; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14662;
14663; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_u_5:
14664; GFX940:       ; %bb.0:
14665; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14666; GFX940-NEXT:    ;;#ASMSTART
14667; GFX940-NEXT:    ; def s[0:5]
14668; GFX940-NEXT:    ;;#ASMEND
14669; GFX940-NEXT:    s_mov_b32 s8, s4
14670; GFX940-NEXT:    s_mov_b32 s9, s5
14671; GFX940-NEXT:    s_mov_b32 s10, s4
14672; GFX940-NEXT:    s_mov_b32 s11, s5
14673; GFX940-NEXT:    s_mov_b32 s14, s4
14674; GFX940-NEXT:    s_mov_b32 s15, s5
14675; GFX940-NEXT:    ;;#ASMSTART
14676; GFX940-NEXT:    ; use s[8:15]
14677; GFX940-NEXT:    ;;#ASMEND
14678; GFX940-NEXT:    s_setpc_b64 s[30:31]
14679  %vec0 = call <3 x i64> asm "; def $0", "=s"()
14680  %vec1 = call <3 x i64> asm "; def $0", "=s"()
14681  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 poison, i32 5>
14682  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14683  ret void
14684}
14685
14686define void @s_shuffle_v4i64_v3i64__5_5_0_5() {
14687; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_0_5:
14688; GFX900:       ; %bb.0:
14689; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14690; GFX900-NEXT:    ;;#ASMSTART
14691; GFX900-NEXT:    ; def s[4:9]
14692; GFX900-NEXT:    ;;#ASMEND
14693; GFX900-NEXT:    ;;#ASMSTART
14694; GFX900-NEXT:    ; def s[12:17]
14695; GFX900-NEXT:    ;;#ASMEND
14696; GFX900-NEXT:    s_mov_b32 s8, s16
14697; GFX900-NEXT:    s_mov_b32 s9, s17
14698; GFX900-NEXT:    s_mov_b32 s10, s16
14699; GFX900-NEXT:    s_mov_b32 s11, s17
14700; GFX900-NEXT:    s_mov_b32 s12, s4
14701; GFX900-NEXT:    s_mov_b32 s13, s5
14702; GFX900-NEXT:    s_mov_b32 s14, s16
14703; GFX900-NEXT:    s_mov_b32 s15, s17
14704; GFX900-NEXT:    ;;#ASMSTART
14705; GFX900-NEXT:    ; use s[8:15]
14706; GFX900-NEXT:    ;;#ASMEND
14707; GFX900-NEXT:    s_setpc_b64 s[30:31]
14708;
14709; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_0_5:
14710; GFX90A:       ; %bb.0:
14711; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14712; GFX90A-NEXT:    ;;#ASMSTART
14713; GFX90A-NEXT:    ; def s[4:9]
14714; GFX90A-NEXT:    ;;#ASMEND
14715; GFX90A-NEXT:    ;;#ASMSTART
14716; GFX90A-NEXT:    ; def s[12:17]
14717; GFX90A-NEXT:    ;;#ASMEND
14718; GFX90A-NEXT:    s_mov_b32 s8, s16
14719; GFX90A-NEXT:    s_mov_b32 s9, s17
14720; GFX90A-NEXT:    s_mov_b32 s10, s16
14721; GFX90A-NEXT:    s_mov_b32 s11, s17
14722; GFX90A-NEXT:    s_mov_b32 s12, s4
14723; GFX90A-NEXT:    s_mov_b32 s13, s5
14724; GFX90A-NEXT:    s_mov_b32 s14, s16
14725; GFX90A-NEXT:    s_mov_b32 s15, s17
14726; GFX90A-NEXT:    ;;#ASMSTART
14727; GFX90A-NEXT:    ; use s[8:15]
14728; GFX90A-NEXT:    ;;#ASMEND
14729; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14730;
14731; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_0_5:
14732; GFX940:       ; %bb.0:
14733; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14734; GFX940-NEXT:    ;;#ASMSTART
14735; GFX940-NEXT:    ; def s[12:17]
14736; GFX940-NEXT:    ;;#ASMEND
14737; GFX940-NEXT:    ;;#ASMSTART
14738; GFX940-NEXT:    ; def s[0:5]
14739; GFX940-NEXT:    ;;#ASMEND
14740; GFX940-NEXT:    s_mov_b32 s8, s16
14741; GFX940-NEXT:    s_mov_b32 s9, s17
14742; GFX940-NEXT:    s_mov_b32 s10, s16
14743; GFX940-NEXT:    s_mov_b32 s11, s17
14744; GFX940-NEXT:    s_mov_b32 s12, s0
14745; GFX940-NEXT:    s_mov_b32 s13, s1
14746; GFX940-NEXT:    s_mov_b32 s14, s16
14747; GFX940-NEXT:    s_mov_b32 s15, s17
14748; GFX940-NEXT:    ;;#ASMSTART
14749; GFX940-NEXT:    ; use s[8:15]
14750; GFX940-NEXT:    ;;#ASMEND
14751; GFX940-NEXT:    s_setpc_b64 s[30:31]
14752  %vec0 = call <3 x i64> asm "; def $0", "=s"()
14753  %vec1 = call <3 x i64> asm "; def $0", "=s"()
14754  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 0, i32 5>
14755  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14756  ret void
14757}
14758
14759define void @s_shuffle_v4i64_v3i64__5_5_1_5() {
14760; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_1_5:
14761; GFX900:       ; %bb.0:
14762; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14763; GFX900-NEXT:    ;;#ASMSTART
14764; GFX900-NEXT:    ; def s[4:9]
14765; GFX900-NEXT:    ;;#ASMEND
14766; GFX900-NEXT:    ;;#ASMSTART
14767; GFX900-NEXT:    ; def s[12:17]
14768; GFX900-NEXT:    ;;#ASMEND
14769; GFX900-NEXT:    s_mov_b32 s8, s16
14770; GFX900-NEXT:    s_mov_b32 s9, s17
14771; GFX900-NEXT:    s_mov_b32 s10, s16
14772; GFX900-NEXT:    s_mov_b32 s11, s17
14773; GFX900-NEXT:    s_mov_b32 s12, s6
14774; GFX900-NEXT:    s_mov_b32 s13, s7
14775; GFX900-NEXT:    s_mov_b32 s14, s16
14776; GFX900-NEXT:    s_mov_b32 s15, s17
14777; GFX900-NEXT:    ;;#ASMSTART
14778; GFX900-NEXT:    ; use s[8:15]
14779; GFX900-NEXT:    ;;#ASMEND
14780; GFX900-NEXT:    s_setpc_b64 s[30:31]
14781;
14782; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_1_5:
14783; GFX90A:       ; %bb.0:
14784; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14785; GFX90A-NEXT:    ;;#ASMSTART
14786; GFX90A-NEXT:    ; def s[4:9]
14787; GFX90A-NEXT:    ;;#ASMEND
14788; GFX90A-NEXT:    ;;#ASMSTART
14789; GFX90A-NEXT:    ; def s[12:17]
14790; GFX90A-NEXT:    ;;#ASMEND
14791; GFX90A-NEXT:    s_mov_b32 s8, s16
14792; GFX90A-NEXT:    s_mov_b32 s9, s17
14793; GFX90A-NEXT:    s_mov_b32 s10, s16
14794; GFX90A-NEXT:    s_mov_b32 s11, s17
14795; GFX90A-NEXT:    s_mov_b32 s12, s6
14796; GFX90A-NEXT:    s_mov_b32 s13, s7
14797; GFX90A-NEXT:    s_mov_b32 s14, s16
14798; GFX90A-NEXT:    s_mov_b32 s15, s17
14799; GFX90A-NEXT:    ;;#ASMSTART
14800; GFX90A-NEXT:    ; use s[8:15]
14801; GFX90A-NEXT:    ;;#ASMEND
14802; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14803;
14804; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_1_5:
14805; GFX940:       ; %bb.0:
14806; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14807; GFX940-NEXT:    ;;#ASMSTART
14808; GFX940-NEXT:    ; def s[12:17]
14809; GFX940-NEXT:    ;;#ASMEND
14810; GFX940-NEXT:    ;;#ASMSTART
14811; GFX940-NEXT:    ; def s[0:5]
14812; GFX940-NEXT:    ;;#ASMEND
14813; GFX940-NEXT:    s_mov_b32 s8, s16
14814; GFX940-NEXT:    s_mov_b32 s9, s17
14815; GFX940-NEXT:    s_mov_b32 s10, s16
14816; GFX940-NEXT:    s_mov_b32 s11, s17
14817; GFX940-NEXT:    s_mov_b32 s12, s2
14818; GFX940-NEXT:    s_mov_b32 s13, s3
14819; GFX940-NEXT:    s_mov_b32 s14, s16
14820; GFX940-NEXT:    s_mov_b32 s15, s17
14821; GFX940-NEXT:    ;;#ASMSTART
14822; GFX940-NEXT:    ; use s[8:15]
14823; GFX940-NEXT:    ;;#ASMEND
14824; GFX940-NEXT:    s_setpc_b64 s[30:31]
14825  %vec0 = call <3 x i64> asm "; def $0", "=s"()
14826  %vec1 = call <3 x i64> asm "; def $0", "=s"()
14827  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 1, i32 5>
14828  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14829  ret void
14830}
14831
14832define void @s_shuffle_v4i64_v3i64__5_5_2_5() {
14833; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_2_5:
14834; GFX900:       ; %bb.0:
14835; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14836; GFX900-NEXT:    ;;#ASMSTART
14837; GFX900-NEXT:    ; def s[8:13]
14838; GFX900-NEXT:    ;;#ASMEND
14839; GFX900-NEXT:    ;;#ASMSTART
14840; GFX900-NEXT:    ; def s[16:21]
14841; GFX900-NEXT:    ;;#ASMEND
14842; GFX900-NEXT:    s_mov_b32 s8, s20
14843; GFX900-NEXT:    s_mov_b32 s9, s21
14844; GFX900-NEXT:    s_mov_b32 s10, s20
14845; GFX900-NEXT:    s_mov_b32 s11, s21
14846; GFX900-NEXT:    s_mov_b32 s14, s20
14847; GFX900-NEXT:    s_mov_b32 s15, s21
14848; GFX900-NEXT:    ;;#ASMSTART
14849; GFX900-NEXT:    ; use s[8:15]
14850; GFX900-NEXT:    ;;#ASMEND
14851; GFX900-NEXT:    s_setpc_b64 s[30:31]
14852;
14853; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_2_5:
14854; GFX90A:       ; %bb.0:
14855; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14856; GFX90A-NEXT:    ;;#ASMSTART
14857; GFX90A-NEXT:    ; def s[8:13]
14858; GFX90A-NEXT:    ;;#ASMEND
14859; GFX90A-NEXT:    ;;#ASMSTART
14860; GFX90A-NEXT:    ; def s[16:21]
14861; GFX90A-NEXT:    ;;#ASMEND
14862; GFX90A-NEXT:    s_mov_b32 s8, s20
14863; GFX90A-NEXT:    s_mov_b32 s9, s21
14864; GFX90A-NEXT:    s_mov_b32 s10, s20
14865; GFX90A-NEXT:    s_mov_b32 s11, s21
14866; GFX90A-NEXT:    s_mov_b32 s14, s20
14867; GFX90A-NEXT:    s_mov_b32 s15, s21
14868; GFX90A-NEXT:    ;;#ASMSTART
14869; GFX90A-NEXT:    ; use s[8:15]
14870; GFX90A-NEXT:    ;;#ASMEND
14871; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14872;
14873; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_2_5:
14874; GFX940:       ; %bb.0:
14875; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14876; GFX940-NEXT:    ;;#ASMSTART
14877; GFX940-NEXT:    ; def s[8:13]
14878; GFX940-NEXT:    ;;#ASMEND
14879; GFX940-NEXT:    ;;#ASMSTART
14880; GFX940-NEXT:    ; def s[0:5]
14881; GFX940-NEXT:    ;;#ASMEND
14882; GFX940-NEXT:    s_mov_b32 s8, s4
14883; GFX940-NEXT:    s_mov_b32 s9, s5
14884; GFX940-NEXT:    s_mov_b32 s10, s4
14885; GFX940-NEXT:    s_mov_b32 s11, s5
14886; GFX940-NEXT:    s_mov_b32 s14, s4
14887; GFX940-NEXT:    s_mov_b32 s15, s5
14888; GFX940-NEXT:    ;;#ASMSTART
14889; GFX940-NEXT:    ; use s[8:15]
14890; GFX940-NEXT:    ;;#ASMEND
14891; GFX940-NEXT:    s_setpc_b64 s[30:31]
14892  %vec0 = call <3 x i64> asm "; def $0", "=s"()
14893  %vec1 = call <3 x i64> asm "; def $0", "=s"()
14894  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 2, i32 5>
14895  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14896  ret void
14897}
14898
14899define void @s_shuffle_v4i64_v3i64__5_5_3_5() {
14900; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_3_5:
14901; GFX900:       ; %bb.0:
14902; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14903; GFX900-NEXT:    ;;#ASMSTART
14904; GFX900-NEXT:    ; def s[12:17]
14905; GFX900-NEXT:    ;;#ASMEND
14906; GFX900-NEXT:    s_mov_b32 s8, s16
14907; GFX900-NEXT:    s_mov_b32 s9, s17
14908; GFX900-NEXT:    s_mov_b32 s10, s16
14909; GFX900-NEXT:    s_mov_b32 s11, s17
14910; GFX900-NEXT:    s_mov_b32 s14, s16
14911; GFX900-NEXT:    s_mov_b32 s15, s17
14912; GFX900-NEXT:    ;;#ASMSTART
14913; GFX900-NEXT:    ; use s[8:15]
14914; GFX900-NEXT:    ;;#ASMEND
14915; GFX900-NEXT:    s_setpc_b64 s[30:31]
14916;
14917; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_3_5:
14918; GFX90A:       ; %bb.0:
14919; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14920; GFX90A-NEXT:    ;;#ASMSTART
14921; GFX90A-NEXT:    ; def s[12:17]
14922; GFX90A-NEXT:    ;;#ASMEND
14923; GFX90A-NEXT:    s_mov_b32 s8, s16
14924; GFX90A-NEXT:    s_mov_b32 s9, s17
14925; GFX90A-NEXT:    s_mov_b32 s10, s16
14926; GFX90A-NEXT:    s_mov_b32 s11, s17
14927; GFX90A-NEXT:    s_mov_b32 s14, s16
14928; GFX90A-NEXT:    s_mov_b32 s15, s17
14929; GFX90A-NEXT:    ;;#ASMSTART
14930; GFX90A-NEXT:    ; use s[8:15]
14931; GFX90A-NEXT:    ;;#ASMEND
14932; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14933;
14934; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_3_5:
14935; GFX940:       ; %bb.0:
14936; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14937; GFX940-NEXT:    ;;#ASMSTART
14938; GFX940-NEXT:    ; def s[0:5]
14939; GFX940-NEXT:    ;;#ASMEND
14940; GFX940-NEXT:    s_mov_b32 s8, s4
14941; GFX940-NEXT:    s_mov_b32 s9, s5
14942; GFX940-NEXT:    s_mov_b32 s10, s4
14943; GFX940-NEXT:    s_mov_b32 s11, s5
14944; GFX940-NEXT:    s_mov_b32 s12, s0
14945; GFX940-NEXT:    s_mov_b32 s13, s1
14946; GFX940-NEXT:    s_mov_b32 s14, s4
14947; GFX940-NEXT:    s_mov_b32 s15, s5
14948; GFX940-NEXT:    ;;#ASMSTART
14949; GFX940-NEXT:    ; use s[8:15]
14950; GFX940-NEXT:    ;;#ASMEND
14951; GFX940-NEXT:    s_setpc_b64 s[30:31]
14952  %vec0 = call <3 x i64> asm "; def $0", "=s"()
14953  %vec1 = call <3 x i64> asm "; def $0", "=s"()
14954  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 3, i32 5>
14955  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
14956  ret void
14957}
14958
14959define void @s_shuffle_v4i64_v3i64__5_5_4_5() {
14960; GFX900-LABEL: s_shuffle_v4i64_v3i64__5_5_4_5:
14961; GFX900:       ; %bb.0:
14962; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14963; GFX900-NEXT:    ;;#ASMSTART
14964; GFX900-NEXT:    ; def s[12:17]
14965; GFX900-NEXT:    ;;#ASMEND
14966; GFX900-NEXT:    s_mov_b32 s8, s16
14967; GFX900-NEXT:    s_mov_b32 s9, s17
14968; GFX900-NEXT:    s_mov_b32 s10, s16
14969; GFX900-NEXT:    s_mov_b32 s11, s17
14970; GFX900-NEXT:    s_mov_b32 s12, s14
14971; GFX900-NEXT:    s_mov_b32 s13, s15
14972; GFX900-NEXT:    s_mov_b32 s14, s16
14973; GFX900-NEXT:    s_mov_b32 s15, s17
14974; GFX900-NEXT:    ;;#ASMSTART
14975; GFX900-NEXT:    ; use s[8:15]
14976; GFX900-NEXT:    ;;#ASMEND
14977; GFX900-NEXT:    s_setpc_b64 s[30:31]
14978;
14979; GFX90A-LABEL: s_shuffle_v4i64_v3i64__5_5_4_5:
14980; GFX90A:       ; %bb.0:
14981; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14982; GFX90A-NEXT:    ;;#ASMSTART
14983; GFX90A-NEXT:    ; def s[12:17]
14984; GFX90A-NEXT:    ;;#ASMEND
14985; GFX90A-NEXT:    s_mov_b32 s8, s16
14986; GFX90A-NEXT:    s_mov_b32 s9, s17
14987; GFX90A-NEXT:    s_mov_b32 s10, s16
14988; GFX90A-NEXT:    s_mov_b32 s11, s17
14989; GFX90A-NEXT:    s_mov_b32 s12, s14
14990; GFX90A-NEXT:    s_mov_b32 s13, s15
14991; GFX90A-NEXT:    s_mov_b32 s14, s16
14992; GFX90A-NEXT:    s_mov_b32 s15, s17
14993; GFX90A-NEXT:    ;;#ASMSTART
14994; GFX90A-NEXT:    ; use s[8:15]
14995; GFX90A-NEXT:    ;;#ASMEND
14996; GFX90A-NEXT:    s_setpc_b64 s[30:31]
14997;
14998; GFX940-LABEL: s_shuffle_v4i64_v3i64__5_5_4_5:
14999; GFX940:       ; %bb.0:
15000; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15001; GFX940-NEXT:    ;;#ASMSTART
15002; GFX940-NEXT:    ; def s[0:5]
15003; GFX940-NEXT:    ;;#ASMEND
15004; GFX940-NEXT:    s_mov_b32 s8, s4
15005; GFX940-NEXT:    s_mov_b32 s9, s5
15006; GFX940-NEXT:    s_mov_b32 s10, s4
15007; GFX940-NEXT:    s_mov_b32 s11, s5
15008; GFX940-NEXT:    s_mov_b32 s12, s2
15009; GFX940-NEXT:    s_mov_b32 s13, s3
15010; GFX940-NEXT:    s_mov_b32 s14, s4
15011; GFX940-NEXT:    s_mov_b32 s15, s5
15012; GFX940-NEXT:    ;;#ASMSTART
15013; GFX940-NEXT:    ; use s[8:15]
15014; GFX940-NEXT:    ;;#ASMEND
15015; GFX940-NEXT:    s_setpc_b64 s[30:31]
15016  %vec0 = call <3 x i64> asm "; def $0", "=s"()
15017  %vec1 = call <3 x i64> asm "; def $0", "=s"()
15018  %shuf = shufflevector <3 x i64> %vec0, <3 x i64> %vec1, <4 x i32> <i32 5, i32 5, i32 4, i32 5>
15019  call void asm sideeffect "; use $0", "{s[8:15]}"(<4 x i64> %shuf)
15020  ret void
15021}
15022;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
15023; GFX90APLUS: {{.*}}
15024