xref: /llvm-project/llvm/test/CodeGen/X86/vector-shuffle-concatenation.ll (revision a70d5e25f32ebd5f1d1c394312036a37591e998b)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2,FALLBACK0
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42,FALLBACK1
4; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx  | FileCheck %s --check-prefixes=AVX,AVX1-ONLY,FALLBACK2
5; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2,AVX2-SLOW,FALLBACK3
6; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX2,AVX2-FAST-PERLANE,FALLBACK4
7; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX2,AVX2-FAST,FALLBACK5
8; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX512F,AVX512F-SLOW,FALLBACK6
9; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512F,AVX512F-FAST,FALLBACK7
10; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512BW,AVX512BW-SLOW,FALLBACK8
11; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512BW,AVX512BW-FAST,FALLBACK9
12
13define void @concat_a_to_shuf_of_a(ptr %a.ptr, ptr %dst) {
14; SSE-LABEL: concat_a_to_shuf_of_a:
15; SSE:       # %bb.0:
16; SSE-NEXT:    movdqa (%rdi), %xmm0
17; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
18; SSE-NEXT:    movdqa %xmm0, 16(%rsi)
19; SSE-NEXT:    movdqa %xmm1, (%rsi)
20; SSE-NEXT:    retq
21;
22; AVX-LABEL: concat_a_to_shuf_of_a:
23; AVX:       # %bb.0:
24; AVX-NEXT:    vmovaps (%rdi), %xmm0
25; AVX-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[2,3,0,1]
26; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
27; AVX-NEXT:    vmovaps %ymm0, (%rsi)
28; AVX-NEXT:    vzeroupper
29; AVX-NEXT:    retq
30;
31; AVX2-LABEL: concat_a_to_shuf_of_a:
32; AVX2:       # %bb.0:
33; AVX2-NEXT:    vmovaps (%rdi), %xmm0
34; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,1]
35; AVX2-NEXT:    vmovaps %ymm0, (%rsi)
36; AVX2-NEXT:    vzeroupper
37; AVX2-NEXT:    retq
38;
39; AVX512F-LABEL: concat_a_to_shuf_of_a:
40; AVX512F:       # %bb.0:
41; AVX512F-NEXT:    vmovaps (%rdi), %xmm0
42; AVX512F-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,1]
43; AVX512F-NEXT:    vmovaps %ymm0, (%rsi)
44; AVX512F-NEXT:    vzeroupper
45; AVX512F-NEXT:    retq
46;
47; AVX512BW-LABEL: concat_a_to_shuf_of_a:
48; AVX512BW:       # %bb.0:
49; AVX512BW-NEXT:    vmovaps (%rdi), %xmm0
50; AVX512BW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,1]
51; AVX512BW-NEXT:    vmovaps %ymm0, (%rsi)
52; AVX512BW-NEXT:    vzeroupper
53; AVX512BW-NEXT:    retq
54  %a = load <2 x i64>, ptr %a.ptr, align 64
55  %shuffle = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
56  %concat = shufflevector <2 x i64> %shuffle, <2 x i64> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
57  store <4 x i64> %concat, ptr %dst, align 64
58  ret void
59}
60define void @concat_shuf_of_a_to_a(ptr %a.ptr, ptr %b.ptr, ptr %dst) {
61; SSE-LABEL: concat_shuf_of_a_to_a:
62; SSE:       # %bb.0:
63; SSE-NEXT:    movdqa (%rdi), %xmm0
64; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
65; SSE-NEXT:    movdqa %xmm0, (%rdx)
66; SSE-NEXT:    movdqa %xmm1, 16(%rdx)
67; SSE-NEXT:    retq
68;
69; AVX-LABEL: concat_shuf_of_a_to_a:
70; AVX:       # %bb.0:
71; AVX-NEXT:    vmovaps (%rdi), %xmm0
72; AVX-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[2,3,0,1]
73; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
74; AVX-NEXT:    vmovaps %ymm0, (%rdx)
75; AVX-NEXT:    vzeroupper
76; AVX-NEXT:    retq
77;
78; AVX2-LABEL: concat_shuf_of_a_to_a:
79; AVX2:       # %bb.0:
80; AVX2-NEXT:    vmovaps (%rdi), %xmm0
81; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,0]
82; AVX2-NEXT:    vmovaps %ymm0, (%rdx)
83; AVX2-NEXT:    vzeroupper
84; AVX2-NEXT:    retq
85;
86; AVX512F-LABEL: concat_shuf_of_a_to_a:
87; AVX512F:       # %bb.0:
88; AVX512F-NEXT:    vmovaps (%rdi), %xmm0
89; AVX512F-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,0]
90; AVX512F-NEXT:    vmovaps %ymm0, (%rdx)
91; AVX512F-NEXT:    vzeroupper
92; AVX512F-NEXT:    retq
93;
94; AVX512BW-LABEL: concat_shuf_of_a_to_a:
95; AVX512BW:       # %bb.0:
96; AVX512BW-NEXT:    vmovaps (%rdi), %xmm0
97; AVX512BW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,0]
98; AVX512BW-NEXT:    vmovaps %ymm0, (%rdx)
99; AVX512BW-NEXT:    vzeroupper
100; AVX512BW-NEXT:    retq
101  %a = load <2 x i64>, ptr %a.ptr, align 64
102  %b = load <2 x i64>, ptr %b.ptr, align 64
103  %shuffle = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
104  %concat = shufflevector <2 x i64> %a, <2 x i64> %shuffle, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
105  store <4 x i64> %concat, ptr %dst, align 64
106  ret void
107}
108
109define void @concat_a_to_shuf_of_a_extrause_of_shuf(ptr %a.ptr, ptr %dst, ptr %shuf.escape.ptr) {
110; SSE-LABEL: concat_a_to_shuf_of_a_extrause_of_shuf:
111; SSE:       # %bb.0:
112; SSE-NEXT:    movdqa (%rdi), %xmm0
113; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
114; SSE-NEXT:    movdqa %xmm1, (%rdx)
115; SSE-NEXT:    movdqa %xmm0, 16(%rsi)
116; SSE-NEXT:    movdqa %xmm1, (%rsi)
117; SSE-NEXT:    retq
118;
119; AVX-LABEL: concat_a_to_shuf_of_a_extrause_of_shuf:
120; AVX:       # %bb.0:
121; AVX-NEXT:    vmovaps (%rdi), %xmm0
122; AVX-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[2,3,0,1]
123; AVX-NEXT:    vmovaps %xmm1, (%rdx)
124; AVX-NEXT:    vmovaps %xmm0, 16(%rsi)
125; AVX-NEXT:    vmovaps %xmm1, (%rsi)
126; AVX-NEXT:    retq
127;
128; AVX2-LABEL: concat_a_to_shuf_of_a_extrause_of_shuf:
129; AVX2:       # %bb.0:
130; AVX2-NEXT:    vmovaps (%rdi), %xmm0
131; AVX2-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[2,3,0,1]
132; AVX2-NEXT:    vmovaps %xmm1, (%rdx)
133; AVX2-NEXT:    vmovaps %xmm0, 16(%rsi)
134; AVX2-NEXT:    vmovaps %xmm1, (%rsi)
135; AVX2-NEXT:    retq
136;
137; AVX512F-LABEL: concat_a_to_shuf_of_a_extrause_of_shuf:
138; AVX512F:       # %bb.0:
139; AVX512F-NEXT:    vmovaps (%rdi), %xmm0
140; AVX512F-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[2,3,0,1]
141; AVX512F-NEXT:    vmovaps %xmm1, (%rdx)
142; AVX512F-NEXT:    vmovaps %xmm0, 16(%rsi)
143; AVX512F-NEXT:    vmovaps %xmm1, (%rsi)
144; AVX512F-NEXT:    retq
145;
146; AVX512BW-LABEL: concat_a_to_shuf_of_a_extrause_of_shuf:
147; AVX512BW:       # %bb.0:
148; AVX512BW-NEXT:    vmovaps (%rdi), %xmm0
149; AVX512BW-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[2,3,0,1]
150; AVX512BW-NEXT:    vmovaps %xmm1, (%rdx)
151; AVX512BW-NEXT:    vmovaps %xmm0, 16(%rsi)
152; AVX512BW-NEXT:    vmovaps %xmm1, (%rsi)
153; AVX512BW-NEXT:    retq
154  %a = load <2 x i64>, ptr %a.ptr, align 64
155  %shuffle = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
156  store <2 x i64> %shuffle, ptr %shuf.escape.ptr, align 64
157  %concat = shufflevector <2 x i64> %shuffle, <2 x i64> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
158  store <4 x i64> %concat, ptr %dst, align 64
159  ret void
160}
161
162define void @concat_a_to_shuf_of_ab(ptr %a.ptr, ptr %b.ptr, ptr %dst) {
163; SSE2-LABEL: concat_a_to_shuf_of_ab:
164; SSE2:       # %bb.0:
165; SSE2-NEXT:    movapd (%rdi), %xmm0
166; SSE2-NEXT:    movapd (%rsi), %xmm1
167; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
168; SSE2-NEXT:    movapd %xmm0, 16(%rdx)
169; SSE2-NEXT:    movapd %xmm1, (%rdx)
170; SSE2-NEXT:    retq
171;
172; SSE42-LABEL: concat_a_to_shuf_of_ab:
173; SSE42:       # %bb.0:
174; SSE42-NEXT:    movaps (%rdi), %xmm0
175; SSE42-NEXT:    movaps (%rsi), %xmm1
176; SSE42-NEXT:    blendps {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3]
177; SSE42-NEXT:    movaps %xmm0, 16(%rdx)
178; SSE42-NEXT:    movaps %xmm1, (%rdx)
179; SSE42-NEXT:    retq
180;
181; AVX-LABEL: concat_a_to_shuf_of_ab:
182; AVX:       # %bb.0:
183; AVX-NEXT:    vmovaps (%rdi), %xmm0
184; AVX-NEXT:    vblendps {{.*#+}} xmm1 = xmm0[0,1],mem[2,3]
185; AVX-NEXT:    vmovaps %xmm0, 16(%rdx)
186; AVX-NEXT:    vmovaps %xmm1, (%rdx)
187; AVX-NEXT:    retq
188;
189; AVX2-LABEL: concat_a_to_shuf_of_ab:
190; AVX2:       # %bb.0:
191; AVX2-NEXT:    vmovaps (%rdi), %xmm0
192; AVX2-NEXT:    vblendps {{.*#+}} xmm1 = xmm0[0,1],mem[2,3]
193; AVX2-NEXT:    vmovaps %xmm0, 16(%rdx)
194; AVX2-NEXT:    vmovaps %xmm1, (%rdx)
195; AVX2-NEXT:    retq
196;
197; AVX512F-LABEL: concat_a_to_shuf_of_ab:
198; AVX512F:       # %bb.0:
199; AVX512F-NEXT:    vmovaps (%rdi), %xmm0
200; AVX512F-NEXT:    vblendps {{.*#+}} xmm1 = xmm0[0,1],mem[2,3]
201; AVX512F-NEXT:    vmovaps %xmm0, 16(%rdx)
202; AVX512F-NEXT:    vmovaps %xmm1, (%rdx)
203; AVX512F-NEXT:    retq
204;
205; AVX512BW-LABEL: concat_a_to_shuf_of_ab:
206; AVX512BW:       # %bb.0:
207; AVX512BW-NEXT:    vmovaps (%rdi), %xmm0
208; AVX512BW-NEXT:    vblendps {{.*#+}} xmm1 = xmm0[0,1],mem[2,3]
209; AVX512BW-NEXT:    vmovaps %xmm0, 16(%rdx)
210; AVX512BW-NEXT:    vmovaps %xmm1, (%rdx)
211; AVX512BW-NEXT:    retq
212  %a = load <2 x i64>, ptr %a.ptr, align 64
213  %b = load <2 x i64>, ptr %b.ptr, align 64
214  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
215  %concat = shufflevector <2 x i64> %shuffle, <2 x i64> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
216  store <4 x i64> %concat, ptr %dst, align 64
217  ret void
218}
219define void @concat_b_to_shuf_of_ab(ptr %a.ptr, ptr %b.ptr, ptr %dst) {
220; SSE2-LABEL: concat_b_to_shuf_of_ab:
221; SSE2:       # %bb.0:
222; SSE2-NEXT:    movaps (%rsi), %xmm0
223; SSE2-NEXT:    movaps %xmm0, %xmm1
224; SSE2-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
225; SSE2-NEXT:    movaps %xmm0, 16(%rdx)
226; SSE2-NEXT:    movaps %xmm1, (%rdx)
227; SSE2-NEXT:    retq
228;
229; SSE42-LABEL: concat_b_to_shuf_of_ab:
230; SSE42:       # %bb.0:
231; SSE42-NEXT:    movaps (%rsi), %xmm0
232; SSE42-NEXT:    movaps (%rdi), %xmm1
233; SSE42-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
234; SSE42-NEXT:    movaps %xmm0, 16(%rdx)
235; SSE42-NEXT:    movaps %xmm1, (%rdx)
236; SSE42-NEXT:    retq
237;
238; AVX-LABEL: concat_b_to_shuf_of_ab:
239; AVX:       # %bb.0:
240; AVX-NEXT:    vmovaps (%rsi), %xmm0
241; AVX-NEXT:    vblendps {{.*#+}} xmm1 = mem[0,1],xmm0[2,3]
242; AVX-NEXT:    vmovaps %xmm0, 16(%rdx)
243; AVX-NEXT:    vmovaps %xmm1, (%rdx)
244; AVX-NEXT:    retq
245;
246; AVX2-LABEL: concat_b_to_shuf_of_ab:
247; AVX2:       # %bb.0:
248; AVX2-NEXT:    vmovaps (%rsi), %xmm0
249; AVX2-NEXT:    vblendps {{.*#+}} xmm1 = mem[0,1],xmm0[2,3]
250; AVX2-NEXT:    vmovaps %xmm0, 16(%rdx)
251; AVX2-NEXT:    vmovaps %xmm1, (%rdx)
252; AVX2-NEXT:    retq
253;
254; AVX512F-LABEL: concat_b_to_shuf_of_ab:
255; AVX512F:       # %bb.0:
256; AVX512F-NEXT:    vmovaps (%rsi), %xmm0
257; AVX512F-NEXT:    vblendps {{.*#+}} xmm1 = mem[0,1],xmm0[2,3]
258; AVX512F-NEXT:    vmovaps %xmm0, 16(%rdx)
259; AVX512F-NEXT:    vmovaps %xmm1, (%rdx)
260; AVX512F-NEXT:    retq
261;
262; AVX512BW-LABEL: concat_b_to_shuf_of_ab:
263; AVX512BW:       # %bb.0:
264; AVX512BW-NEXT:    vmovaps (%rsi), %xmm0
265; AVX512BW-NEXT:    vblendps {{.*#+}} xmm1 = mem[0,1],xmm0[2,3]
266; AVX512BW-NEXT:    vmovaps %xmm0, 16(%rdx)
267; AVX512BW-NEXT:    vmovaps %xmm1, (%rdx)
268; AVX512BW-NEXT:    retq
269  %a = load <2 x i64>, ptr %a.ptr, align 64
270  %b = load <2 x i64>, ptr %b.ptr, align 64
271  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
272  %concat = shufflevector <2 x i64> %shuffle, <2 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
273  store <4 x i64> %concat, ptr %dst, align 64
274  ret void
275}
276
277define void @concat_shuf_of_ab_to_a(ptr %a.ptr, ptr %b.ptr, ptr %dst) {
278; SSE2-LABEL: concat_shuf_of_ab_to_a:
279; SSE2:       # %bb.0:
280; SSE2-NEXT:    movapd (%rdi), %xmm0
281; SSE2-NEXT:    movapd (%rsi), %xmm1
282; SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
283; SSE2-NEXT:    movapd %xmm0, (%rdx)
284; SSE2-NEXT:    movapd %xmm1, 16(%rdx)
285; SSE2-NEXT:    retq
286;
287; SSE42-LABEL: concat_shuf_of_ab_to_a:
288; SSE42:       # %bb.0:
289; SSE42-NEXT:    movaps (%rdi), %xmm0
290; SSE42-NEXT:    movaps (%rsi), %xmm1
291; SSE42-NEXT:    blendps {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3]
292; SSE42-NEXT:    movaps %xmm1, 16(%rdx)
293; SSE42-NEXT:    movaps %xmm0, (%rdx)
294; SSE42-NEXT:    retq
295;
296; AVX-LABEL: concat_shuf_of_ab_to_a:
297; AVX:       # %bb.0:
298; AVX-NEXT:    vmovaps (%rdi), %xmm0
299; AVX-NEXT:    vblendps {{.*#+}} xmm1 = xmm0[0,1],mem[2,3]
300; AVX-NEXT:    vmovaps %xmm1, 16(%rdx)
301; AVX-NEXT:    vmovaps %xmm0, (%rdx)
302; AVX-NEXT:    retq
303;
304; AVX2-LABEL: concat_shuf_of_ab_to_a:
305; AVX2:       # %bb.0:
306; AVX2-NEXT:    vmovaps (%rdi), %xmm0
307; AVX2-NEXT:    vblendps {{.*#+}} xmm1 = xmm0[0,1],mem[2,3]
308; AVX2-NEXT:    vmovaps %xmm1, 16(%rdx)
309; AVX2-NEXT:    vmovaps %xmm0, (%rdx)
310; AVX2-NEXT:    retq
311;
312; AVX512F-LABEL: concat_shuf_of_ab_to_a:
313; AVX512F:       # %bb.0:
314; AVX512F-NEXT:    vmovaps (%rdi), %xmm0
315; AVX512F-NEXT:    vblendps {{.*#+}} xmm1 = xmm0[0,1],mem[2,3]
316; AVX512F-NEXT:    vmovaps %xmm1, 16(%rdx)
317; AVX512F-NEXT:    vmovaps %xmm0, (%rdx)
318; AVX512F-NEXT:    retq
319;
320; AVX512BW-LABEL: concat_shuf_of_ab_to_a:
321; AVX512BW:       # %bb.0:
322; AVX512BW-NEXT:    vmovaps (%rdi), %xmm0
323; AVX512BW-NEXT:    vblendps {{.*#+}} xmm1 = xmm0[0,1],mem[2,3]
324; AVX512BW-NEXT:    vmovaps %xmm1, 16(%rdx)
325; AVX512BW-NEXT:    vmovaps %xmm0, (%rdx)
326; AVX512BW-NEXT:    retq
327  %a = load <2 x i64>, ptr %a.ptr, align 64
328  %b = load <2 x i64>, ptr %b.ptr, align 64
329  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
330  %concat = shufflevector <2 x i64> %a, <2 x i64> %shuffle, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
331  store <4 x i64> %concat, ptr %dst, align 64
332  ret void
333}
334define void @concat_shuf_of_ab_to_b(ptr %a.ptr, ptr %b.ptr, ptr %dst) {
335; SSE2-LABEL: concat_shuf_of_ab_to_b:
336; SSE2:       # %bb.0:
337; SSE2-NEXT:    movaps (%rsi), %xmm0
338; SSE2-NEXT:    movaps %xmm0, %xmm1
339; SSE2-NEXT:    movlps {{.*#+}} xmm1 = mem[0,1],xmm1[2,3]
340; SSE2-NEXT:    movaps %xmm1, 16(%rdx)
341; SSE2-NEXT:    movaps %xmm0, (%rdx)
342; SSE2-NEXT:    retq
343;
344; SSE42-LABEL: concat_shuf_of_ab_to_b:
345; SSE42:       # %bb.0:
346; SSE42-NEXT:    movaps (%rsi), %xmm0
347; SSE42-NEXT:    movaps (%rdi), %xmm1
348; SSE42-NEXT:    blendps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
349; SSE42-NEXT:    movaps %xmm1, 16(%rdx)
350; SSE42-NEXT:    movaps %xmm0, (%rdx)
351; SSE42-NEXT:    retq
352;
353; AVX-LABEL: concat_shuf_of_ab_to_b:
354; AVX:       # %bb.0:
355; AVX-NEXT:    vmovaps (%rsi), %xmm0
356; AVX-NEXT:    vblendps {{.*#+}} xmm1 = mem[0,1],xmm0[2,3]
357; AVX-NEXT:    vmovaps %xmm1, 16(%rdx)
358; AVX-NEXT:    vmovaps %xmm0, (%rdx)
359; AVX-NEXT:    retq
360;
361; AVX2-LABEL: concat_shuf_of_ab_to_b:
362; AVX2:       # %bb.0:
363; AVX2-NEXT:    vmovaps (%rsi), %xmm0
364; AVX2-NEXT:    vblendps {{.*#+}} xmm1 = mem[0,1],xmm0[2,3]
365; AVX2-NEXT:    vmovaps %xmm1, 16(%rdx)
366; AVX2-NEXT:    vmovaps %xmm0, (%rdx)
367; AVX2-NEXT:    retq
368;
369; AVX512F-LABEL: concat_shuf_of_ab_to_b:
370; AVX512F:       # %bb.0:
371; AVX512F-NEXT:    vmovaps (%rsi), %xmm0
372; AVX512F-NEXT:    vblendps {{.*#+}} xmm1 = mem[0,1],xmm0[2,3]
373; AVX512F-NEXT:    vmovaps %xmm1, 16(%rdx)
374; AVX512F-NEXT:    vmovaps %xmm0, (%rdx)
375; AVX512F-NEXT:    retq
376;
377; AVX512BW-LABEL: concat_shuf_of_ab_to_b:
378; AVX512BW:       # %bb.0:
379; AVX512BW-NEXT:    vmovaps (%rsi), %xmm0
380; AVX512BW-NEXT:    vblendps {{.*#+}} xmm1 = mem[0,1],xmm0[2,3]
381; AVX512BW-NEXT:    vmovaps %xmm1, 16(%rdx)
382; AVX512BW-NEXT:    vmovaps %xmm0, (%rdx)
383; AVX512BW-NEXT:    retq
384  %a = load <2 x i64>, ptr %a.ptr, align 64
385  %b = load <2 x i64>, ptr %b.ptr, align 64
386  %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 3>
387  %concat = shufflevector <2 x i64> %b, <2 x i64> %shuffle, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
388  store <4 x i64> %concat, ptr %dst, align 64
389  ret void
390}
391
392define void @concat_b_to_shuf_of_a(ptr %a.ptr, ptr %b.ptr, ptr %dst) {
393; SSE-LABEL: concat_b_to_shuf_of_a:
394; SSE:       # %bb.0:
395; SSE-NEXT:    movaps (%rsi), %xmm0
396; SSE-NEXT:    pshufd {{.*#+}} xmm1 = mem[2,3,0,1]
397; SSE-NEXT:    movaps %xmm0, 16(%rdx)
398; SSE-NEXT:    movdqa %xmm1, (%rdx)
399; SSE-NEXT:    retq
400;
401; AVX-LABEL: concat_b_to_shuf_of_a:
402; AVX:       # %bb.0:
403; AVX-NEXT:    vmovaps (%rsi), %xmm0
404; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = mem[2,3,0,1]
405; AVX-NEXT:    vmovaps %xmm0, 16(%rdx)
406; AVX-NEXT:    vmovaps %xmm1, (%rdx)
407; AVX-NEXT:    retq
408;
409; AVX2-LABEL: concat_b_to_shuf_of_a:
410; AVX2:       # %bb.0:
411; AVX2-NEXT:    vmovaps (%rsi), %xmm0
412; AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = mem[2,3,0,1]
413; AVX2-NEXT:    vmovaps %xmm0, 16(%rdx)
414; AVX2-NEXT:    vmovaps %xmm1, (%rdx)
415; AVX2-NEXT:    retq
416;
417; AVX512F-LABEL: concat_b_to_shuf_of_a:
418; AVX512F:       # %bb.0:
419; AVX512F-NEXT:    vmovaps (%rsi), %xmm0
420; AVX512F-NEXT:    vpermilps {{.*#+}} xmm1 = mem[2,3,0,1]
421; AVX512F-NEXT:    vmovaps %xmm0, 16(%rdx)
422; AVX512F-NEXT:    vmovaps %xmm1, (%rdx)
423; AVX512F-NEXT:    retq
424;
425; AVX512BW-LABEL: concat_b_to_shuf_of_a:
426; AVX512BW:       # %bb.0:
427; AVX512BW-NEXT:    vmovaps (%rsi), %xmm0
428; AVX512BW-NEXT:    vpermilps {{.*#+}} xmm1 = mem[2,3,0,1]
429; AVX512BW-NEXT:    vmovaps %xmm0, 16(%rdx)
430; AVX512BW-NEXT:    vmovaps %xmm1, (%rdx)
431; AVX512BW-NEXT:    retq
432  %a = load <2 x i64>, ptr %a.ptr, align 64
433  %b = load <2 x i64>, ptr %b.ptr, align 64
434  %shuffle = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
435  %concat = shufflevector <2 x i64> %shuffle, <2 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
436  store <4 x i64> %concat, ptr %dst, align 64
437  ret void
438}
439define void @concat_shuf_of_a_to_b(ptr %a.ptr, ptr %b.ptr, ptr %dst) {
440; SSE-LABEL: concat_shuf_of_a_to_b:
441; SSE:       # %bb.0:
442; SSE-NEXT:    movaps (%rsi), %xmm0
443; SSE-NEXT:    pshufd {{.*#+}} xmm1 = mem[2,3,0,1]
444; SSE-NEXT:    movdqa %xmm1, 16(%rdx)
445; SSE-NEXT:    movaps %xmm0, (%rdx)
446; SSE-NEXT:    retq
447;
448; AVX-LABEL: concat_shuf_of_a_to_b:
449; AVX:       # %bb.0:
450; AVX-NEXT:    vmovaps (%rsi), %xmm0
451; AVX-NEXT:    vpermilps {{.*#+}} xmm1 = mem[2,3,0,1]
452; AVX-NEXT:    vmovaps %xmm1, 16(%rdx)
453; AVX-NEXT:    vmovaps %xmm0, (%rdx)
454; AVX-NEXT:    retq
455;
456; AVX2-LABEL: concat_shuf_of_a_to_b:
457; AVX2:       # %bb.0:
458; AVX2-NEXT:    vmovaps (%rsi), %xmm0
459; AVX2-NEXT:    vpermilps {{.*#+}} xmm1 = mem[2,3,0,1]
460; AVX2-NEXT:    vmovaps %xmm1, 16(%rdx)
461; AVX2-NEXT:    vmovaps %xmm0, (%rdx)
462; AVX2-NEXT:    retq
463;
464; AVX512F-LABEL: concat_shuf_of_a_to_b:
465; AVX512F:       # %bb.0:
466; AVX512F-NEXT:    vmovaps (%rsi), %xmm0
467; AVX512F-NEXT:    vpermilps {{.*#+}} xmm1 = mem[2,3,0,1]
468; AVX512F-NEXT:    vmovaps %xmm1, 16(%rdx)
469; AVX512F-NEXT:    vmovaps %xmm0, (%rdx)
470; AVX512F-NEXT:    retq
471;
472; AVX512BW-LABEL: concat_shuf_of_a_to_b:
473; AVX512BW:       # %bb.0:
474; AVX512BW-NEXT:    vmovaps (%rsi), %xmm0
475; AVX512BW-NEXT:    vpermilps {{.*#+}} xmm1 = mem[2,3,0,1]
476; AVX512BW-NEXT:    vmovaps %xmm1, 16(%rdx)
477; AVX512BW-NEXT:    vmovaps %xmm0, (%rdx)
478; AVX512BW-NEXT:    retq
479  %a = load <2 x i64>, ptr %a.ptr, align 64
480  %b = load <2 x i64>, ptr %b.ptr, align 64
481  %shuffle = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
482  %concat = shufflevector <2 x i64> %b, <2 x i64> %shuffle, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
483  store <4 x i64> %concat, ptr %dst, align 64
484  ret void
485}
486
487define void @concat_poison_to_shuf_of_a(ptr %a.ptr, ptr %dst) {
488; SSE-LABEL: concat_poison_to_shuf_of_a:
489; SSE:       # %bb.0:
490; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[2,3,0,1]
491; SSE-NEXT:    movdqa %xmm0, (%rsi)
492; SSE-NEXT:    retq
493;
494; AVX-LABEL: concat_poison_to_shuf_of_a:
495; AVX:       # %bb.0:
496; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = mem[2,3,0,1]
497; AVX-NEXT:    vmovaps %xmm0, (%rsi)
498; AVX-NEXT:    retq
499;
500; AVX2-LABEL: concat_poison_to_shuf_of_a:
501; AVX2:       # %bb.0:
502; AVX2-NEXT:    vpermilps {{.*#+}} xmm0 = mem[2,3,0,1]
503; AVX2-NEXT:    vmovaps %xmm0, (%rsi)
504; AVX2-NEXT:    retq
505;
506; AVX512F-LABEL: concat_poison_to_shuf_of_a:
507; AVX512F:       # %bb.0:
508; AVX512F-NEXT:    vpermilps {{.*#+}} xmm0 = mem[2,3,0,1]
509; AVX512F-NEXT:    vmovaps %xmm0, (%rsi)
510; AVX512F-NEXT:    retq
511;
512; AVX512BW-LABEL: concat_poison_to_shuf_of_a:
513; AVX512BW:       # %bb.0:
514; AVX512BW-NEXT:    vpermilps {{.*#+}} xmm0 = mem[2,3,0,1]
515; AVX512BW-NEXT:    vmovaps %xmm0, (%rsi)
516; AVX512BW-NEXT:    retq
517  %a = load <2 x i64>, ptr %a.ptr, align 64
518  %shuffle = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
519  %concat = shufflevector <2 x i64> %shuffle, <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
520  store <4 x i64> %concat, ptr %dst, align 64
521  ret void
522}
523define void @concat_shuf_of_a_to_poison(ptr %a.ptr, ptr %b.ptr, ptr %dst) {
524; SSE-LABEL: concat_shuf_of_a_to_poison:
525; SSE:       # %bb.0:
526; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[2,3,0,1]
527; SSE-NEXT:    movdqa %xmm0, 16(%rdx)
528; SSE-NEXT:    retq
529;
530; AVX-LABEL: concat_shuf_of_a_to_poison:
531; AVX:       # %bb.0:
532; AVX-NEXT:    vpermilps {{.*#+}} xmm0 = mem[2,3,0,1]
533; AVX-NEXT:    vmovaps %xmm0, 16(%rdx)
534; AVX-NEXT:    retq
535;
536; AVX2-LABEL: concat_shuf_of_a_to_poison:
537; AVX2:       # %bb.0:
538; AVX2-NEXT:    vpermilps {{.*#+}} xmm0 = mem[2,3,0,1]
539; AVX2-NEXT:    vmovaps %xmm0, 16(%rdx)
540; AVX2-NEXT:    retq
541;
542; AVX512F-LABEL: concat_shuf_of_a_to_poison:
543; AVX512F:       # %bb.0:
544; AVX512F-NEXT:    vpermilps {{.*#+}} xmm0 = mem[2,3,0,1]
545; AVX512F-NEXT:    vmovaps %xmm0, 16(%rdx)
546; AVX512F-NEXT:    retq
547;
548; AVX512BW-LABEL: concat_shuf_of_a_to_poison:
549; AVX512BW:       # %bb.0:
550; AVX512BW-NEXT:    vpermilps {{.*#+}} xmm0 = mem[2,3,0,1]
551; AVX512BW-NEXT:    vmovaps %xmm0, 16(%rdx)
552; AVX512BW-NEXT:    retq
553  %a = load <2 x i64>, ptr %a.ptr, align 64
554  %b = load <2 x i64>, ptr %b.ptr, align 64
555  %shuffle = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
556  %concat = shufflevector <2 x i64> poison, <2 x i64> %shuffle, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
557  store <4 x i64> %concat, ptr %dst, align 64
558  ret void
559}
560
561define void @concat_shuf_of_a_to_itself(ptr %a.ptr, ptr %dst) {
562; SSE-LABEL: concat_shuf_of_a_to_itself:
563; SSE:       # %bb.0:
564; SSE-NEXT:    pshufd {{.*#+}} xmm0 = mem[2,3,0,1]
565; SSE-NEXT:    movdqa %xmm0, 16(%rsi)
566; SSE-NEXT:    movdqa %xmm0, (%rsi)
567; SSE-NEXT:    retq
568;
569; AVX-LABEL: concat_shuf_of_a_to_itself:
570; AVX:       # %bb.0:
571; AVX-NEXT:    vpermilpd {{.*#+}} xmm0 = mem[1,0]
572; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
573; AVX-NEXT:    vmovaps %ymm0, (%rsi)
574; AVX-NEXT:    vzeroupper
575; AVX-NEXT:    retq
576;
577; AVX2-LABEL: concat_shuf_of_a_to_itself:
578; AVX2:       # %bb.0:
579; AVX2-NEXT:    vmovaps (%rdi), %xmm0
580; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,1,0]
581; AVX2-NEXT:    vmovaps %ymm0, (%rsi)
582; AVX2-NEXT:    vzeroupper
583; AVX2-NEXT:    retq
584;
585; AVX512F-LABEL: concat_shuf_of_a_to_itself:
586; AVX512F:       # %bb.0:
587; AVX512F-NEXT:    vmovaps (%rdi), %xmm0
588; AVX512F-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,1,0]
589; AVX512F-NEXT:    vmovaps %ymm0, (%rsi)
590; AVX512F-NEXT:    vzeroupper
591; AVX512F-NEXT:    retq
592;
593; AVX512BW-LABEL: concat_shuf_of_a_to_itself:
594; AVX512BW:       # %bb.0:
595; AVX512BW-NEXT:    vmovaps (%rdi), %xmm0
596; AVX512BW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,1,0]
597; AVX512BW-NEXT:    vmovaps %ymm0, (%rsi)
598; AVX512BW-NEXT:    vzeroupper
599; AVX512BW-NEXT:    retq
600  %a = load <2 x i64>, ptr %a.ptr, align 64
601  %shuffle = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
602  %concat = shufflevector <2 x i64> %shuffle, <2 x i64> %shuffle, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
603  store <4 x i64> %concat, ptr %dst, align 64
604  ret void
605}
606
607define void @concat_aaa_to_shuf_of_a(ptr %a.ptr, ptr %dst) {
608; SSE-LABEL: concat_aaa_to_shuf_of_a:
609; SSE:       # %bb.0:
610; SSE-NEXT:    movdqa (%rdi), %xmm0
611; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
612; SSE-NEXT:    movdqa %xmm0, 32(%rsi)
613; SSE-NEXT:    movdqa %xmm0, 48(%rsi)
614; SSE-NEXT:    movdqa %xmm0, 16(%rsi)
615; SSE-NEXT:    movdqa %xmm1, (%rsi)
616; SSE-NEXT:    retq
617;
618; AVX-LABEL: concat_aaa_to_shuf_of_a:
619; AVX:       # %bb.0:
620; AVX-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
621; AVX-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[2,3,0,1]
622; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm1
623; AVX-NEXT:    vmovaps %ymm0, 32(%rsi)
624; AVX-NEXT:    vmovaps %ymm1, (%rsi)
625; AVX-NEXT:    vzeroupper
626; AVX-NEXT:    retq
627;
628; AVX2-LABEL: concat_aaa_to_shuf_of_a:
629; AVX2:       # %bb.0:
630; AVX2-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
631; AVX2-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[1,0,0,1]
632; AVX2-NEXT:    vmovaps %ymm0, 32(%rsi)
633; AVX2-NEXT:    vmovaps %ymm1, (%rsi)
634; AVX2-NEXT:    vzeroupper
635; AVX2-NEXT:    retq
636;
637; AVX512F-LABEL: concat_aaa_to_shuf_of_a:
638; AVX512F:       # %bb.0:
639; AVX512F-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
640; AVX512F-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
641; AVX512F-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm1
642; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
643; AVX512F-NEXT:    vmovdqa64 %zmm0, (%rsi)
644; AVX512F-NEXT:    vzeroupper
645; AVX512F-NEXT:    retq
646;
647; AVX512BW-LABEL: concat_aaa_to_shuf_of_a:
648; AVX512BW:       # %bb.0:
649; AVX512BW-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
650; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
651; AVX512BW-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm1
652; AVX512BW-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
653; AVX512BW-NEXT:    vmovdqa64 %zmm0, (%rsi)
654; AVX512BW-NEXT:    vzeroupper
655; AVX512BW-NEXT:    retq
656  %a = load <2 x i64>, ptr %a.ptr, align 64
657  %shuffle = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
658  %concat01 = shufflevector <2 x i64> %shuffle, <2 x i64> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
659  %concat23 = shufflevector <2 x i64> %a, <2 x i64> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
660  %concat = shufflevector <4 x i64> %concat01, <4 x i64> %concat23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
661  store <8 x i64> %concat, ptr %dst, align 64
662  ret void
663}
664define void @concat_shuf_of_a_to_aaa(ptr %a.ptr, ptr %dst) {
665; SSE-LABEL: concat_shuf_of_a_to_aaa:
666; SSE:       # %bb.0:
667; SSE-NEXT:    movdqa (%rdi), %xmm0
668; SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
669; SSE-NEXT:    movdqa %xmm0, 32(%rsi)
670; SSE-NEXT:    movdqa %xmm0, 16(%rsi)
671; SSE-NEXT:    movdqa %xmm0, (%rsi)
672; SSE-NEXT:    movdqa %xmm1, 48(%rsi)
673; SSE-NEXT:    retq
674;
675; AVX-LABEL: concat_shuf_of_a_to_aaa:
676; AVX:       # %bb.0:
677; AVX-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
678; AVX-NEXT:    vshufps {{.*#+}} xmm1 = xmm0[2,3,0,1]
679; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
680; AVX-NEXT:    vmovaps %ymm0, (%rsi)
681; AVX-NEXT:    vmovaps %ymm1, 32(%rsi)
682; AVX-NEXT:    vzeroupper
683; AVX-NEXT:    retq
684;
685; AVX2-LABEL: concat_shuf_of_a_to_aaa:
686; AVX2:       # %bb.0:
687; AVX2-NEXT:    vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
688; AVX2-NEXT:    vpermpd {{.*#+}} ymm1 = ymm0[0,1,1,0]
689; AVX2-NEXT:    vmovaps %ymm0, (%rsi)
690; AVX2-NEXT:    vmovaps %ymm1, 32(%rsi)
691; AVX2-NEXT:    vzeroupper
692; AVX2-NEXT:    retq
693;
694; AVX512F-LABEL: concat_shuf_of_a_to_aaa:
695; AVX512F:       # %bb.0:
696; AVX512F-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
697; AVX512F-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
698; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
699; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
700; AVX512F-NEXT:    vmovdqa64 %zmm0, (%rsi)
701; AVX512F-NEXT:    vzeroupper
702; AVX512F-NEXT:    retq
703;
704; AVX512BW-LABEL: concat_shuf_of_a_to_aaa:
705; AVX512BW:       # %bb.0:
706; AVX512BW-NEXT:    vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
707; AVX512BW-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
708; AVX512BW-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm1
709; AVX512BW-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
710; AVX512BW-NEXT:    vmovdqa64 %zmm0, (%rsi)
711; AVX512BW-NEXT:    vzeroupper
712; AVX512BW-NEXT:    retq
713  %a = load <2 x i64>, ptr %a.ptr, align 64
714  %shuffle = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 0>
715  %concat01 = shufflevector <2 x i64> %a, <2 x i64> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
716  %concat23 = shufflevector <2 x i64> %a, <2 x i64> %shuffle, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
717  %concat = shufflevector <4 x i64> %concat01, <4 x i64> %concat23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
718  store <8 x i64> %concat, ptr %dst, align 64
719  ret void
720}
721;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
722; AVX1-ONLY: {{.*}}
723; AVX2-FAST: {{.*}}
724; AVX2-FAST-PERLANE: {{.*}}
725; AVX2-SLOW: {{.*}}
726; AVX512BW-FAST: {{.*}}
727; AVX512BW-SLOW: {{.*}}
728; AVX512F-FAST: {{.*}}
729; AVX512F-SLOW: {{.*}}
730; FALLBACK0: {{.*}}
731; FALLBACK1: {{.*}}
732; FALLBACK2: {{.*}}
733; FALLBACK3: {{.*}}
734; FALLBACK4: {{.*}}
735; FALLBACK5: {{.*}}
736; FALLBACK6: {{.*}}
737; FALLBACK7: {{.*}}
738; FALLBACK8: {{.*}}
739; FALLBACK9: {{.*}}
740