xref: /llvm-project/llvm/test/CodeGen/X86/tuning-shuffle-unpckpd.ll (revision c3f01f13b10d708b9b7ff45a6ccc2f0c3462b3af)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skylake  | FileCheck %s --check-prefixes=CHECK,CHECK-AVX2,CHECK-SKL
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3  | FileCheck %s --check-prefixes=CHECK,CHECK-AVX2,CHECK-V3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=icelake-server  | FileCheck %s --check-prefixes=CHECK,CHECK-ICX,CHECK-ICX-NO-BYPASS-DELAY
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=icelake-server -mattr=-no-bypass-delay-shuffle | FileCheck %s --check-prefixes=CHECK,CHECK-ICX,CHECK-ICX-BYPASS-DELAY
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -mattr=+no-bypass-delay-shuffle | FileCheck %s --check-prefixes=CHECK,CHECK-SNB,CHECK-SNB-NO-BYPASS-DELAY
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -mattr=-no-bypass-delay-shuffle | FileCheck %s --check-prefixes=CHECK,CHECK-SNB,CHECK-SNB-BYPASS-DELAY
8
9define <8 x float> @transform_VUNPCKLPDYrr(<8 x float> %a, <8 x float> %b) nounwind {
10; CHECK-AVX2-LABEL: transform_VUNPCKLPDYrr:
11; CHECK-AVX2:       # %bb.0:
12; CHECK-AVX2-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
13; CHECK-AVX2-NEXT:    retq
14;
15; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VUNPCKLPDYrr:
16; CHECK-ICX-NO-BYPASS-DELAY:       # %bb.0:
17; CHECK-ICX-NO-BYPASS-DELAY-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
18; CHECK-ICX-NO-BYPASS-DELAY-NEXT:    retq
19;
20; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VUNPCKLPDYrr:
21; CHECK-ICX-BYPASS-DELAY:       # %bb.0:
22; CHECK-ICX-BYPASS-DELAY-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
23; CHECK-ICX-BYPASS-DELAY-NEXT:    retq
24;
25; CHECK-SNB-LABEL: transform_VUNPCKLPDYrr:
26; CHECK-SNB:       # %bb.0:
27; CHECK-SNB-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
28; CHECK-SNB-NEXT:    retq
29  %shufp = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 12, i32 13>
30  ret <8 x float> %shufp
31}
32
33define <8 x float> @transform_VUNPCKHPDYrr(<8 x float> %a, <8 x float> %b) nounwind {
34; CHECK-AVX2-LABEL: transform_VUNPCKHPDYrr:
35; CHECK-AVX2:       # %bb.0:
36; CHECK-AVX2-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
37; CHECK-AVX2-NEXT:    retq
38;
39; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VUNPCKHPDYrr:
40; CHECK-ICX-NO-BYPASS-DELAY:       # %bb.0:
41; CHECK-ICX-NO-BYPASS-DELAY-NEXT:    vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
42; CHECK-ICX-NO-BYPASS-DELAY-NEXT:    retq
43;
44; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VUNPCKHPDYrr:
45; CHECK-ICX-BYPASS-DELAY:       # %bb.0:
46; CHECK-ICX-BYPASS-DELAY-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
47; CHECK-ICX-BYPASS-DELAY-NEXT:    retq
48;
49; CHECK-SNB-LABEL: transform_VUNPCKHPDYrr:
50; CHECK-SNB:       # %bb.0:
51; CHECK-SNB-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
52; CHECK-SNB-NEXT:    retq
53  %shufp = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 3, i32 10, i32 11, i32 6, i32 7, i32 14, i32 15>
54  ret <8 x float> %shufp
55}
56
57define <4 x float> @transform_VUNPCKLPDrr(<4 x float> %a, <4 x float> %b) nounwind {
58; CHECK-AVX2-LABEL: transform_VUNPCKLPDrr:
59; CHECK-AVX2:       # %bb.0:
60; CHECK-AVX2-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
61; CHECK-AVX2-NEXT:    retq
62;
63; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VUNPCKLPDrr:
64; CHECK-ICX-NO-BYPASS-DELAY:       # %bb.0:
65; CHECK-ICX-NO-BYPASS-DELAY-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
66; CHECK-ICX-NO-BYPASS-DELAY-NEXT:    retq
67;
68; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VUNPCKLPDrr:
69; CHECK-ICX-BYPASS-DELAY:       # %bb.0:
70; CHECK-ICX-BYPASS-DELAY-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
71; CHECK-ICX-BYPASS-DELAY-NEXT:    retq
72;
73; CHECK-SNB-NO-BYPASS-DELAY-LABEL: transform_VUNPCKLPDrr:
74; CHECK-SNB-NO-BYPASS-DELAY:       # %bb.0:
75; CHECK-SNB-NO-BYPASS-DELAY-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
76; CHECK-SNB-NO-BYPASS-DELAY-NEXT:    retq
77;
78; CHECK-SNB-BYPASS-DELAY-LABEL: transform_VUNPCKLPDrr:
79; CHECK-SNB-BYPASS-DELAY:       # %bb.0:
80; CHECK-SNB-BYPASS-DELAY-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
81; CHECK-SNB-BYPASS-DELAY-NEXT:    retq
82  %shufp = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
83  ret <4 x float> %shufp
84}
85
86define <4 x float> @transform_VUNPCKHPDrr(<4 x float> %a, <4 x float> %b) nounwind {
87; CHECK-AVX2-LABEL: transform_VUNPCKHPDrr:
88; CHECK-AVX2:       # %bb.0:
89; CHECK-AVX2-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
90; CHECK-AVX2-NEXT:    retq
91;
92; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VUNPCKHPDrr:
93; CHECK-ICX-NO-BYPASS-DELAY:       # %bb.0:
94; CHECK-ICX-NO-BYPASS-DELAY-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
95; CHECK-ICX-NO-BYPASS-DELAY-NEXT:    retq
96;
97; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VUNPCKHPDrr:
98; CHECK-ICX-BYPASS-DELAY:       # %bb.0:
99; CHECK-ICX-BYPASS-DELAY-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
100; CHECK-ICX-BYPASS-DELAY-NEXT:    retq
101;
102; CHECK-SNB-NO-BYPASS-DELAY-LABEL: transform_VUNPCKHPDrr:
103; CHECK-SNB-NO-BYPASS-DELAY:       # %bb.0:
104; CHECK-SNB-NO-BYPASS-DELAY-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
105; CHECK-SNB-NO-BYPASS-DELAY-NEXT:    retq
106;
107; CHECK-SNB-BYPASS-DELAY-LABEL: transform_VUNPCKHPDrr:
108; CHECK-SNB-BYPASS-DELAY:       # %bb.0:
109; CHECK-SNB-BYPASS-DELAY-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
110; CHECK-SNB-BYPASS-DELAY-NEXT:    retq
111  %shufp = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
112  ret <4 x float> %shufp
113}
114
115define <8 x float> @transform_VUNPCKLPDYrm(<8 x float> %a, ptr %pb) nounwind {
116; CHECK-AVX2-LABEL: transform_VUNPCKLPDYrm:
117; CHECK-AVX2:       # %bb.0:
118; CHECK-AVX2-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2]
119; CHECK-AVX2-NEXT:    retq
120;
121; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VUNPCKLPDYrm:
122; CHECK-ICX-NO-BYPASS-DELAY:       # %bb.0:
123; CHECK-ICX-NO-BYPASS-DELAY-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2]
124; CHECK-ICX-NO-BYPASS-DELAY-NEXT:    retq
125;
126; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VUNPCKLPDYrm:
127; CHECK-ICX-BYPASS-DELAY:       # %bb.0:
128; CHECK-ICX-BYPASS-DELAY-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2]
129; CHECK-ICX-BYPASS-DELAY-NEXT:    retq
130;
131; CHECK-SNB-LABEL: transform_VUNPCKLPDYrm:
132; CHECK-SNB:       # %bb.0:
133; CHECK-SNB-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2]
134; CHECK-SNB-NEXT:    retq
135  %b = load <8 x float>, ptr %pb
136  %shufp = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 12, i32 13>
137  ret <8 x float> %shufp
138}
139
140define <8 x float> @transform_VUNPCKHPDYrm(<8 x float> %a, ptr %pb) nounwind {
141; CHECK-AVX2-LABEL: transform_VUNPCKHPDYrm:
142; CHECK-AVX2:       # %bb.0:
143; CHECK-AVX2-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3]
144; CHECK-AVX2-NEXT:    retq
145;
146; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VUNPCKHPDYrm:
147; CHECK-ICX-NO-BYPASS-DELAY:       # %bb.0:
148; CHECK-ICX-NO-BYPASS-DELAY-NEXT:    vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3]
149; CHECK-ICX-NO-BYPASS-DELAY-NEXT:    retq
150;
151; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VUNPCKHPDYrm:
152; CHECK-ICX-BYPASS-DELAY:       # %bb.0:
153; CHECK-ICX-BYPASS-DELAY-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3]
154; CHECK-ICX-BYPASS-DELAY-NEXT:    retq
155;
156; CHECK-SNB-LABEL: transform_VUNPCKHPDYrm:
157; CHECK-SNB:       # %bb.0:
158; CHECK-SNB-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3]
159; CHECK-SNB-NEXT:    retq
160  %b = load <8 x float>, ptr %pb
161  %shufp = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 3, i32 10, i32 11, i32 6, i32 7, i32 14, i32 15>
162  ret <8 x float> %shufp
163}
164
165define <4 x float> @transform_VUNPCKLPDrm(<4 x float> %a, ptr %pb) nounwind {
166; CHECK-AVX2-LABEL: transform_VUNPCKLPDrm:
167; CHECK-AVX2:       # %bb.0:
168; CHECK-AVX2-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
169; CHECK-AVX2-NEXT:    retq
170;
171; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VUNPCKLPDrm:
172; CHECK-ICX-NO-BYPASS-DELAY:       # %bb.0:
173; CHECK-ICX-NO-BYPASS-DELAY-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],mem[0]
174; CHECK-ICX-NO-BYPASS-DELAY-NEXT:    retq
175;
176; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VUNPCKLPDrm:
177; CHECK-ICX-BYPASS-DELAY:       # %bb.0:
178; CHECK-ICX-BYPASS-DELAY-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
179; CHECK-ICX-BYPASS-DELAY-NEXT:    retq
180;
181; CHECK-SNB-NO-BYPASS-DELAY-LABEL: transform_VUNPCKLPDrm:
182; CHECK-SNB-NO-BYPASS-DELAY:       # %bb.0:
183; CHECK-SNB-NO-BYPASS-DELAY-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],mem[0]
184; CHECK-SNB-NO-BYPASS-DELAY-NEXT:    retq
185;
186; CHECK-SNB-BYPASS-DELAY-LABEL: transform_VUNPCKLPDrm:
187; CHECK-SNB-BYPASS-DELAY:       # %bb.0:
188; CHECK-SNB-BYPASS-DELAY-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
189; CHECK-SNB-BYPASS-DELAY-NEXT:    retq
190  %b = load <4 x float>, ptr %pb
191  %shufp = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
192  ret <4 x float> %shufp
193}
194
195define <4 x float> @transform_VUNPCKHPDrm(<4 x float> %a, ptr %pb) nounwind {
196; CHECK-AVX2-LABEL: transform_VUNPCKHPDrm:
197; CHECK-AVX2:       # %bb.0:
198; CHECK-AVX2-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1]
199; CHECK-AVX2-NEXT:    retq
200;
201; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VUNPCKHPDrm:
202; CHECK-ICX-NO-BYPASS-DELAY:       # %bb.0:
203; CHECK-ICX-NO-BYPASS-DELAY-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],mem[1]
204; CHECK-ICX-NO-BYPASS-DELAY-NEXT:    retq
205;
206; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VUNPCKHPDrm:
207; CHECK-ICX-BYPASS-DELAY:       # %bb.0:
208; CHECK-ICX-BYPASS-DELAY-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1]
209; CHECK-ICX-BYPASS-DELAY-NEXT:    retq
210;
211; CHECK-SNB-NO-BYPASS-DELAY-LABEL: transform_VUNPCKHPDrm:
212; CHECK-SNB-NO-BYPASS-DELAY:       # %bb.0:
213; CHECK-SNB-NO-BYPASS-DELAY-NEXT:    vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],mem[1]
214; CHECK-SNB-NO-BYPASS-DELAY-NEXT:    retq
215;
216; CHECK-SNB-BYPASS-DELAY-LABEL: transform_VUNPCKHPDrm:
217; CHECK-SNB-BYPASS-DELAY:       # %bb.0:
218; CHECK-SNB-BYPASS-DELAY-NEXT:    vunpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1]
219; CHECK-SNB-BYPASS-DELAY-NEXT:    retq
220  %b = load <4 x float>, ptr %pb
221  %shufp = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
222  ret <4 x float> %shufp
223}
224;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
225; CHECK: {{.*}}
226; CHECK-ICX: {{.*}}
227; CHECK-SKL: {{.*}}
228; CHECK-V3: {{.*}}
229