xref: /llvm-project/llvm/test/CodeGen/X86/avx512-hadd-hsub.ll (revision e9f9467da063875bd684e46660e2ff36ba4f55e2)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2;RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefixes=CHECK,KNL
3;RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefixes=CHECK,SKX
4
5define i32 @hadd_16(<16 x i32> %x225) {
6; KNL-LABEL: hadd_16:
7; KNL:       # %bb.0:
8; KNL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
9; KNL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
10; KNL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
11; KNL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
12; KNL-NEXT:    vmovd %xmm0, %eax
13; KNL-NEXT:    retq
14;
15; SKX-LABEL: hadd_16:
16; SKX:       # %bb.0:
17; SKX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
18; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
19; SKX-NEXT:    vpsrlq $32, %xmm0, %xmm1
20; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
21; SKX-NEXT:    vmovd %xmm0, %eax
22; SKX-NEXT:    vzeroupper
23; SKX-NEXT:    retq
24  %x226 = shufflevector <16 x i32> %x225, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
25  %x227 = add <16 x i32> %x225, %x226
26  %x228 = shufflevector <16 x i32> %x227, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
27  %x229 = add <16 x i32> %x227, %x228
28  %x230 = extractelement <16 x i32> %x229, i32 0
29  ret i32 %x230
30}
31
32define i32 @hsub_16(<16 x i32> %x225) {
33; KNL-LABEL: hsub_16:
34; KNL:       # %bb.0:
35; KNL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
36; KNL-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
37; KNL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
38; KNL-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
39; KNL-NEXT:    vmovd %xmm0, %eax
40; KNL-NEXT:    retq
41;
42; SKX-LABEL: hsub_16:
43; SKX:       # %bb.0:
44; SKX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
45; SKX-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
46; SKX-NEXT:    vpsrlq $32, %xmm0, %xmm1
47; SKX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
48; SKX-NEXT:    vmovd %xmm0, %eax
49; SKX-NEXT:    vzeroupper
50; SKX-NEXT:    retq
51  %x226 = shufflevector <16 x i32> %x225, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
52  %x227 = add <16 x i32> %x225, %x226
53  %x228 = shufflevector <16 x i32> %x227, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
54  %x229 = sub <16 x i32> %x227, %x228
55  %x230 = extractelement <16 x i32> %x229, i32 0
56  ret i32 %x230
57}
58
59define float @fhadd_16(<16 x float> %x225) {
60; KNL-LABEL: fhadd_16:
61; KNL:       # %bb.0:
62; KNL-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
63; KNL-NEXT:    vaddps %xmm1, %xmm0, %xmm0
64; KNL-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
65; KNL-NEXT:    vaddss %xmm1, %xmm0, %xmm0
66; KNL-NEXT:    retq
67;
68; SKX-LABEL: fhadd_16:
69; SKX:       # %bb.0:
70; SKX-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
71; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
72; SKX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
73; SKX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
74; SKX-NEXT:    vzeroupper
75; SKX-NEXT:    retq
76  %x226 = shufflevector <16 x float> %x225, <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
77  %x227 = fadd <16 x float> %x225, %x226
78  %x228 = shufflevector <16 x float> %x227, <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
79  %x229 = fadd <16 x float> %x227, %x228
80  %x230 = extractelement <16 x float> %x229, i32 0
81  ret float %x230
82}
83
84define float @fhsub_16(<16 x float> %x225) {
85; KNL-LABEL: fhsub_16:
86; KNL:       # %bb.0:
87; KNL-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
88; KNL-NEXT:    vaddps %xmm1, %xmm0, %xmm0
89; KNL-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
90; KNL-NEXT:    vsubss %xmm1, %xmm0, %xmm0
91; KNL-NEXT:    retq
92;
93; SKX-LABEL: fhsub_16:
94; SKX:       # %bb.0:
95; SKX-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
96; SKX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
97; SKX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
98; SKX-NEXT:    vsubss %xmm1, %xmm0, %xmm0
99; SKX-NEXT:    vzeroupper
100; SKX-NEXT:    retq
101  %x226 = shufflevector <16 x float> %x225, <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
102  %x227 = fadd <16 x float> %x225, %x226
103  %x228 = shufflevector <16 x float> %x227, <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
104  %x229 = fsub <16 x float> %x227, %x228
105  %x230 = extractelement <16 x float> %x229, i32 0
106  ret float %x230
107}
108
109define <16 x i32> @hadd_16_3(<16 x i32> %x225, <16 x i32> %x227) {
110; CHECK-LABEL: hadd_16_3:
111; CHECK:       # %bb.0:
112; CHECK-NEXT:    vphaddd %ymm1, %ymm0, %ymm0
113; CHECK-NEXT:    retq
114  %x226 = shufflevector <16 x i32> %x225, <16 x i32> %x227, <16 x i32> <i32 0, i32 2, i32 16, i32 18
115, i32 4, i32 6, i32 20, i32 22, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
116  %x228 = shufflevector <16 x i32> %x225, <16 x i32> %x227, <16 x i32> <i32 1, i32 3, i32 17, i32 19
117, i32 5 , i32 7, i32 21,   i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef,
118 i32 undef, i32 undef>
119  %x229 = add <16 x i32> %x226, %x228
120  ret <16 x i32> %x229
121}
122
123define <16 x float> @fhadd_16_3(<16 x float> %x225, <16 x float> %x227) {
124; CHECK-LABEL: fhadd_16_3:
125; CHECK:       # %bb.0:
126; CHECK-NEXT:    vhaddps %ymm1, %ymm0, %ymm0
127; CHECK-NEXT:    retq
128  %x226 = shufflevector <16 x float> %x225, <16 x float> %x227, <16 x i32> <i32 0, i32 2, i32 16, i32 18
129, i32 4, i32 6, i32 20, i32 22, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
130  %x228 = shufflevector <16 x float> %x225, <16 x float> %x227, <16 x i32> <i32 1, i32 3, i32 17, i32 19
131, i32 5 , i32 7, i32 21,   i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
132  %x229 = fadd <16 x float> %x226, %x228
133  ret <16 x float> %x229
134}
135
136define <8 x double> @fhadd_16_4(<8 x double> %x225, <8 x double> %x227) {
137; CHECK-LABEL: fhadd_16_4:
138; CHECK:       # %bb.0:
139; CHECK-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0
140; CHECK-NEXT:    retq
141  %x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 undef, i32 undef, i32 undef, i32 undef>
142  %x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 undef ,i32 undef, i32 undef, i32 undef>
143  %x229 = fadd <8 x double> %x226, %x228
144  ret <8 x double> %x229
145}
146
147define <4 x double> @fadd_noundef_low(<8 x double> %x225, <8 x double> %x227) {
148; CHECK-LABEL: fadd_noundef_low:
149; CHECK:       # %bb.0:
150; CHECK-NEXT:    vhaddpd %ymm1, %ymm0, %ymm0
151; CHECK-NEXT:    retq
152  %x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
153  %x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5 ,i32 13, i32 7, i32 15>
154  %x229 = fadd <8 x double> %x226, %x228
155  %x230 = shufflevector <8 x double> %x229, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
156  ret <4 x double> %x230
157}
158
159define <4 x double> @fadd_noundef_high(<8 x double> %x225, <8 x double> %x227) {
160; CHECK-LABEL: fadd_noundef_high:
161; CHECK:       # %bb.0:
162; CHECK-NEXT:    vunpcklpd {{.*#+}} zmm2 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
163; CHECK-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
164; CHECK-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
165; CHECK-NEXT:    vextractf64x4 $1, %zmm2, %ymm1
166; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
167; CHECK-NEXT:    retq
168  %x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
169  %x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5 ,i32 13, i32 7, i32 15>
170  %x229 = fadd <8 x double> %x226, %x228
171  %x230 = shufflevector <8 x double> %x229, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
172  ret <4 x double> %x230
173}
174
175
176define <8 x i32> @hadd_16_3_sv(<16 x i32> %x225, <16 x i32> %x227) {
177; CHECK-LABEL: hadd_16_3_sv:
178; CHECK:       # %bb.0:
179; CHECK-NEXT:    vphaddd %ymm1, %ymm0, %ymm0
180; CHECK-NEXT:    retq
181  %x226 = shufflevector <16 x i32> %x225, <16 x i32> %x227, <16 x i32> <i32 0, i32 2, i32 16, i32 18
182, i32 4, i32 6, i32 20, i32 22, i32 8, i32 10, i32 24, i32 26, i32 12, i32 14, i32 28, i32 30>
183  %x228 = shufflevector <16 x i32> %x225, <16 x i32> %x227, <16 x i32> <i32 1, i32 3, i32 17, i32 19
184, i32 5 , i32 7, i32 21,   i32 23, i32 9, i32 11, i32 25, i32 27, i32 13, i32 15,
185 i32 29, i32 31>
186  %x229 = add <16 x i32> %x226, %x228
187  %x230 = shufflevector <16 x i32> %x229, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4 ,i32 5, i32 6, i32 7>
188  ret <8 x i32> %x230
189}
190
191
192define double @fadd_noundef_eel(<8 x double> %x225, <8 x double> %x227) {
193; KNL-LABEL: fadd_noundef_eel:
194; KNL:       # %bb.0:
195; KNL-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
196; KNL-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
197; KNL-NEXT:    retq
198;
199; SKX-LABEL: fadd_noundef_eel:
200; SKX:       # %bb.0:
201; SKX-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
202; SKX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
203; SKX-NEXT:    vzeroupper
204; SKX-NEXT:    retq
205  %x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
206  %x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5 ,i32 13, i32 7, i32 15>
207  %x229 = fadd <8 x double> %x226, %x228
208  %x230 = extractelement <8 x double> %x229, i32 0
209  ret double %x230
210}
211
212
213
214define double @fsub_noundef_ee (<8 x double> %x225, <8 x double> %x227) {
215; KNL-LABEL: fsub_noundef_ee:
216; KNL:       # %bb.0:
217; KNL-NEXT:    vextractf32x4 $2, %zmm1, %xmm0
218; KNL-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
219; KNL-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
220; KNL-NEXT:    retq
221;
222; SKX-LABEL: fsub_noundef_ee:
223; SKX:       # %bb.0:
224; SKX-NEXT:    vextractf32x4 $2, %zmm1, %xmm0
225; SKX-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1,0]
226; SKX-NEXT:    vsubsd %xmm1, %xmm0, %xmm0
227; SKX-NEXT:    vzeroupper
228; SKX-NEXT:    retq
229  %x226 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
230  %x228 = shufflevector <8 x double> %x225, <8 x double> %x227, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5 ,i32 13, i32 7, i32 15>
231  %x229 = fsub <8 x double> %x226, %x228
232  %x230 = extractelement <8 x double> %x229, i32 5
233  ret double %x230
234}
235
236