xref: /llvm-project/llvm/test/CodeGen/X86/avx-arith.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
3
4define <4 x double> @addpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
5; CHECK-LABEL: addpd256:
6; CHECK:       ## %bb.0: ## %entry
7; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0
8; CHECK-NEXT:    retq
9entry:
10  %add.i = fadd <4 x double> %x, %y
11  ret <4 x double> %add.i
12}
13
14define <4 x double> @addpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
15; CHECK-LABEL: addpd256fold:
16; CHECK:       ## %bb.0: ## %entry
17; CHECK-NEXT:    vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
18; CHECK-NEXT:    retq
19entry:
20  %add.i = fadd <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
21  ret <4 x double> %add.i
22}
23
24define <8 x float> @addps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
25; CHECK-LABEL: addps256:
26; CHECK:       ## %bb.0: ## %entry
27; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0
28; CHECK-NEXT:    retq
29entry:
30  %add.i = fadd <8 x float> %x, %y
31  ret <8 x float> %add.i
32}
33
34define <8 x float> @addps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
35; CHECK-LABEL: addps256fold:
36; CHECK:       ## %bb.0: ## %entry
37; CHECK-NEXT:    vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
38; CHECK-NEXT:    retq
39entry:
40  %add.i = fadd <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
41  ret <8 x float> %add.i
42}
43
44define <4 x double> @subpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
45; CHECK-LABEL: subpd256:
46; CHECK:       ## %bb.0: ## %entry
47; CHECK-NEXT:    vsubpd %ymm0, %ymm1, %ymm0
48; CHECK-NEXT:    retq
49entry:
50  %sub.i = fsub <4 x double> %x, %y
51  ret <4 x double> %sub.i
52}
53
54define <4 x double> @subpd256fold(<4 x double> %y, ptr nocapture %x) nounwind uwtable readonly ssp {
55; CHECK-LABEL: subpd256fold:
56; CHECK:       ## %bb.0: ## %entry
57; CHECK-NEXT:    vsubpd (%rdi), %ymm0, %ymm0
58; CHECK-NEXT:    retq
59entry:
60  %tmp2 = load <4 x double>, ptr %x, align 32
61  %sub.i = fsub <4 x double> %y, %tmp2
62  ret <4 x double> %sub.i
63}
64
65define <8 x float> @subps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
66; CHECK-LABEL: subps256:
67; CHECK:       ## %bb.0: ## %entry
68; CHECK-NEXT:    vsubps %ymm0, %ymm1, %ymm0
69; CHECK-NEXT:    retq
70entry:
71  %sub.i = fsub <8 x float> %x, %y
72  ret <8 x float> %sub.i
73}
74
75define <8 x float> @subps256fold(<8 x float> %y, ptr nocapture %x) nounwind uwtable readonly ssp {
76; CHECK-LABEL: subps256fold:
77; CHECK:       ## %bb.0: ## %entry
78; CHECK-NEXT:    vsubps (%rdi), %ymm0, %ymm0
79; CHECK-NEXT:    retq
80entry:
81  %tmp2 = load <8 x float>, ptr %x, align 32
82  %sub.i = fsub <8 x float> %y, %tmp2
83  ret <8 x float> %sub.i
84}
85
86define <4 x double> @mulpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
87; CHECK-LABEL: mulpd256:
88; CHECK:       ## %bb.0: ## %entry
89; CHECK-NEXT:    vmulpd %ymm0, %ymm1, %ymm0
90; CHECK-NEXT:    retq
91entry:
92  %mul.i = fmul <4 x double> %x, %y
93  ret <4 x double> %mul.i
94}
95
96define <4 x double> @mulpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
97; CHECK-LABEL: mulpd256fold:
98; CHECK:       ## %bb.0: ## %entry
99; CHECK-NEXT:    vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
100; CHECK-NEXT:    retq
101entry:
102  %mul.i = fmul <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
103  ret <4 x double> %mul.i
104}
105
106define <8 x float> @mulps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
107; CHECK-LABEL: mulps256:
108; CHECK:       ## %bb.0: ## %entry
109; CHECK-NEXT:    vmulps %ymm0, %ymm1, %ymm0
110; CHECK-NEXT:    retq
111entry:
112  %mul.i = fmul <8 x float> %x, %y
113  ret <8 x float> %mul.i
114}
115
116define <8 x float> @mulps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
117; CHECK-LABEL: mulps256fold:
118; CHECK:       ## %bb.0: ## %entry
119; CHECK-NEXT:    vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
120; CHECK-NEXT:    retq
121entry:
122  %mul.i = fmul <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
123  ret <8 x float> %mul.i
124}
125
126define <4 x double> @divpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
127; CHECK-LABEL: divpd256:
128; CHECK:       ## %bb.0: ## %entry
129; CHECK-NEXT:    vdivpd %ymm0, %ymm1, %ymm0
130; CHECK-NEXT:    retq
131entry:
132  %div.i = fdiv <4 x double> %x, %y
133  ret <4 x double> %div.i
134}
135
136define <4 x double> @divpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
137; CHECK-LABEL: divpd256fold:
138; CHECK:       ## %bb.0: ## %entry
139; CHECK-NEXT:    vdivpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
140; CHECK-NEXT:    retq
141entry:
142  %div.i = fdiv <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
143  ret <4 x double> %div.i
144}
145
146define <8 x float> @divps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
147; CHECK-LABEL: divps256:
148; CHECK:       ## %bb.0: ## %entry
149; CHECK-NEXT:    vdivps %ymm0, %ymm1, %ymm0
150; CHECK-NEXT:    retq
151entry:
152  %div.i = fdiv <8 x float> %x, %y
153  ret <8 x float> %div.i
154}
155
156define <8 x float> @divps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
157; CHECK-LABEL: divps256fold:
158; CHECK:       ## %bb.0: ## %entry
159; CHECK-NEXT:    vdivps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
160; CHECK-NEXT:    retq
161entry:
162  %div.i = fdiv <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
163  ret <8 x float> %div.i
164}
165
166define float @sqrtA(float %a) nounwind uwtable readnone ssp {
167; CHECK-LABEL: sqrtA:
168; CHECK:       ## %bb.0: ## %entry
169; CHECK-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
170; CHECK-NEXT:    retq
171entry:
172  %conv1 = tail call float @sqrtf(float %a) nounwind readnone
173  ret float %conv1
174}
175
176declare double @sqrt(double) readnone
177
178define double @sqrtB(double %a) nounwind uwtable readnone ssp {
179; CHECK-LABEL: sqrtB:
180; CHECK:       ## %bb.0: ## %entry
181; CHECK-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
182; CHECK-NEXT:    retq
183entry:
184  %call = tail call double @sqrt(double %a) nounwind readnone
185  ret double %call
186}
187
188declare float @sqrtf(float) readnone
189
190
191define <4 x i64> @vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
192; CHECK-LABEL: vpaddq:
193; CHECK:       ## %bb.0:
194; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm2
195; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm3
196; CHECK-NEXT:    vpaddq %xmm2, %xmm3, %xmm2
197; CHECK-NEXT:    vpaddq %xmm1, %xmm0, %xmm0
198; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
199; CHECK-NEXT:    retq
200  %x = add <4 x i64> %i, %j
201  ret <4 x i64> %x
202}
203
204define <8 x i32> @vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
205; CHECK-LABEL: vpaddd:
206; CHECK:       ## %bb.0:
207; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm2
208; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm3
209; CHECK-NEXT:    vpaddd %xmm2, %xmm3, %xmm2
210; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
211; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
212; CHECK-NEXT:    retq
213  %x = add <8 x i32> %i, %j
214  ret <8 x i32> %x
215}
216
217define <16 x i16> @vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
218; CHECK-LABEL: vpaddw:
219; CHECK:       ## %bb.0:
220; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm2
221; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm3
222; CHECK-NEXT:    vpaddw %xmm2, %xmm3, %xmm2
223; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0
224; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
225; CHECK-NEXT:    retq
226  %x = add <16 x i16> %i, %j
227  ret <16 x i16> %x
228}
229
230define <32 x i8> @vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
231; CHECK-LABEL: vpaddb:
232; CHECK:       ## %bb.0:
233; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm2
234; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm3
235; CHECK-NEXT:    vpaddb %xmm2, %xmm3, %xmm2
236; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0
237; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
238; CHECK-NEXT:    retq
239  %x = add <32 x i8> %i, %j
240  ret <32 x i8> %x
241}
242
243define <4 x i64> @vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
244; CHECK-LABEL: vpsubq:
245; CHECK:       ## %bb.0:
246; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm2
247; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm3
248; CHECK-NEXT:    vpsubq %xmm2, %xmm3, %xmm2
249; CHECK-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
250; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
251; CHECK-NEXT:    retq
252  %x = sub <4 x i64> %i, %j
253  ret <4 x i64> %x
254}
255
256define <8 x i32> @vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
257; CHECK-LABEL: vpsubd:
258; CHECK:       ## %bb.0:
259; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm2
260; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm3
261; CHECK-NEXT:    vpsubd %xmm2, %xmm3, %xmm2
262; CHECK-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
263; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
264; CHECK-NEXT:    retq
265  %x = sub <8 x i32> %i, %j
266  ret <8 x i32> %x
267}
268
269define <16 x i16> @vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
270; CHECK-LABEL: vpsubw:
271; CHECK:       ## %bb.0:
272; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm2
273; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm3
274; CHECK-NEXT:    vpsubw %xmm2, %xmm3, %xmm2
275; CHECK-NEXT:    vpsubw %xmm1, %xmm0, %xmm0
276; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
277; CHECK-NEXT:    retq
278  %x = sub <16 x i16> %i, %j
279  ret <16 x i16> %x
280}
281
282define <32 x i8> @vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
283; CHECK-LABEL: vpsubb:
284; CHECK:       ## %bb.0:
285; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm2
286; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm3
287; CHECK-NEXT:    vpsubb %xmm2, %xmm3, %xmm2
288; CHECK-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
289; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
290; CHECK-NEXT:    retq
291  %x = sub <32 x i8> %i, %j
292  ret <32 x i8> %x
293}
294
295define <8 x i32> @vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
296; CHECK-LABEL: vpmulld:
297; CHECK:       ## %bb.0:
298; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm2
299; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm3
300; CHECK-NEXT:    vpmulld %xmm2, %xmm3, %xmm2
301; CHECK-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
302; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
303; CHECK-NEXT:    retq
304  %x = mul <8 x i32> %i, %j
305  ret <8 x i32> %x
306}
307
308define <16 x i16> @vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
309; CHECK-LABEL: vpmullw:
310; CHECK:       ## %bb.0:
311; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm2
312; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm3
313; CHECK-NEXT:    vpmullw %xmm2, %xmm3, %xmm2
314; CHECK-NEXT:    vpmullw %xmm1, %xmm0, %xmm0
315; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
316; CHECK-NEXT:    retq
317  %x = mul <16 x i16> %i, %j
318  ret <16 x i16> %x
319}
320
321define <4 x i64> @mul_v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
322; CHECK-LABEL: mul_v4i64:
323; CHECK:       ## %bb.0:
324; CHECK-NEXT:    vextractf128 $1, %ymm1, %xmm2
325; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm3
326; CHECK-NEXT:    vpsrlq $32, %xmm3, %xmm4
327; CHECK-NEXT:    vpmuludq %xmm2, %xmm4, %xmm4
328; CHECK-NEXT:    vpsrlq $32, %xmm2, %xmm5
329; CHECK-NEXT:    vpmuludq %xmm5, %xmm3, %xmm5
330; CHECK-NEXT:    vpaddq %xmm4, %xmm5, %xmm4
331; CHECK-NEXT:    vpsllq $32, %xmm4, %xmm4
332; CHECK-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
333; CHECK-NEXT:    vpaddq %xmm4, %xmm2, %xmm2
334; CHECK-NEXT:    vpsrlq $32, %xmm0, %xmm3
335; CHECK-NEXT:    vpmuludq %xmm1, %xmm3, %xmm3
336; CHECK-NEXT:    vpsrlq $32, %xmm1, %xmm4
337; CHECK-NEXT:    vpmuludq %xmm4, %xmm0, %xmm4
338; CHECK-NEXT:    vpaddq %xmm3, %xmm4, %xmm3
339; CHECK-NEXT:    vpsllq $32, %xmm3, %xmm3
340; CHECK-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0
341; CHECK-NEXT:    vpaddq %xmm3, %xmm0, %xmm0
342; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
343; CHECK-NEXT:    retq
344  %x = mul <4 x i64> %i, %j
345  ret <4 x i64> %x
346}
347
348declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
349
350define <4 x float> @int_sqrt_ss() {
351; CHECK-LABEL: int_sqrt_ss:
352; CHECK:       ## %bb.0:
353; CHECK-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
354; CHECK-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
355; CHECK-NEXT:    retq
356 %x0 = load float, ptr addrspace(1) undef, align 8
357 %x1 = insertelement <4 x float> undef, float %x0, i32 0
358 %x2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %x1) nounwind
359 ret <4 x float> %x2
360}
361
362define <2 x double> @vector_sqrt_scalar_load(ptr %a0) optsize {
363; CHECK-LABEL: vector_sqrt_scalar_load:
364; CHECK:       ## %bb.0:
365; CHECK-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
366; CHECK-NEXT:    vsqrtpd %xmm0, %xmm0
367; CHECK-NEXT:    retq
368  %a1 = load double, ptr %a0
369  %a2 = insertelement <2 x double> undef, double %a1, i32 0
370  %res = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a2) ; <<2 x double>> [#uses=1]
371  ret <2 x double> %res
372}
373declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) nounwind readnone
374