xref: /llvm-project/llvm/test/CodeGen/AArch64/faddp.ll (revision db158c7c830807caeeb0691739c41f1d522029e9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc --mtriple aarch64 -mattr=+fullfp16 < %s | FileCheck %s
3
4define float @faddp_2xfloat(<2 x float> %a) {
5; CHECK-LABEL: faddp_2xfloat:
6; CHECK:       // %bb.0: // %entry
7; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
8; CHECK-NEXT:    faddp s0, v0.2s
9; CHECK-NEXT:    ret
10entry:
11  %shift = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> <i32 1, i32 undef>
12  %0 = fadd <2 x float> %a, %shift
13  %1 = extractelement <2 x float> %0, i32 0
14  ret float %1
15}
16
17define float @faddp_4xfloat(<4 x float> %a) {
18; CHECK-LABEL: faddp_4xfloat:
19; CHECK:       // %bb.0: // %entry
20; CHECK-NEXT:    faddp s0, v0.2s
21; CHECK-NEXT:    ret
22entry:
23  %shift = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
24  %0 = fadd <4 x float> %a, %shift
25  %1 = extractelement <4 x float> %0, i32 0
26  ret float %1
27}
28
29define float @faddp_4xfloat_commute(<4 x float> %a) {
30; CHECK-LABEL: faddp_4xfloat_commute:
31; CHECK:       // %bb.0: // %entry
32; CHECK-NEXT:    faddp s0, v0.2s
33; CHECK-NEXT:    ret
34entry:
35  %shift = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
36  %0 = fadd <4 x float> %shift, %a
37  %1 = extractelement <4 x float> %0, i32 0
38  ret float %1
39}
40
41define float @faddp_2xfloat_commute(<2 x float> %a) {
42; CHECK-LABEL: faddp_2xfloat_commute:
43; CHECK:       // %bb.0: // %entry
44; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
45; CHECK-NEXT:    faddp s0, v0.2s
46; CHECK-NEXT:    ret
47entry:
48  %shift = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> <i32 1, i32 undef>
49  %0 = fadd <2 x float> %shift, %a
50  %1 = extractelement <2 x float> %0, i32 0
51  ret float %1
52}
53
54define double @faddp_2xdouble(<2 x double> %a) {
55; CHECK-LABEL: faddp_2xdouble:
56; CHECK:       // %bb.0: // %entry
57; CHECK-NEXT:    faddp d0, v0.2d
58; CHECK-NEXT:    ret
59entry:
60  %shift = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
61  %0 = fadd <2 x double> %a, %shift
62  %1 = extractelement <2 x double> %0, i32 0
63  ret double %1
64}
65
66define double @faddp_2xdouble_commute(<2 x double> %a) {
67; CHECK-LABEL: faddp_2xdouble_commute:
68; CHECK:       // %bb.0: // %entry
69; CHECK-NEXT:    faddp d0, v0.2d
70; CHECK-NEXT:    ret
71entry:
72  %shift = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
73  %0 = fadd <2 x double> %shift, %a
74  %1 = extractelement <2 x double> %0, i32 0
75  ret double %1
76}
77
78define i64 @addp_2xi64(<2 x i64> %a) {
79; CHECK-LABEL: addp_2xi64:
80; CHECK:       // %bb.0: // %entry
81; CHECK-NEXT:    addp d0, v0.2d
82; CHECK-NEXT:    fmov x0, d0
83; CHECK-NEXT:    ret
84entry:
85  %shift = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
86  %0 = add <2 x i64> %a, %shift
87  %1 = extractelement <2 x i64> %0, i32 0
88  ret i64 %1
89}
90
91define i64 @addp_2xi64_commute(<2 x i64> %a) {
92; CHECK-LABEL: addp_2xi64_commute:
93; CHECK:       // %bb.0: // %entry
94; CHECK-NEXT:    addp d0, v0.2d
95; CHECK-NEXT:    fmov x0, d0
96; CHECK-NEXT:    ret
97entry:
98  %shift = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
99  %0 = add <2 x i64> %shift, %a
100  %1 = extractelement <2 x i64> %0, i32 0
101  ret i64 %1
102}
103
104define float @faddp_2xfloat_strict(<2 x float> %a) #0 {
105; CHECK-LABEL: faddp_2xfloat_strict:
106; CHECK:       // %bb.0: // %entry
107; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
108; CHECK-NEXT:    faddp s0, v0.2s
109; CHECK-NEXT:    ret
110entry:
111  %shift = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> <i32 1, i32 undef>
112  %0 = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %a, <2 x float> %shift, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
113  %1 = extractelement <2 x float> %0, i32 0
114  ret float %1
115}
116
117define float @faddp_4xfloat_strict(<4 x float> %a) #0 {
118; CHECK-LABEL: faddp_4xfloat_strict:
119; CHECK:       // %bb.0: // %entry
120; CHECK-NEXT:    faddp s0, v0.2s
121; CHECK-NEXT:    ret
122entry:
123  %shift = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
124  %0 = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> %a, <4 x float> %shift, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
125  %1 = extractelement <4 x float> %0, i32 0
126  ret float %1
127}
128
129define float @faddp_4xfloat_commute_strict(<4 x float> %a) #0 {
130; CHECK-LABEL: faddp_4xfloat_commute_strict:
131; CHECK:       // %bb.0: // %entry
132; CHECK-NEXT:    faddp s0, v0.2s
133; CHECK-NEXT:    ret
134entry:
135  %shift = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
136  %0 = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> %shift, <4 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
137  %1 = extractelement <4 x float> %0, i32 0
138  ret float %1
139}
140
141define float @faddp_2xfloat_commute_strict(<2 x float> %a) #0 {
142; CHECK-LABEL: faddp_2xfloat_commute_strict:
143; CHECK:       // %bb.0: // %entry
144; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
145; CHECK-NEXT:    faddp s0, v0.2s
146; CHECK-NEXT:    ret
147entry:
148  %shift = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> <i32 1, i32 undef>
149  %0 = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %shift, <2 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
150  %1 = extractelement <2 x float> %0, i32 0
151  ret float %1
152}
153
154define double @faddp_2xdouble_strict(<2 x double> %a) #0 {
155; CHECK-LABEL: faddp_2xdouble_strict:
156; CHECK:       // %bb.0: // %entry
157; CHECK-NEXT:    faddp d0, v0.2d
158; CHECK-NEXT:    ret
159entry:
160  %shift = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
161  %0 = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %a, <2 x double> %shift, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
162  %1 = extractelement <2 x double> %0, i32 0
163  ret double %1
164}
165
166define double @faddp_2xdouble_commute_strict(<2 x double> %a) #0 {
167; CHECK-LABEL: faddp_2xdouble_commute_strict:
168; CHECK:       // %bb.0: // %entry
169; CHECK-NEXT:    faddp d0, v0.2d
170; CHECK-NEXT:    ret
171entry:
172  %shift = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
173  %0 = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %shift, <2 x double> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
174  %1 = extractelement <2 x double> %0, i32 0
175  ret double %1
176}
177
178
179define <2 x double> @addp_v2f64(<2 x double> %a) {
180; CHECK-LABEL: addp_v2f64:
181; CHECK:       // %bb.0: // %entry
182; CHECK-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
183; CHECK-NEXT:    fadd v0.2d, v1.2d, v0.2d
184; CHECK-NEXT:    ret
185entry:
186  %s = shufflevector <2 x double> %a, <2 x double> poison, <2 x i32> <i32 1, i32 0>
187  %b = fadd reassoc <2 x double> %s, %a
188  ret <2 x double> %b
189}
190
191define <4 x double> @addp_v4f64(<4 x double> %a) {
192; CHECK-LABEL: addp_v4f64:
193; CHECK:       // %bb.0: // %entry
194; CHECK-NEXT:    faddp v1.2d, v0.2d, v1.2d
195; CHECK-NEXT:    dup v0.2d, v1.d[0]
196; CHECK-NEXT:    dup v1.2d, v1.d[1]
197; CHECK-NEXT:    ret
198entry:
199  %s = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
200  %b = fadd reassoc <4 x double> %s, %a
201  ret <4 x double> %b
202}
203
204define <4 x float> @addp_v4f32(<4 x float> %a) {
205; CHECK-LABEL: addp_v4f32:
206; CHECK:       // %bb.0: // %entry
207; CHECK-NEXT:    rev64 v1.4s, v0.4s
208; CHECK-NEXT:    fadd v0.4s, v1.4s, v0.4s
209; CHECK-NEXT:    ret
210entry:
211  %s = shufflevector <4 x float> %a, <4 x float> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
212  %b = fadd reassoc <4 x float> %s, %a
213  ret <4 x float> %b
214}
215
216define <8 x float> @addp_v8f32(<8 x float> %a) {
217; CHECK-LABEL: addp_v8f32:
218; CHECK:       // %bb.0: // %entry
219; CHECK-NEXT:    rev64 v2.4s, v1.4s
220; CHECK-NEXT:    rev64 v3.4s, v0.4s
221; CHECK-NEXT:    fadd v0.4s, v3.4s, v0.4s
222; CHECK-NEXT:    fadd v1.4s, v2.4s, v1.4s
223; CHECK-NEXT:    ret
224entry:
225  %s = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
226  %b = fadd <8 x float> %s, %a
227  ret <8 x float> %b
228}
229
230define <8 x float> @addp_v8f32_slow(<8 x float> %a) {
231; CHECK-LABEL: addp_v8f32_slow:
232; CHECK:       // %bb.0: // %entry
233; CHECK-NEXT:    faddp v1.4s, v0.4s, v1.4s
234; CHECK-NEXT:    zip1 v0.4s, v1.4s, v1.4s
235; CHECK-NEXT:    zip2 v1.4s, v1.4s, v1.4s
236; CHECK-NEXT:    ret
237entry:
238  %s = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
239  %b = fadd reassoc <8 x float> %s, %a
240  ret <8 x float> %b
241}
242
243define <16 x float> @addp_v16f32(<16 x float> %a) {
244; CHECK-LABEL: addp_v16f32:
245; CHECK:       // %bb.0: // %entry
246; CHECK-NEXT:    faddp v3.4s, v2.4s, v3.4s
247; CHECK-NEXT:    faddp v1.4s, v0.4s, v1.4s
248; CHECK-NEXT:    zip1 v2.4s, v3.4s, v3.4s
249; CHECK-NEXT:    zip1 v0.4s, v1.4s, v1.4s
250; CHECK-NEXT:    zip2 v1.4s, v1.4s, v1.4s
251; CHECK-NEXT:    zip2 v3.4s, v3.4s, v3.4s
252; CHECK-NEXT:    ret
253entry:
254  %s = shufflevector <16 x float> %a, <16 x float> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
255  %b = fadd reassoc <16 x float> %s, %a
256  ret <16 x float> %b
257}
258
259define float @faddp_v4f32(<4 x float> %a, <4 x float> %b) {
260; CHECK-LABEL: faddp_v4f32:
261; CHECK:       // %bb.0:
262; CHECK-NEXT:    fadd v0.4s, v0.4s, v1.4s
263; CHECK-NEXT:    faddp v0.4s, v0.4s, v0.4s
264; CHECK-NEXT:    faddp s0, v0.2s
265; CHECK-NEXT:    ret
266  %1 = fadd <4 x float> %a, %b
267  %2 = shufflevector <4 x float> %1, <4 x float> poison, <2 x i32> <i32 0, i32 1>
268  %3 = shufflevector <4 x float> %1, <4 x float> poison, <2 x i32> <i32 2, i32 3>
269  %4 = tail call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %2, <2 x float> %3)
270  %5 = shufflevector <2 x float> %4, <2 x float> poison, <2 x i32> <i32 1, i32 poison>
271  %6 = fadd <2 x float> %4, %5
272  %7 = extractelement <2 x float> %6, i64 0
273  ret float %7
274}
275
276define <4 x half> @faddp_v8f16(<8 x half> %a, <8 x half> %b) {
277; CHECK-LABEL: faddp_v8f16:
278; CHECK:       // %bb.0:
279; CHECK-NEXT:    fadd v0.8h, v0.8h, v1.8h
280; CHECK-NEXT:    faddp v0.8h, v0.8h, v0.8h
281; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
282; CHECK-NEXT:    ret
283  %1 = fadd <8 x half> %a, %b
284  %2 = shufflevector <8 x half> %1, <8 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
285  %3 = shufflevector <8 x half> %1, <8 x half> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
286  %4 = tail call <4 x half> @llvm.aarch64.neon.faddp.v4f16(<4 x half> %2, <4 x half> %3)
287  ret <4 x half> %4
288}
289
290declare <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float>, <2 x float>)
291declare <4 x half> @llvm.aarch64.neon.faddp.v4f16(<4 x half>, <4 x half>)
292
293attributes #0 = { strictfp }
294
295declare <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float>, <2 x float>, metadata, metadata)
296declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata)
297declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
298