xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/RISCV/floating-point.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \
3; RUN:     -riscv-v-vector-bits-min=-1 -riscv-v-slp-max-vf=0 \
4; RUN:     | FileCheck %s
5; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=riscv64 -mattr=+v,+f \
6; RUN:     | FileCheck %s --check-prefix=DEFAULT
7
8define void @fp_add(ptr %dst, ptr %p, ptr %q) {
9; CHECK-LABEL: define void @fp_add
10; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0:[0-9]+]] {
11; CHECK-NEXT:  entry:
12; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
13; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4
14; CHECK-NEXT:    [[TMP2:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]]
15; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr [[DST]], align 4
16; CHECK-NEXT:    ret void
17;
18; DEFAULT-LABEL: define void @fp_add
19; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0:[0-9]+]] {
20; DEFAULT-NEXT:  entry:
21; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
22; DEFAULT-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4
23; DEFAULT-NEXT:    [[TMP2:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]]
24; DEFAULT-NEXT:    store <4 x float> [[TMP2]], ptr [[DST]], align 4
25; DEFAULT-NEXT:    ret void
26;
27entry:
28  %e0 = load float, ptr %p, align 4
29  %pe1 = getelementptr inbounds float, ptr %p, i64 1
30  %e1 = load float, ptr %pe1, align 4
31  %pe2 = getelementptr inbounds float, ptr %p, i64 2
32  %e2 = load float, ptr %pe2, align 4
33  %pe3 = getelementptr inbounds float, ptr %p, i64 3
34  %e3 = load float, ptr %pe3, align 4
35
36  %f0 = load float, ptr %q, align 4
37  %pf1 = getelementptr inbounds float, ptr %q, i64 1
38  %f1 = load float, ptr %pf1, align 4
39  %pf2 = getelementptr inbounds float, ptr %q, i64 2
40  %f2 = load float, ptr %pf2, align 4
41  %pf3 = getelementptr inbounds float, ptr %q, i64 3
42  %f3 = load float, ptr %pf3, align 4
43
44  %a0 = fadd float %e0, %f0
45  %a1 = fadd float %e1, %f1
46  %a2 = fadd float %e2, %f2
47  %a3 = fadd float %e3, %f3
48
49  store float %a0, ptr %dst, align 4
50  %pa1 = getelementptr inbounds float, ptr %dst, i64 1
51  store float %a1, ptr %pa1, align 4
52  %pa2 = getelementptr inbounds float, ptr %dst, i64 2
53  store float %a2, ptr %pa2, align 4
54  %pa3 = getelementptr inbounds float, ptr %dst, i64 3
55  store float %a3, ptr %pa3, align 4
56
57  ret void
58}
59
60define void @fp_sub(ptr %dst, ptr %p) {
61; CHECK-LABEL: define void @fp_sub
62; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
63; CHECK-NEXT:  entry:
64; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
65; CHECK-NEXT:    [[TMP1:%.*]] = fsub <4 x float> [[TMP0]], splat (float 3.000000e+00)
66; CHECK-NEXT:    store <4 x float> [[TMP1]], ptr [[DST]], align 4
67; CHECK-NEXT:    ret void
68;
69; DEFAULT-LABEL: define void @fp_sub
70; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
71; DEFAULT-NEXT:  entry:
72; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
73; DEFAULT-NEXT:    [[TMP1:%.*]] = fsub <4 x float> [[TMP0]], splat (float 3.000000e+00)
74; DEFAULT-NEXT:    store <4 x float> [[TMP1]], ptr [[DST]], align 4
75; DEFAULT-NEXT:    ret void
76;
77entry:
78  %e0 = load float, ptr %p, align 4
79  %pe1 = getelementptr inbounds float, ptr %p, i64 1
80  %e1 = load float, ptr %pe1, align 4
81  %pe2 = getelementptr inbounds float, ptr %p, i64 2
82  %e2 = load float, ptr %pe2, align 4
83  %pe3 = getelementptr inbounds float, ptr %p, i64 3
84  %e3 = load float, ptr %pe3, align 4
85
86  %a0 = fsub float %e0, 3.0
87  %a1 = fsub float %e1, 3.0
88  %a2 = fsub float %e2, 3.0
89  %a3 = fsub float %e3, 3.0
90
91  store float %a0, ptr %dst, align 4
92  %pa1 = getelementptr inbounds float, ptr %dst, i64 1
93  store float %a1, ptr %pa1, align 4
94  %pa2 = getelementptr inbounds float, ptr %dst, i64 2
95  store float %a2, ptr %pa2, align 4
96  %pa3 = getelementptr inbounds float, ptr %dst, i64 3
97  store float %a3, ptr %pa3, align 4
98
99  ret void
100}
101
102define void @fp_mul(ptr %dst, ptr %p, ptr %q) {
103; CHECK-LABEL: define void @fp_mul
104; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] {
105; CHECK-NEXT:  entry:
106; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
107; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4
108; CHECK-NEXT:    [[TMP2:%.*]] = fmul <4 x float> [[TMP0]], [[TMP1]]
109; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr [[DST]], align 4
110; CHECK-NEXT:    ret void
111;
112; DEFAULT-LABEL: define void @fp_mul
113; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] {
114; DEFAULT-NEXT:  entry:
115; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
116; DEFAULT-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4
117; DEFAULT-NEXT:    [[TMP2:%.*]] = fmul <4 x float> [[TMP0]], [[TMP1]]
118; DEFAULT-NEXT:    store <4 x float> [[TMP2]], ptr [[DST]], align 4
119; DEFAULT-NEXT:    ret void
120;
121entry:
122  %e0 = load float, ptr %p, align 4
123  %pe1 = getelementptr inbounds float, ptr %p, i64 1
124  %e1 = load float, ptr %pe1, align 4
125  %pe2 = getelementptr inbounds float, ptr %p, i64 2
126  %e2 = load float, ptr %pe2, align 4
127  %pe3 = getelementptr inbounds float, ptr %p, i64 3
128  %e3 = load float, ptr %pe3, align 4
129
130  %f0 = load float, ptr %q, align 4
131  %pf1 = getelementptr inbounds float, ptr %q, i64 1
132  %f1 = load float, ptr %pf1, align 4
133  %pf2 = getelementptr inbounds float, ptr %q, i64 2
134  %f2 = load float, ptr %pf2, align 4
135  %pf3 = getelementptr inbounds float, ptr %q, i64 3
136  %f3 = load float, ptr %pf3, align 4
137
138  %a0 = fmul float %e0, %f0
139  %a1 = fmul float %e1, %f1
140  %a2 = fmul float %e2, %f2
141  %a3 = fmul float %e3, %f3
142
143  store float %a0, ptr %dst, align 4
144  %pa1 = getelementptr inbounds float, ptr %dst, i64 1
145  store float %a1, ptr %pa1, align 4
146  %pa2 = getelementptr inbounds float, ptr %dst, i64 2
147  store float %a2, ptr %pa2, align 4
148  %pa3 = getelementptr inbounds float, ptr %dst, i64 3
149  store float %a3, ptr %pa3, align 4
150
151  ret void
152}
153
154define void @fp_div(ptr %dst, ptr %p) {
155; CHECK-LABEL: define void @fp_div
156; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
157; CHECK-NEXT:  entry:
158; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
159; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <4 x float> [[TMP0]], splat (float 1.050000e+01)
160; CHECK-NEXT:    store <4 x float> [[TMP1]], ptr [[DST]], align 4
161; CHECK-NEXT:    ret void
162;
163; DEFAULT-LABEL: define void @fp_div
164; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
165; DEFAULT-NEXT:  entry:
166; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
167; DEFAULT-NEXT:    [[TMP1:%.*]] = fdiv <4 x float> [[TMP0]], splat (float 1.050000e+01)
168; DEFAULT-NEXT:    store <4 x float> [[TMP1]], ptr [[DST]], align 4
169; DEFAULT-NEXT:    ret void
170;
171entry:
172  %e0 = load float, ptr %p, align 4
173  %pe1 = getelementptr inbounds float, ptr %p, i64 1
174  %e1 = load float, ptr %pe1, align 4
175  %pe2 = getelementptr inbounds float, ptr %p, i64 2
176  %e2 = load float, ptr %pe2, align 4
177  %pe3 = getelementptr inbounds float, ptr %p, i64 3
178  %e3 = load float, ptr %pe3, align 4
179
180  %a0 = fdiv float %e0, 10.5
181  %a1 = fdiv float %e1, 10.5
182  %a2 = fdiv float %e2, 10.5
183  %a3 = fdiv float %e3, 10.5
184
185  store float %a0, ptr %dst, align 4
186  %pa1 = getelementptr inbounds float, ptr %dst, i64 1
187  store float %a1, ptr %pa1, align 4
188  %pa2 = getelementptr inbounds float, ptr %dst, i64 2
189  store float %a2, ptr %pa2, align 4
190  %pa3 = getelementptr inbounds float, ptr %dst, i64 3
191  store float %a3, ptr %pa3, align 4
192
193  ret void
194}
195
196declare float @llvm.maxnum.f32(float, float)
197
198define void @fp_max(ptr %dst, ptr %p, ptr %q) {
199; CHECK-LABEL: define void @fp_max
200; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] {
201; CHECK-NEXT:  entry:
202; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
203; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4
204; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
205; CHECK-NEXT:    store <4 x float> [[TMP2]], ptr [[DST]], align 4
206; CHECK-NEXT:    ret void
207;
208; DEFAULT-LABEL: define void @fp_max
209; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]], ptr [[Q:%.*]]) #[[ATTR0]] {
210; DEFAULT-NEXT:  entry:
211; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
212; DEFAULT-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[Q]], align 4
213; DEFAULT-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
214; DEFAULT-NEXT:    store <4 x float> [[TMP2]], ptr [[DST]], align 4
215; DEFAULT-NEXT:    ret void
216;
217entry:
218  %e0 = load float, ptr %p, align 4
219  %pe1 = getelementptr inbounds float, ptr %p, i64 1
220  %e1 = load float, ptr %pe1, align 4
221  %pe2 = getelementptr inbounds float, ptr %p, i64 2
222  %e2 = load float, ptr %pe2, align 4
223  %pe3 = getelementptr inbounds float, ptr %p, i64 3
224  %e3 = load float, ptr %pe3, align 4
225
226  %f0 = load float, ptr %q, align 4
227  %pf1 = getelementptr inbounds float, ptr %q, i64 1
228  %f1 = load float, ptr %pf1, align 4
229  %pf2 = getelementptr inbounds float, ptr %q, i64 2
230  %f2 = load float, ptr %pf2, align 4
231  %pf3 = getelementptr inbounds float, ptr %q, i64 3
232  %f3 = load float, ptr %pf3, align 4
233
234  %a0 = tail call float @llvm.maxnum.f32(float %e0, float %f0)
235  %a1 = tail call float @llvm.maxnum.f32(float %e1, float %f1)
236  %a2 = tail call float @llvm.maxnum.f32(float %e2, float %f2)
237  %a3 = tail call float @llvm.maxnum.f32(float %e3, float %f3)
238
239  store float %a0, ptr %dst, align 4
240  %pa1 = getelementptr inbounds float, ptr %dst, i64 1
241  store float %a1, ptr %pa1, align 4
242  %pa2 = getelementptr inbounds float, ptr %dst, i64 2
243  store float %a2, ptr %pa2, align 4
244  %pa3 = getelementptr inbounds float, ptr %dst, i64 3
245  store float %a3, ptr %pa3, align 4
246
247  ret void
248}
249
250declare float @llvm.minnum.f32(float, float)
251
252define void @fp_min(ptr %dst, ptr %p) {
253; CHECK-LABEL: define void @fp_min
254; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
255; CHECK-NEXT:  entry:
256; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
257; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> splat (float 1.250000e+00))
258; CHECK-NEXT:    store <4 x float> [[TMP1]], ptr [[DST]], align 4
259; CHECK-NEXT:    ret void
260;
261; DEFAULT-LABEL: define void @fp_min
262; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
263; DEFAULT-NEXT:  entry:
264; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
265; DEFAULT-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> splat (float 1.250000e+00))
266; DEFAULT-NEXT:    store <4 x float> [[TMP1]], ptr [[DST]], align 4
267; DEFAULT-NEXT:    ret void
268;
269entry:
270  %e0 = load float, ptr %p, align 4
271  %pe1 = getelementptr inbounds float, ptr %p, i64 1
272  %e1 = load float, ptr %pe1, align 4
273  %pe2 = getelementptr inbounds float, ptr %p, i64 2
274  %e2 = load float, ptr %pe2, align 4
275  %pe3 = getelementptr inbounds float, ptr %p, i64 3
276  %e3 = load float, ptr %pe3, align 4
277
278  %a0 = tail call float @llvm.minnum.f32(float %e0, float 1.25)
279  %a1 = tail call float @llvm.minnum.f32(float %e1, float 1.25)
280  %a2 = tail call float @llvm.minnum.f32(float %e2, float 1.25)
281  %a3 = tail call float @llvm.minnum.f32(float %e3, float 1.25)
282
283  store float %a0, ptr %dst, align 4
284  %pa1 = getelementptr inbounds float, ptr %dst, i64 1
285  store float %a1, ptr %pa1, align 4
286  %pa2 = getelementptr inbounds float, ptr %dst, i64 2
287  store float %a2, ptr %pa2, align 4
288  %pa3 = getelementptr inbounds float, ptr %dst, i64 3
289  store float %a3, ptr %pa3, align 4
290
291  ret void
292}
293
294declare i32 @llvm.fptosi.sat.i32.f32(float)
295
296define void @fp_convert(ptr %dst, ptr %p) {
297; CHECK-LABEL: define void @fp_convert
298; CHECK-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
299; CHECK-NEXT:  entry:
300; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
301; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]])
302; CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr [[DST]], align 4
303; CHECK-NEXT:    ret void
304;
305; DEFAULT-LABEL: define void @fp_convert
306; DEFAULT-SAME: (ptr [[DST:%.*]], ptr [[P:%.*]]) #[[ATTR0]] {
307; DEFAULT-NEXT:  entry:
308; DEFAULT-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[P]], align 4
309; DEFAULT-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]])
310; DEFAULT-NEXT:    store <4 x i32> [[TMP1]], ptr [[DST]], align 4
311; DEFAULT-NEXT:    ret void
312;
313entry:
314  %e0 = load float, ptr %p, align 4
315  %pe1 = getelementptr inbounds float, ptr %p, i64 1
316  %e1 = load float, ptr %pe1, align 4
317  %pe2 = getelementptr inbounds float, ptr %p, i64 2
318  %e2 = load float, ptr %pe2, align 4
319  %pe3 = getelementptr inbounds float, ptr %p, i64 3
320  %e3 = load float, ptr %pe3, align 4
321
322  %a0 = tail call i32 @llvm.fptosi.sat.i32.f32(float %e0)
323  %a1 = tail call i32 @llvm.fptosi.sat.i32.f32(float %e1)
324  %a2 = tail call i32 @llvm.fptosi.sat.i32.f32(float %e2)
325  %a3 = tail call i32 @llvm.fptosi.sat.i32.f32(float %e3)
326
327  store i32 %a0, ptr %dst, align 4
328  %pa1 = getelementptr inbounds i32, ptr %dst, i64 1
329  store i32 %a1, ptr %pa1, align 4
330  %pa2 = getelementptr inbounds i32, ptr %dst, i64 2
331  store i32 %a2, ptr %pa2, align 4
332  %pa3 = getelementptr inbounds i32, ptr %dst, i64 3
333  store i32 %a3, ptr %pa3, align 4
334
335  ret void
336}
337