xref: /llvm-project/llvm/test/CodeGen/PowerPC/powf_massv_075_025exp.ll (revision 5403c59c608c08c8ecd4303763f08eb046eb5e4d)
1; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr10 | FileCheck -check-prefixes=CHECK-PWR9 %s
2; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr9 | FileCheck -check-prefixes=CHECK-PWR9 %s
3; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr8 | FileCheck -check-prefixes=CHECK-PWR8 %s
4; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr10 | FileCheck -check-prefixes=CHECK-PWR10 %s
5; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr9 | FileCheck -check-prefixes=CHECK-PWR9 %s
6; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr8 | FileCheck -check-prefixes=CHECK-PWR8 %s
7; RUN: llc -vector-library=MASSV < %s -mtriple=powerpc-ibm-aix-xcoff -mcpu=pwr7 | FileCheck -check-prefixes=CHECK-PWR7 %s
8
9; Exponent is a variable
10define void @vspow_var(ptr nocapture %z, ptr nocapture readonly %y, ptr nocapture readonly %x)  {
11; CHECK-LABEL:       @vspow_var
12; CHECK-PWR10:       __powf4_P10
13; CHECK-PWR9:        __powf4_P9
14; CHECK-PWR8:        __powf4_P8
15; CHECK-PWR7:        __powf4_P7
16; CHECK:             blr
17entry:
18  br label %vector.body
19
20vector.body:
21  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
22  %next.gep = getelementptr float, ptr %z, i64 %index
23  %next.gep31 = getelementptr float, ptr %y, i64 %index
24  %next.gep32 = getelementptr float, ptr %x, i64 %index
25  %wide.load = load <4 x float>, ptr %next.gep32, align 4
26  %wide.load33 = load <4 x float>, ptr %next.gep31, align 4
27  %0 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> %wide.load33)
28  store <4 x float> %0, ptr %next.gep, align 4
29  %index.next = add i64 %index, 4
30  %1 = icmp eq i64 %index.next, 1024
31  br i1 %1, label %for.end, label %vector.body
32
33for.end:
34  ret void
35}
36
37; Exponent is a constant != 0.75 and !=0.25
38define void @vspow_const(ptr nocapture %y, ptr nocapture readonly %x)  {
39; CHECK-LABEL:       @vspow_const
40; CHECK-PWR10:       __powf4_P10
41; CHECK-PWR9:        __powf4_P9
42; CHECK-PWR8:        __powf4_P8
43; CHECK-PWR7:        __powf4_P7
44; CHECK:             blr
45entry:
46  br label %vector.body
47
48vector.body:
49  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
50  %next.gep = getelementptr float, ptr %y, i64 %index
51  %next.gep19 = getelementptr float, ptr %x, i64 %index
52  %wide.load = load <4 x float>, ptr %next.gep19, align 4
53  %0 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 0x3FE851EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000>)
54  store <4 x float> %0, ptr %next.gep, align 4
55  %index.next = add i64 %index, 4
56  %1 = icmp eq i64 %index.next, 1024
57  br i1 %1, label %for.end, label %vector.body
58
59for.end:
60  ret void
61}
62
63; Exponent is a constant != 0.75 and !=0.25 and they are different
64define void @vspow_neq_const(ptr nocapture %y, ptr nocapture readonly %x)  {
65; CHECK-LABEL:       @vspow_neq_const
66; CHECK-PWR10:       __powf4_P10
67; CHECK-PWR9:        __powf4_P9
68; CHECK-PWR8:        __powf4_P8
69; CHECK-PWR7:        __powf4_P7
70; CHECK:             blr
71entry:
72  br label %vector.body
73
74vector.body:
75  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
76  %next.gep = getelementptr float, ptr %y, i64 %index
77  %next.gep19 = getelementptr float, ptr %x, i64 %index
78  %wide.load = load <4 x float>, ptr %next.gep19, align 4
79  %0 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 0x3FE861EB80000000, float 0x3FE871EB80000000, float 0x3FE851EB80000000, float 0x3FE851EB80000000>)
80  store <4 x float> %0, ptr %next.gep, align 4
81  %index.next = add i64 %index, 4
82  %1 = icmp eq i64 %index.next, 1024
83  br i1 %1, label %for.end, label %vector.body
84
85for.end:
86  ret void
87}
88
89; Exponent is a constant != 0.75 and !=0.25
90define void @vspow_neq075_const(ptr nocapture %y, ptr nocapture readonly %x)  {
91; CHECK-LABEL:       @vspow_neq075_const
92; CHECK-PWR10:       __powf4_P10
93; CHECK-PWR9:        __powf4_P9
94; CHECK-PWR8:        __powf4_P8
95; CHECK-PWR7:        __powf4_P7
96; CHECK:             blr
97entry:
98  br label %vector.body
99
100vector.body:
101  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
102  %next.gep = getelementptr float, ptr %y, i64 %index
103  %next.gep19 = getelementptr float, ptr %x, i64 %index
104  %wide.load = load <4 x float>, ptr %next.gep19, align 4
105  %0 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 0x3FE851EB80000000>)
106  store <4 x float> %0, ptr %next.gep, align 4
107  %index.next = add i64 %index, 4
108  %1 = icmp eq i64 %index.next, 1024
109  br i1 %1, label %for.end, label %vector.body
110
111for.end:
112  ret void
113}
114
115; Exponent is a constant != 0.75 and !=0.25
116define void @vspow_neq025_const(ptr nocapture %y, ptr nocapture readonly %x)  {
117; CHECK-LABEL:       @vspow_neq025_const
118; CHECK-PWR10:       __powf4_P10
119; CHECK-PWR9:        __powf4_P9
120; CHECK-PWR8:        __powf4_P8
121; CHECK-PWR7:        __powf4_P7
122; CHECK:             blr
123entry:
124  br label %vector.body
125
126vector.body:
127  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
128  %next.gep = getelementptr float, ptr %y, i64 %index
129  %next.gep19 = getelementptr float, ptr %x, i64 %index
130  %wide.load = load <4 x float>, ptr %next.gep19, align 4
131  %0 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 0x3FE851EB80000000, float 2.500000e-01, float 0x3FE851EB80000000, float 2.500000e-01>)
132  store <4 x float> %0, ptr %next.gep, align 4
133  %index.next = add i64 %index, 4
134  %1 = icmp eq i64 %index.next, 1024
135  br i1 %1, label %for.end, label %vector.body
136
137for.end:
138  ret void
139}
140
141; Exponent is 0.75
142define void @vspow_075(ptr nocapture %y, ptr nocapture readonly %x)  {
143; CHECK-LABEL:       @vspow_075
144; CHECK-NOT:         __powf4_P{{[7,8,9,10]}}
145; CHECK:             xvrsqrtesp
146; CHECK:             blr
147entry:
148  br label %vector.body
149
150vector.body:
151  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
152  %next.gep = getelementptr float, ptr %y, i64 %index
153  %next.gep19 = getelementptr float, ptr %x, i64 %index
154  %wide.load = load <4 x float>, ptr %next.gep19, align 4
155  %0 = call ninf afn <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 7.500000e-01>)
156  store <4 x float> %0, ptr %next.gep, align 4
157  %index.next = add i64 %index, 4
158  %1 = icmp eq i64 %index.next, 1024
159  br i1 %1, label %for.end, label %vector.body
160
161for.end:
162  ret void
163}
164
165; Exponent is 0.25
166define void @vspow_025(ptr nocapture %y, ptr nocapture readonly %x)  {
167; CHECK-LABEL:       @vspow_025
168; CHECK-NOT:         __powf4_P{{[7,8,9,10]}}
169; CHECK:             xvrsqrtesp
170; CHECK:             blr
171entry:
172  br label %vector.body
173
174vector.body:
175  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
176  %next.gep = getelementptr float, ptr %y, i64 %index
177  %next.gep19 = getelementptr float, ptr %x, i64 %index
178  %wide.load = load <4 x float>, ptr %next.gep19, align 4
179  %0 = call ninf afn nsz <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 2.500000e-01, float 2.500000e-01, float 2.500000e-01, float 2.500000e-01>)
180  store <4 x float> %0, ptr %next.gep, align 4
181  %index.next = add i64 %index, 4
182  %1 = icmp eq i64 %index.next, 1024
183  br i1 %1, label %for.end, label %vector.body
184
185for.end:
186  ret void
187}
188
189; Exponent is 0.75 but no proper fast-math flags
190define void @vspow_075_nofast(ptr nocapture %y, ptr nocapture readonly %x)  {
191; CHECK-LABEL:       @vspow_075_nofast
192; CHECK-PWR10:       __powf4_P10
193; CHECK-PWR9:        __powf4_P9
194; CHECK-PWR8:        __powf4_P8
195; CHECK-PWR7:        __powf4_P7
196; CHECK-NOT:         xvrsqrtesp
197; CHECK:             blr
198entry:
199  br label %vector.body
200
201vector.body:
202  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
203  %next.gep = getelementptr float, ptr %y, i64 %index
204  %next.gep19 = getelementptr float, ptr %x, i64 %index
205  %wide.load = load <4 x float>, ptr %next.gep19, align 4
206  %0 = call <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 7.500000e-01, float 7.500000e-01, float 7.500000e-01, float 7.500000e-01>)
207  store <4 x float> %0, ptr %next.gep, align 4
208  %index.next = add i64 %index, 4
209  %1 = icmp eq i64 %index.next, 1024
210  br i1 %1, label %for.end, label %vector.body
211
212for.end:
213  ret void
214}
215
216; Exponent is 0.25 but no proper fast-math flags
217define void @vspow_025_nofast(ptr nocapture %y, ptr nocapture readonly %x)  {
218; CHECK-LABEL:       @vspow_025_nofast
219; CHECK-PWR10:       __powf4_P10
220; CHECK-PWR9:        __powf4_P9
221; CHECK-PWR8:        __powf4_P8
222; CHECK-PWR7:        __powf4_P7
223; CHECK-NOT:         xvrsqrtesp
224; CHECK:             blr
225entry:
226  br label %vector.body
227
228vector.body:
229  %index = phi i64 [ %index.next, %vector.body ], [ 0, %entry ]
230  %next.gep = getelementptr float, ptr %y, i64 %index
231  %next.gep19 = getelementptr float, ptr %x, i64 %index
232  %wide.load = load <4 x float>, ptr %next.gep19, align 4
233  %0 = call <4 x float> @__powf4(<4 x float> %wide.load, <4 x float> <float 2.500000e-01, float 2.500000e-01, float 2.500000e-01, float 2.500000e-01>)
234  store <4 x float> %0, ptr %next.gep, align 4
235  %index.next = add i64 %index, 4
236  %1 = icmp eq i64 %index.next, 1024
237  br i1 %1, label %for.end, label %vector.body
238
239for.end:
240  ret void
241}
242
243; Function Attrs: nounwind readnone speculatable willreturn
244declare <4 x float> @__powf4(<4 x float>, <4 x float>)
245