xref: /llvm-project/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll (revision c03d09ce3eed336fea4d9283232383f6d4d4057d)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --filter "call.*(acos|asin|atan|atan2|cos|cosh|exp|log|sin|sinh|pow|ceil|copysign|fabs|floor|fma|m..num|nearbyint|rint|round|sqrt|tan|tanh|trunc)" --version 2
2
3; RUN: opt -mattr=+neon -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=SLEEF-NEON
4; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s | FileCheck %s --check-prefix=SLEEF-SVE
5; RUN: opt -mattr=+neon -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefix=ARMPL-NEON
6; RUN: opt -mattr=+sve -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize,simplifycfg -force-vector-interleave=1 -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s | FileCheck %s --check-prefix=ARMPL-SVE
7
8target triple = "aarch64-unknown-linux-gnu"
9
10; We are checking whether loops containing intrinsic calls can be vectorized,
11; when the compiler provides TLI mappings to their vector variants. The tests
12; are checking fixed width vectorization with NEON and scalable vectorization
13; with SVE.
14
15declare double @llvm.acos.f64(double)
16declare float @llvm.acos.f32(float)
17
18define void @acos_f64(ptr noalias %in.ptr, ptr %out.ptr) {
19; SLEEF-NEON-LABEL: define void @acos_f64
20; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
21; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_acos(<2 x double> [[WIDE_LOAD:%.*]])
22;
23; SLEEF-SVE-LABEL: define void @acos_f64
24; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
25; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_acos(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
26;
27; ARMPL-NEON-LABEL: define void @acos_f64
28; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
29; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vacosq_f64(<2 x double> [[WIDE_LOAD:%.*]])
30;
31; ARMPL-SVE-LABEL: define void @acos_f64
32; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
33; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @armpl_svacos_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
34;
35  entry:
36  br label %for.body
37
38  for.body:
39  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
40  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
41  %in = load double, ptr %in.gep, align 8
42  %call = tail call double @llvm.acos.f64(double %in)
43  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
44  store double %call, ptr %out.gep, align 8
45  %iv.next = add nuw nsw i64 %iv, 1
46  %exitcond = icmp eq i64 %iv.next, 1000
47  br i1 %exitcond, label %for.end, label %for.body
48
49  for.end:
50  ret void
51}
52
53define void @acos_f32(ptr noalias %in.ptr, ptr %out.ptr) {
54; SLEEF-NEON-LABEL: define void @acos_f32
55; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
56; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_acosf(<4 x float> [[WIDE_LOAD:%.*]])
57;
58; SLEEF-SVE-LABEL: define void @acos_f32
59; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
60; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_acosf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
61;
62; ARMPL-NEON-LABEL: define void @acos_f32
63; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
64; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vacosq_f32(<4 x float> [[WIDE_LOAD:%.*]])
65;
66; ARMPL-SVE-LABEL: define void @acos_f32
67; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
68; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @armpl_svacos_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
69;
70  entry:
71  br label %for.body
72
73  for.body:
74  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
75  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
76  %in = load float, ptr %in.gep, align 8
77  %call = tail call float @llvm.acos.f32(float %in)
78  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
79  store float %call, ptr %out.gep, align 4
80  %iv.next = add nuw nsw i64 %iv, 1
81  %exitcond = icmp eq i64 %iv.next, 1000
82  br i1 %exitcond, label %for.end, label %for.body
83
84  for.end:
85  ret void
86}
87
88declare double @llvm.asin.f64(double)
89declare float @llvm.asin.f32(float)
90
91define void @asin_f64(ptr noalias %in.ptr, ptr %out.ptr) {
92; SLEEF-NEON-LABEL: define void @asin_f64
93; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
94; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_asin(<2 x double> [[WIDE_LOAD:%.*]])
95;
96; SLEEF-SVE-LABEL: define void @asin_f64
97; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
98; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_asin(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
99;
100; ARMPL-NEON-LABEL: define void @asin_f64
101; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
102; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vasinq_f64(<2 x double> [[WIDE_LOAD:%.*]])
103;
104; ARMPL-SVE-LABEL: define void @asin_f64
105; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
106; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @armpl_svasin_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
107;
108  entry:
109  br label %for.body
110
111  for.body:
112  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
113  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
114  %in = load double, ptr %in.gep, align 8
115  %call = tail call double @llvm.asin.f64(double %in)
116  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
117  store double %call, ptr %out.gep, align 8
118  %iv.next = add nuw nsw i64 %iv, 1
119  %exitcond = icmp eq i64 %iv.next, 1000
120  br i1 %exitcond, label %for.end, label %for.body
121
122  for.end:
123  ret void
124}
125
126define void @asin_f32(ptr noalias %in.ptr, ptr %out.ptr) {
127; SLEEF-NEON-LABEL: define void @asin_f32
128; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
129; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_asinf(<4 x float> [[WIDE_LOAD:%.*]])
130;
131; SLEEF-SVE-LABEL: define void @asin_f32
132; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
133; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_asinf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
134;
135; ARMPL-NEON-LABEL: define void @asin_f32
136; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
137; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vasinq_f32(<4 x float> [[WIDE_LOAD:%.*]])
138;
139; ARMPL-SVE-LABEL: define void @asin_f32
140; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
141; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @armpl_svasin_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
142;
143  entry:
144  br label %for.body
145
146  for.body:
147  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
148  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
149  %in = load float, ptr %in.gep, align 8
150  %call = tail call float @llvm.asin.f32(float %in)
151  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
152  store float %call, ptr %out.gep, align 4
153  %iv.next = add nuw nsw i64 %iv, 1
154  %exitcond = icmp eq i64 %iv.next, 1000
155  br i1 %exitcond, label %for.end, label %for.body
156
157  for.end:
158  ret void
159}
160
161declare double @llvm.atan.f64(double)
162declare float @llvm.atan.f32(float)
163
164define void @atan_f64(ptr noalias %in.ptr, ptr %out.ptr) {
165; SLEEF-NEON-LABEL: define void @atan_f64
166; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
167; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_atan(<2 x double> [[WIDE_LOAD:%.*]])
168;
169; SLEEF-SVE-LABEL: define void @atan_f64
170; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
171; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_atan(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
172;
173; ARMPL-NEON-LABEL: define void @atan_f64
174; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
175; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vatanq_f64(<2 x double> [[WIDE_LOAD:%.*]])
176;
177; ARMPL-SVE-LABEL: define void @atan_f64
178; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
179; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @armpl_svatan_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
180;
181  entry:
182  br label %for.body
183
184  for.body:
185  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
186  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
187  %in = load double, ptr %in.gep, align 8
188  %call = tail call double @llvm.atan.f64(double %in)
189  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
190  store double %call, ptr %out.gep, align 8
191  %iv.next = add nuw nsw i64 %iv, 1
192  %exitcond = icmp eq i64 %iv.next, 1000
193  br i1 %exitcond, label %for.end, label %for.body
194
195  for.end:
196  ret void
197}
198
199define void @atan_f32(ptr noalias %in.ptr, ptr %out.ptr) {
200; SLEEF-NEON-LABEL: define void @atan_f32
201; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
202; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_atanf(<4 x float> [[WIDE_LOAD:%.*]])
203;
204; SLEEF-SVE-LABEL: define void @atan_f32
205; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
206; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_atanf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
207;
208; ARMPL-NEON-LABEL: define void @atan_f32
209; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
210; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vatanq_f32(<4 x float> [[WIDE_LOAD:%.*]])
211;
212; ARMPL-SVE-LABEL: define void @atan_f32
213; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
214; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @armpl_svatan_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
215;
216  entry:
217  br label %for.body
218
219  for.body:
220  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
221  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
222  %in = load float, ptr %in.gep, align 8
223  %call = tail call float @llvm.atan.f32(float %in)
224  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
225  store float %call, ptr %out.gep, align 4
226  %iv.next = add nuw nsw i64 %iv, 1
227  %exitcond = icmp eq i64 %iv.next, 1000
228  br i1 %exitcond, label %for.end, label %for.body
229
230  for.end:
231  ret void
232}
233
234declare double @llvm.atan2.f64(double, double)
235declare float @llvm.atan2.f32(float, float)
236
237define void @atan2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
238; SLEEF-NEON-LABEL: define void @atan2_f64
239; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
240; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_atan2(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
241;
242; SLEEF-SVE-LABEL: define void @atan2_f64
243; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
244; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_atan2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
245;
246; ARMPL-NEON-LABEL: define void @atan2_f64
247; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
248; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vatan2q_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
249;
250; ARMPL-SVE-LABEL: define void @atan2_f64
251; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
252; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @armpl_svatan2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
253;
254  entry:
255  br label %for.body
256
257  for.body:
258  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
259  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
260  %in = load double, ptr %in.gep, align 8
261  %call = tail call double @llvm.atan2.f64(double %in, double %in)
262  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
263  store double %call, ptr %out.gep, align 8
264  %iv.next = add nuw nsw i64 %iv, 1
265  %exitcond = icmp eq i64 %iv.next, 1000
266  br i1 %exitcond, label %for.end, label %for.body
267
268  for.end:
269  ret void
270}
271
272define void @atan2_f32(ptr noalias %in.ptr, ptr %out.ptr) {
273; SLEEF-NEON-LABEL: define void @atan2_f32
274; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
275; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_atan2f(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
276;
277; SLEEF-SVE-LABEL: define void @atan2_f32
278; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
279; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_atan2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
280;
281; ARMPL-NEON-LABEL: define void @atan2_f32
282; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
283; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vatan2q_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
284;
285; ARMPL-SVE-LABEL: define void @atan2_f32
286; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
287; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @armpl_svatan2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
288;
289  entry:
290  br label %for.body
291
292  for.body:
293  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
294  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
295  %in = load float, ptr %in.gep, align 8
296  %call = tail call float @llvm.atan2.f32(float %in, float %in)
297  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
298  store float %call, ptr %out.gep, align 4
299  %iv.next = add nuw nsw i64 %iv, 1
300  %exitcond = icmp eq i64 %iv.next, 1000
301  br i1 %exitcond, label %for.end, label %for.body
302
303  for.end:
304  ret void
305}
306
307declare double @llvm.ceil.f64(double)
308declare float @llvm.ceil.f32(float)
309
310define void @ceil_f64(ptr noalias %in.ptr, ptr %out.ptr) {
311; SLEEF-NEON-LABEL: define void @ceil_f64
312; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
313; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
314;
315; SLEEF-SVE-LABEL: define void @ceil_f64
316; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
317; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
318;
319; ARMPL-NEON-LABEL: define void @ceil_f64
320; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
321; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
322;
323; ARMPL-SVE-LABEL: define void @ceil_f64
324; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
325; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
326;
327  entry:
328  br label %for.body
329
330  for.body:
331  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
332  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
333  %in = load double, ptr %in.gep, align 8
334  %call = tail call double @llvm.ceil.f64(double %in)
335  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
336  store double %call, ptr %out.gep, align 8
337  %iv.next = add nuw nsw i64 %iv, 1
338  %exitcond = icmp eq i64 %iv.next, 1000
339  br i1 %exitcond, label %for.end, label %for.body
340
341  for.end:
342  ret void
343}
344
345define void @ceil_f32(ptr noalias %in.ptr, ptr %out.ptr) {
346; SLEEF-NEON-LABEL: define void @ceil_f32
347; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
348; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
349;
350; SLEEF-SVE-LABEL: define void @ceil_f32
351; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
352; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
353;
354; ARMPL-NEON-LABEL: define void @ceil_f32
355; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
356; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
357;
358; ARMPL-SVE-LABEL: define void @ceil_f32
359; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
360; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
361;
362  entry:
363  br label %for.body
364
365  for.body:
366  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
367  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
368  %in = load float, ptr %in.gep, align 8
369  %call = tail call float @llvm.ceil.f32(float %in)
370  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
371  store float %call, ptr %out.gep, align 4
372  %iv.next = add nuw nsw i64 %iv, 1
373  %exitcond = icmp eq i64 %iv.next, 1000
374  br i1 %exitcond, label %for.end, label %for.body
375
376  for.end:
377  ret void
378}
379
380declare double @llvm.copysign.f64(double, double)
381declare float @llvm.copysign.f32(float, float)
382
383define void @copysign_f64(ptr noalias %in.ptr, ptr %out.ptr) {
384; SLEEF-NEON-LABEL: define void @copysign_f64
385; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
386; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
387;
388; SLEEF-SVE-LABEL: define void @copysign_f64
389; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
390; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
391;
392; ARMPL-NEON-LABEL: define void @copysign_f64
393; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
394; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.copysign.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
395;
396; ARMPL-SVE-LABEL: define void @copysign_f64
397; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
398; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
399;
400  entry:
401  br label %for.body
402
403  for.body:
404  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
405  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
406  %in = load double, ptr %in.gep, align 8
407  %call = tail call double @llvm.copysign.f64(double %in, double %in)
408  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
409  store double %call, ptr %out.gep, align 8
410  %iv.next = add nuw nsw i64 %iv, 1
411  %exitcond = icmp eq i64 %iv.next, 1000
412  br i1 %exitcond, label %for.end, label %for.body
413
414  for.end:
415  ret void
416}
417
418define void @copysign_f32(ptr noalias %in.ptr, ptr %out.ptr) {
419; SLEEF-NEON-LABEL: define void @copysign_f32
420; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
421; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
422;
423; SLEEF-SVE-LABEL: define void @copysign_f32
424; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
425; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
426;
427; ARMPL-NEON-LABEL: define void @copysign_f32
428; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
429; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.copysign.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
430;
431; ARMPL-SVE-LABEL: define void @copysign_f32
432; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
433; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @llvm.copysign.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
434;
435  entry:
436  br label %for.body
437
438  for.body:
439  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
440  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
441  %in = load float, ptr %in.gep, align 8
442  %call = tail call float @llvm.copysign.f32(float %in, float %in)
443  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
444  store float %call, ptr %out.gep, align 4
445  %iv.next = add nuw nsw i64 %iv, 1
446  %exitcond = icmp eq i64 %iv.next, 1000
447  br i1 %exitcond, label %for.end, label %for.body
448
449  for.end:
450  ret void
451}
452
453declare double @llvm.cos.f64(double)
454declare float @llvm.cos.f32(float)
455
456define void @cos_f64(ptr noalias %in.ptr, ptr %out.ptr) {
457; SLEEF-NEON-LABEL: define void @cos_f64
458; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
459; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_cos(<2 x double> [[WIDE_LOAD:%.*]])
460;
461; SLEEF-SVE-LABEL: define void @cos_f64
462; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
463; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cos(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
464;
465; ARMPL-NEON-LABEL: define void @cos_f64
466; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
467; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vcosq_f64(<2 x double> [[WIDE_LOAD:%.*]])
468;
469; ARMPL-SVE-LABEL: define void @cos_f64
470; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
471; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @armpl_svcos_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
472;
473  entry:
474  br label %for.body
475
476  for.body:
477  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
478  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
479  %in = load double, ptr %in.gep, align 8
480  %call = tail call double @llvm.cos.f64(double %in)
481  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
482  store double %call, ptr %out.gep, align 8
483  %iv.next = add nuw nsw i64 %iv, 1
484  %exitcond = icmp eq i64 %iv.next, 1000
485  br i1 %exitcond, label %for.end, label %for.body
486
487  for.end:
488  ret void
489}
490
491define void @cos_f32(ptr noalias %in.ptr, ptr %out.ptr) {
492; SLEEF-NEON-LABEL: define void @cos_f32
493; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
494; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_cosf(<4 x float> [[WIDE_LOAD:%.*]])
495;
496; SLEEF-SVE-LABEL: define void @cos_f32
497; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
498; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_cosf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
499;
500; ARMPL-NEON-LABEL: define void @cos_f32
501; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
502; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vcosq_f32(<4 x float> [[WIDE_LOAD:%.*]])
503;
504; ARMPL-SVE-LABEL: define void @cos_f32
505; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
506; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @armpl_svcos_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
507;
508  entry:
509  br label %for.body
510
511  for.body:
512  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
513  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
514  %in = load float, ptr %in.gep, align 8
515  %call = tail call float @llvm.cos.f32(float %in)
516  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
517  store float %call, ptr %out.gep, align 4
518  %iv.next = add nuw nsw i64 %iv, 1
519  %exitcond = icmp eq i64 %iv.next, 1000
520  br i1 %exitcond, label %for.end, label %for.body
521
522  for.end:
523  ret void
524}
525
526declare double @llvm.cosh.f64(double)
527declare float @llvm.cosh.f32(float)
528
529define void @cosh_f64(ptr noalias %in.ptr, ptr %out.ptr) {
530; SLEEF-NEON-LABEL: define void @cosh_f64
531; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
532; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_cosh(<2 x double> [[WIDE_LOAD:%.*]])
533;
534; SLEEF-SVE-LABEL: define void @cosh_f64
535; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
536; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cosh(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
537;
538; ARMPL-NEON-LABEL: define void @cosh_f64
539; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
540; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vcoshq_f64(<2 x double> [[WIDE_LOAD:%.*]])
541;
542; ARMPL-SVE-LABEL: define void @cosh_f64
543; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
544; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @armpl_svcosh_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
545;
546  entry:
547  br label %for.body
548
549  for.body:
550  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
551  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
552  %in = load double, ptr %in.gep, align 8
553  %call = tail call double @llvm.cosh.f64(double %in)
554  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
555  store double %call, ptr %out.gep, align 8
556  %iv.next = add nuw nsw i64 %iv, 1
557  %exitcond = icmp eq i64 %iv.next, 1000
558  br i1 %exitcond, label %for.end, label %for.body
559
560  for.end:
561  ret void
562}
563
564define void @cosh_f32(ptr noalias %in.ptr, ptr %out.ptr) {
565; SLEEF-NEON-LABEL: define void @cosh_f32
566; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
567; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_coshf(<4 x float> [[WIDE_LOAD:%.*]])
568;
569; SLEEF-SVE-LABEL: define void @cosh_f32
570; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
571; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_coshf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
572;
573; ARMPL-NEON-LABEL: define void @cosh_f32
574; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
575; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vcoshq_f32(<4 x float> [[WIDE_LOAD:%.*]])
576;
577; ARMPL-SVE-LABEL: define void @cosh_f32
578; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
579; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @armpl_svcosh_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
580;
581  entry:
582  br label %for.body
583
584  for.body:
585  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
586  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
587  %in = load float, ptr %in.gep, align 8
588  %call = tail call float @llvm.cosh.f32(float %in)
589  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
590  store float %call, ptr %out.gep, align 4
591  %iv.next = add nuw nsw i64 %iv, 1
592  %exitcond = icmp eq i64 %iv.next, 1000
593  br i1 %exitcond, label %for.end, label %for.body
594
595  for.end:
596  ret void
597}
598
599declare double @llvm.exp.f64(double)
600declare float @llvm.exp.f32(float)
601
602define void @exp_f64(ptr noalias %in.ptr, ptr %out.ptr) {
603; SLEEF-NEON-LABEL: define void @exp_f64
604; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
605; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp(<2 x double> [[WIDE_LOAD:%.*]])
606;
607; SLEEF-SVE-LABEL: define void @exp_f64
608; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
609; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
610;
611; ARMPL-NEON-LABEL: define void @exp_f64
612; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
613; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vexpq_f64(<2 x double> [[WIDE_LOAD:%.*]])
614;
615; ARMPL-SVE-LABEL: define void @exp_f64
616; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
617; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @armpl_svexp_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
618;
619  entry:
620  br label %for.body
621
622  for.body:
623  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
624  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
625  %in = load double, ptr %in.gep, align 8
626  %call = tail call double @llvm.exp.f64(double %in)
627  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
628  store double %call, ptr %out.gep, align 8
629  %iv.next = add nuw nsw i64 %iv, 1
630  %exitcond = icmp eq i64 %iv.next, 1000
631  br i1 %exitcond, label %for.end, label %for.body
632
633  for.end:
634  ret void
635}
636
637define void @exp_f32(ptr noalias %in.ptr, ptr %out.ptr) {
638; SLEEF-NEON-LABEL: define void @exp_f32
639; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
640; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_expf(<4 x float> [[WIDE_LOAD:%.*]])
641;
642; SLEEF-SVE-LABEL: define void @exp_f32
643; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
644; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_expf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
645;
646; ARMPL-NEON-LABEL: define void @exp_f32
647; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
648; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vexpq_f32(<4 x float> [[WIDE_LOAD:%.*]])
649;
650; ARMPL-SVE-LABEL: define void @exp_f32
651; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
652; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @armpl_svexp_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
653;
654  entry:
655  br label %for.body
656
657  for.body:
658  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
659  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
660  %in = load float, ptr %in.gep, align 8
661  %call = tail call float @llvm.exp.f32(float %in)
662  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
663  store float %call, ptr %out.gep, align 4
664  %iv.next = add nuw nsw i64 %iv, 1
665  %exitcond = icmp eq i64 %iv.next, 1000
666  br i1 %exitcond, label %for.end, label %for.body
667
668  for.end:
669  ret void
670}
671
672declare double @llvm.exp10.f64(double)
673declare float @llvm.exp10.f32(float)
674
675define void @exp10_f64(ptr noalias %in.ptr, ptr %out.ptr) {
676; SLEEF-NEON-LABEL: define void @exp10_f64
677; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
678; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp10(<2 x double> [[WIDE_LOAD:%.*]])
679;
680; SLEEF-SVE-LABEL: define void @exp10_f64
681; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
682; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
683;
684; ARMPL-NEON-LABEL: define void @exp10_f64
685; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
686; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vexp10q_f64(<2 x double> [[WIDE_LOAD:%.*]])
687;
688; ARMPL-SVE-LABEL: define void @exp10_f64
689; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
690; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
691;
692  entry:
693  br label %for.body
694
695  for.body:
696  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
697  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
698  %in = load double, ptr %in.gep, align 8
699  %call = tail call double @llvm.exp10.f64(double %in)
700  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
701  store double %call, ptr %out.gep, align 8
702  %iv.next = add nuw nsw i64 %iv, 1
703  %exitcond = icmp eq i64 %iv.next, 1000
704  br i1 %exitcond, label %for.end, label %for.body
705
706  for.end:
707  ret void
708}
709
710define void @exp10_f32(ptr noalias %in.ptr, ptr %out.ptr) {
711; SLEEF-NEON-LABEL: define void @exp10_f32
712; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
713; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[WIDE_LOAD:%.*]])
714;
715; SLEEF-SVE-LABEL: define void @exp10_f32
716; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
717; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
718;
719; ARMPL-NEON-LABEL: define void @exp10_f32
720; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
721; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vexp10q_f32(<4 x float> [[WIDE_LOAD:%.*]])
722;
723; ARMPL-SVE-LABEL: define void @exp10_f32
724; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
725; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
726;
727  entry:
728  br label %for.body
729
730  for.body:
731  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
732  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
733  %in = load float, ptr %in.gep, align 8
734  %call = tail call float @llvm.exp10.f32(float %in)
735  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
736  store float %call, ptr %out.gep, align 4
737  %iv.next = add nuw nsw i64 %iv, 1
738  %exitcond = icmp eq i64 %iv.next, 1000
739  br i1 %exitcond, label %for.end, label %for.body
740
741  for.end:
742  ret void
743}
744
745declare double @llvm.exp2.f64(double)
746declare float @llvm.exp2.f32(float)
747
748define void @exp2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
749; SLEEF-NEON-LABEL: define void @exp2_f64
750; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
751; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_exp2(<2 x double> [[WIDE_LOAD:%.*]])
752;
753; SLEEF-SVE-LABEL: define void @exp2_f64
754; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
755; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
756;
757; ARMPL-NEON-LABEL: define void @exp2_f64
758; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
759; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vexp2q_f64(<2 x double> [[WIDE_LOAD:%.*]])
760;
761; ARMPL-SVE-LABEL: define void @exp2_f64
762; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
763; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @armpl_svexp2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
764;
765  entry:
766  br label %for.body
767
768  for.body:
769  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
770  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
771  %in = load double, ptr %in.gep, align 8
772  %call = tail call double @llvm.exp2.f64(double %in)
773  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
774  store double %call, ptr %out.gep, align 8
775  %iv.next = add nuw nsw i64 %iv, 1
776  %exitcond = icmp eq i64 %iv.next, 1000
777  br i1 %exitcond, label %for.end, label %for.body
778
779  for.end:
780  ret void
781}
782
783define void @exp2_f32(ptr noalias %in.ptr, ptr %out.ptr) {
784; SLEEF-NEON-LABEL: define void @exp2_f32
785; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
786; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_exp2f(<4 x float> [[WIDE_LOAD:%.*]])
787;
788; SLEEF-SVE-LABEL: define void @exp2_f32
789; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
790; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
791;
792; ARMPL-NEON-LABEL: define void @exp2_f32
793; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
794; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vexp2q_f32(<4 x float> [[WIDE_LOAD:%.*]])
795;
796; ARMPL-SVE-LABEL: define void @exp2_f32
797; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
798; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @armpl_svexp2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
799;
800  entry:
801  br label %for.body
802
803  for.body:
804  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
805  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
806  %in = load float, ptr %in.gep, align 8
807  %call = tail call float @llvm.exp2.f32(float %in)
808  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
809  store float %call, ptr %out.gep, align 4
810  %iv.next = add nuw nsw i64 %iv, 1
811  %exitcond = icmp eq i64 %iv.next, 1000
812  br i1 %exitcond, label %for.end, label %for.body
813
814  for.end:
815  ret void
816}
817
818declare double @llvm.fabs.f64(double)
819declare float @llvm.fabs.f32(float)
820
821define void @fabs_f64(ptr noalias %in.ptr, ptr %out.ptr) {
822; SLEEF-NEON-LABEL: define void @fabs_f64
823; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
824; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
825;
826; SLEEF-SVE-LABEL: define void @fabs_f64
827; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
828; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
829;
830; ARMPL-NEON-LABEL: define void @fabs_f64
831; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
832; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.fabs.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
833;
834; ARMPL-SVE-LABEL: define void @fabs_f64
835; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
836; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
837;
838  entry:
839  br label %for.body
840
841  for.body:
842  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
843  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
844  %in = load double, ptr %in.gep, align 8
845  %call = tail call double @llvm.fabs.f64(double %in)
846  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
847  store double %call, ptr %out.gep, align 8
848  %iv.next = add nuw nsw i64 %iv, 1
849  %exitcond = icmp eq i64 %iv.next, 1000
850  br i1 %exitcond, label %for.end, label %for.body
851
852  for.end:
853  ret void
854}
855
856define void @fabs_f32(ptr noalias %in.ptr, ptr %out.ptr) {
857; SLEEF-NEON-LABEL: define void @fabs_f32
858; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
859; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
860;
861; SLEEF-SVE-LABEL: define void @fabs_f32
862; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
863; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
864;
865; ARMPL-NEON-LABEL: define void @fabs_f32
866; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
867; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
868;
869; ARMPL-SVE-LABEL: define void @fabs_f32
870; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
871; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
872;
873  entry:
874  br label %for.body
875
876  for.body:
877  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
878  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
879  %in = load float, ptr %in.gep, align 8
880  %call = tail call float @llvm.fabs.f32(float %in)
881  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
882  store float %call, ptr %out.gep, align 4
883  %iv.next = add nuw nsw i64 %iv, 1
884  %exitcond = icmp eq i64 %iv.next, 1000
885  br i1 %exitcond, label %for.end, label %for.body
886
887  for.end:
888  ret void
889}
890
891declare double @llvm.floor.f64(double)
892declare float @llvm.floor.f32(float)
893
894define void @floor_f64(ptr noalias %in.ptr, ptr %out.ptr) {
895; SLEEF-NEON-LABEL: define void @floor_f64
896; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
897; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
898;
899; SLEEF-SVE-LABEL: define void @floor_f64
900; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
901; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
902;
903; ARMPL-NEON-LABEL: define void @floor_f64
904; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
905; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
906;
907; ARMPL-SVE-LABEL: define void @floor_f64
908; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
909; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
910;
911  entry:
912  br label %for.body
913
914  for.body:
915  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
916  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
917  %in = load double, ptr %in.gep, align 8
918  %call = tail call double @llvm.floor.f64(double %in)
919  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
920  store double %call, ptr %out.gep, align 8
921  %iv.next = add nuw nsw i64 %iv, 1
922  %exitcond = icmp eq i64 %iv.next, 1000
923  br i1 %exitcond, label %for.end, label %for.body
924
925  for.end:
926  ret void
927}
928
929define void @floor_f32(ptr noalias %in.ptr, ptr %out.ptr) {
930; SLEEF-NEON-LABEL: define void @floor_f32
931; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
932; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
933;
934; SLEEF-SVE-LABEL: define void @floor_f32
935; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
936; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
937;
938; ARMPL-NEON-LABEL: define void @floor_f32
939; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
940; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
941;
942; ARMPL-SVE-LABEL: define void @floor_f32
943; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
944; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @llvm.floor.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
945;
946  entry:
947  br label %for.body
948
949  for.body:
950  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
951  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
952  %in = load float, ptr %in.gep, align 8
953  %call = tail call float @llvm.floor.f32(float %in)
954  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
955  store float %call, ptr %out.gep, align 4
956  %iv.next = add nuw nsw i64 %iv, 1
957  %exitcond = icmp eq i64 %iv.next, 1000
958  br i1 %exitcond, label %for.end, label %for.body
959
960  for.end:
961  ret void
962}
963
964declare double @llvm.fma.f64(double, double, double)
965declare float @llvm.fma.f32(float, float, float)
966
967define void @fma_f64(ptr noalias %in.ptr, ptr %out.ptr) {
968; SLEEF-NEON-LABEL: define void @fma_f64
969; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
970; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]])
971;
972; SLEEF-SVE-LABEL: define void @fma_f64
973; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
974; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
975;
976; ARMPL-NEON-LABEL: define void @fma_f64
977; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
978; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]], <2 x double> [[WIDE_LOAD]])
979;
980; ARMPL-SVE-LABEL: define void @fma_f64
981; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
982; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
983;
984  entry:
985  br label %for.body
986
987  for.body:
988  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
989  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
990  %in = load double, ptr %in.gep, align 8
991  %call = tail call double @llvm.fma.f64(double %in, double %in, double %in)
992  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
993  store double %call, ptr %out.gep, align 8
994  %iv.next = add nuw nsw i64 %iv, 1
995  %exitcond = icmp eq i64 %iv.next, 1000
996  br i1 %exitcond, label %for.end, label %for.body
997
998  for.end:
999  ret void
1000}
1001
1002define void @fma_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1003; SLEEF-NEON-LABEL: define void @fma_f32
1004; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1005; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]])
1006;
1007; SLEEF-SVE-LABEL: define void @fma_f32
1008; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1009; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
1010;
1011; ARMPL-NEON-LABEL: define void @fma_f32
1012; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1013; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]], <4 x float> [[WIDE_LOAD]])
1014;
1015; ARMPL-SVE-LABEL: define void @fma_f32
1016; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1017; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
1018;
1019  entry:
1020  br label %for.body
1021
1022  for.body:
1023  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1024  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1025  %in = load float, ptr %in.gep, align 8
1026  %call = tail call float @llvm.fma.f32(float %in, float %in, float %in)
1027  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1028  store float %call, ptr %out.gep, align 4
1029  %iv.next = add nuw nsw i64 %iv, 1
1030  %exitcond = icmp eq i64 %iv.next, 1000
1031  br i1 %exitcond, label %for.end, label %for.body
1032
1033  for.end:
1034  ret void
1035}
1036
1037declare double @llvm.log.f64(double)
1038declare float @llvm.log.f32(float)
1039
1040define void @log_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1041; SLEEF-NEON-LABEL: define void @log_f64
1042; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1043; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[WIDE_LOAD:%.*]])
1044;
1045; SLEEF-SVE-LABEL: define void @log_f64
1046; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1047; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
1048;
1049; ARMPL-NEON-LABEL: define void @log_f64
1050; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1051; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vlogq_f64(<2 x double> [[WIDE_LOAD:%.*]])
1052;
1053; ARMPL-SVE-LABEL: define void @log_f64
1054; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1055; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @armpl_svlog_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
1056;
1057  entry:
1058  br label %for.body
1059
1060  for.body:
1061  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1062  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1063  %in = load double, ptr %in.gep, align 8
1064  %call = tail call double @llvm.log.f64(double %in)
1065  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1066  store double %call, ptr %out.gep, align 8
1067  %iv.next = add nuw nsw i64 %iv, 1
1068  %exitcond = icmp eq i64 %iv.next, 1000
1069  br i1 %exitcond, label %for.end, label %for.body
1070
1071  for.end:
1072  ret void
1073}
1074
1075define void @log_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1076; SLEEF-NEON-LABEL: define void @log_f32
1077; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1078; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[WIDE_LOAD:%.*]])
1079;
1080; SLEEF-SVE-LABEL: define void @log_f32
1081; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1082; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
1083;
1084; ARMPL-NEON-LABEL: define void @log_f32
1085; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1086; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vlogq_f32(<4 x float> [[WIDE_LOAD:%.*]])
1087;
1088; ARMPL-SVE-LABEL: define void @log_f32
1089; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1090; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @armpl_svlog_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
1091;
1092  entry:
1093  br label %for.body
1094
1095  for.body:
1096  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1097  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1098  %in = load float, ptr %in.gep, align 8
1099  %call = tail call float @llvm.log.f32(float %in)
1100  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1101  store float %call, ptr %out.gep, align 4
1102  %iv.next = add nuw nsw i64 %iv, 1
1103  %exitcond = icmp eq i64 %iv.next, 1000
1104  br i1 %exitcond, label %for.end, label %for.body
1105
1106  for.end:
1107  ret void
1108}
1109
1110declare double @llvm.log10.f64(double)
1111declare float @llvm.log10.f32(float)
1112
1113define void @log10_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1114; SLEEF-NEON-LABEL: define void @log10_f64
1115; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1116; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[WIDE_LOAD:%.*]])
1117;
1118; SLEEF-SVE-LABEL: define void @log10_f64
1119; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1120; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
1121;
1122; ARMPL-NEON-LABEL: define void @log10_f64
1123; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1124; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vlog10q_f64(<2 x double> [[WIDE_LOAD:%.*]])
1125;
1126; ARMPL-SVE-LABEL: define void @log10_f64
1127; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1128; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @armpl_svlog10_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
1129;
1130  entry:
1131  br label %for.body
1132
1133  for.body:
1134  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1135  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1136  %in = load double, ptr %in.gep, align 8
1137  %call = tail call double @llvm.log10.f64(double %in)
1138  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1139  store double %call, ptr %out.gep, align 8
1140  %iv.next = add nuw nsw i64 %iv, 1
1141  %exitcond = icmp eq i64 %iv.next, 1000
1142  br i1 %exitcond, label %for.end, label %for.body
1143
1144  for.end:
1145  ret void
1146}
1147
1148define void @log10_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1149; SLEEF-NEON-LABEL: define void @log10_f32
1150; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1151; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_log10f(<4 x float> [[WIDE_LOAD:%.*]])
1152;
1153; SLEEF-SVE-LABEL: define void @log10_f32
1154; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1155; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
1156;
1157; ARMPL-NEON-LABEL: define void @log10_f32
1158; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1159; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vlog10q_f32(<4 x float> [[WIDE_LOAD:%.*]])
1160;
1161; ARMPL-SVE-LABEL: define void @log10_f32
1162; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1163; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
1164;
1165  entry:
1166  br label %for.body
1167
1168  for.body:
1169  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1170  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1171  %in = load float, ptr %in.gep, align 8
1172  %call = tail call float @llvm.log10.f32(float %in)
1173  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1174  store float %call, ptr %out.gep, align 4
1175  %iv.next = add nuw nsw i64 %iv, 1
1176  %exitcond = icmp eq i64 %iv.next, 1000
1177  br i1 %exitcond, label %for.end, label %for.body
1178
1179  for.end:
1180  ret void
1181}
1182
1183declare double @llvm.log2.f64(double)
1184declare float @llvm.log2.f32(float)
1185
1186define void @log2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1187; SLEEF-NEON-LABEL: define void @log2_f64
1188; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1189; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_log2(<2 x double> [[WIDE_LOAD:%.*]])
1190;
1191; SLEEF-SVE-LABEL: define void @log2_f64
1192; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1193; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
1194;
1195; ARMPL-NEON-LABEL: define void @log2_f64
1196; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1197; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vlog2q_f64(<2 x double> [[WIDE_LOAD:%.*]])
1198;
1199; ARMPL-SVE-LABEL: define void @log2_f64
1200; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1201; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @armpl_svlog2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
1202;
1203  entry:
1204  br label %for.body
1205
1206  for.body:
1207  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1208  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1209  %in = load double, ptr %in.gep, align 8
1210  %call = tail call double @llvm.log2.f64(double %in)
1211  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1212  store double %call, ptr %out.gep, align 8
1213  %iv.next = add nuw nsw i64 %iv, 1
1214  %exitcond = icmp eq i64 %iv.next, 1000
1215  br i1 %exitcond, label %for.end, label %for.body
1216
1217  for.end:
1218  ret void
1219}
1220
1221define void @log2_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1222; SLEEF-NEON-LABEL: define void @log2_f32
1223; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1224; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_log2f(<4 x float> [[WIDE_LOAD:%.*]])
1225;
1226; SLEEF-SVE-LABEL: define void @log2_f32
1227; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1228; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
1229;
1230; ARMPL-NEON-LABEL: define void @log2_f32
1231; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1232; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vlog2q_f32(<4 x float> [[WIDE_LOAD:%.*]])
1233;
1234; ARMPL-SVE-LABEL: define void @log2_f32
1235; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1236; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @armpl_svlog2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
1237;
1238  entry:
1239  br label %for.body
1240
1241  for.body:
1242  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1243  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1244  %in = load float, ptr %in.gep, align 8
1245  %call = tail call float @llvm.log2.f32(float %in)
1246  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1247  store float %call, ptr %out.gep, align 4
1248  %iv.next = add nuw nsw i64 %iv, 1
1249  %exitcond = icmp eq i64 %iv.next, 1000
1250  br i1 %exitcond, label %for.end, label %for.body
1251
1252  for.end:
1253  ret void
1254}
1255
1256declare double @llvm.maxnum.f64(double, double)
1257declare float @llvm.maxnum.f32(float, float)
1258
1259define void @maxnum_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1260; SLEEF-NEON-LABEL: define void @maxnum_f64
1261; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1262; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
1263;
1264; SLEEF-SVE-LABEL: define void @maxnum_f64
1265; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1266; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @llvm.maxnum.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
1267;
1268; ARMPL-NEON-LABEL: define void @maxnum_f64
1269; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1270; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.maxnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
1271;
1272; ARMPL-SVE-LABEL: define void @maxnum_f64
1273; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1274; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @llvm.maxnum.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
1275;
1276  entry:
1277  br label %for.body
1278
1279  for.body:
1280  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1281  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1282  %in = load double, ptr %in.gep, align 8
1283  %call = tail call double @llvm.maxnum.f64(double %in, double %in)
1284  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1285  store double %call, ptr %out.gep, align 8
1286  %iv.next = add nuw nsw i64 %iv, 1
1287  %exitcond = icmp eq i64 %iv.next, 1000
1288  br i1 %exitcond, label %for.end, label %for.body
1289
1290  for.end:
1291  ret void
1292}
1293
1294define void @maxnum_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1295; SLEEF-NEON-LABEL: define void @maxnum_f32
1296; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1297; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
1298;
1299; SLEEF-SVE-LABEL: define void @maxnum_f32
1300; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1301; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
1302;
1303; ARMPL-NEON-LABEL: define void @maxnum_f32
1304; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1305; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
1306;
1307; ARMPL-SVE-LABEL: define void @maxnum_f32
1308; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1309; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
1310;
1311  entry:
1312  br label %for.body
1313
1314  for.body:
1315  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1316  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1317  %in = load float, ptr %in.gep, align 8
1318  %call = tail call float @llvm.maxnum.f32(float %in, float %in)
1319  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1320  store float %call, ptr %out.gep, align 4
1321  %iv.next = add nuw nsw i64 %iv, 1
1322  %exitcond = icmp eq i64 %iv.next, 1000
1323  br i1 %exitcond, label %for.end, label %for.body
1324
1325  for.end:
1326  ret void
1327}
1328
1329declare double @llvm.minnum.f64(double, double)
1330declare float @llvm.minnum.f32(float, float)
1331
1332define void @minnum_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1333; SLEEF-NEON-LABEL: define void @minnum_f64
1334; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1335; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
1336;
1337; SLEEF-SVE-LABEL: define void @minnum_f64
1338; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1339; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
1340;
1341; ARMPL-NEON-LABEL: define void @minnum_f64
1342; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1343; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.minnum.v2f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
1344;
1345; ARMPL-SVE-LABEL: define void @minnum_f64
1346; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1347; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]])
1348;
1349  entry:
1350  br label %for.body
1351
1352  for.body:
1353  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1354  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1355  %in = load double, ptr %in.gep, align 8
1356  %call = tail call double @llvm.minnum.f64(double %in, double %in)
1357  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1358  store double %call, ptr %out.gep, align 8
1359  %iv.next = add nuw nsw i64 %iv, 1
1360  %exitcond = icmp eq i64 %iv.next, 1000
1361  br i1 %exitcond, label %for.end, label %for.body
1362
1363  for.end:
1364  ret void
1365}
1366
1367define void @minnum_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1368; SLEEF-NEON-LABEL: define void @minnum_f32
1369; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1370; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
1371;
1372; SLEEF-SVE-LABEL: define void @minnum_f32
1373; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1374; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
1375;
1376; ARMPL-NEON-LABEL: define void @minnum_f32
1377; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1378; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
1379;
1380; ARMPL-SVE-LABEL: define void @minnum_f32
1381; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1382; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]])
1383;
1384  entry:
1385  br label %for.body
1386
1387  for.body:
1388  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1389  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1390  %in = load float, ptr %in.gep, align 8
1391  %call = tail call float @llvm.minnum.f32(float %in, float %in)
1392  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1393  store float %call, ptr %out.gep, align 4
1394  %iv.next = add nuw nsw i64 %iv, 1
1395  %exitcond = icmp eq i64 %iv.next, 1000
1396  br i1 %exitcond, label %for.end, label %for.body
1397
1398  for.end:
1399  ret void
1400}
1401
1402declare double @llvm.nearbyint.f64(double)
1403declare float @llvm.nearbyint.f32(float)
1404
1405define void @nearbyint_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1406; SLEEF-NEON-LABEL: define void @nearbyint_f64
1407; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1408; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1409;
1410; SLEEF-SVE-LABEL: define void @nearbyint_f64
1411; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1412; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1413;
1414; ARMPL-NEON-LABEL: define void @nearbyint_f64
1415; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1416; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1417;
1418; ARMPL-SVE-LABEL: define void @nearbyint_f64
1419; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1420; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @llvm.nearbyint.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1421;
1422  entry:
1423  br label %for.body
1424
1425  for.body:
1426  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1427  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1428  %in = load double, ptr %in.gep, align 8
1429  %call = tail call double @llvm.nearbyint.f64(double %in)
1430  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1431  store double %call, ptr %out.gep, align 8
1432  %iv.next = add nuw nsw i64 %iv, 1
1433  %exitcond = icmp eq i64 %iv.next, 1000
1434  br i1 %exitcond, label %for.end, label %for.body
1435
1436  for.end:
1437  ret void
1438}
1439
1440define void @nearbyint_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1441; SLEEF-NEON-LABEL: define void @nearbyint_f32
1442; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1443; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1444;
1445; SLEEF-SVE-LABEL: define void @nearbyint_f32
1446; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1447; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1448;
1449; ARMPL-NEON-LABEL: define void @nearbyint_f32
1450; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1451; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1452;
1453; ARMPL-SVE-LABEL: define void @nearbyint_f32
1454; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1455; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @llvm.nearbyint.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1456;
1457  entry:
1458  br label %for.body
1459
1460  for.body:
1461  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1462  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1463  %in = load float, ptr %in.gep, align 8
1464  %call = tail call float @llvm.nearbyint.f32(float %in)
1465  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1466  store float %call, ptr %out.gep, align 4
1467  %iv.next = add nuw nsw i64 %iv, 1
1468  %exitcond = icmp eq i64 %iv.next, 1000
1469  br i1 %exitcond, label %for.end, label %for.body
1470
1471  for.end:
1472  ret void
1473}
1474
1475declare double @llvm.pow.f64(double, double)
1476declare float @llvm.pow.f32(float, float)
1477
1478define void @pow_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1479; SLEEF-NEON-LABEL: define void @pow_f64
1480; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1481; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2vv_pow(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
1482;
1483; SLEEF-SVE-LABEL: define void @pow_f64
1484; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1485; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
1486;
1487; ARMPL-NEON-LABEL: define void @pow_f64
1488; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1489; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vpowq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x double> [[WIDE_LOAD]])
1490;
1491; ARMPL-SVE-LABEL: define void @pow_f64
1492; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1493; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @armpl_svpow_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
1494;
1495  entry:
1496  br label %for.body
1497
1498  for.body:
1499  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1500  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1501  %in = load double, ptr %in.gep, align 8
1502  %call = tail call double @llvm.pow.f64(double %in, double %in)
1503  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1504  store double %call, ptr %out.gep, align 8
1505  %iv.next = add nuw nsw i64 %iv, 1
1506  %exitcond = icmp eq i64 %iv.next, 1000
1507  br i1 %exitcond, label %for.end, label %for.body
1508
1509  for.end:
1510  ret void
1511}
1512
1513define void @pow_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1514; SLEEF-NEON-LABEL: define void @pow_f32
1515; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1516; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4vv_powf(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
1517;
1518; SLEEF-SVE-LABEL: define void @pow_f32
1519; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1520; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
1521;
1522; ARMPL-NEON-LABEL: define void @pow_f32
1523; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1524; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vpowq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x float> [[WIDE_LOAD]])
1525;
1526; ARMPL-SVE-LABEL: define void @pow_f32
1527; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1528; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @armpl_svpow_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
1529;
1530  entry:
1531  br label %for.body
1532
1533  for.body:
1534  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1535  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1536  %in = load float, ptr %in.gep, align 8
1537  %call = tail call float @llvm.pow.f32(float %in, float %in)
1538  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1539  store float %call, ptr %out.gep, align 4
1540  %iv.next = add nuw nsw i64 %iv, 1
1541  %exitcond = icmp eq i64 %iv.next, 1000
1542  br i1 %exitcond, label %for.end, label %for.body
1543
1544  for.end:
1545  ret void
1546}
1547
1548declare double @llvm.rint.f64(double)
1549declare float @llvm.rint.f32(float)
1550
1551define void @rint_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1552; SLEEF-NEON-LABEL: define void @rint_f64
1553; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1554; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1555;
1556; SLEEF-SVE-LABEL: define void @rint_f64
1557; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1558; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1559;
1560; ARMPL-NEON-LABEL: define void @rint_f64
1561; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1562; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1563;
1564; ARMPL-SVE-LABEL: define void @rint_f64
1565; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1566; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @llvm.rint.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1567;
1568  entry:
1569  br label %for.body
1570
1571  for.body:
1572  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1573  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1574  %in = load double, ptr %in.gep, align 8
1575  %call = tail call double @llvm.rint.f64(double %in)
1576  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1577  store double %call, ptr %out.gep, align 8
1578  %iv.next = add nuw nsw i64 %iv, 1
1579  %exitcond = icmp eq i64 %iv.next, 1000
1580  br i1 %exitcond, label %for.end, label %for.body
1581
1582  for.end:
1583  ret void
1584}
1585
1586define void @rint_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1587; SLEEF-NEON-LABEL: define void @rint_f32
1588; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1589; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1590;
1591; SLEEF-SVE-LABEL: define void @rint_f32
1592; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1593; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1594;
1595; ARMPL-NEON-LABEL: define void @rint_f32
1596; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1597; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1598;
1599; ARMPL-SVE-LABEL: define void @rint_f32
1600; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1601; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @llvm.rint.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1602;
1603  entry:
1604  br label %for.body
1605
1606  for.body:
1607  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1608  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1609  %in = load float, ptr %in.gep, align 8
1610  %call = tail call float @llvm.rint.f32(float %in)
1611  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1612  store float %call, ptr %out.gep, align 4
1613  %iv.next = add nuw nsw i64 %iv, 1
1614  %exitcond = icmp eq i64 %iv.next, 1000
1615  br i1 %exitcond, label %for.end, label %for.body
1616
1617  for.end:
1618  ret void
1619}
1620
1621declare double @llvm.round.f64(double)
1622declare float @llvm.round.f32(float)
1623
1624define void @round_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1625; SLEEF-NEON-LABEL: define void @round_f64
1626; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1627; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1628;
1629; SLEEF-SVE-LABEL: define void @round_f64
1630; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1631; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1632;
1633; ARMPL-NEON-LABEL: define void @round_f64
1634; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1635; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.round.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1636;
1637; ARMPL-SVE-LABEL: define void @round_f64
1638; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1639; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @llvm.round.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1640;
1641  entry:
1642  br label %for.body
1643
1644  for.body:
1645  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1646  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1647  %in = load double, ptr %in.gep, align 8
1648  %call = tail call double @llvm.round.f64(double %in)
1649  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1650  store double %call, ptr %out.gep, align 8
1651  %iv.next = add nuw nsw i64 %iv, 1
1652  %exitcond = icmp eq i64 %iv.next, 1000
1653  br i1 %exitcond, label %for.end, label %for.body
1654
1655  for.end:
1656  ret void
1657}
1658
1659define void @round_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1660; SLEEF-NEON-LABEL: define void @round_f32
1661; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1662; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1663;
1664; SLEEF-SVE-LABEL: define void @round_f32
1665; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1666; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1667;
1668; ARMPL-NEON-LABEL: define void @round_f32
1669; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1670; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1671;
1672; ARMPL-SVE-LABEL: define void @round_f32
1673; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1674; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @llvm.round.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1675;
1676  entry:
1677  br label %for.body
1678
1679  for.body:
1680  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1681  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1682  %in = load float, ptr %in.gep, align 8
1683  %call = tail call float @llvm.round.f32(float %in)
1684  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1685  store float %call, ptr %out.gep, align 4
1686  %iv.next = add nuw nsw i64 %iv, 1
1687  %exitcond = icmp eq i64 %iv.next, 1000
1688  br i1 %exitcond, label %for.end, label %for.body
1689
1690  for.end:
1691  ret void
1692}
1693
1694declare double @llvm.sin.f64(double)
1695declare float @llvm.sin.f32(float)
1696
1697define void @sin_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1698; SLEEF-NEON-LABEL: define void @sin_f64
1699; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1700; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_sin(<2 x double> [[WIDE_LOAD:%.*]])
1701;
1702; SLEEF-SVE-LABEL: define void @sin_f64
1703; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1704; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
1705;
1706; ARMPL-NEON-LABEL: define void @sin_f64
1707; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1708; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vsinq_f64(<2 x double> [[WIDE_LOAD:%.*]])
1709;
1710; ARMPL-SVE-LABEL: define void @sin_f64
1711; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1712; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @armpl_svsin_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
1713;
1714  entry:
1715  br label %for.body
1716
1717  for.body:
1718  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1719  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1720  %in = load double, ptr %in.gep, align 8
1721  %call = tail call double @llvm.sin.f64(double %in)
1722  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1723  store double %call, ptr %out.gep, align 8
1724  %iv.next = add nuw nsw i64 %iv, 1
1725  %exitcond = icmp eq i64 %iv.next, 1000
1726  br i1 %exitcond, label %for.end, label %for.body
1727
1728  for.end:
1729  ret void
1730}
1731
1732define void @sin_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1733; SLEEF-NEON-LABEL: define void @sin_f32
1734; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1735; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_sinf(<4 x float> [[WIDE_LOAD:%.*]])
1736;
1737; SLEEF-SVE-LABEL: define void @sin_f32
1738; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1739; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
1740;
1741; ARMPL-NEON-LABEL: define void @sin_f32
1742; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1743; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vsinq_f32(<4 x float> [[WIDE_LOAD:%.*]])
1744;
1745; ARMPL-SVE-LABEL: define void @sin_f32
1746; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1747; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @armpl_svsin_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
1748;
1749  entry:
1750  br label %for.body
1751
1752  for.body:
1753  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1754  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1755  %in = load float, ptr %in.gep, align 8
1756  %call = tail call float @llvm.sin.f32(float %in)
1757  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1758  store float %call, ptr %out.gep, align 4
1759  %iv.next = add nuw nsw i64 %iv, 1
1760  %exitcond = icmp eq i64 %iv.next, 1000
1761  br i1 %exitcond, label %for.end, label %for.body
1762
1763  for.end:
1764  ret void
1765}
1766
1767declare double @llvm.sinh.f64(double)
1768declare float @llvm.sinh.f32(float)
1769
1770define void @sinh_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1771; SLEEF-NEON-LABEL: define void @sinh_f64
1772; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1773; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_sinh(<2 x double> [[WIDE_LOAD:%.*]])
1774;
1775; SLEEF-SVE-LABEL: define void @sinh_f64
1776; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1777; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sinh(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
1778;
1779; ARMPL-NEON-LABEL: define void @sinh_f64
1780; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1781; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vsinhq_f64(<2 x double> [[WIDE_LOAD:%.*]])
1782;
1783; ARMPL-SVE-LABEL: define void @sinh_f64
1784; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1785; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @armpl_svsinh_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
1786;
1787  entry:
1788  br label %for.body
1789
1790  for.body:
1791  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1792  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1793  %in = load double, ptr %in.gep, align 8
1794  %call = tail call double @llvm.sinh.f64(double %in)
1795  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1796  store double %call, ptr %out.gep, align 8
1797  %iv.next = add nuw nsw i64 %iv, 1
1798  %exitcond = icmp eq i64 %iv.next, 1000
1799  br i1 %exitcond, label %for.end, label %for.body
1800
1801  for.end:
1802  ret void
1803}
1804
1805define void @sinh_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1806; SLEEF-NEON-LABEL: define void @sinh_f32
1807; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1808; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_sinhf(<4 x float> [[WIDE_LOAD:%.*]])
1809;
1810; SLEEF-SVE-LABEL: define void @sinh_f32
1811; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1812; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinhf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
1813;
1814; ARMPL-NEON-LABEL: define void @sinh_f32
1815; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1816; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vsinhq_f32(<4 x float> [[WIDE_LOAD:%.*]])
1817;
1818; ARMPL-SVE-LABEL: define void @sinh_f32
1819; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1820; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @armpl_svsinh_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
1821;
1822  entry:
1823  br label %for.body
1824
1825  for.body:
1826  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1827  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1828  %in = load float, ptr %in.gep, align 8
1829  %call = tail call float @llvm.sinh.f32(float %in)
1830  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1831  store float %call, ptr %out.gep, align 4
1832  %iv.next = add nuw nsw i64 %iv, 1
1833  %exitcond = icmp eq i64 %iv.next, 1000
1834  br i1 %exitcond, label %for.end, label %for.body
1835
1836  for.end:
1837  ret void
1838}
1839
1840declare double @llvm.sqrt.f64(double)
1841declare float @llvm.sqrt.f32(float)
1842
1843define void @sqrt_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1844; SLEEF-NEON-LABEL: define void @sqrt_f64
1845; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1846; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1847;
1848; SLEEF-SVE-LABEL: define void @sqrt_f64
1849; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1850; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1851;
1852; ARMPL-NEON-LABEL: define void @sqrt_f64
1853; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1854; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
1855;
1856; ARMPL-SVE-LABEL: define void @sqrt_f64
1857; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1858; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
1859;
1860  entry:
1861  br label %for.body
1862
1863  for.body:
1864  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1865  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1866  %in = load double, ptr %in.gep, align 8
1867  %call = tail call double @llvm.sqrt.f64(double %in)
1868  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1869  store double %call, ptr %out.gep, align 8
1870  %iv.next = add nuw nsw i64 %iv, 1
1871  %exitcond = icmp eq i64 %iv.next, 1000
1872  br i1 %exitcond, label %for.end, label %for.body
1873
1874  for.end:
1875  ret void
1876}
1877
1878define void @sqrt_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1879; SLEEF-NEON-LABEL: define void @sqrt_f32
1880; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1881; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1882;
1883; SLEEF-SVE-LABEL: define void @sqrt_f32
1884; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1885; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1886;
1887; ARMPL-NEON-LABEL: define void @sqrt_f32
1888; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1889; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
1890;
1891; ARMPL-SVE-LABEL: define void @sqrt_f32
1892; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1893; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
1894;
1895  entry:
1896  br label %for.body
1897
1898  for.body:
1899  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1900  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1901  %in = load float, ptr %in.gep, align 8
1902  %call = tail call float @llvm.sqrt.f32(float %in)
1903  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1904  store float %call, ptr %out.gep, align 4
1905  %iv.next = add nuw nsw i64 %iv, 1
1906  %exitcond = icmp eq i64 %iv.next, 1000
1907  br i1 %exitcond, label %for.end, label %for.body
1908
1909  for.end:
1910  ret void
1911}
1912
1913declare double @llvm.tan.f64(double)
1914declare float @llvm.tan.f32(float)
1915
1916define void @tan_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1917; SLEEF-NEON-LABEL: define void @tan_f64
1918; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1919; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_tan(<2 x double> [[WIDE_LOAD:%.*]])
1920;
1921; SLEEF-SVE-LABEL: define void @tan_f64
1922; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1923; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tan(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
1924;
1925; ARMPL-NEON-LABEL: define void @tan_f64
1926; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1927; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vtanq_f64(<2 x double> [[WIDE_LOAD:%.*]])
1928;
1929; ARMPL-SVE-LABEL: define void @tan_f64
1930; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1931; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @armpl_svtan_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
1932;
1933  entry:
1934  br label %for.body
1935
1936  for.body:
1937  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1938  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
1939  %in = load double, ptr %in.gep, align 8
1940  %call = tail call double @llvm.tan.f64(double %in)
1941  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
1942  store double %call, ptr %out.gep, align 8
1943  %iv.next = add nuw nsw i64 %iv, 1
1944  %exitcond = icmp eq i64 %iv.next, 1000
1945  br i1 %exitcond, label %for.end, label %for.body
1946
1947  for.end:
1948  ret void
1949}
1950
1951define void @tan_f32(ptr noalias %in.ptr, ptr %out.ptr) {
1952; SLEEF-NEON-LABEL: define void @tan_f32
1953; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1954; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_tanf(<4 x float> [[WIDE_LOAD:%.*]])
1955;
1956; SLEEF-SVE-LABEL: define void @tan_f32
1957; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1958; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tanf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
1959;
1960; ARMPL-NEON-LABEL: define void @tan_f32
1961; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1962; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vtanq_f32(<4 x float> [[WIDE_LOAD:%.*]])
1963;
1964; ARMPL-SVE-LABEL: define void @tan_f32
1965; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1966; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @armpl_svtan_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
1967;
1968  entry:
1969  br label %for.body
1970
1971  for.body:
1972  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1973  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
1974  %in = load float, ptr %in.gep, align 8
1975  %call = tail call float @llvm.tan.f32(float %in)
1976  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
1977  store float %call, ptr %out.gep, align 4
1978  %iv.next = add nuw nsw i64 %iv, 1
1979  %exitcond = icmp eq i64 %iv.next, 1000
1980  br i1 %exitcond, label %for.end, label %for.body
1981
1982  for.end:
1983  ret void
1984}
1985
1986declare double @llvm.tanh.f64(double)
1987declare float @llvm.tanh.f32(float)
1988
1989define void @tanh_f64(ptr noalias %in.ptr, ptr %out.ptr) {
1990; SLEEF-NEON-LABEL: define void @tanh_f64
1991; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1992; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @_ZGVnN2v_tanh(<2 x double> [[WIDE_LOAD:%.*]])
1993;
1994; SLEEF-SVE-LABEL: define void @tanh_f64
1995; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
1996; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tanh(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
1997;
1998; ARMPL-NEON-LABEL: define void @tanh_f64
1999; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
2000; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @armpl_vtanhq_f64(<2 x double> [[WIDE_LOAD:%.*]])
2001;
2002; ARMPL-SVE-LABEL: define void @tanh_f64
2003; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
2004; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @armpl_svtanh_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
2005;
2006  entry:
2007  br label %for.body
2008
2009  for.body:
2010  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
2011  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
2012  %in = load double, ptr %in.gep, align 8
2013  %call = tail call double @llvm.tanh.f64(double %in)
2014  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
2015  store double %call, ptr %out.gep, align 8
2016  %iv.next = add nuw nsw i64 %iv, 1
2017  %exitcond = icmp eq i64 %iv.next, 1000
2018  br i1 %exitcond, label %for.end, label %for.body
2019
2020  for.end:
2021  ret void
2022}
2023
2024define void @tanh_f32(ptr noalias %in.ptr, ptr %out.ptr) {
2025; SLEEF-NEON-LABEL: define void @tanh_f32
2026; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
2027; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @_ZGVnN4v_tanhf(<4 x float> [[WIDE_LOAD:%.*]])
2028;
2029; SLEEF-SVE-LABEL: define void @tanh_f32
2030; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
2031; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tanhf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
2032;
2033; ARMPL-NEON-LABEL: define void @tanh_f32
2034; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
2035; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @armpl_vtanhq_f32(<4 x float> [[WIDE_LOAD:%.*]])
2036;
2037; ARMPL-SVE-LABEL: define void @tanh_f32
2038; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
2039; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @armpl_svtanh_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
2040;
2041  entry:
2042  br label %for.body
2043
2044  for.body:
2045  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
2046  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
2047  %in = load float, ptr %in.gep, align 8
2048  %call = tail call float @llvm.tanh.f32(float %in)
2049  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
2050  store float %call, ptr %out.gep, align 4
2051  %iv.next = add nuw nsw i64 %iv, 1
2052  %exitcond = icmp eq i64 %iv.next, 1000
2053  br i1 %exitcond, label %for.end, label %for.body
2054
2055  for.end:
2056  ret void
2057}
2058
2059declare double @llvm.trunc.f64(double)
2060declare float @llvm.trunc.f32(float)
2061
2062define void @trunc_f64(ptr noalias %in.ptr, ptr %out.ptr) {
2063; SLEEF-NEON-LABEL: define void @trunc_f64
2064; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
2065; SLEEF-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
2066;
2067; SLEEF-SVE-LABEL: define void @trunc_f64
2068; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
2069; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
2070;
2071; ARMPL-NEON-LABEL: define void @trunc_f64
2072; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
2073; ARMPL-NEON:    [[TMP3:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[WIDE_LOAD:%.*]])
2074;
2075; ARMPL-SVE-LABEL: define void @trunc_f64
2076; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
2077; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 2 x double> @llvm.trunc.nxv2f64(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]])
2078;
2079  entry:
2080  br label %for.body
2081
2082  for.body:
2083  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
2084  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
2085  %in = load double, ptr %in.gep, align 8
2086  %call = tail call double @llvm.trunc.f64(double %in)
2087  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
2088  store double %call, ptr %out.gep, align 8
2089  %iv.next = add nuw nsw i64 %iv, 1
2090  %exitcond = icmp eq i64 %iv.next, 1000
2091  br i1 %exitcond, label %for.end, label %for.body
2092
2093  for.end:
2094  ret void
2095}
2096
2097define void @trunc_f32(ptr noalias %in.ptr, ptr %out.ptr) {
2098; SLEEF-NEON-LABEL: define void @trunc_f32
2099; SLEEF-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
2100; SLEEF-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
2101;
2102; SLEEF-SVE-LABEL: define void @trunc_f32
2103; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
2104; SLEEF-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
2105;
2106; ARMPL-NEON-LABEL: define void @trunc_f32
2107; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
2108; ARMPL-NEON:    [[TMP3:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[WIDE_LOAD:%.*]])
2109;
2110; ARMPL-SVE-LABEL: define void @trunc_f32
2111; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
2112; ARMPL-SVE:    [[TMP13:%.*]] = call <vscale x 4 x float> @llvm.trunc.nxv4f32(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]])
2113;
2114  entry:
2115  br label %for.body
2116
2117  for.body:
2118  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
2119  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
2120  %in = load float, ptr %in.gep, align 8
2121  %call = tail call float @llvm.trunc.f32(float %in)
2122  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
2123  store float %call, ptr %out.gep, align 4
2124  %iv.next = add nuw nsw i64 %iv, 1
2125  %exitcond = icmp eq i64 %iv.next, 1000
2126  br i1 %exitcond, label %for.end, label %for.body
2127
2128  for.end:
2129  ret void
2130}
2131