xref: /llvm-project/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll (revision dfb60bb9193d78d0980193e1ade715cffbb55af8)
1; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -mattr=avx -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VF4
2; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -mattr=avx -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VF2
3; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -mattr=+avx512f -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VF8
4; RUN: opt -vector-library=AMDLIBM -passes=inject-tli-mappings,loop-vectorize -force-vector-width=16 -force-vector-interleave=1 -mattr=+avx512f -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VF16
5
6target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
7target triple = "x86_64-unknown-linux-gnu"
8
9declare double @sin(double) #0
10declare float @sinf(float) #0
11declare double @llvm.sin.f64(double) #0
12declare float @llvm.sin.f32(float) #0
13
14declare double @cos(double) #0
15declare float @cosf(float) #0
16declare double @llvm.cos.f64(double) #0
17declare float @llvm.cos.f32(float) #0
18
19declare double @tan(double) #0
20declare float @tanf(float) #0
21declare double @llvm.tan.f64(double) #0
22declare float @llvm.tan.f32(float) #0
23
24declare double @acos(double) #0
25declare float @acosf(float) #0
26declare double @llvm.acos.f64(double) #0
27declare float @llvm.acos.f32(float) #0
28
29declare double @asin(double) #0
30declare float @asinf(float) #0
31declare double @llvm.asin.f64(double) #0
32declare float @llvm.asin.f32(float) #0
33
34declare double @atan(double) #0
35declare float @atanf(float) #0
36declare double @llvm.atan.f64(double) #0
37declare float @llvm.atan.f32(float) #0
38
39declare double @sinh(double) #0
40declare float @sinhf(float) #0
41declare double @llvm.sinh.f64(double) #0
42declare float @llvm.sinh.f32(float) #0
43
44declare double @cosh(double) #0
45declare float @coshf(float) #0
46declare double @llvm.cosh.f64(double) #0
47declare float @llvm.cosh.f32(float) #0
48
49declare double @tanh(double) #0
50declare float @tanhf(float) #0
51declare double @llvm.tanh.f64(double) #0
52declare float @llvm.tanh.f32(float) #0
53
54declare double @pow(double, double) #0
55declare float @powf(float, float) #0
56declare double @llvm.pow.f64(double, double) #0
57declare float @llvm.pow.f32(float, float) #0
58
59declare double @exp(double) #0
60declare float @expf(float) #0
61declare double @llvm.exp.f64(double) #0
62declare float @llvm.exp.f32(float) #0
63
64declare double @log(double) #0
65declare float @logf(float) #0
66declare double @llvm.log.f64(double) #0
67declare float @llvm.log.f32(float) #0
68
69declare double @log2(double) #0
70declare float @log2f(float) #0
71declare double @llvm.log2.f64(double) #0
72declare float @llvm.log2.f32(float) #0
73
74declare double @log10(double) #0
75declare float @log10f(float) #0
76declare double @llvm.log10.f64(double) #0
77declare float @llvm.log10.f32(float) #0
78
79declare double @sqrt(double) #0
80declare float @sqrtf(float) #0
81
82declare double @exp2(double) #0
83declare float @exp2f(float) #0
84declare double @llvm.exp2.f64(double) #0
85declare float @llvm.exp2.f32(float) #0
86
87define void @sin_f64(ptr nocapture %varray) {
88; CHECK-LABEL: @sin_f64(
89; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_sin(<2 x double> [[TMP4:%.*]])
90; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_sin(<4 x double> [[TMP4:%.*]])
91; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_sin(<8 x double> [[TMP4:%.*]])
92; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.sin.v16f64(<16 x double> [[TMP4:%.*]])
93; CHECK:        ret void
94;
95entry:
96  br label %for.body
97
98for.body:
99  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
100  %tmp = trunc i64 %iv to i32
101  %conv = sitofp i32 %tmp to double
102  %call = tail call double @sin(double %conv)
103  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
104  store double %call, ptr %arrayidx, align 4
105  %iv.next = add nuw nsw i64 %iv, 1
106  %exitcond = icmp eq i64 %iv.next, 1000
107  br i1 %exitcond, label %for.end, label %for.body
108
109for.end:
110  ret void
111}
112
113define void @sin_f32(ptr nocapture %varray) {
114; CHECK-LABEL: @sin_f32(
115; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP4:%.*]])
116; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_sinf(<4 x float> [[TMP4:%.*]])
117; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_sinf(<8 x float> [[TMP4:%.*]])
118; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_sinf(<16 x float> [[TMP4:%.*]])
119; CHECK:        ret void
120;
121entry:
122  br label %for.body
123
124for.body:
125  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
126  %tmp = trunc i64 %iv to i32
127  %conv = sitofp i32 %tmp to float
128  %call = tail call float @sinf(float %conv)
129  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
130  store float %call, ptr %arrayidx, align 4
131  %iv.next = add nuw nsw i64 %iv, 1
132  %exitcond = icmp eq i64 %iv.next, 1000
133  br i1 %exitcond, label %for.end, label %for.body
134
135for.end:
136  ret void
137}
138
139define void @sin_f64_intrinsic(ptr nocapture %varray) {
140; CHECK-LABEL: @sin_f64_intrinsic(
141; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_sin(<2 x double> [[TMP4:%.*]])
142; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_sin(<4 x double> [[TMP4:%.*]])
143; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_sin(<8 x double> [[TMP4:%.*]])
144; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.sin.v16f64(<16 x double> [[TMP4:%.*]])
145; CHECK:        ret void
146;
147entry:
148  br label %for.body
149
150for.body:
151  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
152  %tmp = trunc i64 %iv to i32
153  %conv = sitofp i32 %tmp to double
154  %call = tail call double @llvm.sin.f64(double %conv)
155  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
156  store double %call, ptr %arrayidx, align 4
157  %iv.next = add nuw nsw i64 %iv, 1
158  %exitcond = icmp eq i64 %iv.next, 1000
159  br i1 %exitcond, label %for.end, label %for.body
160
161for.end:
162  ret void
163}
164
165define void @sin_f32_intrinsic(ptr nocapture %varray) {
166; CHECK-LABEL: @sin_f32_intrinsic(
167; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.sin.v2f32(<2 x float> [[TMP4:%.*]])
168; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_sinf(<4 x float> [[TMP4:%.*]])
169; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_sinf(<8 x float> [[TMP4:%.*]])
170; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_sinf(<16 x float> [[TMP4:%.*]])
171; CHECK:        ret void
172;
173entry:
174  br label %for.body
175
176for.body:
177  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
178  %tmp = trunc i64 %iv to i32
179  %conv = sitofp i32 %tmp to float
180  %call = tail call float @llvm.sin.f32(float %conv)
181  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
182  store float %call, ptr %arrayidx, align 4
183  %iv.next = add nuw nsw i64 %iv, 1
184  %exitcond = icmp eq i64 %iv.next, 1000
185  br i1 %exitcond, label %for.end, label %for.body
186
187for.end:
188  ret void
189}
190
191define void @cos_f64(ptr nocapture %varray) {
192; CHECK-LABEL: @cos_f64(
193; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cos(<2 x double> [[TMP4:%.*]])
194; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_cos(<4 x double> [[TMP4:%.*]])
195; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_cos(<8 x double> [[TMP4:%.*]])
196; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.cos.v16f64(<16 x double> [[TMP4:%.*]])
197; CHECK:        ret void
198;
199entry:
200  br label %for.body
201
202for.body:
203  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
204  %tmp = trunc i64 %iv to i32
205  %conv = sitofp i32 %tmp to double
206  %call = tail call double @cos(double %conv)
207  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
208  store double %call, ptr %arrayidx, align 4
209  %iv.next = add nuw nsw i64 %iv, 1
210  %exitcond = icmp eq i64 %iv.next, 1000
211  br i1 %exitcond, label %for.end, label %for.body
212
213for.end:
214  ret void
215}
216
217define void @cos_f32(ptr nocapture %varray) {
218; CHECK-LABEL: @cos_f32(
219; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP4:%.*]])
220; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_cosf(<4 x float> [[TMP4:%.*]])
221; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_cosf(<8 x float> [[TMP4:%.*]])
222; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_cosf(<16 x float> [[TMP4:%.*]])
223; CHECK:        ret void
224;
225entry:
226  br label %for.body
227
228for.body:
229  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
230  %tmp = trunc i64 %iv to i32
231  %conv = sitofp i32 %tmp to float
232  %call = tail call float @cosf(float %conv)
233  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
234  store float %call, ptr %arrayidx, align 4
235  %iv.next = add nuw nsw i64 %iv, 1
236  %exitcond = icmp eq i64 %iv.next, 1000
237  br i1 %exitcond, label %for.end, label %for.body
238
239for.end:
240  ret void
241}
242
243define void @cos_f64_intrinsic(ptr nocapture %varray) {
244; CHECK-LABEL: @cos_f64_intrinsic(
245; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cos(<2 x double> [[TMP4:%.*]])
246; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_cos(<4 x double> [[TMP4:%.*]])
247; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_cos(<8 x double> [[TMP4:%.*]])
248; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.cos.v16f64(<16 x double> [[TMP4:%.*]])
249; CHECK:        ret void
250;
251entry:
252  br label %for.body
253
254for.body:
255  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
256  %tmp = trunc i64 %iv to i32
257  %conv = sitofp i32 %tmp to double
258  %call = tail call double @llvm.cos.f64(double %conv)
259  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
260  store double %call, ptr %arrayidx, align 4
261  %iv.next = add nuw nsw i64 %iv, 1
262  %exitcond = icmp eq i64 %iv.next, 1000
263  br i1 %exitcond, label %for.end, label %for.body
264
265for.end:
266  ret void
267}
268
269define void @cos_f32_intrinsic(ptr nocapture %varray) {
270; CHECK-LABEL: @cos_f32_intrinsic(
271; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.cos.v2f32(<2 x float> [[TMP4:%.*]])
272; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_cosf(<4 x float> [[TMP4:%.*]])
273; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_cosf(<8 x float> [[TMP4:%.*]])
274; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_cosf(<16 x float> [[TMP4:%.*]])
275; CHECK:        ret void
276;
277entry:
278  br label %for.body
279
280for.body:
281  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
282  %tmp = trunc i64 %iv to i32
283  %conv = sitofp i32 %tmp to float
284  %call = tail call float @llvm.cos.f32(float %conv)
285  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
286  store float %call, ptr %arrayidx, align 4
287  %iv.next = add nuw nsw i64 %iv, 1
288  %exitcond = icmp eq i64 %iv.next, 1000
289  br i1 %exitcond, label %for.end, label %for.body
290
291for.end:
292  ret void
293}
294
295define void @tan_f64(ptr nocapture %varray) {
296; CHECK-LABEL: @tan_f64(
297; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_tan(<2 x double> [[TMP4:%.*]])
298; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_tan(<4 x double> [[TMP4:%.*]])
299; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_tan(<8 x double> [[TMP4:%.*]])
300; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.tan.v16f64(<16 x double> [[TMP4:%.*]])
301; CHECK:        ret void
302;
303entry:
304  br label %for.body
305
306for.body:
307  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
308  %tmp = trunc i64 %iv to i32
309  %conv = sitofp i32 %tmp to double
310  %call = tail call double @tan(double %conv)
311  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
312  store double %call, ptr %arrayidx, align 4
313  %iv.next = add nuw nsw i64 %iv, 1
314  %exitcond = icmp eq i64 %iv.next, 1000
315  br i1 %exitcond, label %for.end, label %for.body
316
317for.end:
318  ret void
319}
320
321define void @tan_f32(ptr nocapture %varray) {
322; CHECK-LABEL: @tan_f32(
323; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP4:%.*]])
324; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanf(<4 x float> [[TMP4:%.*]])
325; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanf(<8 x float> [[TMP4:%.*]])
326; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanf(<16 x float> [[TMP4:%.*]])
327; CHECK:        ret void
328;
329entry:
330  br label %for.body
331
332for.body:
333  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
334  %tmp = trunc i64 %iv to i32
335  %conv = sitofp i32 %tmp to float
336  %call = tail call float @tanf(float %conv)
337  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
338  store float %call, ptr %arrayidx, align 4
339  %iv.next = add nuw nsw i64 %iv, 1
340  %exitcond = icmp eq i64 %iv.next, 1000
341  br i1 %exitcond, label %for.end, label %for.body
342
343for.end:
344  ret void
345}
346
347define void @tan_f64_intrinsic(ptr nocapture %varray) {
348; CHECK-LABEL: @tan_f64_intrinsic(
349; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_tan(<2 x double> [[TMP4:%.*]])
350; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_tan(<4 x double> [[TMP4:%.*]])
351; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_tan(<8 x double> [[TMP4:%.*]])
352; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.tan.v16f64(<16 x double> [[TMP4:%.*]])
353; CHECK:        ret void
354;
355entry:
356  br label %for.body
357
358for.body:
359  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
360  %tmp = trunc i64 %iv to i32
361  %conv = sitofp i32 %tmp to double
362  %call = tail call double @llvm.tan.f64(double %conv)
363  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
364  store double %call, ptr %arrayidx, align 4
365  %iv.next = add nuw nsw i64 %iv, 1
366  %exitcond = icmp eq i64 %iv.next, 1000
367  br i1 %exitcond, label %for.end, label %for.body
368
369for.end:
370  ret void
371}
372
373define void @tan_f32_intrinsic(ptr nocapture %varray) {
374; CHECK-LABEL: @tan_f32_intrinsic(
375; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.tan.v2f32(<2 x float> [[TMP4:%.*]])
376; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanf(<4 x float> [[TMP4:%.*]])
377; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanf(<8 x float> [[TMP4:%.*]])
378; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanf(<16 x float> [[TMP4:%.*]])
379; CHECK:        ret void
380;
381entry:
382  br label %for.body
383
384for.body:
385  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
386  %tmp = trunc i64 %iv to i32
387  %conv = sitofp i32 %tmp to float
388  %call = tail call float @llvm.tan.f32(float %conv)
389  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
390  store float %call, ptr %arrayidx, align 4
391  %iv.next = add nuw nsw i64 %iv, 1
392  %exitcond = icmp eq i64 %iv.next, 1000
393  br i1 %exitcond, label %for.end, label %for.body
394
395for.end:
396  ret void
397}
398
399define void @acos_f64(ptr nocapture %varray) {
400; CHECK-LABEL: @acos_f64(
401; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @llvm.acos.v2f64(<2 x double> [[TMP4:%.*]])
402; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.acos.v4f64(<4 x double> [[TMP4:%.*]])
403; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @llvm.acos.v8f64(<8 x double> [[TMP4:%.*]])
404; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.acos.v16f64(<16 x double> [[TMP4:%.*]])
405; CHECK:        ret void
406;
407entry:
408  br label %for.body
409
410for.body:
411  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
412  %tmp = trunc i64 %iv to i32
413  %conv = sitofp i32 %tmp to double
414  %call = tail call double @acos(double %conv)
415  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
416  store double %call, ptr %arrayidx, align 4
417  %iv.next = add nuw nsw i64 %iv, 1
418  %exitcond = icmp eq i64 %iv.next, 1000
419  br i1 %exitcond, label %for.end, label %for.body
420
421for.end:
422  ret void
423}
424
425define void @acos_f32(ptr nocapture %varray) {
426; CHECK-LABEL: @acos_f32(
427; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP4:%.*]])
428; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_acosf(<4 x float> [[TMP4:%.*]])
429; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_acosf(<8 x float> [[TMP4:%.*]])
430; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_acosf(<16 x float> [[TMP4:%.*]])
431; CHECK:        ret void
432;
433entry:
434  br label %for.body
435
436for.body:
437  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
438  %tmp = trunc i64 %iv to i32
439  %conv = sitofp i32 %tmp to float
440  %call = tail call float @acosf(float %conv)
441  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
442  store float %call, ptr %arrayidx, align 4
443  %iv.next = add nuw nsw i64 %iv, 1
444  %exitcond = icmp eq i64 %iv.next, 1000
445  br i1 %exitcond, label %for.end, label %for.body
446
447for.end:
448  ret void
449}
450
451define void @acos_f64_intrinsic(ptr nocapture %varray) {
452; CHECK-LABEL: @acos_f64_intrinsic(
453; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @llvm.acos.v2f64(<2 x double> [[TMP4:%.*]])
454; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.acos.v4f64(<4 x double> [[TMP4:%.*]])
455; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @llvm.acos.v8f64(<8 x double> [[TMP4:%.*]])
456; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.acos.v16f64(<16 x double> [[TMP4:%.*]])
457; CHECK:        ret void
458;
459entry:
460  br label %for.body
461
462for.body:
463  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
464  %tmp = trunc i64 %iv to i32
465  %conv = sitofp i32 %tmp to double
466  %call = tail call double @llvm.acos.f64(double %conv)
467  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
468  store double %call, ptr %arrayidx, align 4
469  %iv.next = add nuw nsw i64 %iv, 1
470  %exitcond = icmp eq i64 %iv.next, 1000
471  br i1 %exitcond, label %for.end, label %for.body
472
473for.end:
474  ret void
475}
476
477define void @acos_f32_intrinsic(ptr nocapture %varray) {
478; CHECK-LABEL: @acos_f32_intrinsic(
479; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP4:%.*]])
480; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_acosf(<4 x float> [[TMP4:%.*]])
481; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_acosf(<8 x float> [[TMP4:%.*]])
482; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_acosf(<16 x float> [[TMP4:%.*]])
483; CHECK:        ret void
484;
485entry:
486  br label %for.body
487
488for.body:
489  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
490  %tmp = trunc i64 %iv to i32
491  %conv = sitofp i32 %tmp to float
492  %call = tail call float @llvm.acos.f32(float %conv)
493  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
494  store float %call, ptr %arrayidx, align 4
495  %iv.next = add nuw nsw i64 %iv, 1
496  %exitcond = icmp eq i64 %iv.next, 1000
497  br i1 %exitcond, label %for.end, label %for.body
498
499for.end:
500  ret void
501}
502
503define void @asin_f64(ptr nocapture %varray) {
504; CHECK-LABEL: @asin_f64(
505; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @llvm.asin.v2f64(<2 x double> [[TMP4:%.*]])
506; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.asin.v4f64(<4 x double> [[TMP4:%.*]])
507; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_asin(<8 x double> [[TMP4:%.*]])
508; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.asin.v16f64(<16 x double> [[TMP4:%.*]])
509; CHECK:        ret void
510;
511entry:
512  br label %for.body
513
514for.body:
515  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
516  %tmp = trunc i64 %iv to i32
517  %conv = sitofp i32 %tmp to double
518  %call = tail call double @asin(double %conv)
519  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
520  store double %call, ptr %arrayidx, align 4
521  %iv.next = add nuw nsw i64 %iv, 1
522  %exitcond = icmp eq i64 %iv.next, 1000
523  br i1 %exitcond, label %for.end, label %for.body
524
525for.end:
526  ret void
527}
528
529define void @asin_f32(ptr nocapture %varray) {
530; CHECK-LABEL: @asin_f32(
531; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP4:%.*]])
532; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_asinf(<4 x float> [[TMP4:%.*]])
533; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_asinf(<8 x float> [[TMP4:%.*]])
534; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_asinf(<16 x float> [[TMP4:%.*]])
535; CHECK:        ret void
536;
537entry:
538  br label %for.body
539
540for.body:
541  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
542  %tmp = trunc i64 %iv to i32
543  %conv = sitofp i32 %tmp to float
544  %call = tail call float @asinf(float %conv)
545  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
546  store float %call, ptr %arrayidx, align 4
547  %iv.next = add nuw nsw i64 %iv, 1
548  %exitcond = icmp eq i64 %iv.next, 1000
549  br i1 %exitcond, label %for.end, label %for.body
550
551for.end:
552  ret void
553}
554
555define void @asin_f64_intrinsic(ptr nocapture %varray) {
556; CHECK-LABEL: @asin_f64_intrinsic(
557; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @llvm.asin.v2f64(<2 x double> [[TMP4:%.*]])
558; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.asin.v4f64(<4 x double> [[TMP4:%.*]])
559; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_asin(<8 x double> [[TMP4:%.*]])
560; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.asin.v16f64(<16 x double> [[TMP4:%.*]])
561; CHECK:        ret void
562;
563entry:
564  br label %for.body
565
566for.body:
567  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
568  %tmp = trunc i64 %iv to i32
569  %conv = sitofp i32 %tmp to double
570  %call = tail call double @llvm.asin.f64(double %conv)
571  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
572  store double %call, ptr %arrayidx, align 4
573  %iv.next = add nuw nsw i64 %iv, 1
574  %exitcond = icmp eq i64 %iv.next, 1000
575  br i1 %exitcond, label %for.end, label %for.body
576
577for.end:
578  ret void
579}
580
581define void @asin_f32_intrinsic(ptr nocapture %varray) {
582; CHECK-LABEL: @asin_f32_intrinsic(
583; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP4:%.*]])
584; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_asinf(<4 x float> [[TMP4:%.*]])
585; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_asinf(<8 x float> [[TMP4:%.*]])
586; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_asinf(<16 x float> [[TMP4:%.*]])
587; CHECK:        ret void
588;
589entry:
590  br label %for.body
591
592for.body:
593  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
594  %tmp = trunc i64 %iv to i32
595  %conv = sitofp i32 %tmp to float
596  %call = tail call float @llvm.asin.f32(float %conv)
597  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
598  store float %call, ptr %arrayidx, align 4
599  %iv.next = add nuw nsw i64 %iv, 1
600  %exitcond = icmp eq i64 %iv.next, 1000
601  br i1 %exitcond, label %for.end, label %for.body
602
603for.end:
604  ret void
605}
606
607define void @atan_f64(ptr nocapture %varray) {
608; CHECK-LABEL: @atan_f64(
609; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_atan(<2 x double> [[TMP4:%.*]])
610; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_atan(<4 x double> [[TMP4:%.*]])
611; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_atan(<8 x double> [[TMP4:%.*]])
612; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.atan.v16f64(<16 x double> [[TMP4:%.*]])
613; CHECK:        ret void
614;
615entry:
616  br label %for.body
617
618for.body:
619  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
620  %tmp = trunc i64 %iv to i32
621  %conv = sitofp i32 %tmp to double
622  %call = tail call double @atan(double %conv)
623  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
624  store double %call, ptr %arrayidx, align 4
625  %iv.next = add nuw nsw i64 %iv, 1
626  %exitcond = icmp eq i64 %iv.next, 1000
627  br i1 %exitcond, label %for.end, label %for.body
628
629for.end:
630  ret void
631}
632
633define void @atan_f32(ptr nocapture %varray) {
634; CHECK-LABEL: @atan_f32(
635; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP4:%.*]])
636; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_atanf(<4 x float> [[TMP4:%.*]])
637; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_atanf(<8 x float> [[TMP4:%.*]])
638; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_atanf(<16 x float> [[TMP4:%.*]])
639; CHECK:        ret void
640;
641entry:
642  br label %for.body
643
644for.body:
645  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
646  %tmp = trunc i64 %iv to i32
647  %conv = sitofp i32 %tmp to float
648  %call = tail call float @atanf(float %conv)
649  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
650  store float %call, ptr %arrayidx, align 4
651  %iv.next = add nuw nsw i64 %iv, 1
652  %exitcond = icmp eq i64 %iv.next, 1000
653  br i1 %exitcond, label %for.end, label %for.body
654
655for.end:
656  ret void
657}
658
659define void @atan_f64_intrinsic(ptr nocapture %varray) {
660; CHECK-LABEL: @atan_f64_intrinsic(
661; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_atan(<2 x double> [[TMP4:%.*]])
662; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_atan(<4 x double> [[TMP4:%.*]])
663; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_atan(<8 x double> [[TMP4:%.*]])
664; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.atan.v16f64(<16 x double> [[TMP4:%.*]])
665; CHECK:        ret void
666;
667entry:
668  br label %for.body
669
670for.body:
671  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
672  %tmp = trunc i64 %iv to i32
673  %conv = sitofp i32 %tmp to double
674  %call = tail call double @llvm.atan.f64(double %conv)
675  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
676  store double %call, ptr %arrayidx, align 4
677  %iv.next = add nuw nsw i64 %iv, 1
678  %exitcond = icmp eq i64 %iv.next, 1000
679  br i1 %exitcond, label %for.end, label %for.body
680
681for.end:
682  ret void
683}
684
685define void @atan_f32_intrinsic(ptr nocapture %varray) {
686; CHECK-LABEL: @atan_f32_intrinsic(
687; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP4:%.*]])
688; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_atanf(<4 x float> [[TMP4:%.*]])
689; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_atanf(<8 x float> [[TMP4:%.*]])
690; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_atanf(<16 x float> [[TMP4:%.*]])
691; CHECK:        ret void
692;
693entry:
694  br label %for.body
695
696for.body:
697  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
698  %tmp = trunc i64 %iv to i32
699  %conv = sitofp i32 %tmp to float
700  %call = tail call float @llvm.atan.f32(float %conv)
701  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
702  store float %call, ptr %arrayidx, align 4
703  %iv.next = add nuw nsw i64 %iv, 1
704  %exitcond = icmp eq i64 %iv.next, 1000
705  br i1 %exitcond, label %for.end, label %for.body
706
707for.end:
708  ret void
709}
710
711define void @sinh_f64(ptr nocapture %varray) {
712; CHECK-LABEL: @sinh_f64(
713; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @llvm.sinh.v2f64(<2 x double> [[TMP4:%.*]])
714; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.sinh.v4f64(<4 x double> [[TMP4:%.*]])
715; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @llvm.sinh.v8f64(<8 x double> [[TMP4:%.*]])
716; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.sinh.v16f64(<16 x double> [[TMP4:%.*]])
717; CHECK:        ret void
718;
719entry:
720  br label %for.body
721
722for.body:
723  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
724  %tmp = trunc i64 %iv to i32
725  %conv = sitofp i32 %tmp to double
726  %call = tail call double @sinh(double %conv)
727  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
728  store double %call, ptr %arrayidx, align 4
729  %iv.next = add nuw nsw i64 %iv, 1
730  %exitcond = icmp eq i64 %iv.next, 1000
731  br i1 %exitcond, label %for.end, label %for.body
732
733for.end:
734  ret void
735}
736
737define void @sinh_f32(ptr nocapture %varray) {
738; CHECK-LABEL: @sinh_f32(
739; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP4:%.*]])
740; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @llvm.sinh.v4f32(<4 x float> [[TMP4:%.*]])
741; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @llvm.sinh.v8f32(<8 x float> [[TMP4:%.*]])
742; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @llvm.sinh.v16f32(<16 x float> [[TMP4:%.*]])
743; CHECK:        ret void
744;
745entry:
746  br label %for.body
747
748for.body:
749  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
750  %tmp = trunc i64 %iv to i32
751  %conv = sitofp i32 %tmp to float
752  %call = tail call float @sinhf(float %conv)
753  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
754  store float %call, ptr %arrayidx, align 4
755  %iv.next = add nuw nsw i64 %iv, 1
756  %exitcond = icmp eq i64 %iv.next, 1000
757  br i1 %exitcond, label %for.end, label %for.body
758
759for.end:
760  ret void
761}
762
763define void @sinh_f64_intrinsic(ptr nocapture %varray) {
764; CHECK-LABEL: @sinh_f64_intrinsic(
765; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @llvm.sinh.v2f64(<2 x double> [[TMP4:%.*]])
766; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.sinh.v4f64(<4 x double> [[TMP4:%.*]])
767; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @llvm.sinh.v8f64(<8 x double> [[TMP4:%.*]])
768; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.sinh.v16f64(<16 x double> [[TMP4:%.*]])
769; CHECK:        ret void
770;
771entry:
772  br label %for.body
773
774for.body:
775  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
776  %tmp = trunc i64 %iv to i32
777  %conv = sitofp i32 %tmp to double
778  %call = tail call double @llvm.sinh.f64(double %conv)
779  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
780  store double %call, ptr %arrayidx, align 4
781  %iv.next = add nuw nsw i64 %iv, 1
782  %exitcond = icmp eq i64 %iv.next, 1000
783  br i1 %exitcond, label %for.end, label %for.body
784
785for.end:
786  ret void
787}
788
789define void @sinh_f32_intrinsic(ptr nocapture %varray) {
790; CHECK-LABEL: @sinh_f32_intrinsic(
791; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP4:%.*]])
792; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @llvm.sinh.v4f32(<4 x float> [[TMP4:%.*]])
793; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @llvm.sinh.v8f32(<8 x float> [[TMP4:%.*]])
794; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @llvm.sinh.v16f32(<16 x float> [[TMP4:%.*]])
795; CHECK:        ret void
796;
797entry:
798  br label %for.body
799
800for.body:
801  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
802  %tmp = trunc i64 %iv to i32
803  %conv = sitofp i32 %tmp to float
804  %call = tail call float @llvm.sinh.f32(float %conv)
805  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
806  store float %call, ptr %arrayidx, align 4
807  %iv.next = add nuw nsw i64 %iv, 1
808  %exitcond = icmp eq i64 %iv.next, 1000
809  br i1 %exitcond, label %for.end, label %for.body
810
811for.end:
812  ret void
813}
814
815define void @cosh_f64(ptr nocapture %varray) {
816; CHECK-LABEL: @cosh_f64(
817; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cosh(<2 x double> [[TMP4:%.*]])
818; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.cosh.v4f64(<4 x double> [[TMP4:%.*]])
819; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @llvm.cosh.v8f64(<8 x double> [[TMP4:%.*]])
820; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.cosh.v16f64(<16 x double> [[TMP4:%.*]])
821; CHECK:        ret void
822;
823entry:
824  br label %for.body
825
826for.body:
827  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
828  %tmp = trunc i64 %iv to i32
829  %conv = sitofp i32 %tmp to double
830  %call = tail call double @cosh(double %conv)
831  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
832  store double %call, ptr %arrayidx, align 4
833  %iv.next = add nuw nsw i64 %iv, 1
834  %exitcond = icmp eq i64 %iv.next, 1000
835  br i1 %exitcond, label %for.end, label %for.body
836
837for.end:
838  ret void
839}
840
841define void @cosh_f32(ptr nocapture %varray) {
842; CHECK-LABEL: @cosh_f32(
843; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP4:%.*]])
844; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_coshf(<4 x float> [[TMP4:%.*]])
845; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_coshf(<8 x float> [[TMP4:%.*]])
846; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @llvm.cosh.v16f32(<16 x float> [[TMP4:%.*]])
847; CHECK:        ret void
848;
849entry:
850  br label %for.body
851
852for.body:
853  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
854  %tmp = trunc i64 %iv to i32
855  %conv = sitofp i32 %tmp to float
856  %call = tail call float @coshf(float %conv)
857  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
858  store float %call, ptr %arrayidx, align 4
859  %iv.next = add nuw nsw i64 %iv, 1
860  %exitcond = icmp eq i64 %iv.next, 1000
861  br i1 %exitcond, label %for.end, label %for.body
862
863for.end:
864  ret void
865}
866
867define void @cosh_f64_intrinsic(ptr nocapture %varray) {
868; CHECK-LABEL: @cosh_f64_intrinsic(
869; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cosh(<2 x double> [[TMP4:%.*]])
870; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.cosh.v4f64(<4 x double> [[TMP4:%.*]])
871; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @llvm.cosh.v8f64(<8 x double> [[TMP4:%.*]])
872; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.cosh.v16f64(<16 x double> [[TMP4:%.*]])
873; CHECK:        ret void
874;
875entry:
876  br label %for.body
877
878for.body:
879  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
880  %tmp = trunc i64 %iv to i32
881  %conv = sitofp i32 %tmp to double
882  %call = tail call double @llvm.cosh.f64(double %conv)
883  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
884  store double %call, ptr %arrayidx, align 4
885  %iv.next = add nuw nsw i64 %iv, 1
886  %exitcond = icmp eq i64 %iv.next, 1000
887  br i1 %exitcond, label %for.end, label %for.body
888
889for.end:
890  ret void
891}
892
893define void @cosh_f32_intrinsic(ptr nocapture %varray) {
894; CHECK-LABEL: @cosh_f32_intrinsic(
895; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP4:%.*]])
896; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_coshf(<4 x float> [[TMP4:%.*]])
897; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_coshf(<8 x float> [[TMP4:%.*]])
898; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @llvm.cosh.v16f32(<16 x float> [[TMP4:%.*]])
899; CHECK:        ret void
900;
901entry:
902  br label %for.body
903
904for.body:
905  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
906  %tmp = trunc i64 %iv to i32
907  %conv = sitofp i32 %tmp to float
908  %call = tail call float @llvm.cosh.f32(float %conv)
909  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
910  store float %call, ptr %arrayidx, align 4
911  %iv.next = add nuw nsw i64 %iv, 1
912  %exitcond = icmp eq i64 %iv.next, 1000
913  br i1 %exitcond, label %for.end, label %for.body
914
915for.end:
916  ret void
917}
918
919define void @tanh_f64(ptr nocapture %varray) {
920; CHECK-LABEL: @tanh_f64(
921; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @llvm.tanh.v2f64(<2 x double> [[TMP4:%.*]])
922; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.tanh.v4f64(<4 x double> [[TMP4:%.*]])
923; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @llvm.tanh.v8f64(<8 x double> [[TMP4:%.*]])
924; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.tanh.v16f64(<16 x double> [[TMP4:%.*]])
925; CHECK:        ret void
926;
927entry:
928  br label %for.body
929
930for.body:
931  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
932  %tmp = trunc i64 %iv to i32
933  %conv = sitofp i32 %tmp to double
934  %call = tail call double @tanh(double %conv)
935  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
936  store double %call, ptr %arrayidx, align 4
937  %iv.next = add nuw nsw i64 %iv, 1
938  %exitcond = icmp eq i64 %iv.next, 1000
939  br i1 %exitcond, label %for.end, label %for.body
940
941for.end:
942  ret void
943}
944
945define void @tanh_f32(ptr nocapture %varray) {
946; CHECK-LABEL: @tanh_f32(
947; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP4:%.*]])
948; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanhf(<4 x float> [[TMP4:%.*]])
949; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanhf(<8 x float> [[TMP4:%.*]])
950; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanhf(<16 x float> [[TMP4:%.*]])
951; CHECK:        ret void
952;
953entry:
954  br label %for.body
955
956for.body:
957  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
958  %tmp = trunc i64 %iv to i32
959  %conv = sitofp i32 %tmp to float
960  %call = tail call float @tanhf(float %conv)
961  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
962  store float %call, ptr %arrayidx, align 4
963  %iv.next = add nuw nsw i64 %iv, 1
964  %exitcond = icmp eq i64 %iv.next, 1000
965  br i1 %exitcond, label %for.end, label %for.body
966
967for.end:
968  ret void
969}
970
971define void @tanh_f64_intrinsic(ptr nocapture %varray) {
972; CHECK-LABEL: @tanh_f64_intrinsic(
973; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @llvm.tanh.v2f64(<2 x double> [[TMP4:%.*]])
974; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.tanh.v4f64(<4 x double> [[TMP4:%.*]])
975; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @llvm.tanh.v8f64(<8 x double> [[TMP4:%.*]])
976; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.tanh.v16f64(<16 x double> [[TMP4:%.*]])
977; CHECK:        ret void
978;
979entry:
980  br label %for.body
981
982for.body:
983  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
984  %tmp = trunc i64 %iv to i32
985  %conv = sitofp i32 %tmp to double
986  %call = tail call double @llvm.tanh.f64(double %conv)
987  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
988  store double %call, ptr %arrayidx, align 4
989  %iv.next = add nuw nsw i64 %iv, 1
990  %exitcond = icmp eq i64 %iv.next, 1000
991  br i1 %exitcond, label %for.end, label %for.body
992
993for.end:
994  ret void
995}
996
997define void @tanh_f32_intrinsic(ptr nocapture %varray) {
998; CHECK-LABEL: @tanh_f32_intrinsic(
999; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP4:%.*]])
1000; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanhf(<4 x float> [[TMP4:%.*]])
1001; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanhf(<8 x float> [[TMP4:%.*]])
1002; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanhf(<16 x float> [[TMP4:%.*]])
1003; CHECK:        ret void
1004;
1005entry:
1006  br label %for.body
1007
1008for.body:
1009  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1010  %tmp = trunc i64 %iv to i32
1011  %conv = sitofp i32 %tmp to float
1012  %call = tail call float @llvm.tanh.f32(float %conv)
1013  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1014  store float %call, ptr %arrayidx, align 4
1015  %iv.next = add nuw nsw i64 %iv, 1
1016  %exitcond = icmp eq i64 %iv.next, 1000
1017  br i1 %exitcond, label %for.end, label %for.body
1018
1019for.end:
1020  ret void
1021}
1022
1023define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) {
1024; CHECK-LABEL: @pow_f64(
1025; CHECK-VF2:    [[TMP8:%.*]] = call <2 x double> @amd_vrd2_pow(<2 x double> [[TMP4:%.*]], <2 x double> [[WIDE_LOAD:%.*]])
1026; CHECK-VF4:    [[TMP8:%.*]] = call <4 x double> @amd_vrd4_pow(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
1027; CHECK-VF8:    [[TMP8:%.*]] = call <8 x double> @amd_vrd8_pow(<8 x double> [[TMP4:%.*]], <8 x double> [[WIDE_LOAD:%.*]])
1028; CHECK-VF16:   [[TMP8:%.*]] = call <16 x double> @llvm.pow.v16f64(<16 x double> [[TMP4:%.*]], <16 x double> [[WIDE_LOAD:%.*]])
1029; CHECK:        ret void
1030;
1031entry:
1032  br label %for.body
1033
1034for.body:
1035  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1036  %tmp = trunc i64 %iv to i32
1037  %conv = sitofp i32 %tmp to double
1038  %arrayidx = getelementptr inbounds double, ptr %exp, i64 %iv
1039  %tmp1 = load double, ptr %arrayidx, align 4
1040  %tmp2 = tail call double @pow(double %conv, double %tmp1)
1041  %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %iv
1042  store double %tmp2, ptr %arrayidx2, align 4
1043  %iv.next = add nuw nsw i64 %iv, 1
1044  %exitcond = icmp eq i64 %iv.next, 1000
1045  br i1 %exitcond, label %for.end, label %for.body
1046
1047for.end:
1048  ret void
1049}
1050
1051define void @pow_f64_intrinsic(ptr nocapture %varray, ptr nocapture readonly %exp) {
1052; CHECK-LABEL: @pow_f64_intrinsic(
1053; CHECK-VF2:    [[TMP8:%.*]] = call <2 x double> @amd_vrd2_pow(<2 x double> [[TMP4:%.*]], <2 x double> [[WIDE_LOAD:%.*]])
1054; CHECK-VF4:    [[TMP8:%.*]] = call <4 x double> @amd_vrd4_pow(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
1055; CHECK-VF8:    [[TMP8:%.*]] = call <8 x double> @amd_vrd8_pow(<8 x double> [[TMP4:%.*]], <8 x double> [[WIDE_LOAD:%.*]])
1056; CHECK-VF16:   [[TMP8:%.*]] = call <16 x double> @llvm.pow.v16f64(<16 x double> [[TMP4:%.*]], <16 x double> [[WIDE_LOAD:%.*]])
1057; CHECK:        ret void
1058;
1059entry:
1060  br label %for.body
1061
1062for.body:
1063  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1064  %tmp = trunc i64 %iv to i32
1065  %conv = sitofp i32 %tmp to double
1066  %arrayidx = getelementptr inbounds double, ptr %exp, i64 %iv
1067  %tmp1 = load double, ptr %arrayidx, align 4
1068  %tmp2 = tail call double @llvm.pow.f64(double %conv, double %tmp1)
1069  %arrayidx2 = getelementptr inbounds double, ptr %varray, i64 %iv
1070  store double %tmp2, ptr %arrayidx2, align 4
1071  %iv.next = add nuw nsw i64 %iv, 1
1072  %exitcond = icmp eq i64 %iv.next, 1000
1073  br i1 %exitcond, label %for.end, label %for.body
1074
1075for.end:
1076  ret void
1077}
1078
1079define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) {
1080; CHECK-LABEL: @pow_f32(
1081; CHECK-VF2:    [[TMP8:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[TMP4:%.*]], <2 x float> [[WIDE_LOAD:%.*]])
1082; CHECK-VF4:    [[TMP8:%.*]] = call <4 x float> @amd_vrs4_powf(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
1083; CHECK-VF8:    [[TMP8:%.*]] = call <8 x float> @amd_vrs8_powf(<8 x float> [[TMP4:%.*]], <8 x float> [[WIDE_LOAD:%.*]])
1084; CHECK-VF16:   [[TMP8:%.*]] = call <16 x float> @amd_vrs16_powf(<16 x float> [[TMP4:%.*]], <16 x float> [[WIDE_LOAD:%.*]])
1085; CHECK:        ret void
1086;
1087entry:
1088  br label %for.body
1089
1090for.body:
1091  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1092  %tmp = trunc i64 %iv to i32
1093  %conv = sitofp i32 %tmp to float
1094  %arrayidx = getelementptr inbounds float, ptr %exp, i64 %iv
1095  %tmp1 = load float, ptr %arrayidx, align 4
1096  %tmp2 = tail call float @powf(float %conv, float %tmp1)
1097  %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %iv
1098  store float %tmp2, ptr %arrayidx2, align 4
1099  %iv.next = add nuw nsw i64 %iv, 1
1100  %exitcond = icmp eq i64 %iv.next, 1000
1101  br i1 %exitcond, label %for.end, label %for.body
1102
1103for.end:
1104  ret void
1105}
1106
1107define void @pow_f32_intrinsic(ptr nocapture %varray, ptr nocapture readonly %exp) {
1108; CHECK-LABEL: @pow_f32_intrinsic(
1109; CHECK-VF2:    [[TMP8:%.*]] = call <2 x float> @llvm.pow.v2f32(<2 x float> [[TMP4:%.*]], <2 x float> [[WIDE_LOAD:%.*]])
1110; CHECK-VF4:    [[TMP8:%.*]] = call <4 x float> @amd_vrs4_powf(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
1111; CHECK-VF8:    [[TMP8:%.*]] = call <8 x float> @amd_vrs8_powf(<8 x float> [[TMP4:%.*]], <8 x float> [[WIDE_LOAD:%.*]])
1112; CHECK-VF16:   [[TMP8:%.*]] = call <16 x float> @amd_vrs16_powf(<16 x float> [[TMP4:%.*]], <16 x float> [[WIDE_LOAD:%.*]])
1113; CHECK:        ret void
1114;
1115entry:
1116  br label %for.body
1117
1118for.body:
1119  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1120  %tmp = trunc i64 %iv to i32
1121  %conv = sitofp i32 %tmp to float
1122  %arrayidx = getelementptr inbounds float, ptr %exp, i64 %iv
1123  %tmp1 = load float, ptr %arrayidx, align 4
1124  %tmp2 = tail call float @llvm.pow.f32(float %conv, float %tmp1)
1125  %arrayidx2 = getelementptr inbounds float, ptr %varray, i64 %iv
1126  store float %tmp2, ptr %arrayidx2, align 4
1127  %iv.next = add nuw nsw i64 %iv, 1
1128  %exitcond = icmp eq i64 %iv.next, 1000
1129  br i1 %exitcond, label %for.end, label %for.body
1130
1131for.end:
1132  ret void
1133}
1134
1135define void @exp_f64(ptr nocapture %varray) {
1136; CHECK-LABEL: @exp_f64(
1137; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp(<2 x double> [[TMP4:%.*]])
1138; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp(<4 x double> [[TMP4:%.*]])
1139; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp(<8 x double> [[TMP4:%.*]])
1140; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.exp.v16f64(<16 x double> [[TMP4:%.*]])
1141; CHECK:        ret void
1142;
1143entry:
1144  br label %for.body
1145
1146for.body:
1147  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1148  %tmp = trunc i64 %iv to i32
1149  %conv = sitofp i32 %tmp to double
1150  %call = tail call double @exp(double %conv)
1151  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1152  store double %call, ptr %arrayidx, align 4
1153  %iv.next = add nuw nsw i64 %iv, 1
1154  %exitcond = icmp eq i64 %iv.next, 1000
1155  br i1 %exitcond, label %for.end, label %for.body
1156
1157for.end:
1158  ret void
1159}
1160
1161define void @exp_f32(ptr nocapture %varray) {
1162; CHECK-LABEL: @exp_f32(
1163; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP4:%.*]])
1164; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_expf(<4 x float> [[TMP4:%.*]])
1165; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_expf(<8 x float> [[TMP4:%.*]])
1166; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_expf(<16 x float> [[TMP4:%.*]])
1167; CHECK:        ret void
1168;
1169entry:
1170  br label %for.body
1171
1172for.body:
1173  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1174  %tmp = trunc i64 %iv to i32
1175  %conv = sitofp i32 %tmp to float
1176  %call = tail call float @expf(float %conv)
1177  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1178  store float %call, ptr %arrayidx, align 4
1179  %iv.next = add nuw nsw i64 %iv, 1
1180  %exitcond = icmp eq i64 %iv.next, 1000
1181  br i1 %exitcond, label %for.end, label %for.body
1182
1183for.end:
1184  ret void
1185}
1186
1187define void @exp_f64_intrinsic(ptr nocapture %varray) {
1188; CHECK-LABEL: @exp_f64_intrinsic(
1189; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp(<2 x double> [[TMP4:%.*]])
1190; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp(<4 x double> [[TMP4:%.*]])
1191; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp(<8 x double> [[TMP4:%.*]])
1192; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.exp.v16f64(<16 x double> [[TMP4:%.*]])
1193; CHECK:        ret void
1194;
1195entry:
1196  br label %for.body
1197
1198for.body:
1199  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1200  %tmp = trunc i64 %iv to i32
1201  %conv = sitofp i32 %tmp to double
1202  %call = tail call double @llvm.exp.f64(double %conv)
1203  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1204  store double %call, ptr %arrayidx, align 4
1205  %iv.next = add nuw nsw i64 %iv, 1
1206  %exitcond = icmp eq i64 %iv.next, 1000
1207  br i1 %exitcond, label %for.end, label %for.body
1208
1209for.end:
1210  ret void
1211}
1212
1213define void @exp_f32_intrinsic(ptr nocapture %varray) {
1214; CHECK-LABEL: @exp_f32_intrinsic(
1215; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP4:%.*]])
1216; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_expf(<4 x float> [[TMP4:%.*]])
1217; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_expf(<8 x float> [[TMP4:%.*]])
1218; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_expf(<16 x float> [[TMP4:%.*]])
1219; CHECK:        ret void
1220;
1221entry:
1222  br label %for.body
1223
1224for.body:
1225  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1226  %tmp = trunc i64 %iv to i32
1227  %conv = sitofp i32 %tmp to float
1228  %call = tail call float @llvm.exp.f32(float %conv)
1229  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1230  store float %call, ptr %arrayidx, align 4
1231  %iv.next = add nuw nsw i64 %iv, 1
1232  %exitcond = icmp eq i64 %iv.next, 1000
1233  br i1 %exitcond, label %for.end, label %for.body
1234
1235for.end:
1236  ret void
1237}
1238
1239define void @log_f64(ptr nocapture %varray) {
1240; CHECK-LABEL: @log_f64(
1241; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log(<2 x double> [[TMP4:%.*]])
1242; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log(<4 x double> [[TMP4:%.*]])
1243; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log(<8 x double> [[TMP4:%.*]])
1244; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.log.v16f64(<16 x double> [[TMP4:%.*]])
1245; CHECK:        ret void
1246;
1247entry:
1248  br label %for.body
1249
1250for.body:
1251  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1252  %tmp = trunc i64 %iv to i32
1253  %conv = sitofp i32 %tmp to double
1254  %call = tail call double @log(double %conv)
1255  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1256  store double %call, ptr %arrayidx, align 4
1257  %iv.next = add nuw nsw i64 %iv, 1
1258  %exitcond = icmp eq i64 %iv.next, 1000
1259  br i1 %exitcond, label %for.end, label %for.body
1260
1261for.end:
1262  ret void
1263}
1264
1265define void @log_f32(ptr nocapture %varray) {
1266; CHECK-LABEL: @log_f32(
1267; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.log.v2f32(<2 x float> [[TMP4:%.*]])
1268; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_logf(<4 x float> [[TMP4:%.*]])
1269; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_logf(<8 x float> [[TMP4:%.*]])
1270; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_logf(<16 x float> [[TMP4:%.*]])
1271; CHECK:        ret void
1272;
1273entry:
1274  br label %for.body
1275
1276for.body:
1277  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1278  %tmp = trunc i64 %iv to i32
1279  %conv = sitofp i32 %tmp to float
1280  %call = tail call float @logf(float %conv)
1281  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1282  store float %call, ptr %arrayidx, align 4
1283  %iv.next = add nuw nsw i64 %iv, 1
1284  %exitcond = icmp eq i64 %iv.next, 1000
1285  br i1 %exitcond, label %for.end, label %for.body
1286
1287for.end:
1288  ret void
1289}
1290
1291define void @log_f64_intrinsic(ptr nocapture %varray) {
1292; CHECK-LABEL: @log_f64_intrinsic(
1293; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log(<2 x double> [[TMP4:%.*]])
1294; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log(<4 x double> [[TMP4:%.*]])
1295; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log(<8 x double> [[TMP4:%.*]])
1296; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.log.v16f64(<16 x double> [[TMP4:%.*]])
1297; CHECK:        ret void
1298;
1299entry:
1300  br label %for.body
1301
1302for.body:
1303  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1304  %tmp = trunc i64 %iv to i32
1305  %conv = sitofp i32 %tmp to double
1306  %call = tail call double @llvm.log.f64(double %conv)
1307  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1308  store double %call, ptr %arrayidx, align 4
1309  %iv.next = add nuw nsw i64 %iv, 1
1310  %exitcond = icmp eq i64 %iv.next, 1000
1311  br i1 %exitcond, label %for.end, label %for.body
1312
1313for.end:
1314  ret void
1315}
1316
1317define void @log_f32_intrinsic(ptr nocapture %varray) {
1318; CHECK-LABEL: @log_f32_intrinsic(
1319; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.log.v2f32(<2 x float> [[TMP4:%.*]])
1320; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_logf(<4 x float> [[TMP4:%.*]])
1321; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_logf(<8 x float> [[TMP4:%.*]])
1322; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_logf(<16 x float> [[TMP4:%.*]])
1323; CHECK:        ret void
1324;
1325entry:
1326  br label %for.body
1327
1328for.body:
1329  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1330  %tmp = trunc i64 %iv to i32
1331  %conv = sitofp i32 %tmp to float
1332  %call = tail call float @llvm.log.f32(float %conv)
1333  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1334  store float %call, ptr %arrayidx, align 4
1335  %iv.next = add nuw nsw i64 %iv, 1
1336  %exitcond = icmp eq i64 %iv.next, 1000
1337  br i1 %exitcond, label %for.end, label %for.body
1338
1339for.end:
1340  ret void
1341}
1342
1343define void @log2_f64(ptr nocapture %varray) {
1344; CHECK-LABEL: @log2_f64(
1345; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log2(<2 x double> [[TMP4:%.*]])
1346; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log2(<4 x double> [[TMP4:%.*]])
1347; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log2(<8 x double> [[TMP4:%.*]])
1348; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.log2.v16f64(<16 x double> [[TMP4:%.*]])
1349; CHECK:        ret void
1350;
1351entry:
1352  br label %for.body
1353
1354for.body:
1355  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1356  %tmp = trunc i64 %iv to i32
1357  %conv = sitofp i32 %tmp to double
1358  %call = tail call double @log2(double %conv)
1359  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1360  store double %call, ptr %arrayidx, align 4
1361  %iv.next = add nuw nsw i64 %iv, 1
1362  %exitcond = icmp eq i64 %iv.next, 1000
1363  br i1 %exitcond, label %for.end, label %for.body
1364
1365for.end:
1366  ret void
1367}
1368
1369define void @log2_f32(ptr nocapture %varray) {
1370; CHECK-LABEL: @log2_f32(
1371; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.log2.v2f32(<2 x float> [[TMP4:%.*]])
1372; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log2f(<4 x float> [[TMP4:%.*]])
1373; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_log2f(<8 x float> [[TMP4:%.*]])
1374; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log2f(<16 x float> [[TMP4:%.*]])
1375; CHECK:        ret void
1376;
1377entry:
1378  br label %for.body
1379
1380for.body:
1381  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1382  %tmp = trunc i64 %iv to i32
1383  %conv = sitofp i32 %tmp to float
1384  %call = tail call float @log2f(float %conv)
1385  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1386  store float %call, ptr %arrayidx, align 4
1387  %iv.next = add nuw nsw i64 %iv, 1
1388  %exitcond = icmp eq i64 %iv.next, 1000
1389  br i1 %exitcond, label %for.end, label %for.body
1390
1391for.end:
1392  ret void
1393}
1394
1395define void @log2_f64_intrinsic(ptr nocapture %varray) {
1396; CHECK-LABEL: @log2_f64_intrinsic(
1397; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log2(<2 x double> [[TMP4:%.*]])
1398; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log2(<4 x double> [[TMP4:%.*]])
1399; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log2(<8 x double> [[TMP4:%.*]])
1400; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.log2.v16f64(<16 x double> [[TMP4:%.*]])
1401; CHECK:        ret void
1402;
1403entry:
1404  br label %for.body
1405
1406for.body:
1407  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1408  %tmp = trunc i64 %iv to i32
1409  %conv = sitofp i32 %tmp to double
1410  %call = tail call double @llvm.log2.f64(double %conv)
1411  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1412  store double %call, ptr %arrayidx, align 4
1413  %iv.next = add nuw nsw i64 %iv, 1
1414  %exitcond = icmp eq i64 %iv.next, 1000
1415  br i1 %exitcond, label %for.end, label %for.body
1416
1417for.end:
1418  ret void
1419}
1420
1421define void @log2_f32_intrinsic(ptr nocapture %varray) {
1422; CHECK-LABEL: @log2_f32_intrinsic(
1423; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.log2.v2f32(<2 x float> [[TMP4:%.*]])
1424; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log2f(<4 x float> [[TMP4:%.*]])
1425; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_log2f(<8 x float> [[TMP4:%.*]])
1426; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log2f(<16 x float> [[TMP4:%.*]])
1427; CHECK:        ret void
1428;
1429entry:
1430  br label %for.body
1431
1432for.body:
1433  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1434  %tmp = trunc i64 %iv to i32
1435  %conv = sitofp i32 %tmp to float
1436  %call = tail call float @llvm.log2.f32(float %conv)
1437  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1438  store float %call, ptr %arrayidx, align 4
1439  %iv.next = add nuw nsw i64 %iv, 1
1440  %exitcond = icmp eq i64 %iv.next, 1000
1441  br i1 %exitcond, label %for.end, label %for.body
1442
1443for.end:
1444  ret void
1445}
1446
1447define void @log10_f64(ptr nocapture %varray) {
1448; CHECK-LABEL: @log10_f64(
1449; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log10(<2 x double> [[TMP4:%.*]])
1450; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.log10.v4f64(<4 x double> [[TMP4:%.*]])
1451; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @llvm.log10.v8f64(<8 x double> [[TMP4:%.*]])
1452; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.log10.v16f64(<16 x double> [[TMP4:%.*]])
1453; CHECK:        ret void
1454;
1455entry:
1456  br label %for.body
1457
1458for.body:
1459  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1460  %tmp = trunc i64 %iv to i32
1461  %conv = sitofp i32 %tmp to double
1462  %call = tail call double @log10(double %conv)
1463  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1464  store double %call, ptr %arrayidx, align 4
1465  %iv.next = add nuw nsw i64 %iv, 1
1466  %exitcond = icmp eq i64 %iv.next, 1000
1467  br i1 %exitcond, label %for.end, label %for.body
1468
1469for.end:
1470  ret void
1471}
1472
1473define void @log10_f32(ptr nocapture %varray) {
1474; CHECK-LABEL: @log10_f32(
1475; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.log10.v2f32(<2 x float> [[TMP4:%.*]])
1476; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log10f(<4 x float> [[TMP4:%.*]])
1477; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_log10f(<8 x float> [[TMP4:%.*]])
1478; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log10f(<16 x float> [[TMP4:%.*]])
1479; CHECK:        ret void
1480;
1481entry:
1482  br label %for.body
1483
1484for.body:
1485  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1486  %tmp = trunc i64 %iv to i32
1487  %conv = sitofp i32 %tmp to float
1488  %call = tail call float @log10f(float %conv)
1489  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1490  store float %call, ptr %arrayidx, align 4
1491  %iv.next = add nuw nsw i64 %iv, 1
1492  %exitcond = icmp eq i64 %iv.next, 1000
1493  br i1 %exitcond, label %for.end, label %for.body
1494
1495for.end:
1496  ret void
1497}
1498
1499define void @log10_f64_intrinsic(ptr nocapture %varray) {
1500; CHECK-LABEL: @log10_f64_intrinsic(
1501; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log10(<2 x double> [[TMP4:%.*]])
1502; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.log10.v4f64(<4 x double> [[TMP4:%.*]])
1503; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @llvm.log10.v8f64(<8 x double> [[TMP4:%.*]])
1504; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.log10.v16f64(<16 x double> [[TMP4:%.*]])
1505; CHECK:        ret void
1506;
1507entry:
1508  br label %for.body
1509
1510for.body:
1511  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1512  %tmp = trunc i64 %iv to i32
1513  %conv = sitofp i32 %tmp to double
1514  %call = tail call double @llvm.log10.f64(double %conv)
1515  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1516  store double %call, ptr %arrayidx, align 4
1517  %iv.next = add nuw nsw i64 %iv, 1
1518  %exitcond = icmp eq i64 %iv.next, 1000
1519  br i1 %exitcond, label %for.end, label %for.body
1520
1521for.end:
1522  ret void
1523}
1524
1525define void @log10_f32_intrinsic(ptr nocapture %varray) {
1526; CHECK-LABEL: @log10_f32_intrinsic(
1527; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.log10.v2f32(<2 x float> [[TMP4:%.*]])
1528; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log10f(<4 x float> [[TMP4:%.*]])
1529; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_log10f(<8 x float> [[TMP4:%.*]])
1530; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log10f(<16 x float> [[TMP4:%.*]])
1531; CHECK:        ret void
1532;
1533entry:
1534  br label %for.body
1535
1536for.body:
1537  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1538  %tmp = trunc i64 %iv to i32
1539  %conv = sitofp i32 %tmp to float
1540  %call = tail call float @llvm.log10.f32(float %conv)
1541  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1542  store float %call, ptr %arrayidx, align 4
1543  %iv.next = add nuw nsw i64 %iv, 1
1544  %exitcond = icmp eq i64 %iv.next, 1000
1545  br i1 %exitcond, label %for.end, label %for.body
1546
1547for.end:
1548  ret void
1549}
1550
1551define void @exp2_f64(ptr nocapture %varray) {
1552; CHECK-LABEL: @exp2_f64(
1553; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp2(<2 x double> [[TMP4:%.*]])
1554; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp2(<4 x double> [[TMP4:%.*]])
1555; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp2(<8 x double> [[TMP4:%.*]])
1556; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.exp2.v16f64(<16 x double> [[TMP4:%.*]])
1557; CHECK:        ret void
1558;
1559entry:
1560  br label %for.body
1561
1562for.body:
1563  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1564  %tmp = trunc i64 %iv to i32
1565  %conv = sitofp i32 %tmp to double
1566  %call = tail call double @exp2(double %conv)
1567  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1568  store double %call, ptr %arrayidx, align 4
1569  %iv.next = add nuw nsw i64 %iv, 1
1570  %exitcond = icmp eq i64 %iv.next, 1000
1571  br i1 %exitcond, label %for.end, label %for.body
1572
1573for.end:
1574  ret void
1575}
1576
1577define void @exp2_f32(ptr nocapture %varray) {
1578; CHECK-LABEL: @exp2_f32(
1579; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[TMP4:%.*]])
1580; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp2f(<4 x float> [[TMP4:%.*]])
1581; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_exp2f(<8 x float> [[TMP4:%.*]])
1582; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_exp2f(<16 x float> [[TMP4:%.*]])
1583; CHECK:        ret void
1584;
1585entry:
1586  br label %for.body
1587
1588for.body:
1589  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1590  %tmp = trunc i64 %iv to i32
1591  %conv = sitofp i32 %tmp to float
1592  %call = tail call float @exp2f(float %conv)
1593  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1594  store float %call, ptr %arrayidx, align 4
1595  %iv.next = add nuw nsw i64 %iv, 1
1596  %exitcond = icmp eq i64 %iv.next, 1000
1597  br i1 %exitcond, label %for.end, label %for.body
1598
1599for.end:
1600  ret void
1601}
1602
1603define void @exp2_f64_intrinsic(ptr nocapture %varray) {
1604; CHECK-LABEL: @exp2_f64_intrinsic(
1605; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp2(<2 x double> [[TMP4:%.*]])
1606; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp2(<4 x double> [[TMP4:%.*]])
1607; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp2(<8 x double> [[TMP4:%.*]])
1608; CHECK-VF16:   [[TMP5:%.*]] = call <16 x double> @llvm.exp2.v16f64(<16 x double> [[TMP4:%.*]])
1609; CHECK:        ret void
1610;
1611entry:
1612  br label %for.body
1613
1614for.body:
1615  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1616  %tmp = trunc i64 %iv to i32
1617  %conv = sitofp i32 %tmp to double
1618  %call = tail call double @llvm.exp2.f64(double %conv)
1619  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1620  store double %call, ptr %arrayidx, align 4
1621  %iv.next = add nuw nsw i64 %iv, 1
1622  %exitcond = icmp eq i64 %iv.next, 1000
1623  br i1 %exitcond, label %for.end, label %for.body
1624
1625for.end:
1626  ret void
1627}
1628
1629define void @exp2_f32_intrinsic(ptr nocapture %varray) {
1630; CHECK-LABEL: @exp2_f32_intrinsic(
1631; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.exp2.v2f32(<2 x float> [[TMP4:%.*]])
1632; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp2f(<4 x float> [[TMP4:%.*]])
1633; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_exp2f(<8 x float> [[TMP4:%.*]])
1634; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_exp2f(<16 x float> [[TMP4:%.*]])
1635; CHECK:        ret void
1636;
1637entry:
1638  br label %for.body
1639
1640for.body:
1641  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1642  %tmp = trunc i64 %iv to i32
1643  %conv = sitofp i32 %tmp to float
1644  %call = tail call float @llvm.exp2.f32(float %conv)
1645  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1646  store float %call, ptr %arrayidx, align 4
1647  %iv.next = add nuw nsw i64 %iv, 1
1648  %exitcond = icmp eq i64 %iv.next, 1000
1649  br i1 %exitcond, label %for.end, label %for.body
1650
1651for.end:
1652  ret void
1653}
1654
1655define void @exp10_f64(ptr nocapture %varray) {
1656; CHECK-LABEL: @exp10_f64(
1657; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp10(<2 x double> [[TMP4:%.*]])
1658; CHECK-VF4:    call <4 x double> @llvm.exp10.v4f64(<4 x double> [[TMP4:%.*]])
1659; CHECK-VF8:    call <8 x double> @llvm.exp10.v8f64(<8 x double> [[TMP4:%.*]])
1660; CHECK-VF16:    call <16 x double> @llvm.exp10.v16f64(<16 x double> [[TMP4:%.*]])
1661; CHECK:        ret void
1662;
1663entry:
1664  br label %for.body
1665
1666for.body:
1667  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1668  %tmp = trunc i64 %iv to i32
1669  %conv = sitofp i32 %tmp to double
1670  %call = tail call double @exp10(double %conv)
1671  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1672  store double %call, ptr %arrayidx, align 4
1673  %iv.next = add nuw nsw i64 %iv, 1
1674  %exitcond = icmp eq i64 %iv.next, 1000
1675  br i1 %exitcond, label %for.end, label %for.body
1676
1677for.end:
1678  ret void
1679}
1680
1681define void @exp10_f32(ptr nocapture %varray) {
1682; CHECK-LABEL: @exp10_f32(
1683; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> [[TMP4:%.*]])
1684; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp10f(<4 x float> [[TMP4:%.*]])
1685; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @llvm.exp10.v8f32(<8 x float> [[TMP4:%.*]])
1686; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @llvm.exp10.v16f32(<16 x float> [[TMP4:%.*]])
1687; CHECK:        ret void
1688;
1689entry:
1690  br label %for.body
1691
1692for.body:
1693  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1694  %tmp = trunc i64 %iv to i32
1695  %conv = sitofp i32 %tmp to float
1696  %call = tail call float @exp10f(float %conv)
1697  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1698  store float %call, ptr %arrayidx, align 4
1699  %iv.next = add nuw nsw i64 %iv, 1
1700  %exitcond = icmp eq i64 %iv.next, 1000
1701  br i1 %exitcond, label %for.end, label %for.body
1702
1703for.end:
1704  ret void
1705}
1706
1707define void @exp10_f64_intrinsic(ptr nocapture %varray) {
1708; CHECK-LABEL: @exp10_f64_intrinsic(
1709; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp10(<2 x double> [[TMP4:%.*]])
1710; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @llvm.exp10.v4f64(<4 x double> [[TMP4:%.*]])
1711; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @llvm.exp10.v8f64(<8 x double> [[TMP4:%.*]])
1712; CHECK-VF16:    [[TMP5:%.*]] = call <16 x double> @llvm.exp10.v16f64(<16 x double> [[TMP4:%.*]])
1713; CHECK:        ret void
1714;
1715entry:
1716  br label %for.body
1717
1718for.body:
1719  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1720  %tmp = trunc i64 %iv to i32
1721  %conv = sitofp i32 %tmp to double
1722  %call = tail call double @llvm.exp10.f64(double %conv)
1723  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
1724  store double %call, ptr %arrayidx, align 4
1725  %iv.next = add nuw nsw i64 %iv, 1
1726  %exitcond = icmp eq i64 %iv.next, 1000
1727  br i1 %exitcond, label %for.end, label %for.body
1728
1729for.end:
1730  ret void
1731}
1732
1733define void @exp10_f32_intrinsic(ptr nocapture %varray) {
1734; CHECK-LABEL: @exp10_f32_intrinsic(
1735; CHECK-VF2:    [[TMP5:%.*]] = call <2 x float> @llvm.exp10.v2f32(<2 x float> [[TMP4:%.*]])
1736; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp10f(<4 x float> [[TMP4:%.*]])
1737; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @llvm.exp10.v8f32(<8 x float> [[TMP4:%.*]])
1738; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @llvm.exp10.v16f32(<16 x float> [[TMP4:%.*]])
1739; CHECK:        ret void
1740;
1741entry:
1742  br label %for.body
1743
1744for.body:
1745  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
1746  %tmp = trunc i64 %iv to i32
1747  %conv = sitofp i32 %tmp to float
1748  %call = tail call float @llvm.exp10.f32(float %conv)
1749  %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
1750  store float %call, ptr %arrayidx, align 4
1751  %iv.next = add nuw nsw i64 %iv, 1
1752  %exitcond = icmp eq i64 %iv.next, 1000
1753  br i1 %exitcond, label %for.end, label %for.body
1754
1755for.end:
1756  ret void
1757}
1758
1759
1760define void @sincos_f64(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
1761; CHECK-LABEL: define void @sincos_f64
1762; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]])
1763; CHECK-VF2-NOT:    call void @amd_vrd2_sincos(<2 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
1764; CHECK-VF4-NOT:    call void @amd_vrd4_sincos(<4 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
1765; CHECK-VF8-NOT:    call void @amd_vrd8_sincos(<8 x double> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
1766; CHECK:        ret void
1767;
1768entry:
1769  br label %for.body
1770
1771for.body:
1772  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
1773  %gepa = getelementptr double, ptr %a, i64 %indvars.iv
1774  %num = load double, ptr %gepa, align 8
1775  %gepb = getelementptr double, ptr %b, i64 %indvars.iv
1776  %gepc = getelementptr double, ptr %c, i64 %indvars.iv
1777  call void @sincos(double %num, ptr %gepb, ptr %gepc)
1778  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1779  %exitcond = icmp eq i64 %indvars.iv.next, 1000
1780  br i1 %exitcond, label %for.cond.cleanup, label %for.body
1781
1782for.cond.cleanup:
1783  ret void
1784}
1785
1786define void @sincos_f32(ptr noalias %a, ptr noalias %b, ptr noalias %c) {
1787; CHECK-LABEL: define void @sincos_f32
1788; CHECK-SAME: (ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]])
1789; CHECK-VF4-NOT:    call void @amd_vrs4_sincosf(<4 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
1790; CHECK-VF8-NOT:    call void @amd_vrs8_sincosf(<8 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
1791; CHECK-VF16-NOT:    call void @amd_vrs16_sincosf(<16 x float> [[WIDE_LOAD:%.*]], ptr [[TMP5:%.*]], ptr [[TMP6:%.*]])
1792; CHECK:        ret void
1793;
1794entry:
1795  br label %for.body
1796
1797for.body:
1798  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
1799  %gepa = getelementptr float, ptr %a, i64 %indvars.iv
1800  %num = load float, ptr %gepa, align 8
1801  %gepb = getelementptr float, ptr %b, i64 %indvars.iv
1802  %gepc = getelementptr float, ptr %c, i64 %indvars.iv
1803  call void @sincosf(float %num, ptr %gepb, ptr %gepc)
1804  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1805  %exitcond = icmp eq i64 %indvars.iv.next, 1000
1806  br i1 %exitcond, label %for.cond.cleanup, label %for.body
1807
1808for.cond.cleanup:
1809  ret void
1810}
1811
1812attributes #0 = { nounwind readnone }
1813
1814declare double @exp10(double) #0
1815declare float @exp10f(float) #0
1816declare double @llvm.exp10.f64(double) #0
1817declare float @llvm.exp10.f32(float) #0
1818declare void @sincos(double, ptr, ptr)
1819declare void @sincosf(float, ptr, ptr)
1820