xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-fmath.ll (revision 92a8b81bdf44560e243c85c78e6289e5c7f35762)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,FULLFP16
3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,MVEFP
4; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,FULLFP16
5; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,MVEFP
6
7define arm_aapcs_vfpcc <4 x float> @sqrt_float32_t(<4 x float> %src) {
8; CHECK-LABEL: sqrt_float32_t:
9; CHECK:       @ %bb.0: @ %entry
10; CHECK-NEXT:    vsqrt.f32 s3, s3
11; CHECK-NEXT:    vsqrt.f32 s2, s2
12; CHECK-NEXT:    vsqrt.f32 s1, s1
13; CHECK-NEXT:    vsqrt.f32 s0, s0
14; CHECK-NEXT:    bx lr
15entry:
16  %0 = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %src)
17  ret <4 x float> %0
18}
19
20define arm_aapcs_vfpcc <8 x half> @sqrt_float16_t(<8 x half> %src) {
21; CHECK-LABEL: sqrt_float16_t:
22; CHECK:       @ %bb.0: @ %entry
23; CHECK-NEXT:    vmovx.f16 s4, s0
24; CHECK-NEXT:    vsqrt.f16 s0, s0
25; CHECK-NEXT:    vsqrt.f16 s4, s4
26; CHECK-NEXT:    vins.f16 s0, s4
27; CHECK-NEXT:    vmovx.f16 s4, s1
28; CHECK-NEXT:    vsqrt.f16 s4, s4
29; CHECK-NEXT:    vsqrt.f16 s1, s1
30; CHECK-NEXT:    vins.f16 s1, s4
31; CHECK-NEXT:    vmovx.f16 s4, s2
32; CHECK-NEXT:    vsqrt.f16 s4, s4
33; CHECK-NEXT:    vsqrt.f16 s2, s2
34; CHECK-NEXT:    vins.f16 s2, s4
35; CHECK-NEXT:    vmovx.f16 s4, s3
36; CHECK-NEXT:    vsqrt.f16 s4, s4
37; CHECK-NEXT:    vsqrt.f16 s3, s3
38; CHECK-NEXT:    vins.f16 s3, s4
39; CHECK-NEXT:    bx lr
40entry:
41  %0 = call fast <8 x half> @llvm.sqrt.v8f16(<8 x half> %src)
42  ret <8 x half> %0
43}
44
45define arm_aapcs_vfpcc <2 x double> @sqrt_float64_t(<2 x double> %src) {
46; CHECK-LABEL: sqrt_float64_t:
47; CHECK:       @ %bb.0: @ %entry
48; CHECK-NEXT:    .save {r7, lr}
49; CHECK-NEXT:    push {r7, lr}
50; CHECK-NEXT:    .vsave {d8, d9}
51; CHECK-NEXT:    vpush {d8, d9}
52; CHECK-NEXT:    vmov q4, q0
53; CHECK-NEXT:    vmov r0, r1, d9
54; CHECK-NEXT:    bl sqrt
55; CHECK-NEXT:    vmov r2, r3, d8
56; CHECK-NEXT:    vmov d9, r0, r1
57; CHECK-NEXT:    mov r0, r2
58; CHECK-NEXT:    mov r1, r3
59; CHECK-NEXT:    bl sqrt
60; CHECK-NEXT:    vmov d8, r0, r1
61; CHECK-NEXT:    vmov q0, q4
62; CHECK-NEXT:    vpop {d8, d9}
63; CHECK-NEXT:    pop {r7, pc}
64entry:
65  %0 = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %src)
66  ret <2 x double> %0
67}
68
69define arm_aapcs_vfpcc <4 x float> @cos_float32_t(<4 x float> %src) {
70; CHECK-LABEL: cos_float32_t:
71; CHECK:       @ %bb.0: @ %entry
72; CHECK-NEXT:    .save {r4, r5, r7, lr}
73; CHECK-NEXT:    push {r4, r5, r7, lr}
74; CHECK-NEXT:    .vsave {d8, d9}
75; CHECK-NEXT:    vpush {d8, d9}
76; CHECK-NEXT:    vmov q4, q0
77; CHECK-NEXT:    vmov r0, r4, d9
78; CHECK-NEXT:    bl cosf
79; CHECK-NEXT:    mov r5, r0
80; CHECK-NEXT:    mov r0, r4
81; CHECK-NEXT:    bl cosf
82; CHECK-NEXT:    vmov r4, r1, d8
83; CHECK-NEXT:    vmov s19, r0
84; CHECK-NEXT:    vmov s18, r5
85; CHECK-NEXT:    mov r0, r1
86; CHECK-NEXT:    bl cosf
87; CHECK-NEXT:    vmov s17, r0
88; CHECK-NEXT:    mov r0, r4
89; CHECK-NEXT:    bl cosf
90; CHECK-NEXT:    vmov s16, r0
91; CHECK-NEXT:    vmov q0, q4
92; CHECK-NEXT:    vpop {d8, d9}
93; CHECK-NEXT:    pop {r4, r5, r7, pc}
94entry:
95  %0 = call fast <4 x float> @llvm.cos.v4f32(<4 x float> %src)
96  ret <4 x float> %0
97}
98
99define arm_aapcs_vfpcc <8 x half> @cos_float16_t(<8 x half> %src) {
100; CHECK-LABEL: cos_float16_t:
101; CHECK:       @ %bb.0: @ %entry
102; CHECK-NEXT:    .save {r7, lr}
103; CHECK-NEXT:    push {r7, lr}
104; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
105; CHECK-NEXT:    vpush {d8, d9, d10, d11}
106; CHECK-NEXT:    vmov q4, q0
107; CHECK-NEXT:    vcvtb.f32.f16 s0, s16
108; CHECK-NEXT:    vmov r0, s0
109; CHECK-NEXT:    bl cosf
110; CHECK-NEXT:    vcvtt.f32.f16 s0, s16
111; CHECK-NEXT:    vmov s16, r0
112; CHECK-NEXT:    vmov r1, s0
113; CHECK-NEXT:    mov r0, r1
114; CHECK-NEXT:    bl cosf
115; CHECK-NEXT:    vmov s0, r0
116; CHECK-NEXT:    vcvtb.f16.f32 s20, s16
117; CHECK-NEXT:    vcvtt.f16.f32 s20, s0
118; CHECK-NEXT:    vcvtb.f32.f16 s0, s17
119; CHECK-NEXT:    vmov r0, s0
120; CHECK-NEXT:    bl cosf
121; CHECK-NEXT:    vmov s0, r0
122; CHECK-NEXT:    vcvtb.f16.f32 s21, s0
123; CHECK-NEXT:    vcvtt.f32.f16 s0, s17
124; CHECK-NEXT:    vmov r0, s0
125; CHECK-NEXT:    bl cosf
126; CHECK-NEXT:    vmov s0, r0
127; CHECK-NEXT:    vcvtt.f16.f32 s21, s0
128; CHECK-NEXT:    vcvtb.f32.f16 s0, s18
129; CHECK-NEXT:    vmov r0, s0
130; CHECK-NEXT:    bl cosf
131; CHECK-NEXT:    vmov s0, r0
132; CHECK-NEXT:    vcvtb.f16.f32 s22, s0
133; CHECK-NEXT:    vcvtt.f32.f16 s0, s18
134; CHECK-NEXT:    vmov r0, s0
135; CHECK-NEXT:    bl cosf
136; CHECK-NEXT:    vmov s0, r0
137; CHECK-NEXT:    vcvtt.f16.f32 s22, s0
138; CHECK-NEXT:    vcvtb.f32.f16 s0, s19
139; CHECK-NEXT:    vmov r0, s0
140; CHECK-NEXT:    bl cosf
141; CHECK-NEXT:    vmov s0, r0
142; CHECK-NEXT:    vcvtb.f16.f32 s23, s0
143; CHECK-NEXT:    vcvtt.f32.f16 s0, s19
144; CHECK-NEXT:    vmov r0, s0
145; CHECK-NEXT:    bl cosf
146; CHECK-NEXT:    vmov s0, r0
147; CHECK-NEXT:    vcvtt.f16.f32 s23, s0
148; CHECK-NEXT:    vmov q0, q5
149; CHECK-NEXT:    vpop {d8, d9, d10, d11}
150; CHECK-NEXT:    pop {r7, pc}
151entry:
152  %0 = call fast <8 x half> @llvm.cos.v8f16(<8 x half> %src)
153  ret <8 x half> %0
154}
155
156define arm_aapcs_vfpcc <2 x double> @cos_float64_t(<2 x double> %src) {
157; CHECK-LABEL: cos_float64_t:
158; CHECK:       @ %bb.0: @ %entry
159; CHECK-NEXT:    .save {r7, lr}
160; CHECK-NEXT:    push {r7, lr}
161; CHECK-NEXT:    .vsave {d8, d9}
162; CHECK-NEXT:    vpush {d8, d9}
163; CHECK-NEXT:    vmov q4, q0
164; CHECK-NEXT:    vmov r0, r1, d9
165; CHECK-NEXT:    bl cos
166; CHECK-NEXT:    vmov r2, r3, d8
167; CHECK-NEXT:    vmov d9, r0, r1
168; CHECK-NEXT:    mov r0, r2
169; CHECK-NEXT:    mov r1, r3
170; CHECK-NEXT:    bl cos
171; CHECK-NEXT:    vmov d8, r0, r1
172; CHECK-NEXT:    vmov q0, q4
173; CHECK-NEXT:    vpop {d8, d9}
174; CHECK-NEXT:    pop {r7, pc}
175entry:
176  %0 = call fast <2 x double> @llvm.cos.v2f64(<2 x double> %src)
177  ret <2 x double> %0
178}
179
180define arm_aapcs_vfpcc <4 x float> @sin_float32_t(<4 x float> %src) {
181; CHECK-LABEL: sin_float32_t:
182; CHECK:       @ %bb.0: @ %entry
183; CHECK-NEXT:    .save {r4, r5, r7, lr}
184; CHECK-NEXT:    push {r4, r5, r7, lr}
185; CHECK-NEXT:    .vsave {d8, d9}
186; CHECK-NEXT:    vpush {d8, d9}
187; CHECK-NEXT:    vmov q4, q0
188; CHECK-NEXT:    vmov r0, r4, d9
189; CHECK-NEXT:    bl sinf
190; CHECK-NEXT:    mov r5, r0
191; CHECK-NEXT:    mov r0, r4
192; CHECK-NEXT:    bl sinf
193; CHECK-NEXT:    vmov r4, r1, d8
194; CHECK-NEXT:    vmov s19, r0
195; CHECK-NEXT:    vmov s18, r5
196; CHECK-NEXT:    mov r0, r1
197; CHECK-NEXT:    bl sinf
198; CHECK-NEXT:    vmov s17, r0
199; CHECK-NEXT:    mov r0, r4
200; CHECK-NEXT:    bl sinf
201; CHECK-NEXT:    vmov s16, r0
202; CHECK-NEXT:    vmov q0, q4
203; CHECK-NEXT:    vpop {d8, d9}
204; CHECK-NEXT:    pop {r4, r5, r7, pc}
205entry:
206  %0 = call fast <4 x float> @llvm.sin.v4f32(<4 x float> %src)
207  ret <4 x float> %0
208}
209
210define arm_aapcs_vfpcc <8 x half> @sin_float16_t(<8 x half> %src) {
211; CHECK-LABEL: sin_float16_t:
212; CHECK:       @ %bb.0: @ %entry
213; CHECK-NEXT:    .save {r7, lr}
214; CHECK-NEXT:    push {r7, lr}
215; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
216; CHECK-NEXT:    vpush {d8, d9, d10, d11}
217; CHECK-NEXT:    vmov q4, q0
218; CHECK-NEXT:    vcvtb.f32.f16 s0, s16
219; CHECK-NEXT:    vmov r0, s0
220; CHECK-NEXT:    bl sinf
221; CHECK-NEXT:    vcvtt.f32.f16 s0, s16
222; CHECK-NEXT:    vmov s16, r0
223; CHECK-NEXT:    vmov r1, s0
224; CHECK-NEXT:    mov r0, r1
225; CHECK-NEXT:    bl sinf
226; CHECK-NEXT:    vmov s0, r0
227; CHECK-NEXT:    vcvtb.f16.f32 s20, s16
228; CHECK-NEXT:    vcvtt.f16.f32 s20, s0
229; CHECK-NEXT:    vcvtb.f32.f16 s0, s17
230; CHECK-NEXT:    vmov r0, s0
231; CHECK-NEXT:    bl sinf
232; CHECK-NEXT:    vmov s0, r0
233; CHECK-NEXT:    vcvtb.f16.f32 s21, s0
234; CHECK-NEXT:    vcvtt.f32.f16 s0, s17
235; CHECK-NEXT:    vmov r0, s0
236; CHECK-NEXT:    bl sinf
237; CHECK-NEXT:    vmov s0, r0
238; CHECK-NEXT:    vcvtt.f16.f32 s21, s0
239; CHECK-NEXT:    vcvtb.f32.f16 s0, s18
240; CHECK-NEXT:    vmov r0, s0
241; CHECK-NEXT:    bl sinf
242; CHECK-NEXT:    vmov s0, r0
243; CHECK-NEXT:    vcvtb.f16.f32 s22, s0
244; CHECK-NEXT:    vcvtt.f32.f16 s0, s18
245; CHECK-NEXT:    vmov r0, s0
246; CHECK-NEXT:    bl sinf
247; CHECK-NEXT:    vmov s0, r0
248; CHECK-NEXT:    vcvtt.f16.f32 s22, s0
249; CHECK-NEXT:    vcvtb.f32.f16 s0, s19
250; CHECK-NEXT:    vmov r0, s0
251; CHECK-NEXT:    bl sinf
252; CHECK-NEXT:    vmov s0, r0
253; CHECK-NEXT:    vcvtb.f16.f32 s23, s0
254; CHECK-NEXT:    vcvtt.f32.f16 s0, s19
255; CHECK-NEXT:    vmov r0, s0
256; CHECK-NEXT:    bl sinf
257; CHECK-NEXT:    vmov s0, r0
258; CHECK-NEXT:    vcvtt.f16.f32 s23, s0
259; CHECK-NEXT:    vmov q0, q5
260; CHECK-NEXT:    vpop {d8, d9, d10, d11}
261; CHECK-NEXT:    pop {r7, pc}
262entry:
263  %0 = call fast <8 x half> @llvm.sin.v8f16(<8 x half> %src)
264  ret <8 x half> %0
265}
266
267define arm_aapcs_vfpcc <2 x double> @sin_float64_t(<2 x double> %src) {
268; CHECK-LABEL: sin_float64_t:
269; CHECK:       @ %bb.0: @ %entry
270; CHECK-NEXT:    .save {r7, lr}
271; CHECK-NEXT:    push {r7, lr}
272; CHECK-NEXT:    .vsave {d8, d9}
273; CHECK-NEXT:    vpush {d8, d9}
274; CHECK-NEXT:    vmov q4, q0
275; CHECK-NEXT:    vmov r0, r1, d9
276; CHECK-NEXT:    bl sin
277; CHECK-NEXT:    vmov r2, r3, d8
278; CHECK-NEXT:    vmov d9, r0, r1
279; CHECK-NEXT:    mov r0, r2
280; CHECK-NEXT:    mov r1, r3
281; CHECK-NEXT:    bl sin
282; CHECK-NEXT:    vmov d8, r0, r1
283; CHECK-NEXT:    vmov q0, q4
284; CHECK-NEXT:    vpop {d8, d9}
285; CHECK-NEXT:    pop {r7, pc}
286entry:
287  %0 = call fast <2 x double> @llvm.sin.v2f64(<2 x double> %src)
288  ret <2 x double> %0
289}
290
291define arm_aapcs_vfpcc <4 x float> @tan_float32_t(<4 x float> %src) {
292; CHECK-LABEL: tan_float32_t:
293; CHECK:       @ %bb.0: @ %entry
294; CHECK-NEXT:    .save {r4, r5, r7, lr}
295; CHECK-NEXT:    push {r4, r5, r7, lr}
296; CHECK-NEXT:    .vsave {d8, d9}
297; CHECK-NEXT:    vpush {d8, d9}
298; CHECK-NEXT:    vmov q4, q0
299; CHECK-NEXT:    vmov r0, r4, d9
300; CHECK-NEXT:    bl tanf
301; CHECK-NEXT:    mov r5, r0
302; CHECK-NEXT:    mov r0, r4
303; CHECK-NEXT:    bl tanf
304; CHECK-NEXT:    vmov r4, r1, d8
305; CHECK-NEXT:    vmov s19, r0
306; CHECK-NEXT:    vmov s18, r5
307; CHECK-NEXT:    mov r0, r1
308; CHECK-NEXT:    bl tanf
309; CHECK-NEXT:    vmov s17, r0
310; CHECK-NEXT:    mov r0, r4
311; CHECK-NEXT:    bl tanf
312; CHECK-NEXT:    vmov s16, r0
313; CHECK-NEXT:    vmov q0, q4
314; CHECK-NEXT:    vpop {d8, d9}
315; CHECK-NEXT:    pop {r4, r5, r7, pc}
316entry:
317  %0 = call fast <4 x float> @llvm.tan.v4f32(<4 x float> %src)
318  ret <4 x float> %0
319}
320
321define arm_aapcs_vfpcc <8 x half> @tan_float16_t(<8 x half> %src) {
322; CHECK-LABEL: tan_float16_t:
323; CHECK:       @ %bb.0: @ %entry
324; CHECK-NEXT:    .save {r7, lr}
325; CHECK-NEXT:    push {r7, lr}
326; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
327; CHECK-NEXT:    vpush {d8, d9, d10, d11}
328; CHECK-NEXT:    vmov q4, q0
329; CHECK-NEXT:    vcvtb.f32.f16 s0, s16
330; CHECK-NEXT:    vmov r0, s0
331; CHECK-NEXT:    bl tanf
332; CHECK-NEXT:    vcvtt.f32.f16 s0, s16
333; CHECK-NEXT:    vmov s16, r0
334; CHECK-NEXT:    vmov r1, s0
335; CHECK-NEXT:    mov r0, r1
336; CHECK-NEXT:    bl tanf
337; CHECK-NEXT:    vmov s0, r0
338; CHECK-NEXT:    vcvtb.f16.f32 s20, s16
339; CHECK-NEXT:    vcvtt.f16.f32 s20, s0
340; CHECK-NEXT:    vcvtb.f32.f16 s0, s17
341; CHECK-NEXT:    vmov r0, s0
342; CHECK-NEXT:    bl tanf
343; CHECK-NEXT:    vmov s0, r0
344; CHECK-NEXT:    vcvtb.f16.f32 s21, s0
345; CHECK-NEXT:    vcvtt.f32.f16 s0, s17
346; CHECK-NEXT:    vmov r0, s0
347; CHECK-NEXT:    bl tanf
348; CHECK-NEXT:    vmov s0, r0
349; CHECK-NEXT:    vcvtt.f16.f32 s21, s0
350; CHECK-NEXT:    vcvtb.f32.f16 s0, s18
351; CHECK-NEXT:    vmov r0, s0
352; CHECK-NEXT:    bl tanf
353; CHECK-NEXT:    vmov s0, r0
354; CHECK-NEXT:    vcvtb.f16.f32 s22, s0
355; CHECK-NEXT:    vcvtt.f32.f16 s0, s18
356; CHECK-NEXT:    vmov r0, s0
357; CHECK-NEXT:    bl tanf
358; CHECK-NEXT:    vmov s0, r0
359; CHECK-NEXT:    vcvtt.f16.f32 s22, s0
360; CHECK-NEXT:    vcvtb.f32.f16 s0, s19
361; CHECK-NEXT:    vmov r0, s0
362; CHECK-NEXT:    bl tanf
363; CHECK-NEXT:    vmov s0, r0
364; CHECK-NEXT:    vcvtb.f16.f32 s23, s0
365; CHECK-NEXT:    vcvtt.f32.f16 s0, s19
366; CHECK-NEXT:    vmov r0, s0
367; CHECK-NEXT:    bl tanf
368; CHECK-NEXT:    vmov s0, r0
369; CHECK-NEXT:    vcvtt.f16.f32 s23, s0
370; CHECK-NEXT:    vmov q0, q5
371; CHECK-NEXT:    vpop {d8, d9, d10, d11}
372; CHECK-NEXT:    pop {r7, pc}
373entry:
374  %0 = call fast <8 x half> @llvm.tan.v8f16(<8 x half> %src)
375  ret <8 x half> %0
376}
377
378define arm_aapcs_vfpcc <2 x double> @tan_float64_t(<2 x double> %src) {
379; CHECK-LABEL: tan_float64_t:
380; CHECK:       @ %bb.0: @ %entry
381; CHECK-NEXT:    .save {r7, lr}
382; CHECK-NEXT:    push {r7, lr}
383; CHECK-NEXT:    .vsave {d8, d9}
384; CHECK-NEXT:    vpush {d8, d9}
385; CHECK-NEXT:    vmov q4, q0
386; CHECK-NEXT:    vmov r0, r1, d9
387; CHECK-NEXT:    bl tan
388; CHECK-NEXT:    vmov r2, r3, d8
389; CHECK-NEXT:    vmov d9, r0, r1
390; CHECK-NEXT:    mov r0, r2
391; CHECK-NEXT:    mov r1, r3
392; CHECK-NEXT:    bl tan
393; CHECK-NEXT:    vmov d8, r0, r1
394; CHECK-NEXT:    vmov q0, q4
395; CHECK-NEXT:    vpop {d8, d9}
396; CHECK-NEXT:    pop {r7, pc}
397entry:
398  %0 = call fast <2 x double> @llvm.tan.v2f64(<2 x double> %src)
399  ret <2 x double> %0
400}
401
402define arm_aapcs_vfpcc <4 x float> @exp_float32_t(<4 x float> %src) {
403; CHECK-LABEL: exp_float32_t:
404; CHECK:       @ %bb.0: @ %entry
405; CHECK-NEXT:    .save {r4, r5, r7, lr}
406; CHECK-NEXT:    push {r4, r5, r7, lr}
407; CHECK-NEXT:    .vsave {d8, d9}
408; CHECK-NEXT:    vpush {d8, d9}
409; CHECK-NEXT:    vmov q4, q0
410; CHECK-NEXT:    vmov r0, r4, d9
411; CHECK-NEXT:    bl expf
412; CHECK-NEXT:    mov r5, r0
413; CHECK-NEXT:    mov r0, r4
414; CHECK-NEXT:    bl expf
415; CHECK-NEXT:    vmov r4, r1, d8
416; CHECK-NEXT:    vmov s19, r0
417; CHECK-NEXT:    vmov s18, r5
418; CHECK-NEXT:    mov r0, r1
419; CHECK-NEXT:    bl expf
420; CHECK-NEXT:    vmov s17, r0
421; CHECK-NEXT:    mov r0, r4
422; CHECK-NEXT:    bl expf
423; CHECK-NEXT:    vmov s16, r0
424; CHECK-NEXT:    vmov q0, q4
425; CHECK-NEXT:    vpop {d8, d9}
426; CHECK-NEXT:    pop {r4, r5, r7, pc}
427entry:
428  %0 = call fast <4 x float> @llvm.exp.v4f32(<4 x float> %src)
429  ret <4 x float> %0
430}
431
432define arm_aapcs_vfpcc <8 x half> @exp_float16_t(<8 x half> %src) {
433; CHECK-LABEL: exp_float16_t:
434; CHECK:       @ %bb.0: @ %entry
435; CHECK-NEXT:    .save {r7, lr}
436; CHECK-NEXT:    push {r7, lr}
437; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
438; CHECK-NEXT:    vpush {d8, d9, d10, d11}
439; CHECK-NEXT:    vmov q4, q0
440; CHECK-NEXT:    vcvtb.f32.f16 s0, s16
441; CHECK-NEXT:    vmov r0, s0
442; CHECK-NEXT:    bl expf
443; CHECK-NEXT:    vcvtt.f32.f16 s0, s16
444; CHECK-NEXT:    vmov s16, r0
445; CHECK-NEXT:    vmov r1, s0
446; CHECK-NEXT:    mov r0, r1
447; CHECK-NEXT:    bl expf
448; CHECK-NEXT:    vmov s0, r0
449; CHECK-NEXT:    vcvtb.f16.f32 s20, s16
450; CHECK-NEXT:    vcvtt.f16.f32 s20, s0
451; CHECK-NEXT:    vcvtb.f32.f16 s0, s17
452; CHECK-NEXT:    vmov r0, s0
453; CHECK-NEXT:    bl expf
454; CHECK-NEXT:    vmov s0, r0
455; CHECK-NEXT:    vcvtb.f16.f32 s21, s0
456; CHECK-NEXT:    vcvtt.f32.f16 s0, s17
457; CHECK-NEXT:    vmov r0, s0
458; CHECK-NEXT:    bl expf
459; CHECK-NEXT:    vmov s0, r0
460; CHECK-NEXT:    vcvtt.f16.f32 s21, s0
461; CHECK-NEXT:    vcvtb.f32.f16 s0, s18
462; CHECK-NEXT:    vmov r0, s0
463; CHECK-NEXT:    bl expf
464; CHECK-NEXT:    vmov s0, r0
465; CHECK-NEXT:    vcvtb.f16.f32 s22, s0
466; CHECK-NEXT:    vcvtt.f32.f16 s0, s18
467; CHECK-NEXT:    vmov r0, s0
468; CHECK-NEXT:    bl expf
469; CHECK-NEXT:    vmov s0, r0
470; CHECK-NEXT:    vcvtt.f16.f32 s22, s0
471; CHECK-NEXT:    vcvtb.f32.f16 s0, s19
472; CHECK-NEXT:    vmov r0, s0
473; CHECK-NEXT:    bl expf
474; CHECK-NEXT:    vmov s0, r0
475; CHECK-NEXT:    vcvtb.f16.f32 s23, s0
476; CHECK-NEXT:    vcvtt.f32.f16 s0, s19
477; CHECK-NEXT:    vmov r0, s0
478; CHECK-NEXT:    bl expf
479; CHECK-NEXT:    vmov s0, r0
480; CHECK-NEXT:    vcvtt.f16.f32 s23, s0
481; CHECK-NEXT:    vmov q0, q5
482; CHECK-NEXT:    vpop {d8, d9, d10, d11}
483; CHECK-NEXT:    pop {r7, pc}
484entry:
485  %0 = call fast <8 x half> @llvm.exp.v8f16(<8 x half> %src)
486  ret <8 x half> %0
487}
488
489define arm_aapcs_vfpcc <2 x double> @exp_float64_t(<2 x double> %src) {
490; CHECK-LABEL: exp_float64_t:
491; CHECK:       @ %bb.0: @ %entry
492; CHECK-NEXT:    .save {r7, lr}
493; CHECK-NEXT:    push {r7, lr}
494; CHECK-NEXT:    .vsave {d8, d9}
495; CHECK-NEXT:    vpush {d8, d9}
496; CHECK-NEXT:    vmov q4, q0
497; CHECK-NEXT:    vmov r0, r1, d9
498; CHECK-NEXT:    bl exp
499; CHECK-NEXT:    vmov r2, r3, d8
500; CHECK-NEXT:    vmov d9, r0, r1
501; CHECK-NEXT:    mov r0, r2
502; CHECK-NEXT:    mov r1, r3
503; CHECK-NEXT:    bl exp
504; CHECK-NEXT:    vmov d8, r0, r1
505; CHECK-NEXT:    vmov q0, q4
506; CHECK-NEXT:    vpop {d8, d9}
507; CHECK-NEXT:    pop {r7, pc}
508entry:
509  %0 = call fast <2 x double> @llvm.exp.v2f64(<2 x double> %src)
510  ret <2 x double> %0
511}
512
513define arm_aapcs_vfpcc <4 x float> @exp2_float32_t(<4 x float> %src) {
514; CHECK-LABEL: exp2_float32_t:
515; CHECK:       @ %bb.0: @ %entry
516; CHECK-NEXT:    .save {r4, r5, r7, lr}
517; CHECK-NEXT:    push {r4, r5, r7, lr}
518; CHECK-NEXT:    .vsave {d8, d9}
519; CHECK-NEXT:    vpush {d8, d9}
520; CHECK-NEXT:    vmov q4, q0
521; CHECK-NEXT:    vmov r0, r4, d9
522; CHECK-NEXT:    bl exp2f
523; CHECK-NEXT:    mov r5, r0
524; CHECK-NEXT:    mov r0, r4
525; CHECK-NEXT:    bl exp2f
526; CHECK-NEXT:    vmov r4, r1, d8
527; CHECK-NEXT:    vmov s19, r0
528; CHECK-NEXT:    vmov s18, r5
529; CHECK-NEXT:    mov r0, r1
530; CHECK-NEXT:    bl exp2f
531; CHECK-NEXT:    vmov s17, r0
532; CHECK-NEXT:    mov r0, r4
533; CHECK-NEXT:    bl exp2f
534; CHECK-NEXT:    vmov s16, r0
535; CHECK-NEXT:    vmov q0, q4
536; CHECK-NEXT:    vpop {d8, d9}
537; CHECK-NEXT:    pop {r4, r5, r7, pc}
538entry:
539  %0 = call fast <4 x float> @llvm.exp2.v4f32(<4 x float> %src)
540  ret <4 x float> %0
541}
542
543define arm_aapcs_vfpcc <8 x half> @exp2_float16_t(<8 x half> %src) {
544; CHECK-LABEL: exp2_float16_t:
545; CHECK:       @ %bb.0: @ %entry
546; CHECK-NEXT:    .save {r7, lr}
547; CHECK-NEXT:    push {r7, lr}
548; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
549; CHECK-NEXT:    vpush {d8, d9, d10, d11}
550; CHECK-NEXT:    vmov q4, q0
551; CHECK-NEXT:    vcvtb.f32.f16 s0, s16
552; CHECK-NEXT:    vmov r0, s0
553; CHECK-NEXT:    bl exp2f
554; CHECK-NEXT:    vcvtt.f32.f16 s0, s16
555; CHECK-NEXT:    vmov s16, r0
556; CHECK-NEXT:    vmov r1, s0
557; CHECK-NEXT:    mov r0, r1
558; CHECK-NEXT:    bl exp2f
559; CHECK-NEXT:    vmov s0, r0
560; CHECK-NEXT:    vcvtb.f16.f32 s20, s16
561; CHECK-NEXT:    vcvtt.f16.f32 s20, s0
562; CHECK-NEXT:    vcvtb.f32.f16 s0, s17
563; CHECK-NEXT:    vmov r0, s0
564; CHECK-NEXT:    bl exp2f
565; CHECK-NEXT:    vmov s0, r0
566; CHECK-NEXT:    vcvtb.f16.f32 s21, s0
567; CHECK-NEXT:    vcvtt.f32.f16 s0, s17
568; CHECK-NEXT:    vmov r0, s0
569; CHECK-NEXT:    bl exp2f
570; CHECK-NEXT:    vmov s0, r0
571; CHECK-NEXT:    vcvtt.f16.f32 s21, s0
572; CHECK-NEXT:    vcvtb.f32.f16 s0, s18
573; CHECK-NEXT:    vmov r0, s0
574; CHECK-NEXT:    bl exp2f
575; CHECK-NEXT:    vmov s0, r0
576; CHECK-NEXT:    vcvtb.f16.f32 s22, s0
577; CHECK-NEXT:    vcvtt.f32.f16 s0, s18
578; CHECK-NEXT:    vmov r0, s0
579; CHECK-NEXT:    bl exp2f
580; CHECK-NEXT:    vmov s0, r0
581; CHECK-NEXT:    vcvtt.f16.f32 s22, s0
582; CHECK-NEXT:    vcvtb.f32.f16 s0, s19
583; CHECK-NEXT:    vmov r0, s0
584; CHECK-NEXT:    bl exp2f
585; CHECK-NEXT:    vmov s0, r0
586; CHECK-NEXT:    vcvtb.f16.f32 s23, s0
587; CHECK-NEXT:    vcvtt.f32.f16 s0, s19
588; CHECK-NEXT:    vmov r0, s0
589; CHECK-NEXT:    bl exp2f
590; CHECK-NEXT:    vmov s0, r0
591; CHECK-NEXT:    vcvtt.f16.f32 s23, s0
592; CHECK-NEXT:    vmov q0, q5
593; CHECK-NEXT:    vpop {d8, d9, d10, d11}
594; CHECK-NEXT:    pop {r7, pc}
595entry:
596  %0 = call fast <8 x half> @llvm.exp2.v8f16(<8 x half> %src)
597  ret <8 x half> %0
598}
599
600define arm_aapcs_vfpcc <2 x double> @exp2_float64_t(<2 x double> %src) {
601; CHECK-LABEL: exp2_float64_t:
602; CHECK:       @ %bb.0: @ %entry
603; CHECK-NEXT:    .save {r7, lr}
604; CHECK-NEXT:    push {r7, lr}
605; CHECK-NEXT:    .vsave {d8, d9}
606; CHECK-NEXT:    vpush {d8, d9}
607; CHECK-NEXT:    vmov q4, q0
608; CHECK-NEXT:    vmov r0, r1, d9
609; CHECK-NEXT:    bl exp2
610; CHECK-NEXT:    vmov r2, r3, d8
611; CHECK-NEXT:    vmov d9, r0, r1
612; CHECK-NEXT:    mov r0, r2
613; CHECK-NEXT:    mov r1, r3
614; CHECK-NEXT:    bl exp2
615; CHECK-NEXT:    vmov d8, r0, r1
616; CHECK-NEXT:    vmov q0, q4
617; CHECK-NEXT:    vpop {d8, d9}
618; CHECK-NEXT:    pop {r7, pc}
619entry:
620  %0 = call fast <2 x double> @llvm.exp2.v2f64(<2 x double> %src)
621  ret <2 x double> %0
622}
623
624define arm_aapcs_vfpcc <4 x float> @log_float32_t(<4 x float> %src) {
625; CHECK-LABEL: log_float32_t:
626; CHECK:       @ %bb.0: @ %entry
627; CHECK-NEXT:    .save {r4, r5, r7, lr}
628; CHECK-NEXT:    push {r4, r5, r7, lr}
629; CHECK-NEXT:    .vsave {d8, d9}
630; CHECK-NEXT:    vpush {d8, d9}
631; CHECK-NEXT:    vmov q4, q0
632; CHECK-NEXT:    vmov r0, r4, d9
633; CHECK-NEXT:    bl logf
634; CHECK-NEXT:    mov r5, r0
635; CHECK-NEXT:    mov r0, r4
636; CHECK-NEXT:    bl logf
637; CHECK-NEXT:    vmov r4, r1, d8
638; CHECK-NEXT:    vmov s19, r0
639; CHECK-NEXT:    vmov s18, r5
640; CHECK-NEXT:    mov r0, r1
641; CHECK-NEXT:    bl logf
642; CHECK-NEXT:    vmov s17, r0
643; CHECK-NEXT:    mov r0, r4
644; CHECK-NEXT:    bl logf
645; CHECK-NEXT:    vmov s16, r0
646; CHECK-NEXT:    vmov q0, q4
647; CHECK-NEXT:    vpop {d8, d9}
648; CHECK-NEXT:    pop {r4, r5, r7, pc}
649entry:
650  %0 = call fast <4 x float> @llvm.log.v4f32(<4 x float> %src)
651  ret <4 x float> %0
652}
653
654define arm_aapcs_vfpcc <8 x half> @log_float16_t(<8 x half> %src) {
655; CHECK-LABEL: log_float16_t:
656; CHECK:       @ %bb.0: @ %entry
657; CHECK-NEXT:    .save {r7, lr}
658; CHECK-NEXT:    push {r7, lr}
659; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
660; CHECK-NEXT:    vpush {d8, d9, d10, d11}
661; CHECK-NEXT:    vmov q4, q0
662; CHECK-NEXT:    vcvtb.f32.f16 s0, s16
663; CHECK-NEXT:    vmov r0, s0
664; CHECK-NEXT:    bl logf
665; CHECK-NEXT:    vcvtt.f32.f16 s0, s16
666; CHECK-NEXT:    vmov s16, r0
667; CHECK-NEXT:    vmov r1, s0
668; CHECK-NEXT:    mov r0, r1
669; CHECK-NEXT:    bl logf
670; CHECK-NEXT:    vmov s0, r0
671; CHECK-NEXT:    vcvtb.f16.f32 s20, s16
672; CHECK-NEXT:    vcvtt.f16.f32 s20, s0
673; CHECK-NEXT:    vcvtb.f32.f16 s0, s17
674; CHECK-NEXT:    vmov r0, s0
675; CHECK-NEXT:    bl logf
676; CHECK-NEXT:    vmov s0, r0
677; CHECK-NEXT:    vcvtb.f16.f32 s21, s0
678; CHECK-NEXT:    vcvtt.f32.f16 s0, s17
679; CHECK-NEXT:    vmov r0, s0
680; CHECK-NEXT:    bl logf
681; CHECK-NEXT:    vmov s0, r0
682; CHECK-NEXT:    vcvtt.f16.f32 s21, s0
683; CHECK-NEXT:    vcvtb.f32.f16 s0, s18
684; CHECK-NEXT:    vmov r0, s0
685; CHECK-NEXT:    bl logf
686; CHECK-NEXT:    vmov s0, r0
687; CHECK-NEXT:    vcvtb.f16.f32 s22, s0
688; CHECK-NEXT:    vcvtt.f32.f16 s0, s18
689; CHECK-NEXT:    vmov r0, s0
690; CHECK-NEXT:    bl logf
691; CHECK-NEXT:    vmov s0, r0
692; CHECK-NEXT:    vcvtt.f16.f32 s22, s0
693; CHECK-NEXT:    vcvtb.f32.f16 s0, s19
694; CHECK-NEXT:    vmov r0, s0
695; CHECK-NEXT:    bl logf
696; CHECK-NEXT:    vmov s0, r0
697; CHECK-NEXT:    vcvtb.f16.f32 s23, s0
698; CHECK-NEXT:    vcvtt.f32.f16 s0, s19
699; CHECK-NEXT:    vmov r0, s0
700; CHECK-NEXT:    bl logf
701; CHECK-NEXT:    vmov s0, r0
702; CHECK-NEXT:    vcvtt.f16.f32 s23, s0
703; CHECK-NEXT:    vmov q0, q5
704; CHECK-NEXT:    vpop {d8, d9, d10, d11}
705; CHECK-NEXT:    pop {r7, pc}
706entry:
707  %0 = call fast <8 x half> @llvm.log.v8f16(<8 x half> %src)
708  ret <8 x half> %0
709}
710
711define arm_aapcs_vfpcc <2 x double> @log_float64_t(<2 x double> %src) {
712; CHECK-LABEL: log_float64_t:
713; CHECK:       @ %bb.0: @ %entry
714; CHECK-NEXT:    .save {r7, lr}
715; CHECK-NEXT:    push {r7, lr}
716; CHECK-NEXT:    .vsave {d8, d9}
717; CHECK-NEXT:    vpush {d8, d9}
718; CHECK-NEXT:    vmov q4, q0
719; CHECK-NEXT:    vmov r0, r1, d9
720; CHECK-NEXT:    bl log
721; CHECK-NEXT:    vmov r2, r3, d8
722; CHECK-NEXT:    vmov d9, r0, r1
723; CHECK-NEXT:    mov r0, r2
724; CHECK-NEXT:    mov r1, r3
725; CHECK-NEXT:    bl log
726; CHECK-NEXT:    vmov d8, r0, r1
727; CHECK-NEXT:    vmov q0, q4
728; CHECK-NEXT:    vpop {d8, d9}
729; CHECK-NEXT:    pop {r7, pc}
730entry:
731  %0 = call fast <2 x double> @llvm.log.v2f64(<2 x double> %src)
732  ret <2 x double> %0
733}
734
735define arm_aapcs_vfpcc <4 x float> @log2_float32_t(<4 x float> %src) {
736; CHECK-LABEL: log2_float32_t:
737; CHECK:       @ %bb.0: @ %entry
738; CHECK-NEXT:    .save {r4, r5, r7, lr}
739; CHECK-NEXT:    push {r4, r5, r7, lr}
740; CHECK-NEXT:    .vsave {d8, d9}
741; CHECK-NEXT:    vpush {d8, d9}
742; CHECK-NEXT:    vmov q4, q0
743; CHECK-NEXT:    vmov r0, r4, d9
744; CHECK-NEXT:    bl log2f
745; CHECK-NEXT:    mov r5, r0
746; CHECK-NEXT:    mov r0, r4
747; CHECK-NEXT:    bl log2f
748; CHECK-NEXT:    vmov r4, r1, d8
749; CHECK-NEXT:    vmov s19, r0
750; CHECK-NEXT:    vmov s18, r5
751; CHECK-NEXT:    mov r0, r1
752; CHECK-NEXT:    bl log2f
753; CHECK-NEXT:    vmov s17, r0
754; CHECK-NEXT:    mov r0, r4
755; CHECK-NEXT:    bl log2f
756; CHECK-NEXT:    vmov s16, r0
757; CHECK-NEXT:    vmov q0, q4
758; CHECK-NEXT:    vpop {d8, d9}
759; CHECK-NEXT:    pop {r4, r5, r7, pc}
760entry:
761  %0 = call fast <4 x float> @llvm.log2.v4f32(<4 x float> %src)
762  ret <4 x float> %0
763}
764
765define arm_aapcs_vfpcc <8 x half> @log2_float16_t(<8 x half> %src) {
766; CHECK-LABEL: log2_float16_t:
767; CHECK:       @ %bb.0: @ %entry
768; CHECK-NEXT:    .save {r7, lr}
769; CHECK-NEXT:    push {r7, lr}
770; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
771; CHECK-NEXT:    vpush {d8, d9, d10, d11}
772; CHECK-NEXT:    vmov q4, q0
773; CHECK-NEXT:    vcvtb.f32.f16 s0, s16
774; CHECK-NEXT:    vmov r0, s0
775; CHECK-NEXT:    bl log2f
776; CHECK-NEXT:    vcvtt.f32.f16 s0, s16
777; CHECK-NEXT:    vmov s16, r0
778; CHECK-NEXT:    vmov r1, s0
779; CHECK-NEXT:    mov r0, r1
780; CHECK-NEXT:    bl log2f
781; CHECK-NEXT:    vmov s0, r0
782; CHECK-NEXT:    vcvtb.f16.f32 s20, s16
783; CHECK-NEXT:    vcvtt.f16.f32 s20, s0
784; CHECK-NEXT:    vcvtb.f32.f16 s0, s17
785; CHECK-NEXT:    vmov r0, s0
786; CHECK-NEXT:    bl log2f
787; CHECK-NEXT:    vmov s0, r0
788; CHECK-NEXT:    vcvtb.f16.f32 s21, s0
789; CHECK-NEXT:    vcvtt.f32.f16 s0, s17
790; CHECK-NEXT:    vmov r0, s0
791; CHECK-NEXT:    bl log2f
792; CHECK-NEXT:    vmov s0, r0
793; CHECK-NEXT:    vcvtt.f16.f32 s21, s0
794; CHECK-NEXT:    vcvtb.f32.f16 s0, s18
795; CHECK-NEXT:    vmov r0, s0
796; CHECK-NEXT:    bl log2f
797; CHECK-NEXT:    vmov s0, r0
798; CHECK-NEXT:    vcvtb.f16.f32 s22, s0
799; CHECK-NEXT:    vcvtt.f32.f16 s0, s18
800; CHECK-NEXT:    vmov r0, s0
801; CHECK-NEXT:    bl log2f
802; CHECK-NEXT:    vmov s0, r0
803; CHECK-NEXT:    vcvtt.f16.f32 s22, s0
804; CHECK-NEXT:    vcvtb.f32.f16 s0, s19
805; CHECK-NEXT:    vmov r0, s0
806; CHECK-NEXT:    bl log2f
807; CHECK-NEXT:    vmov s0, r0
808; CHECK-NEXT:    vcvtb.f16.f32 s23, s0
809; CHECK-NEXT:    vcvtt.f32.f16 s0, s19
810; CHECK-NEXT:    vmov r0, s0
811; CHECK-NEXT:    bl log2f
812; CHECK-NEXT:    vmov s0, r0
813; CHECK-NEXT:    vcvtt.f16.f32 s23, s0
814; CHECK-NEXT:    vmov q0, q5
815; CHECK-NEXT:    vpop {d8, d9, d10, d11}
816; CHECK-NEXT:    pop {r7, pc}
817entry:
818  %0 = call fast <8 x half> @llvm.log2.v8f16(<8 x half> %src)
819  ret <8 x half> %0
820}
821
822define arm_aapcs_vfpcc <2 x double> @log2_float64_t(<2 x double> %src) {
823; CHECK-LABEL: log2_float64_t:
824; CHECK:       @ %bb.0: @ %entry
825; CHECK-NEXT:    .save {r7, lr}
826; CHECK-NEXT:    push {r7, lr}
827; CHECK-NEXT:    .vsave {d8, d9}
828; CHECK-NEXT:    vpush {d8, d9}
829; CHECK-NEXT:    vmov q4, q0
830; CHECK-NEXT:    vmov r0, r1, d9
831; CHECK-NEXT:    bl log2
832; CHECK-NEXT:    vmov r2, r3, d8
833; CHECK-NEXT:    vmov d9, r0, r1
834; CHECK-NEXT:    mov r0, r2
835; CHECK-NEXT:    mov r1, r3
836; CHECK-NEXT:    bl log2
837; CHECK-NEXT:    vmov d8, r0, r1
838; CHECK-NEXT:    vmov q0, q4
839; CHECK-NEXT:    vpop {d8, d9}
840; CHECK-NEXT:    pop {r7, pc}
841entry:
842  %0 = call fast <2 x double> @llvm.log2.v2f64(<2 x double> %src)
843  ret <2 x double> %0
844}
845
846define arm_aapcs_vfpcc <4 x float> @log10_float32_t(<4 x float> %src) {
847; CHECK-LABEL: log10_float32_t:
848; CHECK:       @ %bb.0: @ %entry
849; CHECK-NEXT:    .save {r4, r5, r7, lr}
850; CHECK-NEXT:    push {r4, r5, r7, lr}
851; CHECK-NEXT:    .vsave {d8, d9}
852; CHECK-NEXT:    vpush {d8, d9}
853; CHECK-NEXT:    vmov q4, q0
854; CHECK-NEXT:    vmov r0, r4, d9
855; CHECK-NEXT:    bl log10f
856; CHECK-NEXT:    mov r5, r0
857; CHECK-NEXT:    mov r0, r4
858; CHECK-NEXT:    bl log10f
859; CHECK-NEXT:    vmov r4, r1, d8
860; CHECK-NEXT:    vmov s19, r0
861; CHECK-NEXT:    vmov s18, r5
862; CHECK-NEXT:    mov r0, r1
863; CHECK-NEXT:    bl log10f
864; CHECK-NEXT:    vmov s17, r0
865; CHECK-NEXT:    mov r0, r4
866; CHECK-NEXT:    bl log10f
867; CHECK-NEXT:    vmov s16, r0
868; CHECK-NEXT:    vmov q0, q4
869; CHECK-NEXT:    vpop {d8, d9}
870; CHECK-NEXT:    pop {r4, r5, r7, pc}
871entry:
872  %0 = call fast <4 x float> @llvm.log10.v4f32(<4 x float> %src)
873  ret <4 x float> %0
874}
875
876define arm_aapcs_vfpcc <8 x half> @log10_float16_t(<8 x half> %src) {
877; CHECK-LABEL: log10_float16_t:
878; CHECK:       @ %bb.0: @ %entry
879; CHECK-NEXT:    .save {r7, lr}
880; CHECK-NEXT:    push {r7, lr}
881; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
882; CHECK-NEXT:    vpush {d8, d9, d10, d11}
883; CHECK-NEXT:    vmov q4, q0
884; CHECK-NEXT:    vcvtb.f32.f16 s0, s16
885; CHECK-NEXT:    vmov r0, s0
886; CHECK-NEXT:    bl log10f
887; CHECK-NEXT:    vcvtt.f32.f16 s0, s16
888; CHECK-NEXT:    vmov s16, r0
889; CHECK-NEXT:    vmov r1, s0
890; CHECK-NEXT:    mov r0, r1
891; CHECK-NEXT:    bl log10f
892; CHECK-NEXT:    vmov s0, r0
893; CHECK-NEXT:    vcvtb.f16.f32 s20, s16
894; CHECK-NEXT:    vcvtt.f16.f32 s20, s0
895; CHECK-NEXT:    vcvtb.f32.f16 s0, s17
896; CHECK-NEXT:    vmov r0, s0
897; CHECK-NEXT:    bl log10f
898; CHECK-NEXT:    vmov s0, r0
899; CHECK-NEXT:    vcvtb.f16.f32 s21, s0
900; CHECK-NEXT:    vcvtt.f32.f16 s0, s17
901; CHECK-NEXT:    vmov r0, s0
902; CHECK-NEXT:    bl log10f
903; CHECK-NEXT:    vmov s0, r0
904; CHECK-NEXT:    vcvtt.f16.f32 s21, s0
905; CHECK-NEXT:    vcvtb.f32.f16 s0, s18
906; CHECK-NEXT:    vmov r0, s0
907; CHECK-NEXT:    bl log10f
908; CHECK-NEXT:    vmov s0, r0
909; CHECK-NEXT:    vcvtb.f16.f32 s22, s0
910; CHECK-NEXT:    vcvtt.f32.f16 s0, s18
911; CHECK-NEXT:    vmov r0, s0
912; CHECK-NEXT:    bl log10f
913; CHECK-NEXT:    vmov s0, r0
914; CHECK-NEXT:    vcvtt.f16.f32 s22, s0
915; CHECK-NEXT:    vcvtb.f32.f16 s0, s19
916; CHECK-NEXT:    vmov r0, s0
917; CHECK-NEXT:    bl log10f
918; CHECK-NEXT:    vmov s0, r0
919; CHECK-NEXT:    vcvtb.f16.f32 s23, s0
920; CHECK-NEXT:    vcvtt.f32.f16 s0, s19
921; CHECK-NEXT:    vmov r0, s0
922; CHECK-NEXT:    bl log10f
923; CHECK-NEXT:    vmov s0, r0
924; CHECK-NEXT:    vcvtt.f16.f32 s23, s0
925; CHECK-NEXT:    vmov q0, q5
926; CHECK-NEXT:    vpop {d8, d9, d10, d11}
927; CHECK-NEXT:    pop {r7, pc}
928entry:
929  %0 = call fast <8 x half> @llvm.log10.v8f16(<8 x half> %src)
930  ret <8 x half> %0
931}
932
933define arm_aapcs_vfpcc <2 x double> @log10_float64_t(<2 x double> %src) {
934; CHECK-LABEL: log10_float64_t:
935; CHECK:       @ %bb.0: @ %entry
936; CHECK-NEXT:    .save {r7, lr}
937; CHECK-NEXT:    push {r7, lr}
938; CHECK-NEXT:    .vsave {d8, d9}
939; CHECK-NEXT:    vpush {d8, d9}
940; CHECK-NEXT:    vmov q4, q0
941; CHECK-NEXT:    vmov r0, r1, d9
942; CHECK-NEXT:    bl log10
943; CHECK-NEXT:    vmov r2, r3, d8
944; CHECK-NEXT:    vmov d9, r0, r1
945; CHECK-NEXT:    mov r0, r2
946; CHECK-NEXT:    mov r1, r3
947; CHECK-NEXT:    bl log10
948; CHECK-NEXT:    vmov d8, r0, r1
949; CHECK-NEXT:    vmov q0, q4
950; CHECK-NEXT:    vpop {d8, d9}
951; CHECK-NEXT:    pop {r7, pc}
952entry:
953  %0 = call fast <2 x double> @llvm.log10.v2f64(<2 x double> %src)
954  ret <2 x double> %0
955}
956
957define arm_aapcs_vfpcc <4 x float> @pow_float32_t(<4 x float> %src1, <4 x float> %src2) {
958; CHECK-LABEL: pow_float32_t:
959; CHECK:       @ %bb.0: @ %entry
960; CHECK-NEXT:    .save {r4, r5, r6, lr}
961; CHECK-NEXT:    push {r4, r5, r6, lr}
962; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
963; CHECK-NEXT:    vpush {d8, d9, d10, d11}
964; CHECK-NEXT:    vmov q4, q1
965; CHECK-NEXT:    vmov q5, q0
966; CHECK-NEXT:    vmov r0, r4, d11
967; CHECK-NEXT:    vmov r1, r5, d9
968; CHECK-NEXT:    bl powf
969; CHECK-NEXT:    mov r6, r0
970; CHECK-NEXT:    mov r0, r4
971; CHECK-NEXT:    mov r1, r5
972; CHECK-NEXT:    bl powf
973; CHECK-NEXT:    vmov r4, r2, d10
974; CHECK-NEXT:    vmov r5, r1, d8
975; CHECK-NEXT:    vmov s19, r0
976; CHECK-NEXT:    vmov s18, r6
977; CHECK-NEXT:    mov r0, r2
978; CHECK-NEXT:    bl powf
979; CHECK-NEXT:    vmov s17, r0
980; CHECK-NEXT:    mov r0, r4
981; CHECK-NEXT:    mov r1, r5
982; CHECK-NEXT:    bl powf
983; CHECK-NEXT:    vmov s16, r0
984; CHECK-NEXT:    vmov q0, q4
985; CHECK-NEXT:    vpop {d8, d9, d10, d11}
986; CHECK-NEXT:    pop {r4, r5, r6, pc}
987entry:
988  %0 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %src1, <4 x float> %src2)
989  ret <4 x float> %0
990}
991
992define arm_aapcs_vfpcc <8 x half> @pow_float16_t(<8 x half> %src1, <8 x half> %src2) {
993; CHECK-LABEL: pow_float16_t:
994; CHECK:       @ %bb.0: @ %entry
995; CHECK-NEXT:    .save {r7, lr}
996; CHECK-NEXT:    push {r7, lr}
997; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
998; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
999; CHECK-NEXT:    vmov q5, q0
1000; CHECK-NEXT:    vmov q4, q1
1001; CHECK-NEXT:    vcvtb.f32.f16 s0, s20
1002; CHECK-NEXT:    vmov r0, s0
1003; CHECK-NEXT:    vcvtb.f32.f16 s0, s16
1004; CHECK-NEXT:    vmov r1, s0
1005; CHECK-NEXT:    bl powf
1006; CHECK-NEXT:    vcvtt.f32.f16 s0, s20
1007; CHECK-NEXT:    vmov r2, s0
1008; CHECK-NEXT:    vcvtt.f32.f16 s0, s16
1009; CHECK-NEXT:    vmov r1, s0
1010; CHECK-NEXT:    vmov s16, r0
1011; CHECK-NEXT:    mov r0, r2
1012; CHECK-NEXT:    bl powf
1013; CHECK-NEXT:    vmov s0, r0
1014; CHECK-NEXT:    vcvtb.f16.f32 s24, s16
1015; CHECK-NEXT:    vcvtt.f16.f32 s24, s0
1016; CHECK-NEXT:    vcvtb.f32.f16 s0, s21
1017; CHECK-NEXT:    vmov r0, s0
1018; CHECK-NEXT:    vcvtb.f32.f16 s0, s17
1019; CHECK-NEXT:    vmov r1, s0
1020; CHECK-NEXT:    bl powf
1021; CHECK-NEXT:    vmov s0, r0
1022; CHECK-NEXT:    vcvtb.f16.f32 s25, s0
1023; CHECK-NEXT:    vcvtt.f32.f16 s0, s21
1024; CHECK-NEXT:    vmov r0, s0
1025; CHECK-NEXT:    vcvtt.f32.f16 s0, s17
1026; CHECK-NEXT:    vmov r1, s0
1027; CHECK-NEXT:    bl powf
1028; CHECK-NEXT:    vmov s0, r0
1029; CHECK-NEXT:    vcvtt.f16.f32 s25, s0
1030; CHECK-NEXT:    vcvtb.f32.f16 s0, s22
1031; CHECK-NEXT:    vmov r0, s0
1032; CHECK-NEXT:    vcvtb.f32.f16 s0, s18
1033; CHECK-NEXT:    vmov r1, s0
1034; CHECK-NEXT:    bl powf
1035; CHECK-NEXT:    vmov s0, r0
1036; CHECK-NEXT:    vcvtb.f16.f32 s26, s0
1037; CHECK-NEXT:    vcvtt.f32.f16 s0, s22
1038; CHECK-NEXT:    vmov r0, s0
1039; CHECK-NEXT:    vcvtt.f32.f16 s0, s18
1040; CHECK-NEXT:    vmov r1, s0
1041; CHECK-NEXT:    bl powf
1042; CHECK-NEXT:    vmov s0, r0
1043; CHECK-NEXT:    vcvtt.f16.f32 s26, s0
1044; CHECK-NEXT:    vcvtb.f32.f16 s0, s23
1045; CHECK-NEXT:    vmov r0, s0
1046; CHECK-NEXT:    vcvtb.f32.f16 s0, s19
1047; CHECK-NEXT:    vmov r1, s0
1048; CHECK-NEXT:    bl powf
1049; CHECK-NEXT:    vmov s0, r0
1050; CHECK-NEXT:    vcvtb.f16.f32 s27, s0
1051; CHECK-NEXT:    vcvtt.f32.f16 s0, s23
1052; CHECK-NEXT:    vmov r0, s0
1053; CHECK-NEXT:    vcvtt.f32.f16 s0, s19
1054; CHECK-NEXT:    vmov r1, s0
1055; CHECK-NEXT:    bl powf
1056; CHECK-NEXT:    vmov s0, r0
1057; CHECK-NEXT:    vcvtt.f16.f32 s27, s0
1058; CHECK-NEXT:    vmov q0, q6
1059; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
1060; CHECK-NEXT:    pop {r7, pc}
1061entry:
1062  %0 = call fast <8 x half> @llvm.pow.v8f16(<8 x half> %src1, <8 x half> %src2)
1063  ret <8 x half> %0
1064}
1065
1066define arm_aapcs_vfpcc <2 x double> @pow_float64_t(<2 x double> %src1, <2 x double> %src2) {
1067; CHECK-LABEL: pow_float64_t:
1068; CHECK:       @ %bb.0: @ %entry
1069; CHECK-NEXT:    .save {r7, lr}
1070; CHECK-NEXT:    push {r7, lr}
1071; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
1072; CHECK-NEXT:    vpush {d8, d9, d10, d11}
1073; CHECK-NEXT:    vmov q4, q1
1074; CHECK-NEXT:    vmov q5, q0
1075; CHECK-NEXT:    vmov r0, r1, d11
1076; CHECK-NEXT:    vmov r2, r3, d9
1077; CHECK-NEXT:    bl pow
1078; CHECK-NEXT:    vmov lr, r12, d10
1079; CHECK-NEXT:    vmov r2, r3, d8
1080; CHECK-NEXT:    vmov d9, r0, r1
1081; CHECK-NEXT:    mov r0, lr
1082; CHECK-NEXT:    mov r1, r12
1083; CHECK-NEXT:    bl pow
1084; CHECK-NEXT:    vmov d8, r0, r1
1085; CHECK-NEXT:    vmov q0, q4
1086; CHECK-NEXT:    vpop {d8, d9, d10, d11}
1087; CHECK-NEXT:    pop {r7, pc}
1088entry:
1089  %0 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %src1, <2 x double> %src2)
1090  ret <2 x double> %0
1091}
1092
1093define arm_aapcs_vfpcc <4 x float> @copysign_float32_t(<4 x float> %src1, <4 x float> %src2) {
1094; FULLFP16-LABEL: copysign_float32_t:
1095; FULLFP16:       @ %bb.0: @ %entry
1096; FULLFP16-NEXT:    .save {r4, r5, r7, lr}
1097; FULLFP16-NEXT:    push {r4, r5, r7, lr}
1098; FULLFP16-NEXT:    vmov r12, r1, d2
1099; FULLFP16-NEXT:    vmov r2, lr, d3
1100; FULLFP16-NEXT:    vmov r3, r0, d0
1101; FULLFP16-NEXT:    vmov r4, r5, d1
1102; FULLFP16-NEXT:    lsrs r1, r1, #31
1103; FULLFP16-NEXT:    bfi r0, r1, #31, #1
1104; FULLFP16-NEXT:    lsrs r1, r2, #31
1105; FULLFP16-NEXT:    bfi r4, r1, #31, #1
1106; FULLFP16-NEXT:    lsr.w r1, lr, #31
1107; FULLFP16-NEXT:    bfi r5, r1, #31, #1
1108; FULLFP16-NEXT:    lsr.w r1, r12, #31
1109; FULLFP16-NEXT:    bfi r3, r1, #31, #1
1110; FULLFP16-NEXT:    vmov s2, r4
1111; FULLFP16-NEXT:    vmov s3, r5
1112; FULLFP16-NEXT:    vmov s1, r0
1113; FULLFP16-NEXT:    vmov s0, r3
1114; FULLFP16-NEXT:    pop {r4, r5, r7, pc}
1115;
1116; MVEFP-LABEL: copysign_float32_t:
1117; MVEFP:       @ %bb.0: @ %entry
1118; MVEFP-NEXT:    vmov.i32 q2, #0x80000000
1119; MVEFP-NEXT:    vbic.i32 q0, #0x80000000
1120; MVEFP-NEXT:    vand q1, q1, q2
1121; MVEFP-NEXT:    vorr q0, q0, q1
1122; MVEFP-NEXT:    bx lr
1123entry:
1124  %0 = call fast <4 x float> @llvm.copysign.v4f32(<4 x float> %src1, <4 x float> %src2)
1125  ret <4 x float> %0
1126}
1127
1128define arm_aapcs_vfpcc <8 x half> @copysign_float16_t(<8 x half> %src1, <8 x half> %src2) {
1129; FULLFP16-LABEL: copysign_float16_t:
1130; FULLFP16:       @ %bb.0: @ %entry
1131; FULLFP16-NEXT:    .pad #32
1132; FULLFP16-NEXT:    sub sp, #32
1133; FULLFP16-NEXT:    vmovx.f16 s8, s4
1134; FULLFP16-NEXT:    vstr.16 s8, [sp, #24]
1135; FULLFP16-NEXT:    vstr.16 s4, [sp, #28]
1136; FULLFP16-NEXT:    vmovx.f16 s4, s5
1137; FULLFP16-NEXT:    vstr.16 s4, [sp, #16]
1138; FULLFP16-NEXT:    vmovx.f16 s4, s6
1139; FULLFP16-NEXT:    vstr.16 s5, [sp, #20]
1140; FULLFP16-NEXT:    vstr.16 s4, [sp, #8]
1141; FULLFP16-NEXT:    vmovx.f16 s4, s7
1142; FULLFP16-NEXT:    vstr.16 s6, [sp, #12]
1143; FULLFP16-NEXT:    vstr.16 s4, [sp]
1144; FULLFP16-NEXT:    vstr.16 s7, [sp, #4]
1145; FULLFP16-NEXT:    ldrb.w r0, [sp, #25]
1146; FULLFP16-NEXT:    vmovx.f16 s4, s0
1147; FULLFP16-NEXT:    vabs.f16 s4, s4
1148; FULLFP16-NEXT:    vneg.f16 s6, s4
1149; FULLFP16-NEXT:    lsls r0, r0, #24
1150; FULLFP16-NEXT:    it pl
1151; FULLFP16-NEXT:    vmovpl.f32 s6, s4
1152; FULLFP16-NEXT:    ldrb.w r0, [sp, #29]
1153; FULLFP16-NEXT:    vabs.f16 s4, s0
1154; FULLFP16-NEXT:    vneg.f16 s0, s4
1155; FULLFP16-NEXT:    lsls r0, r0, #24
1156; FULLFP16-NEXT:    it pl
1157; FULLFP16-NEXT:    vmovpl.f32 s0, s4
1158; FULLFP16-NEXT:    ldrb.w r0, [sp, #17]
1159; FULLFP16-NEXT:    vmovx.f16 s4, s1
1160; FULLFP16-NEXT:    vabs.f16 s4, s4
1161; FULLFP16-NEXT:    vins.f16 s0, s6
1162; FULLFP16-NEXT:    vneg.f16 s6, s4
1163; FULLFP16-NEXT:    lsls r0, r0, #24
1164; FULLFP16-NEXT:    it pl
1165; FULLFP16-NEXT:    vmovpl.f32 s6, s4
1166; FULLFP16-NEXT:    ldrb.w r0, [sp, #21]
1167; FULLFP16-NEXT:    vabs.f16 s4, s1
1168; FULLFP16-NEXT:    vneg.f16 s1, s4
1169; FULLFP16-NEXT:    lsls r0, r0, #24
1170; FULLFP16-NEXT:    it pl
1171; FULLFP16-NEXT:    vmovpl.f32 s1, s4
1172; FULLFP16-NEXT:    ldrb.w r0, [sp, #9]
1173; FULLFP16-NEXT:    vmovx.f16 s4, s2
1174; FULLFP16-NEXT:    vabs.f16 s4, s4
1175; FULLFP16-NEXT:    vins.f16 s1, s6
1176; FULLFP16-NEXT:    vneg.f16 s6, s4
1177; FULLFP16-NEXT:    lsls r0, r0, #24
1178; FULLFP16-NEXT:    it pl
1179; FULLFP16-NEXT:    vmovpl.f32 s6, s4
1180; FULLFP16-NEXT:    ldrb.w r0, [sp, #13]
1181; FULLFP16-NEXT:    vabs.f16 s4, s2
1182; FULLFP16-NEXT:    vneg.f16 s2, s4
1183; FULLFP16-NEXT:    lsls r0, r0, #24
1184; FULLFP16-NEXT:    it pl
1185; FULLFP16-NEXT:    vmovpl.f32 s2, s4
1186; FULLFP16-NEXT:    ldrb.w r0, [sp, #1]
1187; FULLFP16-NEXT:    vmovx.f16 s4, s3
1188; FULLFP16-NEXT:    vabs.f16 s4, s4
1189; FULLFP16-NEXT:    vins.f16 s2, s6
1190; FULLFP16-NEXT:    vneg.f16 s6, s4
1191; FULLFP16-NEXT:    lsls r0, r0, #24
1192; FULLFP16-NEXT:    it pl
1193; FULLFP16-NEXT:    vmovpl.f32 s6, s4
1194; FULLFP16-NEXT:    ldrb.w r0, [sp, #5]
1195; FULLFP16-NEXT:    vabs.f16 s4, s3
1196; FULLFP16-NEXT:    vneg.f16 s3, s4
1197; FULLFP16-NEXT:    lsls r0, r0, #24
1198; FULLFP16-NEXT:    it pl
1199; FULLFP16-NEXT:    vmovpl.f32 s3, s4
1200; FULLFP16-NEXT:    vins.f16 s3, s6
1201; FULLFP16-NEXT:    add sp, #32
1202; FULLFP16-NEXT:    bx lr
1203;
1204; MVEFP-LABEL: copysign_float16_t:
1205; MVEFP:       @ %bb.0: @ %entry
1206; MVEFP-NEXT:    vmov.i16 q2, #0x8000
1207; MVEFP-NEXT:    vbic.i16 q0, #0x8000
1208; MVEFP-NEXT:    vand q1, q1, q2
1209; MVEFP-NEXT:    vorr q0, q0, q1
1210; MVEFP-NEXT:    bx lr
1211entry:
1212  %0 = call fast <8 x half> @llvm.copysign.v8f16(<8 x half> %src1, <8 x half> %src2)
1213  ret <8 x half> %0
1214}
1215
1216define arm_aapcs_vfpcc <2 x double> @copysign_float64_t(<2 x double> %src1, <2 x double> %src2) {
1217; CHECK-LABEL: copysign_float64_t:
1218; CHECK:       @ %bb.0: @ %entry
1219; CHECK-NEXT:    .save {r7, lr}
1220; CHECK-NEXT:    push {r7, lr}
1221; CHECK-NEXT:    vmov r0, r1, d3
1222; CHECK-NEXT:    vmov r0, lr, d2
1223; CHECK-NEXT:    vmov r0, r3, d1
1224; CHECK-NEXT:    vmov r12, r2, d0
1225; CHECK-NEXT:    lsrs r1, r1, #31
1226; CHECK-NEXT:    bfi r3, r1, #31, #1
1227; CHECK-NEXT:    lsr.w r1, lr, #31
1228; CHECK-NEXT:    bfi r2, r1, #31, #1
1229; CHECK-NEXT:    vmov d1, r0, r3
1230; CHECK-NEXT:    vmov d0, r12, r2
1231; CHECK-NEXT:    pop {r7, pc}
1232entry:
1233  %0 = call fast <2 x double> @llvm.copysign.v2f64(<2 x double> %src1, <2 x double> %src2)
1234  ret <2 x double> %0
1235}
1236
1237declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
1238declare <4 x float> @llvm.cos.v4f32(<4 x float>)
1239declare <4 x float> @llvm.sin.v4f32(<4 x float>)
1240declare <4 x float> @llvm.exp.v4f32(<4 x float>)
1241declare <4 x float> @llvm.exp2.v4f32(<4 x float>)
1242declare <4 x float> @llvm.log.v4f32(<4 x float>)
1243declare <4 x float> @llvm.log2.v4f32(<4 x float>)
1244declare <4 x float> @llvm.log10.v4f32(<4 x float>)
1245declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>)
1246declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>)
1247declare <8 x half> @llvm.sqrt.v8f16(<8 x half>)
1248declare <8 x half> @llvm.cos.v8f16(<8 x half>)
1249declare <8 x half> @llvm.sin.v8f16(<8 x half>)
1250declare <8 x half> @llvm.exp.v8f16(<8 x half>)
1251declare <8 x half> @llvm.exp2.v8f16(<8 x half>)
1252declare <8 x half> @llvm.log.v8f16(<8 x half>)
1253declare <8 x half> @llvm.log2.v8f16(<8 x half>)
1254declare <8 x half> @llvm.log10.v8f16(<8 x half>)
1255declare <8 x half> @llvm.pow.v8f16(<8 x half>, <8 x half>)
1256declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>)
1257declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
1258declare <2 x double> @llvm.cos.v2f64(<2 x double>)
1259declare <2 x double> @llvm.sin.v2f64(<2 x double>)
1260declare <2 x double> @llvm.exp.v2f64(<2 x double>)
1261declare <2 x double> @llvm.exp2.v2f64(<2 x double>)
1262declare <2 x double> @llvm.log.v2f64(<2 x double>)
1263declare <2 x double> @llvm.log2.v2f64(<2 x double>)
1264declare <2 x double> @llvm.log10.v2f64(<2 x double>)
1265declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>)
1266declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>)
1267