xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-nofloat.ll (revision 52e0cf9d61618353d2745a51a16ae408edf0f49b)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve,-vfp2 -o - %s | FileCheck %s --check-prefix=CHECK-NOFP
3; RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -o - %s | FileCheck --check-prefix=CHECK-FP %s
4
5; This file tests tests that we expand floating point operations correctly,
6; even if we do not have an fpu.
7
8define arm_aapcs_vfpcc <8 x half> @vector_add_f16(<8 x half> %lhs, <8 x half> %rhs) {
9; CHECK-NOFP-LABEL: vector_add_f16:
10; CHECK-NOFP:       @ %bb.0: @ %entry
11; CHECK-NOFP-NEXT:    .save {r4, lr}
12; CHECK-NOFP-NEXT:    push {r4, lr}
13; CHECK-NOFP-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
14; CHECK-NOFP-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
15; CHECK-NOFP-NEXT:    vmov.u16 r0, q1[0]
16; CHECK-NOFP-NEXT:    vmov q5, q1
17; CHECK-NOFP-NEXT:    vmov q4, q0
18; CHECK-NOFP-NEXT:    bl __aeabi_h2f
19; CHECK-NOFP-NEXT:    mov r4, r0
20; CHECK-NOFP-NEXT:    vmov.u16 r0, q4[0]
21; CHECK-NOFP-NEXT:    bl __aeabi_h2f
22; CHECK-NOFP-NEXT:    mov r1, r4
23; CHECK-NOFP-NEXT:    bl __aeabi_fadd
24; CHECK-NOFP-NEXT:    bl __aeabi_f2h
25; CHECK-NOFP-NEXT:    vmov.16 q6[0], r0
26; CHECK-NOFP-NEXT:    vmov.u16 r0, q5[1]
27; CHECK-NOFP-NEXT:    bl __aeabi_h2f
28; CHECK-NOFP-NEXT:    mov r4, r0
29; CHECK-NOFP-NEXT:    vmov.u16 r0, q4[1]
30; CHECK-NOFP-NEXT:    bl __aeabi_h2f
31; CHECK-NOFP-NEXT:    mov r1, r4
32; CHECK-NOFP-NEXT:    bl __aeabi_fadd
33; CHECK-NOFP-NEXT:    bl __aeabi_f2h
34; CHECK-NOFP-NEXT:    vmov.16 q6[1], r0
35; CHECK-NOFP-NEXT:    vmov.u16 r0, q5[2]
36; CHECK-NOFP-NEXT:    bl __aeabi_h2f
37; CHECK-NOFP-NEXT:    mov r4, r0
38; CHECK-NOFP-NEXT:    vmov.u16 r0, q4[2]
39; CHECK-NOFP-NEXT:    bl __aeabi_h2f
40; CHECK-NOFP-NEXT:    mov r1, r4
41; CHECK-NOFP-NEXT:    bl __aeabi_fadd
42; CHECK-NOFP-NEXT:    bl __aeabi_f2h
43; CHECK-NOFP-NEXT:    vmov.16 q6[2], r0
44; CHECK-NOFP-NEXT:    vmov.u16 r0, q5[3]
45; CHECK-NOFP-NEXT:    bl __aeabi_h2f
46; CHECK-NOFP-NEXT:    mov r4, r0
47; CHECK-NOFP-NEXT:    vmov.u16 r0, q4[3]
48; CHECK-NOFP-NEXT:    bl __aeabi_h2f
49; CHECK-NOFP-NEXT:    mov r1, r4
50; CHECK-NOFP-NEXT:    bl __aeabi_fadd
51; CHECK-NOFP-NEXT:    bl __aeabi_f2h
52; CHECK-NOFP-NEXT:    vmov.16 q6[3], r0
53; CHECK-NOFP-NEXT:    vmov.u16 r0, q5[4]
54; CHECK-NOFP-NEXT:    bl __aeabi_h2f
55; CHECK-NOFP-NEXT:    mov r4, r0
56; CHECK-NOFP-NEXT:    vmov.u16 r0, q4[4]
57; CHECK-NOFP-NEXT:    bl __aeabi_h2f
58; CHECK-NOFP-NEXT:    mov r1, r4
59; CHECK-NOFP-NEXT:    bl __aeabi_fadd
60; CHECK-NOFP-NEXT:    bl __aeabi_f2h
61; CHECK-NOFP-NEXT:    vmov.16 q6[4], r0
62; CHECK-NOFP-NEXT:    vmov.u16 r0, q5[5]
63; CHECK-NOFP-NEXT:    bl __aeabi_h2f
64; CHECK-NOFP-NEXT:    mov r4, r0
65; CHECK-NOFP-NEXT:    vmov.u16 r0, q4[5]
66; CHECK-NOFP-NEXT:    bl __aeabi_h2f
67; CHECK-NOFP-NEXT:    mov r1, r4
68; CHECK-NOFP-NEXT:    bl __aeabi_fadd
69; CHECK-NOFP-NEXT:    bl __aeabi_f2h
70; CHECK-NOFP-NEXT:    vmov.16 q6[5], r0
71; CHECK-NOFP-NEXT:    vmov.u16 r0, q5[6]
72; CHECK-NOFP-NEXT:    bl __aeabi_h2f
73; CHECK-NOFP-NEXT:    mov r4, r0
74; CHECK-NOFP-NEXT:    vmov.u16 r0, q4[6]
75; CHECK-NOFP-NEXT:    bl __aeabi_h2f
76; CHECK-NOFP-NEXT:    mov r1, r4
77; CHECK-NOFP-NEXT:    bl __aeabi_fadd
78; CHECK-NOFP-NEXT:    bl __aeabi_f2h
79; CHECK-NOFP-NEXT:    vmov.16 q6[6], r0
80; CHECK-NOFP-NEXT:    vmov.u16 r0, q5[7]
81; CHECK-NOFP-NEXT:    bl __aeabi_h2f
82; CHECK-NOFP-NEXT:    mov r4, r0
83; CHECK-NOFP-NEXT:    vmov.u16 r0, q4[7]
84; CHECK-NOFP-NEXT:    bl __aeabi_h2f
85; CHECK-NOFP-NEXT:    mov r1, r4
86; CHECK-NOFP-NEXT:    bl __aeabi_fadd
87; CHECK-NOFP-NEXT:    bl __aeabi_f2h
88; CHECK-NOFP-NEXT:    vmov.16 q6[7], r0
89; CHECK-NOFP-NEXT:    vmov q0, q6
90; CHECK-NOFP-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
91; CHECK-NOFP-NEXT:    pop {r4, pc}
92;
93; CHECK-FP-LABEL: vector_add_f16:
94; CHECK-FP:       @ %bb.0: @ %entry
95; CHECK-FP-NEXT:    vadd.f16 q0, q0, q1
96; CHECK-FP-NEXT:    bx lr
97entry:
98  %sum = fadd <8 x half> %lhs, %rhs
99  ret <8 x half> %sum
100}
101
102define arm_aapcs_vfpcc <4 x float> @vector_add_f32(<4 x float> %lhs, <4 x float> %rhs) {
103; CHECK-NOFP-LABEL: vector_add_f32:
104; CHECK-NOFP:       @ %bb.0: @ %entry
105; CHECK-NOFP-NEXT:    .save {r4, r5, r7, lr}
106; CHECK-NOFP-NEXT:    push {r4, r5, r7, lr}
107; CHECK-NOFP-NEXT:    .vsave {d8, d9, d10, d11}
108; CHECK-NOFP-NEXT:    vpush {d8, d9, d10, d11}
109; CHECK-NOFP-NEXT:    vmov q4, q1
110; CHECK-NOFP-NEXT:    vmov q5, q0
111; CHECK-NOFP-NEXT:    vmov r4, r0, d11
112; CHECK-NOFP-NEXT:    vmov r5, r1, d9
113; CHECK-NOFP-NEXT:    bl __aeabi_fadd
114; CHECK-NOFP-NEXT:    vmov s19, r0
115; CHECK-NOFP-NEXT:    mov r0, r4
116; CHECK-NOFP-NEXT:    mov r1, r5
117; CHECK-NOFP-NEXT:    bl __aeabi_fadd
118; CHECK-NOFP-NEXT:    vmov s18, r0
119; CHECK-NOFP-NEXT:    vmov r4, r0, d10
120; CHECK-NOFP-NEXT:    vmov r5, r1, d8
121; CHECK-NOFP-NEXT:    bl __aeabi_fadd
122; CHECK-NOFP-NEXT:    vmov s17, r0
123; CHECK-NOFP-NEXT:    mov r0, r4
124; CHECK-NOFP-NEXT:    mov r1, r5
125; CHECK-NOFP-NEXT:    bl __aeabi_fadd
126; CHECK-NOFP-NEXT:    vmov s16, r0
127; CHECK-NOFP-NEXT:    vmov q0, q4
128; CHECK-NOFP-NEXT:    vpop {d8, d9, d10, d11}
129; CHECK-NOFP-NEXT:    pop {r4, r5, r7, pc}
130;
131; CHECK-FP-LABEL: vector_add_f32:
132; CHECK-FP:       @ %bb.0: @ %entry
133; CHECK-FP-NEXT:    vadd.f32 q0, q0, q1
134; CHECK-FP-NEXT:    bx lr
135entry:
136  %sum = fadd <4 x float> %lhs, %rhs
137  ret <4 x float> %sum
138}
139
140