xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-vabd.ll (revision 52864d9c7bd49ca41191bd34fcee47f61cfea743)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve %s -o - | FileCheck %s --check-prefix=CHECK-MVE
3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s --check-prefix=CHECK-MVEFP
4
5define arm_aapcs_vfpcc void @vabd_v4f32(<4 x float> %x, <4 x float> %y, ptr %z) {
6; CHECK-MVE-LABEL: vabd_v4f32:
7; CHECK-MVE:       @ %bb.0: @ %entry
8; CHECK-MVE-NEXT:    .save {r4, r5, r6, r7, r8, r9, lr}
9; CHECK-MVE-NEXT:    push.w {r4, r5, r6, r7, r8, r9, lr}
10; CHECK-MVE-NEXT:    .pad #4
11; CHECK-MVE-NEXT:    sub sp, #4
12; CHECK-MVE-NEXT:    .vsave {d8, d9, d10, d11}
13; CHECK-MVE-NEXT:    vpush {d8, d9, d10, d11}
14; CHECK-MVE-NEXT:    vmov q4, q1
15; CHECK-MVE-NEXT:    vmov q5, q0
16; CHECK-MVE-NEXT:    mov r8, r0
17; CHECK-MVE-NEXT:    vmov r0, r6, d10
18; CHECK-MVE-NEXT:    vmov r1, r7, d8
19; CHECK-MVE-NEXT:    bl __aeabi_fsub
20; CHECK-MVE-NEXT:    mov r9, r0
21; CHECK-MVE-NEXT:    mov r0, r6
22; CHECK-MVE-NEXT:    mov r1, r7
23; CHECK-MVE-NEXT:    bl __aeabi_fsub
24; CHECK-MVE-NEXT:    mov r6, r0
25; CHECK-MVE-NEXT:    vmov r0, r7, d11
26; CHECK-MVE-NEXT:    vmov r1, r4, d9
27; CHECK-MVE-NEXT:    bl __aeabi_fsub
28; CHECK-MVE-NEXT:    mov r5, r0
29; CHECK-MVE-NEXT:    mov r0, r7
30; CHECK-MVE-NEXT:    mov r1, r4
31; CHECK-MVE-NEXT:    bl __aeabi_fsub
32; CHECK-MVE-NEXT:    bic r0, r0, #-2147483648
33; CHECK-MVE-NEXT:    vmov s3, r0
34; CHECK-MVE-NEXT:    bic r0, r5, #-2147483648
35; CHECK-MVE-NEXT:    vmov s2, r0
36; CHECK-MVE-NEXT:    bic r0, r6, #-2147483648
37; CHECK-MVE-NEXT:    vmov s1, r0
38; CHECK-MVE-NEXT:    bic r0, r9, #-2147483648
39; CHECK-MVE-NEXT:    vmov s0, r0
40; CHECK-MVE-NEXT:    vstrw.32 q0, [r8]
41; CHECK-MVE-NEXT:    vpop {d8, d9, d10, d11}
42; CHECK-MVE-NEXT:    add sp, #4
43; CHECK-MVE-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, pc}
44;
45; CHECK-MVEFP-LABEL: vabd_v4f32:
46; CHECK-MVEFP:       @ %bb.0: @ %entry
47; CHECK-MVEFP-NEXT:    vabd.f32 q0, q0, q1
48; CHECK-MVEFP-NEXT:    vstrw.32 q0, [r0]
49; CHECK-MVEFP-NEXT:    bx lr
50
51entry:
52  %0 = fsub <4 x float> %x, %y
53  %1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %0)
54  store <4 x float> %1, ptr %z, align 4
55  ret void
56}
57
58define arm_aapcs_vfpcc void @vabd_v8f16(<8 x half> %x, <8 x half> %y, ptr %z) {
59; CHECK-MVE-LABEL: vabd_v8f16:
60; CHECK-MVE:       @ %bb.0: @ %entry
61; CHECK-MVE-NEXT:    .save {r4, r5, r6, lr}
62; CHECK-MVE-NEXT:    push {r4, r5, r6, lr}
63; CHECK-MVE-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
64; CHECK-MVE-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
65; CHECK-MVE-NEXT:    mov r4, r0
66; CHECK-MVE-NEXT:    vmov.u16 r0, q1[0]
67; CHECK-MVE-NEXT:    vmov q5, q1
68; CHECK-MVE-NEXT:    vmov q4, q0
69; CHECK-MVE-NEXT:    bl __aeabi_h2f
70; CHECK-MVE-NEXT:    mov r5, r0
71; CHECK-MVE-NEXT:    vmov.u16 r0, q4[0]
72; CHECK-MVE-NEXT:    bl __aeabi_h2f
73; CHECK-MVE-NEXT:    mov r1, r5
74; CHECK-MVE-NEXT:    bl __aeabi_fsub
75; CHECK-MVE-NEXT:    bl __aeabi_f2h
76; CHECK-MVE-NEXT:    bl __aeabi_h2f
77; CHECK-MVE-NEXT:    bic r0, r0, #-2147483648
78; CHECK-MVE-NEXT:    bl __aeabi_f2h
79; CHECK-MVE-NEXT:    mov r5, r0
80; CHECK-MVE-NEXT:    vmov.u16 r0, q5[1]
81; CHECK-MVE-NEXT:    bl __aeabi_h2f
82; CHECK-MVE-NEXT:    mov r6, r0
83; CHECK-MVE-NEXT:    vmov.u16 r0, q4[1]
84; CHECK-MVE-NEXT:    bl __aeabi_h2f
85; CHECK-MVE-NEXT:    mov r1, r6
86; CHECK-MVE-NEXT:    bl __aeabi_fsub
87; CHECK-MVE-NEXT:    bl __aeabi_f2h
88; CHECK-MVE-NEXT:    vmov.16 q6[0], r5
89; CHECK-MVE-NEXT:    bl __aeabi_h2f
90; CHECK-MVE-NEXT:    bic r0, r0, #-2147483648
91; CHECK-MVE-NEXT:    bl __aeabi_f2h
92; CHECK-MVE-NEXT:    vmov.16 q6[1], r0
93; CHECK-MVE-NEXT:    vmov.u16 r0, q5[2]
94; CHECK-MVE-NEXT:    bl __aeabi_h2f
95; CHECK-MVE-NEXT:    mov r5, r0
96; CHECK-MVE-NEXT:    vmov.u16 r0, q4[2]
97; CHECK-MVE-NEXT:    bl __aeabi_h2f
98; CHECK-MVE-NEXT:    mov r1, r5
99; CHECK-MVE-NEXT:    bl __aeabi_fsub
100; CHECK-MVE-NEXT:    bl __aeabi_f2h
101; CHECK-MVE-NEXT:    bl __aeabi_h2f
102; CHECK-MVE-NEXT:    bic r0, r0, #-2147483648
103; CHECK-MVE-NEXT:    bl __aeabi_f2h
104; CHECK-MVE-NEXT:    vmov.16 q6[2], r0
105; CHECK-MVE-NEXT:    vmov.u16 r0, q5[3]
106; CHECK-MVE-NEXT:    bl __aeabi_h2f
107; CHECK-MVE-NEXT:    mov r5, r0
108; CHECK-MVE-NEXT:    vmov.u16 r0, q4[3]
109; CHECK-MVE-NEXT:    bl __aeabi_h2f
110; CHECK-MVE-NEXT:    mov r1, r5
111; CHECK-MVE-NEXT:    bl __aeabi_fsub
112; CHECK-MVE-NEXT:    bl __aeabi_f2h
113; CHECK-MVE-NEXT:    bl __aeabi_h2f
114; CHECK-MVE-NEXT:    bic r0, r0, #-2147483648
115; CHECK-MVE-NEXT:    bl __aeabi_f2h
116; CHECK-MVE-NEXT:    vmov.16 q6[3], r0
117; CHECK-MVE-NEXT:    vmov.u16 r0, q5[4]
118; CHECK-MVE-NEXT:    bl __aeabi_h2f
119; CHECK-MVE-NEXT:    mov r5, r0
120; CHECK-MVE-NEXT:    vmov.u16 r0, q4[4]
121; CHECK-MVE-NEXT:    bl __aeabi_h2f
122; CHECK-MVE-NEXT:    mov r1, r5
123; CHECK-MVE-NEXT:    bl __aeabi_fsub
124; CHECK-MVE-NEXT:    bl __aeabi_f2h
125; CHECK-MVE-NEXT:    bl __aeabi_h2f
126; CHECK-MVE-NEXT:    bic r0, r0, #-2147483648
127; CHECK-MVE-NEXT:    bl __aeabi_f2h
128; CHECK-MVE-NEXT:    vmov.16 q6[4], r0
129; CHECK-MVE-NEXT:    vmov.u16 r0, q5[5]
130; CHECK-MVE-NEXT:    bl __aeabi_h2f
131; CHECK-MVE-NEXT:    mov r5, r0
132; CHECK-MVE-NEXT:    vmov.u16 r0, q4[5]
133; CHECK-MVE-NEXT:    bl __aeabi_h2f
134; CHECK-MVE-NEXT:    mov r1, r5
135; CHECK-MVE-NEXT:    bl __aeabi_fsub
136; CHECK-MVE-NEXT:    bl __aeabi_f2h
137; CHECK-MVE-NEXT:    bl __aeabi_h2f
138; CHECK-MVE-NEXT:    bic r0, r0, #-2147483648
139; CHECK-MVE-NEXT:    bl __aeabi_f2h
140; CHECK-MVE-NEXT:    vmov.16 q6[5], r0
141; CHECK-MVE-NEXT:    vmov.u16 r0, q5[6]
142; CHECK-MVE-NEXT:    bl __aeabi_h2f
143; CHECK-MVE-NEXT:    mov r5, r0
144; CHECK-MVE-NEXT:    vmov.u16 r0, q4[6]
145; CHECK-MVE-NEXT:    bl __aeabi_h2f
146; CHECK-MVE-NEXT:    mov r1, r5
147; CHECK-MVE-NEXT:    bl __aeabi_fsub
148; CHECK-MVE-NEXT:    bl __aeabi_f2h
149; CHECK-MVE-NEXT:    bl __aeabi_h2f
150; CHECK-MVE-NEXT:    bic r0, r0, #-2147483648
151; CHECK-MVE-NEXT:    bl __aeabi_f2h
152; CHECK-MVE-NEXT:    vmov.16 q6[6], r0
153; CHECK-MVE-NEXT:    vmov.u16 r0, q5[7]
154; CHECK-MVE-NEXT:    bl __aeabi_h2f
155; CHECK-MVE-NEXT:    mov r5, r0
156; CHECK-MVE-NEXT:    vmov.u16 r0, q4[7]
157; CHECK-MVE-NEXT:    bl __aeabi_h2f
158; CHECK-MVE-NEXT:    mov r1, r5
159; CHECK-MVE-NEXT:    bl __aeabi_fsub
160; CHECK-MVE-NEXT:    bl __aeabi_f2h
161; CHECK-MVE-NEXT:    bl __aeabi_h2f
162; CHECK-MVE-NEXT:    bic r0, r0, #-2147483648
163; CHECK-MVE-NEXT:    bl __aeabi_f2h
164; CHECK-MVE-NEXT:    vmov.16 q6[7], r0
165; CHECK-MVE-NEXT:    vstrw.32 q6, [r4]
166; CHECK-MVE-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
167; CHECK-MVE-NEXT:    pop {r4, r5, r6, pc}
168;
169; CHECK-MVEFP-LABEL: vabd_v8f16:
170; CHECK-MVEFP:       @ %bb.0: @ %entry
171; CHECK-MVEFP-NEXT:    vabd.f16 q0, q0, q1
172; CHECK-MVEFP-NEXT:    vstrw.32 q0, [r0]
173; CHECK-MVEFP-NEXT:    bx lr
174
175entry:
176  %0 = fsub <8 x half> %x, %y
177  %1 = call <8 x half> @llvm.fabs.v8f16(<8 x half> %0)
178  store <8 x half> %1, ptr %z
179  ret void
180}
181
182declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
183declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
184