xref: /llvm-project/llvm/test/CodeGen/ARM/vecreduce-fadd-legalization-soft-float.ll (revision 52864d9c7bd49ca41191bd34fcee47f61cfea743)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=arm-none-eabi -mattr=-neon | FileCheck %s --check-prefix=CHECK
3
4declare half @llvm.vector.reduce.fadd.f16.v4f16(half, <4 x half>)
5declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
6declare double @llvm.vector.reduce.fadd.f64.v2f64(double, <2 x double>)
7declare fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128, <2 x fp128>)
8
9define half @test_v4f16_reassoc(<4 x half> %a) nounwind {
10; CHECK-LABEL: test_v4f16_reassoc:
11; CHECK:       @ %bb.0:
12; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
13; CHECK-NEXT:    push {r4, r5, r6, r7, r8, lr}
14; CHECK-NEXT:    mov r8, #255
15; CHECK-NEXT:    mov r4, r3
16; CHECK-NEXT:    orr r8, r8, #65280
17; CHECK-NEXT:    mov r5, r2
18; CHECK-NEXT:    and r0, r0, r8
19; CHECK-NEXT:    mov r6, r1
20; CHECK-NEXT:    bl __aeabi_h2f
21; CHECK-NEXT:    mov r7, r0
22; CHECK-NEXT:    and r0, r6, r8
23; CHECK-NEXT:    bl __aeabi_h2f
24; CHECK-NEXT:    mov r1, r0
25; CHECK-NEXT:    mov r0, r7
26; CHECK-NEXT:    bl __aeabi_fadd
27; CHECK-NEXT:    bl __aeabi_f2h
28; CHECK-NEXT:    mov r6, r0
29; CHECK-NEXT:    and r0, r5, r8
30; CHECK-NEXT:    bl __aeabi_h2f
31; CHECK-NEXT:    mov r5, r0
32; CHECK-NEXT:    and r0, r6, r8
33; CHECK-NEXT:    bl __aeabi_h2f
34; CHECK-NEXT:    mov r1, r5
35; CHECK-NEXT:    bl __aeabi_fadd
36; CHECK-NEXT:    bl __aeabi_f2h
37; CHECK-NEXT:    mov r5, r0
38; CHECK-NEXT:    and r0, r4, r8
39; CHECK-NEXT:    bl __aeabi_h2f
40; CHECK-NEXT:    mov r4, r0
41; CHECK-NEXT:    and r0, r5, r8
42; CHECK-NEXT:    bl __aeabi_h2f
43; CHECK-NEXT:    mov r1, r4
44; CHECK-NEXT:    bl __aeabi_fadd
45; CHECK-NEXT:    bl __aeabi_f2h
46; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, lr}
47; CHECK-NEXT:    mov pc, lr
48  %b = call reassoc half @llvm.vector.reduce.fadd.f16.v4f16(half -0.0, <4 x half> %a)
49  ret half %b
50}
51
52define half @test_v4f16_seq(<4 x half> %a) nounwind {
53; CHECK-LABEL: test_v4f16_seq:
54; CHECK:       @ %bb.0:
55; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
56; CHECK-NEXT:    push {r4, r5, r6, r7, r8, lr}
57; CHECK-NEXT:    mov r8, #255
58; CHECK-NEXT:    mov r4, r3
59; CHECK-NEXT:    orr r8, r8, #65280
60; CHECK-NEXT:    mov r5, r2
61; CHECK-NEXT:    and r0, r0, r8
62; CHECK-NEXT:    mov r6, r1
63; CHECK-NEXT:    bl __aeabi_h2f
64; CHECK-NEXT:    mov r7, r0
65; CHECK-NEXT:    and r0, r6, r8
66; CHECK-NEXT:    bl __aeabi_h2f
67; CHECK-NEXT:    mov r1, r0
68; CHECK-NEXT:    mov r0, r7
69; CHECK-NEXT:    bl __aeabi_fadd
70; CHECK-NEXT:    bl __aeabi_f2h
71; CHECK-NEXT:    mov r6, r0
72; CHECK-NEXT:    and r0, r5, r8
73; CHECK-NEXT:    bl __aeabi_h2f
74; CHECK-NEXT:    mov r5, r0
75; CHECK-NEXT:    and r0, r6, r8
76; CHECK-NEXT:    bl __aeabi_h2f
77; CHECK-NEXT:    mov r1, r5
78; CHECK-NEXT:    bl __aeabi_fadd
79; CHECK-NEXT:    bl __aeabi_f2h
80; CHECK-NEXT:    mov r5, r0
81; CHECK-NEXT:    and r0, r4, r8
82; CHECK-NEXT:    bl __aeabi_h2f
83; CHECK-NEXT:    mov r4, r0
84; CHECK-NEXT:    and r0, r5, r8
85; CHECK-NEXT:    bl __aeabi_h2f
86; CHECK-NEXT:    mov r1, r4
87; CHECK-NEXT:    bl __aeabi_fadd
88; CHECK-NEXT:    bl __aeabi_f2h
89; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, lr}
90; CHECK-NEXT:    mov pc, lr
91  %b = call half @llvm.vector.reduce.fadd.f16.v4f16(half -0.0, <4 x half> %a)
92  ret half %b
93}
94
95define float @test_v4f32_reassoc(<4 x float> %a) nounwind {
96; CHECK-LABEL: test_v4f32_reassoc:
97; CHECK:       @ %bb.0:
98; CHECK-NEXT:    .save {r4, r5, r11, lr}
99; CHECK-NEXT:    push {r4, r5, r11, lr}
100; CHECK-NEXT:    mov r4, r3
101; CHECK-NEXT:    mov r5, r2
102; CHECK-NEXT:    bl __aeabi_fadd
103; CHECK-NEXT:    mov r1, r5
104; CHECK-NEXT:    bl __aeabi_fadd
105; CHECK-NEXT:    mov r1, r4
106; CHECK-NEXT:    bl __aeabi_fadd
107; CHECK-NEXT:    pop {r4, r5, r11, lr}
108; CHECK-NEXT:    mov pc, lr
109  %b = call reassoc float @llvm.vector.reduce.fadd.f32.v4f32(float -0.0, <4 x float> %a)
110  ret float %b
111}
112
113define float @test_v4f32_seq(<4 x float> %a) nounwind {
114; CHECK-LABEL: test_v4f32_seq:
115; CHECK:       @ %bb.0:
116; CHECK-NEXT:    .save {r4, r5, r11, lr}
117; CHECK-NEXT:    push {r4, r5, r11, lr}
118; CHECK-NEXT:    mov r4, r3
119; CHECK-NEXT:    mov r5, r2
120; CHECK-NEXT:    bl __aeabi_fadd
121; CHECK-NEXT:    mov r1, r5
122; CHECK-NEXT:    bl __aeabi_fadd
123; CHECK-NEXT:    mov r1, r4
124; CHECK-NEXT:    bl __aeabi_fadd
125; CHECK-NEXT:    pop {r4, r5, r11, lr}
126; CHECK-NEXT:    mov pc, lr
127  %b = call float @llvm.vector.reduce.fadd.f32.v4f32(float -0.0, <4 x float> %a)
128  ret float %b
129}
130
131define double @test_v2f64_reassoc(<2 x double> %a) nounwind {
132; CHECK-LABEL: test_v2f64_reassoc:
133; CHECK:       @ %bb.0:
134; CHECK-NEXT:    .save {r11, lr}
135; CHECK-NEXT:    push {r11, lr}
136; CHECK-NEXT:    bl __aeabi_dadd
137; CHECK-NEXT:    pop {r11, lr}
138; CHECK-NEXT:    mov pc, lr
139  %b = call reassoc double @llvm.vector.reduce.fadd.f64.v2f64(double -0.0, <2 x double> %a)
140  ret double %b
141}
142
143define double @test_v2f64_seq(<2 x double> %a) nounwind {
144; CHECK-LABEL: test_v2f64_seq:
145; CHECK:       @ %bb.0:
146; CHECK-NEXT:    .save {r11, lr}
147; CHECK-NEXT:    push {r11, lr}
148; CHECK-NEXT:    bl __aeabi_dadd
149; CHECK-NEXT:    pop {r11, lr}
150; CHECK-NEXT:    mov pc, lr
151  %b = call double @llvm.vector.reduce.fadd.f64.v2f64(double -0.0, <2 x double> %a)
152  ret double %b
153}
154
155define fp128 @test_v2f128_reassoc(<2 x fp128> %a) nounwind {
156; CHECK-LABEL: test_v2f128_reassoc:
157; CHECK:       @ %bb.0:
158; CHECK-NEXT:    .save {r11, lr}
159; CHECK-NEXT:    push {r11, lr}
160; CHECK-NEXT:    .pad #16
161; CHECK-NEXT:    sub sp, sp, #16
162; CHECK-NEXT:    ldr r12, [sp, #36]
163; CHECK-NEXT:    str r12, [sp, #12]
164; CHECK-NEXT:    ldr r12, [sp, #32]
165; CHECK-NEXT:    str r12, [sp, #8]
166; CHECK-NEXT:    ldr r12, [sp, #28]
167; CHECK-NEXT:    str r12, [sp, #4]
168; CHECK-NEXT:    ldr r12, [sp, #24]
169; CHECK-NEXT:    str r12, [sp]
170; CHECK-NEXT:    bl __addtf3
171; CHECK-NEXT:    add sp, sp, #16
172; CHECK-NEXT:    pop {r11, lr}
173; CHECK-NEXT:    mov pc, lr
174  %b = call reassoc fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 0xL00000000000000008000000000000000, <2 x fp128> %a)
175  ret fp128 %b
176}
177
178define fp128 @test_v2f128_seq(<2 x fp128> %a) nounwind {
179; CHECK-LABEL: test_v2f128_seq:
180; CHECK:       @ %bb.0:
181; CHECK-NEXT:    .save {r11, lr}
182; CHECK-NEXT:    push {r11, lr}
183; CHECK-NEXT:    .pad #16
184; CHECK-NEXT:    sub sp, sp, #16
185; CHECK-NEXT:    ldr r12, [sp, #36]
186; CHECK-NEXT:    str r12, [sp, #12]
187; CHECK-NEXT:    ldr r12, [sp, #32]
188; CHECK-NEXT:    str r12, [sp, #8]
189; CHECK-NEXT:    ldr r12, [sp, #28]
190; CHECK-NEXT:    str r12, [sp, #4]
191; CHECK-NEXT:    ldr r12, [sp, #24]
192; CHECK-NEXT:    str r12, [sp]
193; CHECK-NEXT:    bl __addtf3
194; CHECK-NEXT:    add sp, sp, #16
195; CHECK-NEXT:    pop {r11, lr}
196; CHECK-NEXT:    mov pc, lr
197  %b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 0xL00000000000000008000000000000000, <2 x fp128> %a)
198  ret fp128 %b
199}
200