xref: /llvm-project/llvm/test/CodeGen/ARM/vecreduce-fmin-legalization-soft-float.ll (revision 52864d9c7bd49ca41191bd34fcee47f61cfea743)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=arm-none-eabi -mattr=-neon | FileCheck %s --check-prefix=CHECK
3
4declare half @llvm.vector.reduce.fmin.v4f16(<4 x half>)
5declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>)
6declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>)
7declare fp128 @llvm.vector.reduce.fmin.v2f128(<2 x fp128>)
8
9define half @test_v4f16(<4 x half> %a) nounwind {
10; CHECK-LABEL: test_v4f16:
11; CHECK:       @ %bb.0:
12; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
13; CHECK-NEXT:    push {r4, r5, r6, r7, r8, lr}
14; CHECK-NEXT:    mov r8, #255
15; CHECK-NEXT:    mov r4, r3
16; CHECK-NEXT:    orr r8, r8, #65280
17; CHECK-NEXT:    mov r5, r2
18; CHECK-NEXT:    and r0, r0, r8
19; CHECK-NEXT:    mov r6, r1
20; CHECK-NEXT:    bl __aeabi_h2f
21; CHECK-NEXT:    mov r7, r0
22; CHECK-NEXT:    and r0, r6, r8
23; CHECK-NEXT:    bl __aeabi_h2f
24; CHECK-NEXT:    mov r1, r0
25; CHECK-NEXT:    mov r0, r7
26; CHECK-NEXT:    bl fminf
27; CHECK-NEXT:    bl __aeabi_f2h
28; CHECK-NEXT:    mov r6, r0
29; CHECK-NEXT:    and r0, r5, r8
30; CHECK-NEXT:    bl __aeabi_h2f
31; CHECK-NEXT:    mov r5, r0
32; CHECK-NEXT:    and r0, r6, r8
33; CHECK-NEXT:    bl __aeabi_h2f
34; CHECK-NEXT:    mov r1, r5
35; CHECK-NEXT:    bl fminf
36; CHECK-NEXT:    bl __aeabi_f2h
37; CHECK-NEXT:    mov r5, r0
38; CHECK-NEXT:    and r0, r4, r8
39; CHECK-NEXT:    bl __aeabi_h2f
40; CHECK-NEXT:    mov r4, r0
41; CHECK-NEXT:    and r0, r5, r8
42; CHECK-NEXT:    bl __aeabi_h2f
43; CHECK-NEXT:    mov r1, r4
44; CHECK-NEXT:    bl fminf
45; CHECK-NEXT:    bl __aeabi_f2h
46; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, lr}
47; CHECK-NEXT:    mov pc, lr
48  %b = call fast half @llvm.vector.reduce.fmin.v4f16(<4 x half> %a)
49  ret half %b
50}
51
52define float @test_v4f32(<4 x float> %a) nounwind {
53; CHECK-LABEL: test_v4f32:
54; CHECK:       @ %bb.0:
55; CHECK-NEXT:    .save {r4, r5, r6, r7, r11, lr}
56; CHECK-NEXT:    push {r4, r5, r6, r7, r11, lr}
57; CHECK-NEXT:    mov r4, r3
58; CHECK-NEXT:    mov r6, r2
59; CHECK-NEXT:    mov r5, r1
60; CHECK-NEXT:    mov r7, r0
61; CHECK-NEXT:    bl __aeabi_fcmplt
62; CHECK-NEXT:    cmp r0, #0
63; CHECK-NEXT:    mov r1, r6
64; CHECK-NEXT:    movne r5, r7
65; CHECK-NEXT:    mov r0, r5
66; CHECK-NEXT:    bl __aeabi_fcmplt
67; CHECK-NEXT:    cmp r0, #0
68; CHECK-NEXT:    mov r1, r4
69; CHECK-NEXT:    moveq r5, r6
70; CHECK-NEXT:    mov r0, r5
71; CHECK-NEXT:    bl __aeabi_fcmplt
72; CHECK-NEXT:    cmp r0, #0
73; CHECK-NEXT:    moveq r5, r4
74; CHECK-NEXT:    mov r0, r5
75; CHECK-NEXT:    pop {r4, r5, r6, r7, r11, lr}
76; CHECK-NEXT:    mov pc, lr
77  %b = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a)
78  ret float %b
79}
80
81define double @test_v2f64(<2 x double> %a) nounwind {
82; CHECK-LABEL: test_v2f64:
83; CHECK:       @ %bb.0:
84; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
85; CHECK-NEXT:    push {r4, r5, r6, r7, r8, lr}
86; CHECK-NEXT:    mov r4, r3
87; CHECK-NEXT:    mov r6, r2
88; CHECK-NEXT:    mov r8, r1
89; CHECK-NEXT:    mov r7, r0
90; CHECK-NEXT:    bl __aeabi_dcmplt
91; CHECK-NEXT:    cmp r0, #0
92; CHECK-NEXT:    mov r5, r6
93; CHECK-NEXT:    mov r0, r7
94; CHECK-NEXT:    mov r1, r8
95; CHECK-NEXT:    mov r2, r6
96; CHECK-NEXT:    mov r3, r4
97; CHECK-NEXT:    movne r5, r7
98; CHECK-NEXT:    bl __aeabi_dcmplt
99; CHECK-NEXT:    cmp r0, #0
100; CHECK-NEXT:    mov r0, r5
101; CHECK-NEXT:    movne r4, r8
102; CHECK-NEXT:    mov r1, r4
103; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, lr}
104; CHECK-NEXT:    mov pc, lr
105  %b = call fast double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a)
106  ret double %b
107}
108
109define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
110; CHECK-LABEL: test_v2f128:
111; CHECK:       @ %bb.0:
112; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
113; CHECK-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
114; CHECK-NEXT:    .pad #28
115; CHECK-NEXT:    sub sp, sp, #28
116; CHECK-NEXT:    ldr r5, [sp, #76]
117; CHECK-NEXT:    mov r8, r3
118; CHECK-NEXT:    ldr r6, [sp, #72]
119; CHECK-NEXT:    mov r9, r2
120; CHECK-NEXT:    ldr r4, [sp, #68]
121; CHECK-NEXT:    mov r10, r1
122; CHECK-NEXT:    ldr r7, [sp, #64]
123; CHECK-NEXT:    mov r11, r0
124; CHECK-NEXT:    str r5, [sp, #12]
125; CHECK-NEXT:    str r6, [sp, #8]
126; CHECK-NEXT:    str r4, [sp, #4]
127; CHECK-NEXT:    str r7, [sp]
128; CHECK-NEXT:    bl __lttf2
129; CHECK-NEXT:    str r0, [sp, #24] @ 4-byte Spill
130; CHECK-NEXT:    mov r0, r11
131; CHECK-NEXT:    mov r1, r10
132; CHECK-NEXT:    mov r2, r9
133; CHECK-NEXT:    mov r3, r8
134; CHECK-NEXT:    str r7, [sp]
135; CHECK-NEXT:    stmib sp, {r4, r6}
136; CHECK-NEXT:    str r5, [sp, #12]
137; CHECK-NEXT:    bl __lttf2
138; CHECK-NEXT:    str r0, [sp, #20] @ 4-byte Spill
139; CHECK-NEXT:    mov r0, r11
140; CHECK-NEXT:    mov r1, r10
141; CHECK-NEXT:    mov r2, r9
142; CHECK-NEXT:    mov r3, r8
143; CHECK-NEXT:    str r7, [sp]
144; CHECK-NEXT:    stmib sp, {r4, r6}
145; CHECK-NEXT:    str r5, [sp, #12]
146; CHECK-NEXT:    bl __lttf2
147; CHECK-NEXT:    cmp r0, #0
148; CHECK-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
149; CHECK-NEXT:    str r7, [sp]
150; CHECK-NEXT:    movmi r7, r11
151; CHECK-NEXT:    cmp r0, #0
152; CHECK-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
153; CHECK-NEXT:    stmib sp, {r4, r6}
154; CHECK-NEXT:    movmi r4, r10
155; CHECK-NEXT:    cmp r0, #0
156; CHECK-NEXT:    mov r0, r11
157; CHECK-NEXT:    mov r1, r10
158; CHECK-NEXT:    mov r2, r9
159; CHECK-NEXT:    mov r3, r8
160; CHECK-NEXT:    str r5, [sp, #12]
161; CHECK-NEXT:    movmi r6, r9
162; CHECK-NEXT:    bl __lttf2
163; CHECK-NEXT:    cmp r0, #0
164; CHECK-NEXT:    mov r0, r7
165; CHECK-NEXT:    movmi r5, r8
166; CHECK-NEXT:    mov r1, r4
167; CHECK-NEXT:    mov r2, r6
168; CHECK-NEXT:    mov r3, r5
169; CHECK-NEXT:    add sp, sp, #28
170; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, lr}
171; CHECK-NEXT:    mov pc, lr
172  %b = call fast fp128 @llvm.vector.reduce.fmin.v2f128(<2 x fp128> %a)
173  ret fp128 %b
174}
175