xref: /llvm-project/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-soft-float.ll (revision 52864d9c7bd49ca41191bd34fcee47f61cfea743)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=arm-none-eabi -mattr=-neon | FileCheck %s --check-prefix=CHECK
3
4declare half @llvm.vector.reduce.fmul.f16.v4f16(half, <4 x half>)
5declare float @llvm.vector.reduce.fmul.f32.v4f32(float, <4 x float>)
6declare double @llvm.vector.reduce.fmul.f64.v2f64(double, <2 x double>)
7declare fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128, <2 x fp128>)
8
9define half @test_v4f16(<4 x half> %a) nounwind {
10; CHECK-LABEL: test_v4f16:
11; CHECK:       @ %bb.0:
12; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
13; CHECK-NEXT:    push {r4, r5, r6, r7, r8, lr}
14; CHECK-NEXT:    mov r8, #255
15; CHECK-NEXT:    mov r4, r3
16; CHECK-NEXT:    orr r8, r8, #65280
17; CHECK-NEXT:    mov r5, r2
18; CHECK-NEXT:    and r0, r0, r8
19; CHECK-NEXT:    mov r6, r1
20; CHECK-NEXT:    bl __aeabi_h2f
21; CHECK-NEXT:    mov r7, r0
22; CHECK-NEXT:    and r0, r6, r8
23; CHECK-NEXT:    bl __aeabi_h2f
24; CHECK-NEXT:    mov r1, r0
25; CHECK-NEXT:    mov r0, r7
26; CHECK-NEXT:    bl __aeabi_fmul
27; CHECK-NEXT:    bl __aeabi_f2h
28; CHECK-NEXT:    mov r6, r0
29; CHECK-NEXT:    and r0, r5, r8
30; CHECK-NEXT:    bl __aeabi_h2f
31; CHECK-NEXT:    mov r5, r0
32; CHECK-NEXT:    and r0, r6, r8
33; CHECK-NEXT:    bl __aeabi_h2f
34; CHECK-NEXT:    mov r1, r5
35; CHECK-NEXT:    bl __aeabi_fmul
36; CHECK-NEXT:    bl __aeabi_f2h
37; CHECK-NEXT:    mov r5, r0
38; CHECK-NEXT:    and r0, r4, r8
39; CHECK-NEXT:    bl __aeabi_h2f
40; CHECK-NEXT:    mov r4, r0
41; CHECK-NEXT:    and r0, r5, r8
42; CHECK-NEXT:    bl __aeabi_h2f
43; CHECK-NEXT:    mov r1, r4
44; CHECK-NEXT:    bl __aeabi_fmul
45; CHECK-NEXT:    bl __aeabi_f2h
46; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, lr}
47; CHECK-NEXT:    mov pc, lr
48  %b = call fast half @llvm.vector.reduce.fmul.f16.v4f16(half 1.0, <4 x half> %a)
49  ret half %b
50}
51
52define float @test_v4f32(<4 x float> %a) nounwind {
53; CHECK-LABEL: test_v4f32:
54; CHECK:       @ %bb.0:
55; CHECK-NEXT:    .save {r4, r5, r11, lr}
56; CHECK-NEXT:    push {r4, r5, r11, lr}
57; CHECK-NEXT:    mov r4, r3
58; CHECK-NEXT:    mov r5, r2
59; CHECK-NEXT:    bl __aeabi_fmul
60; CHECK-NEXT:    mov r1, r5
61; CHECK-NEXT:    bl __aeabi_fmul
62; CHECK-NEXT:    mov r1, r4
63; CHECK-NEXT:    bl __aeabi_fmul
64; CHECK-NEXT:    pop {r4, r5, r11, lr}
65; CHECK-NEXT:    mov pc, lr
66  %b = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a)
67  ret float %b
68}
69
70define float @test_v4f32_strict(<4 x float> %a) nounwind {
71; CHECK-LABEL: test_v4f32_strict:
72; CHECK:       @ %bb.0:
73; CHECK-NEXT:    .save {r4, r5, r11, lr}
74; CHECK-NEXT:    push {r4, r5, r11, lr}
75; CHECK-NEXT:    mov r4, r3
76; CHECK-NEXT:    mov r5, r2
77; CHECK-NEXT:    bl __aeabi_fmul
78; CHECK-NEXT:    mov r1, r5
79; CHECK-NEXT:    bl __aeabi_fmul
80; CHECK-NEXT:    mov r1, r4
81; CHECK-NEXT:    bl __aeabi_fmul
82; CHECK-NEXT:    pop {r4, r5, r11, lr}
83; CHECK-NEXT:    mov pc, lr
84  %b = call float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a)
85  ret float %b
86}
87
88define double @test_v2f64(<2 x double> %a) nounwind {
89; CHECK-LABEL: test_v2f64:
90; CHECK:       @ %bb.0:
91; CHECK-NEXT:    .save {r11, lr}
92; CHECK-NEXT:    push {r11, lr}
93; CHECK-NEXT:    bl __aeabi_dmul
94; CHECK-NEXT:    pop {r11, lr}
95; CHECK-NEXT:    mov pc, lr
96  %b = call fast double @llvm.vector.reduce.fmul.f64.v2f64(double 1.0, <2 x double> %a)
97  ret double %b
98}
99
100define double @test_v2f64_strict(<2 x double> %a) nounwind {
101; CHECK-LABEL: test_v2f64_strict:
102; CHECK:       @ %bb.0:
103; CHECK-NEXT:    .save {r11, lr}
104; CHECK-NEXT:    push {r11, lr}
105; CHECK-NEXT:    bl __aeabi_dmul
106; CHECK-NEXT:    pop {r11, lr}
107; CHECK-NEXT:    mov pc, lr
108  %b = call double @llvm.vector.reduce.fmul.f64.v2f64(double 1.0, <2 x double> %a)
109  ret double %b
110}
111
112define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
113; CHECK-LABEL: test_v2f128:
114; CHECK:       @ %bb.0:
115; CHECK-NEXT:    .save {r11, lr}
116; CHECK-NEXT:    push {r11, lr}
117; CHECK-NEXT:    .pad #16
118; CHECK-NEXT:    sub sp, sp, #16
119; CHECK-NEXT:    ldr r12, [sp, #36]
120; CHECK-NEXT:    str r12, [sp, #12]
121; CHECK-NEXT:    ldr r12, [sp, #32]
122; CHECK-NEXT:    str r12, [sp, #8]
123; CHECK-NEXT:    ldr r12, [sp, #28]
124; CHECK-NEXT:    str r12, [sp, #4]
125; CHECK-NEXT:    ldr r12, [sp, #24]
126; CHECK-NEXT:    str r12, [sp]
127; CHECK-NEXT:    bl __multf3
128; CHECK-NEXT:    add sp, sp, #16
129; CHECK-NEXT:    pop {r11, lr}
130; CHECK-NEXT:    mov pc, lr
131  %b = call fast fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128 0xL00000000000000003fff00000000000000, <2 x fp128> %a)
132  ret fp128 %b
133}
134
135define fp128 @test_v2f128_strict(<2 x fp128> %a) nounwind {
136; CHECK-LABEL: test_v2f128_strict:
137; CHECK:       @ %bb.0:
138; CHECK-NEXT:    .save {r11, lr}
139; CHECK-NEXT:    push {r11, lr}
140; CHECK-NEXT:    .pad #16
141; CHECK-NEXT:    sub sp, sp, #16
142; CHECK-NEXT:    ldr r12, [sp, #36]
143; CHECK-NEXT:    str r12, [sp, #12]
144; CHECK-NEXT:    ldr r12, [sp, #32]
145; CHECK-NEXT:    str r12, [sp, #8]
146; CHECK-NEXT:    ldr r12, [sp, #28]
147; CHECK-NEXT:    str r12, [sp, #4]
148; CHECK-NEXT:    ldr r12, [sp, #24]
149; CHECK-NEXT:    str r12, [sp]
150; CHECK-NEXT:    bl __multf3
151; CHECK-NEXT:    add sp, sp, #16
152; CHECK-NEXT:    pop {r11, lr}
153; CHECK-NEXT:    mov pc, lr
154  %b = call fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128 0xL00000000000000003fff00000000000000, <2 x fp128> %a)
155  ret fp128 %b
156}
157