xref: /llvm-project/llvm/test/CodeGen/ARM/vecreduce-fmul-legalization-strict.ll (revision 52864d9c7bd49ca41191bd34fcee47f61cfea743)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+neon | FileCheck %s --check-prefix=CHECK
3
4declare half @llvm.vector.reduce.fmul.f16.v1f16(half, <1 x half>)
5declare float @llvm.vector.reduce.fmul.f32.v1f32(float, <1 x float>)
6declare double @llvm.vector.reduce.fmul.f64.v1f64(double, <1 x double>)
7declare fp128 @llvm.vector.reduce.fmul.f128.v1f128(fp128, <1 x fp128>)
8
9declare float @llvm.vector.reduce.fmul.f32.v3f32(float, <3 x float>)
10declare fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128, <2 x fp128>)
11declare float @llvm.vector.reduce.fmul.f32.v16f32(float, <16 x float>)
12
13define half @test_v1f16(<1 x half> %a) nounwind {
14; CHECK-LABEL: test_v1f16:
15; CHECK:       @ %bb.0:
16; CHECK-NEXT:    .save {r11, lr}
17; CHECK-NEXT:    push {r11, lr}
18; CHECK-NEXT:    bl __aeabi_f2h
19; CHECK-NEXT:    pop {r11, lr}
20; CHECK-NEXT:    mov pc, lr
21  %b = call half @llvm.vector.reduce.fmul.f16.v1f16(half 1.0, <1 x half> %a)
22  ret half %b
23}
24
25define float @test_v1f32(<1 x float> %a) nounwind {
26; CHECK-LABEL: test_v1f32:
27; CHECK:       @ %bb.0:
28; CHECK-NEXT:    mov pc, lr
29  %b = call float @llvm.vector.reduce.fmul.f32.v1f32(float 1.0, <1 x float> %a)
30  ret float %b
31}
32
33define double @test_v1f64(<1 x double> %a) nounwind {
34; CHECK-LABEL: test_v1f64:
35; CHECK:       @ %bb.0:
36; CHECK-NEXT:    mov pc, lr
37  %b = call double @llvm.vector.reduce.fmul.f64.v1f64(double 1.0, <1 x double> %a)
38  ret double %b
39}
40
41define fp128 @test_v1f128(<1 x fp128> %a) nounwind {
42; CHECK-LABEL: test_v1f128:
43; CHECK:       @ %bb.0:
44; CHECK-NEXT:    mov pc, lr
45  %b = call fp128 @llvm.vector.reduce.fmul.f128.v1f128(fp128 0xL00000000000000003fff00000000000000, <1 x fp128> %a)
46  ret fp128 %b
47}
48
49define float @test_v3f32(<3 x float> %a) nounwind {
50; CHECK-LABEL: test_v3f32:
51; CHECK:       @ %bb.0:
52; CHECK-NEXT:    vmov d1, r2, r3
53; CHECK-NEXT:    vmov d0, r0, r1
54; CHECK-NEXT:    vmul.f32 s4, s0, s1
55; CHECK-NEXT:    vmul.f32 s0, s4, s2
56; CHECK-NEXT:    vmov r0, s0
57; CHECK-NEXT:    mov pc, lr
58  %b = call float @llvm.vector.reduce.fmul.f32.v3f32(float 1.0, <3 x float> %a)
59  ret float %b
60}
61
62define fp128 @test_v2f128(<2 x fp128> %a) nounwind {
63; CHECK-LABEL: test_v2f128:
64; CHECK:       @ %bb.0:
65; CHECK-NEXT:    .save {r4, r5, r11, lr}
66; CHECK-NEXT:    push {r4, r5, r11, lr}
67; CHECK-NEXT:    .pad #16
68; CHECK-NEXT:    sub sp, sp, #16
69; CHECK-NEXT:    ldr r12, [sp, #36]
70; CHECK-NEXT:    ldr lr, [sp, #32]
71; CHECK-NEXT:    ldr r4, [sp, #40]
72; CHECK-NEXT:    ldr r5, [sp, #44]
73; CHECK-NEXT:    str lr, [sp]
74; CHECK-NEXT:    str r12, [sp, #4]
75; CHECK-NEXT:    str r4, [sp, #8]
76; CHECK-NEXT:    str r5, [sp, #12]
77; CHECK-NEXT:    bl __multf3
78; CHECK-NEXT:    add sp, sp, #16
79; CHECK-NEXT:    pop {r4, r5, r11, lr}
80; CHECK-NEXT:    mov pc, lr
81  %b = call fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128 0xL00000000000000003fff00000000000000, <2 x fp128> %a)
82  ret fp128 %b
83}
84
85define float @test_v16f32(<16 x float> %a) nounwind {
86; CHECK-LABEL: test_v16f32:
87; CHECK:       @ %bb.0:
88; CHECK-NEXT:    vmov d1, r2, r3
89; CHECK-NEXT:    vmov d0, r0, r1
90; CHECK-NEXT:    mov r0, sp
91; CHECK-NEXT:    vmul.f32 s4, s0, s1
92; CHECK-NEXT:    vmul.f32 s4, s4, s2
93; CHECK-NEXT:    vmul.f32 s0, s4, s3
94; CHECK-NEXT:    vld1.64 {d2, d3}, [r0]
95; CHECK-NEXT:    add r0, sp, #16
96; CHECK-NEXT:    vmul.f32 s0, s0, s4
97; CHECK-NEXT:    vmul.f32 s0, s0, s5
98; CHECK-NEXT:    vmul.f32 s0, s0, s6
99; CHECK-NEXT:    vmul.f32 s0, s0, s7
100; CHECK-NEXT:    vld1.64 {d2, d3}, [r0]
101; CHECK-NEXT:    add r0, sp, #32
102; CHECK-NEXT:    vmul.f32 s0, s0, s4
103; CHECK-NEXT:    vmul.f32 s0, s0, s5
104; CHECK-NEXT:    vmul.f32 s0, s0, s6
105; CHECK-NEXT:    vmul.f32 s0, s0, s7
106; CHECK-NEXT:    vld1.64 {d2, d3}, [r0]
107; CHECK-NEXT:    vmul.f32 s0, s0, s4
108; CHECK-NEXT:    vmul.f32 s0, s0, s5
109; CHECK-NEXT:    vmul.f32 s0, s0, s6
110; CHECK-NEXT:    vmul.f32 s0, s0, s7
111; CHECK-NEXT:    vmov r0, s0
112; CHECK-NEXT:    mov pc, lr
113  %b = call float @llvm.vector.reduce.fmul.f32.v16f32(float 1.0, <16 x float> %a)
114  ret float %b
115}
116