1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=arm-none-eabi -mattr=-neon | FileCheck %s --check-prefix=CHECK 3 4declare half @llvm.vector.reduce.fmul.f16.v4f16(half, <4 x half>) 5declare float @llvm.vector.reduce.fmul.f32.v4f32(float, <4 x float>) 6declare double @llvm.vector.reduce.fmul.f64.v2f64(double, <2 x double>) 7declare fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128, <2 x fp128>) 8 9define half @test_v4f16(<4 x half> %a) nounwind { 10; CHECK-LABEL: test_v4f16: 11; CHECK: @ %bb.0: 12; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} 13; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} 14; CHECK-NEXT: mov r8, #255 15; CHECK-NEXT: mov r4, r3 16; CHECK-NEXT: orr r8, r8, #65280 17; CHECK-NEXT: mov r5, r2 18; CHECK-NEXT: and r0, r0, r8 19; CHECK-NEXT: mov r6, r1 20; CHECK-NEXT: bl __aeabi_h2f 21; CHECK-NEXT: mov r7, r0 22; CHECK-NEXT: and r0, r6, r8 23; CHECK-NEXT: bl __aeabi_h2f 24; CHECK-NEXT: mov r1, r0 25; CHECK-NEXT: mov r0, r7 26; CHECK-NEXT: bl __aeabi_fmul 27; CHECK-NEXT: bl __aeabi_f2h 28; CHECK-NEXT: mov r6, r0 29; CHECK-NEXT: and r0, r5, r8 30; CHECK-NEXT: bl __aeabi_h2f 31; CHECK-NEXT: mov r5, r0 32; CHECK-NEXT: and r0, r6, r8 33; CHECK-NEXT: bl __aeabi_h2f 34; CHECK-NEXT: mov r1, r5 35; CHECK-NEXT: bl __aeabi_fmul 36; CHECK-NEXT: bl __aeabi_f2h 37; CHECK-NEXT: mov r5, r0 38; CHECK-NEXT: and r0, r4, r8 39; CHECK-NEXT: bl __aeabi_h2f 40; CHECK-NEXT: mov r4, r0 41; CHECK-NEXT: and r0, r5, r8 42; CHECK-NEXT: bl __aeabi_h2f 43; CHECK-NEXT: mov r1, r4 44; CHECK-NEXT: bl __aeabi_fmul 45; CHECK-NEXT: bl __aeabi_f2h 46; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} 47; CHECK-NEXT: mov pc, lr 48 %b = call fast half @llvm.vector.reduce.fmul.f16.v4f16(half 1.0, <4 x half> %a) 49 ret half %b 50} 51 52define float @test_v4f32(<4 x float> %a) nounwind { 53; CHECK-LABEL: test_v4f32: 54; CHECK: @ %bb.0: 55; CHECK-NEXT: .save {r4, r5, r11, lr} 56; CHECK-NEXT: push {r4, r5, r11, lr} 57; CHECK-NEXT: mov r4, r3 58; CHECK-NEXT: mov r5, r2 59; CHECK-NEXT: bl __aeabi_fmul 60; CHECK-NEXT: mov r1, r5 61; CHECK-NEXT: bl __aeabi_fmul 62; CHECK-NEXT: mov r1, r4 63; CHECK-NEXT: bl __aeabi_fmul 64; CHECK-NEXT: pop {r4, r5, r11, lr} 65; CHECK-NEXT: mov pc, lr 66 %b = call fast float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a) 67 ret float %b 68} 69 70define float @test_v4f32_strict(<4 x float> %a) nounwind { 71; CHECK-LABEL: test_v4f32_strict: 72; CHECK: @ %bb.0: 73; CHECK-NEXT: .save {r4, r5, r11, lr} 74; CHECK-NEXT: push {r4, r5, r11, lr} 75; CHECK-NEXT: mov r4, r3 76; CHECK-NEXT: mov r5, r2 77; CHECK-NEXT: bl __aeabi_fmul 78; CHECK-NEXT: mov r1, r5 79; CHECK-NEXT: bl __aeabi_fmul 80; CHECK-NEXT: mov r1, r4 81; CHECK-NEXT: bl __aeabi_fmul 82; CHECK-NEXT: pop {r4, r5, r11, lr} 83; CHECK-NEXT: mov pc, lr 84 %b = call float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %a) 85 ret float %b 86} 87 88define double @test_v2f64(<2 x double> %a) nounwind { 89; CHECK-LABEL: test_v2f64: 90; CHECK: @ %bb.0: 91; CHECK-NEXT: .save {r11, lr} 92; CHECK-NEXT: push {r11, lr} 93; CHECK-NEXT: bl __aeabi_dmul 94; CHECK-NEXT: pop {r11, lr} 95; CHECK-NEXT: mov pc, lr 96 %b = call fast double @llvm.vector.reduce.fmul.f64.v2f64(double 1.0, <2 x double> %a) 97 ret double %b 98} 99 100define double @test_v2f64_strict(<2 x double> %a) nounwind { 101; CHECK-LABEL: test_v2f64_strict: 102; CHECK: @ %bb.0: 103; CHECK-NEXT: .save {r11, lr} 104; CHECK-NEXT: push {r11, lr} 105; CHECK-NEXT: bl __aeabi_dmul 106; CHECK-NEXT: pop {r11, lr} 107; CHECK-NEXT: mov pc, lr 108 %b = call double @llvm.vector.reduce.fmul.f64.v2f64(double 1.0, <2 x double> %a) 109 ret double %b 110} 111 112define fp128 @test_v2f128(<2 x fp128> %a) nounwind { 113; CHECK-LABEL: test_v2f128: 114; CHECK: @ %bb.0: 115; CHECK-NEXT: .save {r11, lr} 116; CHECK-NEXT: push {r11, lr} 117; CHECK-NEXT: .pad #16 118; CHECK-NEXT: sub sp, sp, #16 119; CHECK-NEXT: ldr r12, [sp, #36] 120; CHECK-NEXT: str r12, [sp, #12] 121; CHECK-NEXT: ldr r12, [sp, #32] 122; CHECK-NEXT: str r12, [sp, #8] 123; CHECK-NEXT: ldr r12, [sp, #28] 124; CHECK-NEXT: str r12, [sp, #4] 125; CHECK-NEXT: ldr r12, [sp, #24] 126; CHECK-NEXT: str r12, [sp] 127; CHECK-NEXT: bl __multf3 128; CHECK-NEXT: add sp, sp, #16 129; CHECK-NEXT: pop {r11, lr} 130; CHECK-NEXT: mov pc, lr 131 %b = call fast fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128 0xL00000000000000003fff00000000000000, <2 x fp128> %a) 132 ret fp128 %b 133} 134 135define fp128 @test_v2f128_strict(<2 x fp128> %a) nounwind { 136; CHECK-LABEL: test_v2f128_strict: 137; CHECK: @ %bb.0: 138; CHECK-NEXT: .save {r11, lr} 139; CHECK-NEXT: push {r11, lr} 140; CHECK-NEXT: .pad #16 141; CHECK-NEXT: sub sp, sp, #16 142; CHECK-NEXT: ldr r12, [sp, #36] 143; CHECK-NEXT: str r12, [sp, #12] 144; CHECK-NEXT: ldr r12, [sp, #32] 145; CHECK-NEXT: str r12, [sp, #8] 146; CHECK-NEXT: ldr r12, [sp, #28] 147; CHECK-NEXT: str r12, [sp, #4] 148; CHECK-NEXT: ldr r12, [sp, #24] 149; CHECK-NEXT: str r12, [sp] 150; CHECK-NEXT: bl __multf3 151; CHECK-NEXT: add sp, sp, #16 152; CHECK-NEXT: pop {r11, lr} 153; CHECK-NEXT: mov pc, lr 154 %b = call fp128 @llvm.vector.reduce.fmul.f128.v2f128(fp128 0xL00000000000000003fff00000000000000, <2 x fp128> %a) 155 ret fp128 %b 156} 157