1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=arm-none-eabi -mattr=-neon | FileCheck %s --check-prefix=CHECK 3 4declare half @llvm.vector.reduce.fadd.f16.v4f16(half, <4 x half>) 5declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>) 6declare double @llvm.vector.reduce.fadd.f64.v2f64(double, <2 x double>) 7declare fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128, <2 x fp128>) 8 9define half @test_v4f16_reassoc(<4 x half> %a) nounwind { 10; CHECK-LABEL: test_v4f16_reassoc: 11; CHECK: @ %bb.0: 12; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} 13; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} 14; CHECK-NEXT: mov r8, #255 15; CHECK-NEXT: mov r4, r3 16; CHECK-NEXT: orr r8, r8, #65280 17; CHECK-NEXT: mov r5, r2 18; CHECK-NEXT: and r0, r0, r8 19; CHECK-NEXT: mov r6, r1 20; CHECK-NEXT: bl __aeabi_h2f 21; CHECK-NEXT: mov r7, r0 22; CHECK-NEXT: and r0, r6, r8 23; CHECK-NEXT: bl __aeabi_h2f 24; CHECK-NEXT: mov r1, r0 25; CHECK-NEXT: mov r0, r7 26; CHECK-NEXT: bl __aeabi_fadd 27; CHECK-NEXT: bl __aeabi_f2h 28; CHECK-NEXT: mov r6, r0 29; CHECK-NEXT: and r0, r5, r8 30; CHECK-NEXT: bl __aeabi_h2f 31; CHECK-NEXT: mov r5, r0 32; CHECK-NEXT: and r0, r6, r8 33; CHECK-NEXT: bl __aeabi_h2f 34; CHECK-NEXT: mov r1, r5 35; CHECK-NEXT: bl __aeabi_fadd 36; CHECK-NEXT: bl __aeabi_f2h 37; CHECK-NEXT: mov r5, r0 38; CHECK-NEXT: and r0, r4, r8 39; CHECK-NEXT: bl __aeabi_h2f 40; CHECK-NEXT: mov r4, r0 41; CHECK-NEXT: and r0, r5, r8 42; CHECK-NEXT: bl __aeabi_h2f 43; CHECK-NEXT: mov r1, r4 44; CHECK-NEXT: bl __aeabi_fadd 45; CHECK-NEXT: bl __aeabi_f2h 46; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} 47; CHECK-NEXT: mov pc, lr 48 %b = call reassoc half @llvm.vector.reduce.fadd.f16.v4f16(half -0.0, <4 x half> %a) 49 ret half %b 50} 51 52define half @test_v4f16_seq(<4 x half> %a) nounwind { 53; CHECK-LABEL: test_v4f16_seq: 54; CHECK: @ %bb.0: 55; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} 56; CHECK-NEXT: push {r4, r5, r6, r7, r8, lr} 57; CHECK-NEXT: mov r8, #255 58; CHECK-NEXT: mov r4, r3 59; CHECK-NEXT: orr r8, r8, #65280 60; CHECK-NEXT: mov r5, r2 61; CHECK-NEXT: and r0, r0, r8 62; CHECK-NEXT: mov r6, r1 63; CHECK-NEXT: bl __aeabi_h2f 64; CHECK-NEXT: mov r7, r0 65; CHECK-NEXT: and r0, r6, r8 66; CHECK-NEXT: bl __aeabi_h2f 67; CHECK-NEXT: mov r1, r0 68; CHECK-NEXT: mov r0, r7 69; CHECK-NEXT: bl __aeabi_fadd 70; CHECK-NEXT: bl __aeabi_f2h 71; CHECK-NEXT: mov r6, r0 72; CHECK-NEXT: and r0, r5, r8 73; CHECK-NEXT: bl __aeabi_h2f 74; CHECK-NEXT: mov r5, r0 75; CHECK-NEXT: and r0, r6, r8 76; CHECK-NEXT: bl __aeabi_h2f 77; CHECK-NEXT: mov r1, r5 78; CHECK-NEXT: bl __aeabi_fadd 79; CHECK-NEXT: bl __aeabi_f2h 80; CHECK-NEXT: mov r5, r0 81; CHECK-NEXT: and r0, r4, r8 82; CHECK-NEXT: bl __aeabi_h2f 83; CHECK-NEXT: mov r4, r0 84; CHECK-NEXT: and r0, r5, r8 85; CHECK-NEXT: bl __aeabi_h2f 86; CHECK-NEXT: mov r1, r4 87; CHECK-NEXT: bl __aeabi_fadd 88; CHECK-NEXT: bl __aeabi_f2h 89; CHECK-NEXT: pop {r4, r5, r6, r7, r8, lr} 90; CHECK-NEXT: mov pc, lr 91 %b = call half @llvm.vector.reduce.fadd.f16.v4f16(half -0.0, <4 x half> %a) 92 ret half %b 93} 94 95define float @test_v4f32_reassoc(<4 x float> %a) nounwind { 96; CHECK-LABEL: test_v4f32_reassoc: 97; CHECK: @ %bb.0: 98; CHECK-NEXT: .save {r4, r5, r11, lr} 99; CHECK-NEXT: push {r4, r5, r11, lr} 100; CHECK-NEXT: mov r4, r3 101; CHECK-NEXT: mov r5, r2 102; CHECK-NEXT: bl __aeabi_fadd 103; CHECK-NEXT: mov r1, r5 104; CHECK-NEXT: bl __aeabi_fadd 105; CHECK-NEXT: mov r1, r4 106; CHECK-NEXT: bl __aeabi_fadd 107; CHECK-NEXT: pop {r4, r5, r11, lr} 108; CHECK-NEXT: mov pc, lr 109 %b = call reassoc float @llvm.vector.reduce.fadd.f32.v4f32(float -0.0, <4 x float> %a) 110 ret float %b 111} 112 113define float @test_v4f32_seq(<4 x float> %a) nounwind { 114; CHECK-LABEL: test_v4f32_seq: 115; CHECK: @ %bb.0: 116; CHECK-NEXT: .save {r4, r5, r11, lr} 117; CHECK-NEXT: push {r4, r5, r11, lr} 118; CHECK-NEXT: mov r4, r3 119; CHECK-NEXT: mov r5, r2 120; CHECK-NEXT: bl __aeabi_fadd 121; CHECK-NEXT: mov r1, r5 122; CHECK-NEXT: bl __aeabi_fadd 123; CHECK-NEXT: mov r1, r4 124; CHECK-NEXT: bl __aeabi_fadd 125; CHECK-NEXT: pop {r4, r5, r11, lr} 126; CHECK-NEXT: mov pc, lr 127 %b = call float @llvm.vector.reduce.fadd.f32.v4f32(float -0.0, <4 x float> %a) 128 ret float %b 129} 130 131define double @test_v2f64_reassoc(<2 x double> %a) nounwind { 132; CHECK-LABEL: test_v2f64_reassoc: 133; CHECK: @ %bb.0: 134; CHECK-NEXT: .save {r11, lr} 135; CHECK-NEXT: push {r11, lr} 136; CHECK-NEXT: bl __aeabi_dadd 137; CHECK-NEXT: pop {r11, lr} 138; CHECK-NEXT: mov pc, lr 139 %b = call reassoc double @llvm.vector.reduce.fadd.f64.v2f64(double -0.0, <2 x double> %a) 140 ret double %b 141} 142 143define double @test_v2f64_seq(<2 x double> %a) nounwind { 144; CHECK-LABEL: test_v2f64_seq: 145; CHECK: @ %bb.0: 146; CHECK-NEXT: .save {r11, lr} 147; CHECK-NEXT: push {r11, lr} 148; CHECK-NEXT: bl __aeabi_dadd 149; CHECK-NEXT: pop {r11, lr} 150; CHECK-NEXT: mov pc, lr 151 %b = call double @llvm.vector.reduce.fadd.f64.v2f64(double -0.0, <2 x double> %a) 152 ret double %b 153} 154 155define fp128 @test_v2f128_reassoc(<2 x fp128> %a) nounwind { 156; CHECK-LABEL: test_v2f128_reassoc: 157; CHECK: @ %bb.0: 158; CHECK-NEXT: .save {r11, lr} 159; CHECK-NEXT: push {r11, lr} 160; CHECK-NEXT: .pad #16 161; CHECK-NEXT: sub sp, sp, #16 162; CHECK-NEXT: ldr r12, [sp, #36] 163; CHECK-NEXT: str r12, [sp, #12] 164; CHECK-NEXT: ldr r12, [sp, #32] 165; CHECK-NEXT: str r12, [sp, #8] 166; CHECK-NEXT: ldr r12, [sp, #28] 167; CHECK-NEXT: str r12, [sp, #4] 168; CHECK-NEXT: ldr r12, [sp, #24] 169; CHECK-NEXT: str r12, [sp] 170; CHECK-NEXT: bl __addtf3 171; CHECK-NEXT: add sp, sp, #16 172; CHECK-NEXT: pop {r11, lr} 173; CHECK-NEXT: mov pc, lr 174 %b = call reassoc fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 0xL00000000000000008000000000000000, <2 x fp128> %a) 175 ret fp128 %b 176} 177 178define fp128 @test_v2f128_seq(<2 x fp128> %a) nounwind { 179; CHECK-LABEL: test_v2f128_seq: 180; CHECK: @ %bb.0: 181; CHECK-NEXT: .save {r11, lr} 182; CHECK-NEXT: push {r11, lr} 183; CHECK-NEXT: .pad #16 184; CHECK-NEXT: sub sp, sp, #16 185; CHECK-NEXT: ldr r12, [sp, #36] 186; CHECK-NEXT: str r12, [sp, #12] 187; CHECK-NEXT: ldr r12, [sp, #32] 188; CHECK-NEXT: str r12, [sp, #8] 189; CHECK-NEXT: ldr r12, [sp, #28] 190; CHECK-NEXT: str r12, [sp, #4] 191; CHECK-NEXT: ldr r12, [sp, #24] 192; CHECK-NEXT: str r12, [sp] 193; CHECK-NEXT: bl __addtf3 194; CHECK-NEXT: add sp, sp, #16 195; CHECK-NEXT: pop {r11, lr} 196; CHECK-NEXT: mov pc, lr 197 %b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 0xL00000000000000008000000000000000, <2 x fp128> %a) 198 ret fp128 %b 199} 200