1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+neon | FileCheck %s --check-prefix=CHECK 3 4declare half @llvm.vector.reduce.fadd.f16.v1f16(half, <1 x half>) 5declare float @llvm.vector.reduce.fadd.f32.v1f32(float, <1 x float>) 6declare double @llvm.vector.reduce.fadd.f64.v1f64(double, <1 x double>) 7declare fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128, <1 x fp128>) 8 9declare float @llvm.vector.reduce.fadd.f32.v3f32(float, <3 x float>) 10declare float @llvm.vector.reduce.fadd.f32.v5f32(float, <5 x float>) 11declare fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128, <2 x fp128>) 12declare float @llvm.vector.reduce.fadd.f32.v16f32(float, <16 x float>) 13 14define half @test_v1f16(<1 x half> %a, half %s) nounwind { 15; CHECK-LABEL: test_v1f16: 16; CHECK: @ %bb.0: 17; CHECK-NEXT: .save {r4, r5, r11, lr} 18; CHECK-NEXT: push {r4, r5, r11, lr} 19; CHECK-NEXT: .vsave {d8} 20; CHECK-NEXT: vpush {d8} 21; CHECK-NEXT: mov r4, r0 22; CHECK-NEXT: mov r0, r1 23; CHECK-NEXT: bl __aeabi_h2f 24; CHECK-NEXT: mov r5, r0 25; CHECK-NEXT: mov r0, r4 26; CHECK-NEXT: bl __aeabi_f2h 27; CHECK-NEXT: vmov s16, r5 28; CHECK-NEXT: bl __aeabi_h2f 29; CHECK-NEXT: vmov s0, r0 30; CHECK-NEXT: vadd.f32 s0, s16, s0 31; CHECK-NEXT: vmov r0, s0 32; CHECK-NEXT: bl __aeabi_f2h 33; CHECK-NEXT: vpop {d8} 34; CHECK-NEXT: pop {r4, r5, r11, lr} 35; CHECK-NEXT: mov pc, lr 36 %b = call half @llvm.vector.reduce.fadd.f16.v1f16(half %s, <1 x half> %a) 37 ret half %b 38} 39 40define half @test_v1f16_neutral(<1 x half> %a) nounwind { 41; CHECK-LABEL: test_v1f16_neutral: 42; CHECK: @ %bb.0: 43; CHECK-NEXT: .save {r11, lr} 44; CHECK-NEXT: push {r11, lr} 45; CHECK-NEXT: bl __aeabi_f2h 46; CHECK-NEXT: pop {r11, lr} 47; CHECK-NEXT: mov pc, lr 48 %b = call half @llvm.vector.reduce.fadd.f16.v1f16(half -0.0, <1 x half> %a) 49 ret half %b 50} 51 52define float @test_v1f32(<1 x float> %a, float %s) nounwind { 53; CHECK-LABEL: test_v1f32: 54; CHECK: @ %bb.0: 55; CHECK-NEXT: vmov s0, r0 56; CHECK-NEXT: vmov s2, r1 57; CHECK-NEXT: vadd.f32 s0, s2, s0 58; CHECK-NEXT: vmov r0, s0 59; CHECK-NEXT: mov pc, lr 60 %b = call float @llvm.vector.reduce.fadd.f32.v1f32(float %s, <1 x float> %a) 61 ret float %b 62} 63 64define float @test_v1f32_neutral(<1 x float> %a) nounwind { 65; CHECK-LABEL: test_v1f32_neutral: 66; CHECK: @ %bb.0: 67; CHECK-NEXT: mov pc, lr 68 %b = call float @llvm.vector.reduce.fadd.f32.v1f32(float -0.0, <1 x float> %a) 69 ret float %b 70} 71 72define double @test_v1f64(<1 x double> %a, double %s) nounwind { 73; CHECK-LABEL: test_v1f64: 74; CHECK: @ %bb.0: 75; CHECK-NEXT: vmov d16, r0, r1 76; CHECK-NEXT: vmov d17, r2, r3 77; CHECK-NEXT: vadd.f64 d16, d17, d16 78; CHECK-NEXT: vmov r0, r1, d16 79; CHECK-NEXT: mov pc, lr 80 %b = call double @llvm.vector.reduce.fadd.f64.v1f64(double %s, <1 x double> %a) 81 ret double %b 82} 83 84define double @test_v1f64_neutral(<1 x double> %a) nounwind { 85; CHECK-LABEL: test_v1f64_neutral: 86; CHECK: @ %bb.0: 87; CHECK-NEXT: mov pc, lr 88 %b = call double @llvm.vector.reduce.fadd.f64.v1f64(double -0.0, <1 x double> %a) 89 ret double %b 90} 91 92define fp128 @test_v1f128(<1 x fp128> %a, fp128 %s) nounwind { 93; CHECK-LABEL: test_v1f128: 94; CHECK: @ %bb.0: 95; CHECK-NEXT: .save {r4, r5, r11, lr} 96; CHECK-NEXT: push {r4, r5, r11, lr} 97; CHECK-NEXT: .pad #16 98; CHECK-NEXT: sub sp, sp, #16 99; CHECK-NEXT: ldr r12, [sp, #32] 100; CHECK-NEXT: ldr lr, [sp, #36] 101; CHECK-NEXT: ldr r4, [sp, #40] 102; CHECK-NEXT: ldr r5, [sp, #44] 103; CHECK-NEXT: stm sp, {r0, r1, r2, r3} 104; CHECK-NEXT: mov r0, r12 105; CHECK-NEXT: mov r1, lr 106; CHECK-NEXT: mov r2, r4 107; CHECK-NEXT: mov r3, r5 108; CHECK-NEXT: bl __addtf3 109; CHECK-NEXT: add sp, sp, #16 110; CHECK-NEXT: pop {r4, r5, r11, lr} 111; CHECK-NEXT: mov pc, lr 112 %b = call fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 %s, <1 x fp128> %a) 113 ret fp128 %b 114} 115 116define fp128 @test_v1f128_neutral(<1 x fp128> %a) nounwind { 117; CHECK-LABEL: test_v1f128_neutral: 118; CHECK: @ %bb.0: 119; CHECK-NEXT: mov pc, lr 120 %b = call fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 0xL00000000000000008000000000000000, <1 x fp128> %a) 121 ret fp128 %b 122} 123 124define float @test_v3f32(<3 x float> %a, float %s) nounwind { 125; CHECK-LABEL: test_v3f32: 126; CHECK: @ %bb.0: 127; CHECK-NEXT: vmov d3, r2, r3 128; CHECK-NEXT: vldr s0, [sp] 129; CHECK-NEXT: vmov d2, r0, r1 130; CHECK-NEXT: vadd.f32 s0, s0, s4 131; CHECK-NEXT: vadd.f32 s0, s0, s5 132; CHECK-NEXT: vadd.f32 s0, s0, s6 133; CHECK-NEXT: vmov r0, s0 134; CHECK-NEXT: mov pc, lr 135 %b = call float @llvm.vector.reduce.fadd.f32.v3f32(float %s, <3 x float> %a) 136 ret float %b 137} 138 139define float @test_v3f32_neutral(<3 x float> %a) nounwind { 140; CHECK-LABEL: test_v3f32_neutral: 141; CHECK: @ %bb.0: 142; CHECK-NEXT: vmov d1, r2, r3 143; CHECK-NEXT: vmov d0, r0, r1 144; CHECK-NEXT: vadd.f32 s4, s0, s1 145; CHECK-NEXT: vadd.f32 s0, s4, s2 146; CHECK-NEXT: vmov r0, s0 147; CHECK-NEXT: mov pc, lr 148 %b = call float @llvm.vector.reduce.fadd.f32.v3f32(float -0.0, <3 x float> %a) 149 ret float %b 150} 151 152define float @test_v5f32(<5 x float> %a, float %s) nounwind { 153; CHECK-LABEL: test_v5f32: 154; CHECK: @ %bb.0: 155; CHECK-NEXT: vldr s0, [sp, #4] 156; CHECK-NEXT: vmov s2, r0 157; CHECK-NEXT: vadd.f32 s0, s0, s2 158; CHECK-NEXT: vmov s2, r1 159; CHECK-NEXT: vadd.f32 s0, s0, s2 160; CHECK-NEXT: vmov s2, r2 161; CHECK-NEXT: vadd.f32 s0, s0, s2 162; CHECK-NEXT: vmov s2, r3 163; CHECK-NEXT: vadd.f32 s0, s0, s2 164; CHECK-NEXT: vldr s2, [sp] 165; CHECK-NEXT: vadd.f32 s0, s0, s2 166; CHECK-NEXT: vmov r0, s0 167; CHECK-NEXT: mov pc, lr 168 %b = call float @llvm.vector.reduce.fadd.f32.v5f32(float %s, <5 x float> %a) 169 ret float %b 170} 171 172define float @test_v5f32_neutral(<5 x float> %a) nounwind { 173; CHECK-LABEL: test_v5f32_neutral: 174; CHECK: @ %bb.0: 175; CHECK-NEXT: vmov s0, r1 176; CHECK-NEXT: vmov s2, r0 177; CHECK-NEXT: vadd.f32 s0, s2, s0 178; CHECK-NEXT: vmov s2, r2 179; CHECK-NEXT: vadd.f32 s0, s0, s2 180; CHECK-NEXT: vmov s2, r3 181; CHECK-NEXT: vadd.f32 s0, s0, s2 182; CHECK-NEXT: vldr s2, [sp] 183; CHECK-NEXT: vadd.f32 s0, s0, s2 184; CHECK-NEXT: vmov r0, s0 185; CHECK-NEXT: mov pc, lr 186 %b = call float @llvm.vector.reduce.fadd.f32.v5f32(float -0.0, <5 x float> %a) 187 ret float %b 188} 189 190define fp128 @test_v2f128(<2 x fp128> %a, fp128 %s) nounwind { 191; CHECK-LABEL: test_v2f128: 192; CHECK: @ %bb.0: 193; CHECK-NEXT: .save {r4, r5, r11, lr} 194; CHECK-NEXT: push {r4, r5, r11, lr} 195; CHECK-NEXT: .pad #16 196; CHECK-NEXT: sub sp, sp, #16 197; CHECK-NEXT: ldr r12, [sp, #48] 198; CHECK-NEXT: ldr lr, [sp, #52] 199; CHECK-NEXT: ldr r4, [sp, #56] 200; CHECK-NEXT: ldr r5, [sp, #60] 201; CHECK-NEXT: stm sp, {r0, r1, r2, r3} 202; CHECK-NEXT: mov r0, r12 203; CHECK-NEXT: mov r1, lr 204; CHECK-NEXT: mov r2, r4 205; CHECK-NEXT: mov r3, r5 206; CHECK-NEXT: bl __addtf3 207; CHECK-NEXT: ldr r4, [sp, #32] 208; CHECK-NEXT: ldr r5, [sp, #40] 209; CHECK-NEXT: ldr lr, [sp, #44] 210; CHECK-NEXT: ldr r12, [sp, #36] 211; CHECK-NEXT: stm sp, {r4, r12} 212; CHECK-NEXT: str r5, [sp, #8] 213; CHECK-NEXT: str lr, [sp, #12] 214; CHECK-NEXT: bl __addtf3 215; CHECK-NEXT: add sp, sp, #16 216; CHECK-NEXT: pop {r4, r5, r11, lr} 217; CHECK-NEXT: mov pc, lr 218 %b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 %s, <2 x fp128> %a) 219 ret fp128 %b 220} 221 222define fp128 @test_v2f128_neutral(<2 x fp128> %a) nounwind { 223; CHECK-LABEL: test_v2f128_neutral: 224; CHECK: @ %bb.0: 225; CHECK-NEXT: .save {r4, r5, r11, lr} 226; CHECK-NEXT: push {r4, r5, r11, lr} 227; CHECK-NEXT: .pad #16 228; CHECK-NEXT: sub sp, sp, #16 229; CHECK-NEXT: ldr r12, [sp, #36] 230; CHECK-NEXT: ldr lr, [sp, #32] 231; CHECK-NEXT: ldr r4, [sp, #40] 232; CHECK-NEXT: ldr r5, [sp, #44] 233; CHECK-NEXT: str lr, [sp] 234; CHECK-NEXT: str r12, [sp, #4] 235; CHECK-NEXT: str r4, [sp, #8] 236; CHECK-NEXT: str r5, [sp, #12] 237; CHECK-NEXT: bl __addtf3 238; CHECK-NEXT: add sp, sp, #16 239; CHECK-NEXT: pop {r4, r5, r11, lr} 240; CHECK-NEXT: mov pc, lr 241 %b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 0xL00000000000000008000000000000000, <2 x fp128> %a) 242 ret fp128 %b 243} 244 245define float @test_v16f32(<16 x float> %a, float %s) nounwind { 246; CHECK-LABEL: test_v16f32: 247; CHECK: @ %bb.0: 248; CHECK-NEXT: vmov d3, r2, r3 249; CHECK-NEXT: vldr s0, [sp, #48] 250; CHECK-NEXT: vmov d2, r0, r1 251; CHECK-NEXT: mov r0, sp 252; CHECK-NEXT: vadd.f32 s0, s0, s4 253; CHECK-NEXT: vadd.f32 s0, s0, s5 254; CHECK-NEXT: vadd.f32 s0, s0, s6 255; CHECK-NEXT: vadd.f32 s0, s0, s7 256; CHECK-NEXT: vld1.64 {d2, d3}, [r0] 257; CHECK-NEXT: add r0, sp, #16 258; CHECK-NEXT: vadd.f32 s0, s0, s4 259; CHECK-NEXT: vadd.f32 s0, s0, s5 260; CHECK-NEXT: vadd.f32 s0, s0, s6 261; CHECK-NEXT: vadd.f32 s0, s0, s7 262; CHECK-NEXT: vld1.64 {d2, d3}, [r0] 263; CHECK-NEXT: add r0, sp, #32 264; CHECK-NEXT: vadd.f32 s0, s0, s4 265; CHECK-NEXT: vadd.f32 s0, s0, s5 266; CHECK-NEXT: vadd.f32 s0, s0, s6 267; CHECK-NEXT: vadd.f32 s0, s0, s7 268; CHECK-NEXT: vld1.64 {d2, d3}, [r0] 269; CHECK-NEXT: vadd.f32 s0, s0, s4 270; CHECK-NEXT: vadd.f32 s0, s0, s5 271; CHECK-NEXT: vadd.f32 s0, s0, s6 272; CHECK-NEXT: vadd.f32 s0, s0, s7 273; CHECK-NEXT: vmov r0, s0 274; CHECK-NEXT: mov pc, lr 275 %b = call float @llvm.vector.reduce.fadd.f32.v16f32(float %s, <16 x float> %a) 276 ret float %b 277} 278 279define float @test_v16f32_neutral(<16 x float> %a) nounwind { 280; CHECK-LABEL: test_v16f32_neutral: 281; CHECK: @ %bb.0: 282; CHECK-NEXT: vmov d1, r2, r3 283; CHECK-NEXT: vmov d0, r0, r1 284; CHECK-NEXT: mov r0, sp 285; CHECK-NEXT: vadd.f32 s4, s0, s1 286; CHECK-NEXT: vadd.f32 s4, s4, s2 287; CHECK-NEXT: vadd.f32 s0, s4, s3 288; CHECK-NEXT: vld1.64 {d2, d3}, [r0] 289; CHECK-NEXT: add r0, sp, #16 290; CHECK-NEXT: vadd.f32 s0, s0, s4 291; CHECK-NEXT: vadd.f32 s0, s0, s5 292; CHECK-NEXT: vadd.f32 s0, s0, s6 293; CHECK-NEXT: vadd.f32 s0, s0, s7 294; CHECK-NEXT: vld1.64 {d2, d3}, [r0] 295; CHECK-NEXT: add r0, sp, #32 296; CHECK-NEXT: vadd.f32 s0, s0, s4 297; CHECK-NEXT: vadd.f32 s0, s0, s5 298; CHECK-NEXT: vadd.f32 s0, s0, s6 299; CHECK-NEXT: vadd.f32 s0, s0, s7 300; CHECK-NEXT: vld1.64 {d2, d3}, [r0] 301; CHECK-NEXT: vadd.f32 s0, s0, s4 302; CHECK-NEXT: vadd.f32 s0, s0, s5 303; CHECK-NEXT: vadd.f32 s0, s0, s6 304; CHECK-NEXT: vadd.f32 s0, s0, s7 305; CHECK-NEXT: vmov r0, s0 306; CHECK-NEXT: mov pc, lr 307 %b = call float @llvm.vector.reduce.fadd.f32.v16f32(float -0.0, <16 x float> %a) 308 ret float %b 309} 310