1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK 3 4declare i1 @llvm.vector.reduce.add.v1i1(<1 x i1> %a) 5declare i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %a) 6declare i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a) 7declare i24 @llvm.vector.reduce.add.v1i24(<1 x i24> %a) 8declare i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a) 9declare i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a) 10declare i128 @llvm.vector.reduce.add.v1i128(<1 x i128> %a) 11 12declare i8 @llvm.vector.reduce.add.v3i8(<3 x i8> %a) 13declare i8 @llvm.vector.reduce.add.v9i8(<9 x i8> %a) 14declare i32 @llvm.vector.reduce.add.v3i32(<3 x i32> %a) 15declare i1 @llvm.vector.reduce.add.v4i1(<4 x i1> %a) 16declare i24 @llvm.vector.reduce.add.v4i24(<4 x i24> %a) 17declare i128 @llvm.vector.reduce.add.v2i128(<2 x i128> %a) 18declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a) 19 20define i1 @test_v1i1(<1 x i1> %a) nounwind { 21; CHECK-LABEL: test_v1i1: 22; CHECK: // %bb.0: 23; CHECK-NEXT: and w0, w0, #0x1 24; CHECK-NEXT: ret 25 %b = call i1 @llvm.vector.reduce.add.v1i1(<1 x i1> %a) 26 ret i1 %b 27} 28 29define i8 @test_v1i8(<1 x i8> %a) nounwind { 30; CHECK-LABEL: test_v1i8: 31; CHECK: // %bb.0: 32; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 33; CHECK-NEXT: umov w0, v0.b[0] 34; CHECK-NEXT: ret 35 %b = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> %a) 36 ret i8 %b 37} 38 39define i16 @test_v1i16(<1 x i16> %a) nounwind { 40; CHECK-LABEL: test_v1i16: 41; CHECK: // %bb.0: 42; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 43; CHECK-NEXT: umov w0, v0.h[0] 44; CHECK-NEXT: ret 45 %b = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a) 46 ret i16 %b 47} 48 49define i24 @test_v1i24(<1 x i24> %a) nounwind { 50; CHECK-LABEL: test_v1i24: 51; CHECK: // %bb.0: 52; CHECK-NEXT: ret 53 %b = call i24 @llvm.vector.reduce.add.v1i24(<1 x i24> %a) 54 ret i24 %b 55} 56 57define i32 @test_v1i32(<1 x i32> %a) nounwind { 58; CHECK-LABEL: test_v1i32: 59; CHECK: // %bb.0: 60; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 61; CHECK-NEXT: fmov w0, s0 62; CHECK-NEXT: ret 63 %b = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a) 64 ret i32 %b 65} 66 67define i64 @test_v1i64(<1 x i64> %a) nounwind { 68; CHECK-LABEL: test_v1i64: 69; CHECK: // %bb.0: 70; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 71; CHECK-NEXT: fmov x0, d0 72; CHECK-NEXT: ret 73 %b = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a) 74 ret i64 %b 75} 76 77define i128 @test_v1i128(<1 x i128> %a) nounwind { 78; CHECK-LABEL: test_v1i128: 79; CHECK: // %bb.0: 80; CHECK-NEXT: ret 81 %b = call i128 @llvm.vector.reduce.add.v1i128(<1 x i128> %a) 82 ret i128 %b 83} 84 85define i8 @test_v3i8(<3 x i8> %a) nounwind { 86; CHECK-LABEL: test_v3i8: 87; CHECK: // %bb.0: 88; CHECK-NEXT: movi v0.2d, #0000000000000000 89; CHECK-NEXT: mov v0.h[0], w0 90; CHECK-NEXT: mov v0.h[1], w1 91; CHECK-NEXT: mov v0.h[2], w2 92; CHECK-NEXT: addv h0, v0.4h 93; CHECK-NEXT: fmov w0, s0 94; CHECK-NEXT: ret 95 %b = call i8 @llvm.vector.reduce.add.v3i8(<3 x i8> %a) 96 ret i8 %b 97} 98 99define i8 @test_v9i8(<9 x i8> %a) nounwind { 100; CHECK-LABEL: test_v9i8: 101; CHECK: // %bb.0: 102; CHECK-NEXT: adrp x8, .LCPI8_0 103; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_0] 104; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 105; CHECK-NEXT: addv b0, v0.16b 106; CHECK-NEXT: fmov w0, s0 107; CHECK-NEXT: ret 108 %b = call i8 @llvm.vector.reduce.add.v9i8(<9 x i8> %a) 109 ret i8 %b 110} 111 112define i32 @test_v3i32(<3 x i32> %a) nounwind { 113; CHECK-LABEL: test_v3i32: 114; CHECK: // %bb.0: 115; CHECK-NEXT: mov v0.s[3], wzr 116; CHECK-NEXT: addv s0, v0.4s 117; CHECK-NEXT: fmov w0, s0 118; CHECK-NEXT: ret 119 %b = call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> %a) 120 ret i32 %b 121} 122 123define i1 @test_v4i1(<4 x i1> %a) nounwind { 124; CHECK-LABEL: test_v4i1: 125; CHECK: // %bb.0: 126; CHECK-NEXT: addv h0, v0.4h 127; CHECK-NEXT: fmov w8, s0 128; CHECK-NEXT: and w0, w8, #0x1 129; CHECK-NEXT: ret 130 %b = call i1 @llvm.vector.reduce.add.v4i1(<4 x i1> %a) 131 ret i1 %b 132} 133 134define i24 @test_v4i24(<4 x i24> %a) nounwind { 135; CHECK-LABEL: test_v4i24: 136; CHECK: // %bb.0: 137; CHECK-NEXT: addv s0, v0.4s 138; CHECK-NEXT: fmov w0, s0 139; CHECK-NEXT: ret 140 %b = call i24 @llvm.vector.reduce.add.v4i24(<4 x i24> %a) 141 ret i24 %b 142} 143 144define i128 @test_v2i128(<2 x i128> %a) nounwind { 145; CHECK-LABEL: test_v2i128: 146; CHECK: // %bb.0: 147; CHECK-NEXT: adds x0, x0, x2 148; CHECK-NEXT: adc x1, x1, x3 149; CHECK-NEXT: ret 150 %b = call i128 @llvm.vector.reduce.add.v2i128(<2 x i128> %a) 151 ret i128 %b 152} 153 154define i32 @test_v16i32(<16 x i32> %a) nounwind { 155; CHECK-LABEL: test_v16i32: 156; CHECK: // %bb.0: 157; CHECK-NEXT: add v1.4s, v1.4s, v3.4s 158; CHECK-NEXT: add v0.4s, v0.4s, v2.4s 159; CHECK-NEXT: add v0.4s, v0.4s, v1.4s 160; CHECK-NEXT: addv s0, v0.4s 161; CHECK-NEXT: fmov w0, s0 162; CHECK-NEXT: ret 163 %b = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a) 164 ret i32 %b 165} 166