1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mattr=+sve < %s | FileCheck %s 3; RUN: llc -mattr=+dotprod,+sve < %s | FileCheck %s -check-prefix=DOT 4; RUN: llc -mattr=+dotprod,+sve -force-streaming-compatible < %s | FileCheck %s --check-prefix=STREAMING-SVE 5; RUN: llc -mattr=+dotprod,+sme -force-streaming < %s | FileCheck %s --check-prefix=STREAMING-SVE 6 7target triple = "aarch64-unknown-linux-gnu" 8 9define i32 @reduce_uaddv_v16i8(<32 x i8> %a) { 10; CHECK-LABEL: reduce_uaddv_v16i8: 11; CHECK: // %bb.0: 12; CHECK-NEXT: ushll2 v2.8h, v1.16b, #0 13; CHECK-NEXT: ushll2 v3.8h, v0.16b, #0 14; CHECK-NEXT: ushll v1.8h, v1.8b, #0 15; CHECK-NEXT: ushll v0.8h, v0.8b, #0 16; CHECK-NEXT: uaddl2 v4.4s, v3.8h, v2.8h 17; CHECK-NEXT: uaddl v2.4s, v3.4h, v2.4h 18; CHECK-NEXT: uaddl2 v5.4s, v0.8h, v1.8h 19; CHECK-NEXT: uaddl v0.4s, v0.4h, v1.4h 20; CHECK-NEXT: add v1.4s, v5.4s, v4.4s 21; CHECK-NEXT: add v0.4s, v0.4s, v2.4s 22; CHECK-NEXT: add v0.4s, v0.4s, v1.4s 23; CHECK-NEXT: addv s0, v0.4s 24; CHECK-NEXT: fmov w0, s0 25; CHECK-NEXT: ret 26; 27; DOT-LABEL: reduce_uaddv_v16i8: 28; DOT: // %bb.0: 29; DOT-NEXT: movi v2.16b, #1 30; DOT-NEXT: movi v3.2d, #0000000000000000 31; DOT-NEXT: udot v3.4s, v1.16b, v2.16b 32; DOT-NEXT: udot v3.4s, v0.16b, v2.16b 33; DOT-NEXT: addv s0, v3.4s 34; DOT-NEXT: fmov w0, s0 35; DOT-NEXT: ret 36; 37; STREAMING-SVE-LABEL: reduce_uaddv_v16i8: 38; STREAMING-SVE: // %bb.0: 39; STREAMING-SVE-NEXT: // kill: def $q1 killed $q1 def $z1 40; STREAMING-SVE-NEXT: uunpklo z2.h, z1.b 41; STREAMING-SVE-NEXT: // kill: def $q0 killed $q0 def $z0 42; STREAMING-SVE-NEXT: uunpklo z3.h, z0.b 43; STREAMING-SVE-NEXT: ptrue p0.s, vl4 44; STREAMING-SVE-NEXT: ext z1.b, z1.b, z1.b, #8 45; STREAMING-SVE-NEXT: ext z0.b, z0.b, z0.b, #8 46; STREAMING-SVE-NEXT: uunpklo z1.h, z1.b 47; STREAMING-SVE-NEXT: uunpklo z0.h, z0.b 48; STREAMING-SVE-NEXT: uunpklo z4.s, z2.h 49; STREAMING-SVE-NEXT: ext z2.b, z2.b, z2.b, #8 50; STREAMING-SVE-NEXT: uunpklo z6.s, z3.h 51; STREAMING-SVE-NEXT: ext z3.b, z3.b, z3.b, #8 52; STREAMING-SVE-NEXT: mov z5.d, z1.d 53; STREAMING-SVE-NEXT: uunpklo z7.s, z0.h 54; STREAMING-SVE-NEXT: ext z0.b, z0.b, z0.b, #8 55; STREAMING-SVE-NEXT: uunpklo z2.s, z2.h 56; STREAMING-SVE-NEXT: uunpklo z3.s, z3.h 57; STREAMING-SVE-NEXT: add z4.s, z6.s, z4.s 58; STREAMING-SVE-NEXT: ext z5.b, z5.b, z1.b, #8 59; STREAMING-SVE-NEXT: uunpklo z1.s, z1.h 60; STREAMING-SVE-NEXT: uunpklo z0.s, z0.h 61; STREAMING-SVE-NEXT: add z2.s, z3.s, z2.s 62; STREAMING-SVE-NEXT: uunpklo z5.s, z5.h 63; STREAMING-SVE-NEXT: add z1.s, z7.s, z1.s 64; STREAMING-SVE-NEXT: add z0.s, z0.s, z5.s 65; STREAMING-SVE-NEXT: add z1.s, z4.s, z1.s 66; STREAMING-SVE-NEXT: add z0.s, z2.s, z0.s 67; STREAMING-SVE-NEXT: add z0.s, z1.s, z0.s 68; STREAMING-SVE-NEXT: uaddv d0, p0, z0.s 69; STREAMING-SVE-NEXT: fmov w0, s0 70; STREAMING-SVE-NEXT: ret 71 %1 = zext <32 x i8> %a to <32 x i32> 72 %2 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %1) 73 ret i32 %2 74} 75 76define i32 @reduce_saddv_v16i8(<32 x i8> %a) { 77; CHECK-LABEL: reduce_saddv_v16i8: 78; CHECK: // %bb.0: 79; CHECK-NEXT: sshll2 v2.8h, v1.16b, #0 80; CHECK-NEXT: sshll2 v3.8h, v0.16b, #0 81; CHECK-NEXT: sshll v1.8h, v1.8b, #0 82; CHECK-NEXT: sshll v0.8h, v0.8b, #0 83; CHECK-NEXT: saddl2 v4.4s, v3.8h, v2.8h 84; CHECK-NEXT: saddl v2.4s, v3.4h, v2.4h 85; CHECK-NEXT: saddl2 v5.4s, v0.8h, v1.8h 86; CHECK-NEXT: saddl v0.4s, v0.4h, v1.4h 87; CHECK-NEXT: add v1.4s, v5.4s, v4.4s 88; CHECK-NEXT: add v0.4s, v0.4s, v2.4s 89; CHECK-NEXT: add v0.4s, v0.4s, v1.4s 90; CHECK-NEXT: addv s0, v0.4s 91; CHECK-NEXT: fmov w0, s0 92; CHECK-NEXT: ret 93; 94; DOT-LABEL: reduce_saddv_v16i8: 95; DOT: // %bb.0: 96; DOT-NEXT: movi v2.16b, #1 97; DOT-NEXT: movi v3.2d, #0000000000000000 98; DOT-NEXT: sdot v3.4s, v1.16b, v2.16b 99; DOT-NEXT: sdot v3.4s, v0.16b, v2.16b 100; DOT-NEXT: addv s0, v3.4s 101; DOT-NEXT: fmov w0, s0 102; DOT-NEXT: ret 103; 104; STREAMING-SVE-LABEL: reduce_saddv_v16i8: 105; STREAMING-SVE: // %bb.0: 106; STREAMING-SVE-NEXT: // kill: def $q1 killed $q1 def $z1 107; STREAMING-SVE-NEXT: sunpklo z2.h, z1.b 108; STREAMING-SVE-NEXT: // kill: def $q0 killed $q0 def $z0 109; STREAMING-SVE-NEXT: sunpklo z3.h, z0.b 110; STREAMING-SVE-NEXT: ptrue p0.s, vl4 111; STREAMING-SVE-NEXT: ext z1.b, z1.b, z1.b, #8 112; STREAMING-SVE-NEXT: ext z0.b, z0.b, z0.b, #8 113; STREAMING-SVE-NEXT: sunpklo z1.h, z1.b 114; STREAMING-SVE-NEXT: sunpklo z0.h, z0.b 115; STREAMING-SVE-NEXT: sunpklo z4.s, z2.h 116; STREAMING-SVE-NEXT: ext z2.b, z2.b, z2.b, #8 117; STREAMING-SVE-NEXT: sunpklo z6.s, z3.h 118; STREAMING-SVE-NEXT: ext z3.b, z3.b, z3.b, #8 119; STREAMING-SVE-NEXT: mov z5.d, z1.d 120; STREAMING-SVE-NEXT: sunpklo z7.s, z0.h 121; STREAMING-SVE-NEXT: ext z0.b, z0.b, z0.b, #8 122; STREAMING-SVE-NEXT: sunpklo z2.s, z2.h 123; STREAMING-SVE-NEXT: sunpklo z3.s, z3.h 124; STREAMING-SVE-NEXT: add z4.s, z6.s, z4.s 125; STREAMING-SVE-NEXT: ext z5.b, z5.b, z1.b, #8 126; STREAMING-SVE-NEXT: sunpklo z1.s, z1.h 127; STREAMING-SVE-NEXT: sunpklo z0.s, z0.h 128; STREAMING-SVE-NEXT: add z2.s, z3.s, z2.s 129; STREAMING-SVE-NEXT: sunpklo z5.s, z5.h 130; STREAMING-SVE-NEXT: add z1.s, z7.s, z1.s 131; STREAMING-SVE-NEXT: add z0.s, z0.s, z5.s 132; STREAMING-SVE-NEXT: add z1.s, z4.s, z1.s 133; STREAMING-SVE-NEXT: add z0.s, z2.s, z0.s 134; STREAMING-SVE-NEXT: add z0.s, z1.s, z0.s 135; STREAMING-SVE-NEXT: uaddv d0, p0, z0.s 136; STREAMING-SVE-NEXT: fmov w0, s0 137; STREAMING-SVE-NEXT: ret 138 %1 = sext <32 x i8> %a to <32 x i32> 139 %2 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %1) 140 ret i32 %2 141} 142