xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll (revision c2bd5c25b3634e55089d34afe922aa38eee743e2)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mattr=+sve < %s | FileCheck %s
3; RUN: llc -mattr=+dotprod,+sve < %s | FileCheck %s -check-prefix=DOT
4; RUN: llc -mattr=+dotprod,+sve -force-streaming-compatible < %s | FileCheck %s --check-prefix=STREAMING-SVE
5; RUN: llc -mattr=+dotprod,+sme -force-streaming < %s | FileCheck %s --check-prefix=STREAMING-SVE
6
7target triple = "aarch64-unknown-linux-gnu"
8
9define i32 @reduce_uaddv_v16i8(<32 x i8> %a) {
10; CHECK-LABEL: reduce_uaddv_v16i8:
11; CHECK:       // %bb.0:
12; CHECK-NEXT:    ushll2 v2.8h, v1.16b, #0
13; CHECK-NEXT:    ushll2 v3.8h, v0.16b, #0
14; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
15; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
16; CHECK-NEXT:    uaddl2 v4.4s, v3.8h, v2.8h
17; CHECK-NEXT:    uaddl v2.4s, v3.4h, v2.4h
18; CHECK-NEXT:    uaddl2 v5.4s, v0.8h, v1.8h
19; CHECK-NEXT:    uaddl v0.4s, v0.4h, v1.4h
20; CHECK-NEXT:    add v1.4s, v5.4s, v4.4s
21; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
22; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
23; CHECK-NEXT:    addv s0, v0.4s
24; CHECK-NEXT:    fmov w0, s0
25; CHECK-NEXT:    ret
26;
27; DOT-LABEL: reduce_uaddv_v16i8:
28; DOT:       // %bb.0:
29; DOT-NEXT:    movi v2.16b, #1
30; DOT-NEXT:    movi v3.2d, #0000000000000000
31; DOT-NEXT:    udot v3.4s, v1.16b, v2.16b
32; DOT-NEXT:    udot v3.4s, v0.16b, v2.16b
33; DOT-NEXT:    addv s0, v3.4s
34; DOT-NEXT:    fmov w0, s0
35; DOT-NEXT:    ret
36;
37; STREAMING-SVE-LABEL: reduce_uaddv_v16i8:
38; STREAMING-SVE:       // %bb.0:
39; STREAMING-SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
40; STREAMING-SVE-NEXT:    uunpklo z2.h, z1.b
41; STREAMING-SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
42; STREAMING-SVE-NEXT:    uunpklo z3.h, z0.b
43; STREAMING-SVE-NEXT:    ptrue p0.s, vl4
44; STREAMING-SVE-NEXT:    ext z1.b, z1.b, z1.b, #8
45; STREAMING-SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
46; STREAMING-SVE-NEXT:    uunpklo z1.h, z1.b
47; STREAMING-SVE-NEXT:    uunpklo z0.h, z0.b
48; STREAMING-SVE-NEXT:    uunpklo z4.s, z2.h
49; STREAMING-SVE-NEXT:    ext z2.b, z2.b, z2.b, #8
50; STREAMING-SVE-NEXT:    uunpklo z6.s, z3.h
51; STREAMING-SVE-NEXT:    ext z3.b, z3.b, z3.b, #8
52; STREAMING-SVE-NEXT:    mov z5.d, z1.d
53; STREAMING-SVE-NEXT:    uunpklo z7.s, z0.h
54; STREAMING-SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
55; STREAMING-SVE-NEXT:    uunpklo z2.s, z2.h
56; STREAMING-SVE-NEXT:    uunpklo z3.s, z3.h
57; STREAMING-SVE-NEXT:    add z4.s, z6.s, z4.s
58; STREAMING-SVE-NEXT:    ext z5.b, z5.b, z1.b, #8
59; STREAMING-SVE-NEXT:    uunpklo z1.s, z1.h
60; STREAMING-SVE-NEXT:    uunpklo z0.s, z0.h
61; STREAMING-SVE-NEXT:    add z2.s, z3.s, z2.s
62; STREAMING-SVE-NEXT:    uunpklo z5.s, z5.h
63; STREAMING-SVE-NEXT:    add z1.s, z7.s, z1.s
64; STREAMING-SVE-NEXT:    add z0.s, z0.s, z5.s
65; STREAMING-SVE-NEXT:    add z1.s, z4.s, z1.s
66; STREAMING-SVE-NEXT:    add z0.s, z2.s, z0.s
67; STREAMING-SVE-NEXT:    add z0.s, z1.s, z0.s
68; STREAMING-SVE-NEXT:    uaddv d0, p0, z0.s
69; STREAMING-SVE-NEXT:    fmov w0, s0
70; STREAMING-SVE-NEXT:    ret
71  %1 = zext <32 x i8> %a to <32 x i32>
72  %2 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %1)
73  ret i32 %2
74}
75
76define i32 @reduce_saddv_v16i8(<32 x i8> %a) {
77; CHECK-LABEL: reduce_saddv_v16i8:
78; CHECK:       // %bb.0:
79; CHECK-NEXT:    sshll2 v2.8h, v1.16b, #0
80; CHECK-NEXT:    sshll2 v3.8h, v0.16b, #0
81; CHECK-NEXT:    sshll v1.8h, v1.8b, #0
82; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
83; CHECK-NEXT:    saddl2 v4.4s, v3.8h, v2.8h
84; CHECK-NEXT:    saddl v2.4s, v3.4h, v2.4h
85; CHECK-NEXT:    saddl2 v5.4s, v0.8h, v1.8h
86; CHECK-NEXT:    saddl v0.4s, v0.4h, v1.4h
87; CHECK-NEXT:    add v1.4s, v5.4s, v4.4s
88; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
89; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
90; CHECK-NEXT:    addv s0, v0.4s
91; CHECK-NEXT:    fmov w0, s0
92; CHECK-NEXT:    ret
93;
94; DOT-LABEL: reduce_saddv_v16i8:
95; DOT:       // %bb.0:
96; DOT-NEXT:    movi v2.16b, #1
97; DOT-NEXT:    movi v3.2d, #0000000000000000
98; DOT-NEXT:    sdot v3.4s, v1.16b, v2.16b
99; DOT-NEXT:    sdot v3.4s, v0.16b, v2.16b
100; DOT-NEXT:    addv s0, v3.4s
101; DOT-NEXT:    fmov w0, s0
102; DOT-NEXT:    ret
103;
104; STREAMING-SVE-LABEL: reduce_saddv_v16i8:
105; STREAMING-SVE:       // %bb.0:
106; STREAMING-SVE-NEXT:    // kill: def $q1 killed $q1 def $z1
107; STREAMING-SVE-NEXT:    sunpklo z2.h, z1.b
108; STREAMING-SVE-NEXT:    // kill: def $q0 killed $q0 def $z0
109; STREAMING-SVE-NEXT:    sunpklo z3.h, z0.b
110; STREAMING-SVE-NEXT:    ptrue p0.s, vl4
111; STREAMING-SVE-NEXT:    ext z1.b, z1.b, z1.b, #8
112; STREAMING-SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
113; STREAMING-SVE-NEXT:    sunpklo z1.h, z1.b
114; STREAMING-SVE-NEXT:    sunpklo z0.h, z0.b
115; STREAMING-SVE-NEXT:    sunpklo z4.s, z2.h
116; STREAMING-SVE-NEXT:    ext z2.b, z2.b, z2.b, #8
117; STREAMING-SVE-NEXT:    sunpklo z6.s, z3.h
118; STREAMING-SVE-NEXT:    ext z3.b, z3.b, z3.b, #8
119; STREAMING-SVE-NEXT:    mov z5.d, z1.d
120; STREAMING-SVE-NEXT:    sunpklo z7.s, z0.h
121; STREAMING-SVE-NEXT:    ext z0.b, z0.b, z0.b, #8
122; STREAMING-SVE-NEXT:    sunpklo z2.s, z2.h
123; STREAMING-SVE-NEXT:    sunpklo z3.s, z3.h
124; STREAMING-SVE-NEXT:    add z4.s, z6.s, z4.s
125; STREAMING-SVE-NEXT:    ext z5.b, z5.b, z1.b, #8
126; STREAMING-SVE-NEXT:    sunpklo z1.s, z1.h
127; STREAMING-SVE-NEXT:    sunpklo z0.s, z0.h
128; STREAMING-SVE-NEXT:    add z2.s, z3.s, z2.s
129; STREAMING-SVE-NEXT:    sunpklo z5.s, z5.h
130; STREAMING-SVE-NEXT:    add z1.s, z7.s, z1.s
131; STREAMING-SVE-NEXT:    add z0.s, z0.s, z5.s
132; STREAMING-SVE-NEXT:    add z1.s, z4.s, z1.s
133; STREAMING-SVE-NEXT:    add z0.s, z2.s, z0.s
134; STREAMING-SVE-NEXT:    add z0.s, z1.s, z0.s
135; STREAMING-SVE-NEXT:    uaddv d0, p0, z0.s
136; STREAMING-SVE-NEXT:    fmov w0, s0
137; STREAMING-SVE-NEXT:    ret
138  %1 = sext <32 x i8> %a to <32 x i32>
139  %2 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %1)
140  ret i32 %2
141}
142