xref: /llvm-project/llvm/test/CodeGen/SystemZ/vec-reduce-add-01.ll (revision 922992a22f7c87c192cf96606038df3cf20d6404)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; Test vector add reduction intrinsic
3;
4; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z16 | FileCheck %s
5
6; 1 vector length
7declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a)
8declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a)
9declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
10declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a)
11declare i128 @llvm.vector.reduce.add.v1i128(<1 x i128> %a)
12; 2 vector lengths
13declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %a)
14declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a)
15declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a)
16declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a)
17declare i128 @llvm.vector.reduce.add.v2i128(<2 x i128> %a)
18; ; TODO
19; ; 4 vector lengths
20declare i8 @llvm.vector.reduce.add.v64i8(<64 x i8> %a)
21declare i16 @llvm.vector.reduce.add.v32i16(<32 x i16> %a)
22declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a)
23declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a)
24declare i128 @llvm.vector.reduce.add.v4i128(<4 x i128> %a)
25; ; Subvector lengths
26declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a)
27declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a)
28declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a)
29declare i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a)
30
31define i8 @f1_1(<16 x i8> %a) {
32; CHECK-LABEL: f1_1:
33; CHECK:       # %bb.0:
34; CHECK-NEXT:    vgbm %v0, 0
35; CHECK-NEXT:    vsumb %v1, %v24, %v0
36; CHECK-NEXT:    vsumqf %v0, %v1, %v0
37; CHECK-NEXT:    vlgvf %r2, %v0, 3
38; CHECK-NEXT:    # kill: def $r2l killed $r2l killed $r2d
39; CHECK-NEXT:    br %r14
40  %redadd = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a)
41  ret i8 %redadd
42}
43
44define i16 @f1_2(<8 x i16> %a) {
45; CHECK-LABEL: f1_2:
46; CHECK:       # %bb.0:
47; CHECK-NEXT:    vgbm %v0, 0
48; CHECK-NEXT:    vsumh %v1, %v24, %v0
49; CHECK-NEXT:    vsumqf %v0, %v1, %v0
50; CHECK-NEXT:    vlgvf %r2, %v0, 3
51; CHECK-NEXT:    # kill: def $r2l killed $r2l killed $r2d
52; CHECK-NEXT:    br %r14
53  %redadd = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a)
54  ret i16 %redadd
55}
56
57define i32 @f1_3(<4 x i32> %a) {
58; CHECK-LABEL: f1_3:
59; CHECK:       # %bb.0:
60; CHECK-NEXT:    vgbm %v0, 0
61; CHECK-NEXT:    vsumqf %v0, %v24, %v0
62; CHECK-NEXT:    vlgvf %r2, %v0, 3
63; CHECK-NEXT:    # kill: def $r2l killed $r2l killed $r2d
64; CHECK-NEXT:    br %r14
65
66  %redadd = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
67  ret i32 %redadd
68}
69
70define i64 @f1_4(<2 x i64> %a) {
71; CHECK-LABEL: f1_4:
72; CHECK:       # %bb.0:
73; CHECK-NEXT:    vrepg %v0, %v24, 1
74; CHECK-NEXT:    vag %v0, %v24, %v0
75; CHECK-NEXT:    vlgvg %r2, %v0, 0
76; CHECK-NEXT:    br %r14
77
78  %redadd = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a)
79  ret i64 %redadd
80}
81
82define i128 @f1_5(<1 x i128> %a) {
83; CHECK-LABEL: f1_5:
84; CHECK:       # %bb.0:
85; CHECK-NEXT:    vst %v24, 0(%r2), 3
86; CHECK-NEXT:    br %r14
87  %redadd = call i128 @llvm.vector.reduce.add.v1i128(<1 x i128> %a)
88  ret i128 %redadd
89}
90
91define i8 @f2_1(<32 x i8> %a) {
92; CHECK-LABEL: f2_1:
93; CHECK:       # %bb.0:
94; CHECK-NEXT:    vab %v0, %v24, %v26
95; CHECK-NEXT:    vgbm %v1, 0
96; CHECK-NEXT:    vsumb %v0, %v0, %v1
97; CHECK-NEXT:    vsumqf %v0, %v0, %v1
98; CHECK-NEXT:    vlgvf %r2, %v0, 3
99; CHECK-NEXT:    # kill: def $r2l killed $r2l killed $r2d
100; CHECK-NEXT:    br %r14
101  %redadd = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %a)
102  ret i8 %redadd
103}
104
105define i16 @f2_2(<16 x i16> %a) {
106; CHECK-LABEL: f2_2:
107; CHECK:       # %bb.0:
108; CHECK-NEXT:    vah %v0, %v24, %v26
109; CHECK-NEXT:    vgbm %v1, 0
110; CHECK-NEXT:    vsumh %v0, %v0, %v1
111; CHECK-NEXT:    vsumqf %v0, %v0, %v1
112; CHECK-NEXT:    vlgvf %r2, %v0, 3
113; CHECK-NEXT:    # kill: def $r2l killed $r2l killed $r2d
114; CHECK-NEXT:    br %r14
115  %redadd = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a)
116  ret i16 %redadd
117}
118
119define i32 @f2_3(<8 x i32> %a) {
120; CHECK-LABEL: f2_3:
121; CHECK:       # %bb.0:
122; CHECK-NEXT:    vaf %v0, %v24, %v26
123; CHECK-NEXT:    vgbm %v1, 0
124; CHECK-NEXT:    vsumqf %v0, %v0, %v1
125; CHECK-NEXT:    vlgvf %r2, %v0, 3
126; CHECK-NEXT:    # kill: def $r2l killed $r2l killed $r2d
127; CHECK-NEXT:    br %r14
128
129  %redadd = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a)
130  ret i32 %redadd
131}
132
133define i64 @f2_4(<4 x i64> %a) {
134; CHECK-LABEL: f2_4:
135; CHECK:       # %bb.0:
136; CHECK-NEXT:    vag %v0, %v24, %v26
137; CHECK-NEXT:    vrepg %v1, %v0, 1
138; CHECK-NEXT:    vag %v0, %v0, %v1
139; CHECK-NEXT:    vlgvg %r2, %v0, 0
140; CHECK-NEXT:    br %r14
141
142  %redadd = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a)
143  ret i64 %redadd
144}
145
146define i128 @f2_5(<2 x i128> %a) {
147; CHECK-LABEL: f2_5:
148; CHECK:       # %bb.0:
149; CHECK-NEXT:    vl %v0, 16(%r3), 3
150; CHECK-NEXT:    vl %v1, 0(%r3), 3
151; CHECK-NEXT:    vaq %v0, %v1, %v0
152; CHECK-NEXT:    vst %v0, 0(%r2), 3
153; CHECK-NEXT:    br %r14
154  %redadd = call i128 @llvm.vector.reduce.add.v2i128(<2 x i128> %a)
155  ret i128 %redadd
156}
157
158define i8 @f3_1(<64 x i8> %a) {
159; CHECK-LABEL: f3_1:
160; CHECK:       # %bb.0:
161; CHECK-NEXT:    vab %v0, %v26, %v30
162; CHECK-NEXT:    vab %v1, %v24, %v28
163; CHECK-NEXT:    vab %v0, %v1, %v0
164; CHECK-NEXT:    vgbm %v1, 0
165; CHECK-NEXT:    vsumb %v0, %v0, %v1
166; CHECK-NEXT:    vsumqf %v0, %v0, %v1
167; CHECK-NEXT:    vlgvf %r2, %v0, 3
168; CHECK-NEXT:    # kill: def $r2l killed $r2l killed $r2d
169; CHECK-NEXT:    br %r14
170  %redadd = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> %a)
171  ret i8 %redadd
172}
173
174define i16 @f3_2(<32 x i16> %a) {
175; CHECK-LABEL: f3_2:
176; CHECK:       # %bb.0:
177; CHECK-NEXT:    vah %v0, %v26, %v30
178; CHECK-NEXT:    vah %v1, %v24, %v28
179; CHECK-NEXT:    vah %v0, %v1, %v0
180; CHECK-NEXT:    vgbm %v1, 0
181; CHECK-NEXT:    vsumh %v0, %v0, %v1
182; CHECK-NEXT:    vsumqf %v0, %v0, %v1
183; CHECK-NEXT:    vlgvf %r2, %v0, 3
184; CHECK-NEXT:    # kill: def $r2l killed $r2l killed $r2d
185; CHECK-NEXT:    br %r14
186  %redadd = call i16 @llvm.vector.reduce.add.v32i16(<32 x i16> %a)
187  ret i16 %redadd
188}
189
190define i32 @f3_3(<16 x i32> %a) {
191; CHECK-LABEL: f3_3:
192; CHECK:       # %bb.0:
193; CHECK-NEXT:    vaf %v0, %v26, %v30
194; CHECK-NEXT:    vaf %v1, %v24, %v28
195; CHECK-NEXT:    vaf %v0, %v1, %v0
196; CHECK-NEXT:    vgbm %v1, 0
197; CHECK-NEXT:    vsumqf %v0, %v0, %v1
198; CHECK-NEXT:    vlgvf %r2, %v0, 3
199; CHECK-NEXT:    # kill: def $r2l killed $r2l killed $r2d
200; CHECK-NEXT:    br %r14
201
202  %redadd = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a)
203  ret i32 %redadd
204}
205
206define i64 @f3_4(<8 x i64> %a) {
207; CHECK-LABEL: f3_4:
208; CHECK:       # %bb.0:
209; CHECK-NEXT:    vag %v0, %v26, %v30
210; CHECK-NEXT:    vag %v1, %v24, %v28
211; CHECK-NEXT:    vag %v0, %v1, %v0
212; CHECK-NEXT:    vrepg %v1, %v0, 1
213; CHECK-NEXT:    vag %v0, %v0, %v1
214; CHECK-NEXT:    vlgvg %r2, %v0, 0
215; CHECK-NEXT:    br %r14
216
217  %redadd = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a)
218  ret i64 %redadd
219}
220
221define i128 @f3_5(<4 x i128> %a) {
222; CHECK-LABEL: f3_5:
223; CHECK:       # %bb.0:
224; CHECK-NEXT:    vl %v0, 32(%r3), 3
225; CHECK-NEXT:    vl %v1, 0(%r3), 3
226; CHECK-NEXT:    vl %v2, 48(%r3), 3
227; CHECK-NEXT:    vl %v3, 16(%r3), 3
228; CHECK-NEXT:    vaq %v2, %v3, %v2
229; CHECK-NEXT:    vaq %v0, %v1, %v0
230; CHECK-NEXT:    vaq %v0, %v0, %v2
231; CHECK-NEXT:    vst %v0, 0(%r2), 3
232; CHECK-NEXT:    br %r14
233  %redadd = call i128 @llvm.vector.reduce.add.v4i128(<4 x i128> %a)
234  ret i128 %redadd
235}
236
237
238define i8 @f4_1(<8 x i8> %a) {
239; CHECK-LABEL: f4_1:
240; CHECK:       # %bb.0:
241; CHECK-NEXT:    vpkg %v0, %v24, %v24
242; CHECK-NEXT:    vab %v0, %v24, %v0
243; CHECK-NEXT:    vpkf %v1, %v0, %v0
244; CHECK-NEXT:    vab %v0, %v0, %v1
245; CHECK-NEXT:    vrepb %v1, %v0, 1
246; CHECK-NEXT:    vab %v0, %v0, %v1
247; CHECK-NEXT:    vlgvb %r2, %v0, 0
248; CHECK-NEXT:    # kill: def $r2l killed $r2l killed $r2d
249; CHECK-NEXT:    br %r14
250  %redadd = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a)
251  ret i8 %redadd
252}
253
254define i16 @f4_2(<4 x i16> %a) {
255; CHECK-LABEL: f4_2:
256; CHECK:       # %bb.0:
257; CHECK-NEXT:    vpkg %v0, %v24, %v24
258; CHECK-NEXT:    vah %v0, %v24, %v0
259; CHECK-NEXT:    vreph %v1, %v0, 1
260; CHECK-NEXT:    vah %v0, %v0, %v1
261; CHECK-NEXT:    vlgvh %r2, %v0, 0
262; CHECK-NEXT:    # kill: def $r2l killed $r2l killed $r2d
263; CHECK-NEXT:    br %r14
264  %redadd = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a)
265  ret i16 %redadd
266}
267
268define i32 @f4_3(<2 x i32> %a) {
269; CHECK-LABEL: f4_3:
270; CHECK:       # %bb.0:
271; CHECK-NEXT:    vrepf %v0, %v24, 1
272; CHECK-NEXT:    vaf %v0, %v24, %v0
273; CHECK-NEXT:    vlgvf %r2, %v0, 0
274; CHECK-NEXT:    # kill: def $r2l killed $r2l killed $r2d
275; CHECK-NEXT:    br %r14
276
277  %redadd = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a)
278  ret i32 %redadd
279}
280
281define i64 @f4_4(<1 x i64> %a) {
282; CHECK-LABEL: f4_4:
283; CHECK:       # %bb.0:
284; CHECK-NEXT:    vlgvg %r2, %v24, 0
285; CHECK-NEXT:    br %r14
286
287  %redadd = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> %a)
288  ret i64 %redadd
289}
290