xref: /llvm-project/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll (revision a2b5117df75d1be8a65b2a86d5f75a59a8565fe6)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
3; RUN:   -mcpu=pwr9 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR9LE
4; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
5; RUN:   -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR9BE
6; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
7; RUN:   -mcpu=pwr10 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR10LE
8; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
9; RUN:   -mcpu=pwr10 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR10BE
10
11;;
12;; Vectors of i8
13;;
14define dso_local i8 @v2i8(<2 x i8> %a) local_unnamed_addr #0 {
15; PWR9LE-LABEL: v2i8:
16; PWR9LE:       # %bb.0: # %entry
17; PWR9LE-NEXT:    vspltb v3, v2, 14
18; PWR9LE-NEXT:    li r3, 0
19; PWR9LE-NEXT:    vaddubm v2, v2, v3
20; PWR9LE-NEXT:    vextubrx r3, r3, v2
21; PWR9LE-NEXT:    blr
22;
23; PWR9BE-LABEL: v2i8:
24; PWR9BE:       # %bb.0: # %entry
25; PWR9BE-NEXT:    vspltb v3, v2, 1
26; PWR9BE-NEXT:    li r3, 0
27; PWR9BE-NEXT:    vaddubm v2, v2, v3
28; PWR9BE-NEXT:    vextublx r3, r3, v2
29; PWR9BE-NEXT:    blr
30;
31; PWR10LE-LABEL: v2i8:
32; PWR10LE:       # %bb.0: # %entry
33; PWR10LE-NEXT:    vspltb v3, v2, 14
34; PWR10LE-NEXT:    li r3, 0
35; PWR10LE-NEXT:    vaddubm v2, v2, v3
36; PWR10LE-NEXT:    vextubrx r3, r3, v2
37; PWR10LE-NEXT:    blr
38;
39; PWR10BE-LABEL: v2i8:
40; PWR10BE:       # %bb.0: # %entry
41; PWR10BE-NEXT:    vspltb v3, v2, 1
42; PWR10BE-NEXT:    li r3, 0
43; PWR10BE-NEXT:    vaddubm v2, v2, v3
44; PWR10BE-NEXT:    vextublx r3, r3, v2
45; PWR10BE-NEXT:    blr
46entry:
47  %0 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %a)
48  ret i8 %0
49}
50
51define dso_local i8 @v4i8(<4 x i8> %a) local_unnamed_addr #0 {
52; PWR9LE-LABEL: v4i8:
53; PWR9LE:       # %bb.0: # %entry
54; PWR9LE-NEXT:    vsplth v3, v2, 6
55; PWR9LE-NEXT:    li r3, 0
56; PWR9LE-NEXT:    vaddubm v2, v2, v3
57; PWR9LE-NEXT:    vspltb v3, v2, 14
58; PWR9LE-NEXT:    vaddubm v2, v2, v3
59; PWR9LE-NEXT:    vextubrx r3, r3, v2
60; PWR9LE-NEXT:    blr
61;
62; PWR9BE-LABEL: v4i8:
63; PWR9BE:       # %bb.0: # %entry
64; PWR9BE-NEXT:    vsplth v3, v2, 1
65; PWR9BE-NEXT:    li r3, 0
66; PWR9BE-NEXT:    vaddubm v2, v2, v3
67; PWR9BE-NEXT:    vspltb v3, v2, 1
68; PWR9BE-NEXT:    vaddubm v2, v2, v3
69; PWR9BE-NEXT:    vextublx r3, r3, v2
70; PWR9BE-NEXT:    blr
71;
72; PWR10LE-LABEL: v4i8:
73; PWR10LE:       # %bb.0: # %entry
74; PWR10LE-NEXT:    vsplth v3, v2, 6
75; PWR10LE-NEXT:    li r3, 0
76; PWR10LE-NEXT:    vaddubm v2, v2, v3
77; PWR10LE-NEXT:    vspltb v3, v2, 14
78; PWR10LE-NEXT:    vaddubm v2, v2, v3
79; PWR10LE-NEXT:    vextubrx r3, r3, v2
80; PWR10LE-NEXT:    blr
81;
82; PWR10BE-LABEL: v4i8:
83; PWR10BE:       # %bb.0: # %entry
84; PWR10BE-NEXT:    vsplth v3, v2, 1
85; PWR10BE-NEXT:    li r3, 0
86; PWR10BE-NEXT:    vaddubm v2, v2, v3
87; PWR10BE-NEXT:    vspltb v3, v2, 1
88; PWR10BE-NEXT:    vaddubm v2, v2, v3
89; PWR10BE-NEXT:    vextublx r3, r3, v2
90; PWR10BE-NEXT:    blr
91entry:
92  %0 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %a)
93  ret i8 %0
94}
95
96define dso_local i8 @v8i8(<8 x i8> %a) local_unnamed_addr #0 {
97; PWR9LE-LABEL: v8i8:
98; PWR9LE:       # %bb.0: # %entry
99; PWR9LE-NEXT:    xxspltw v3, v2, 2
100; PWR9LE-NEXT:    li r3, 0
101; PWR9LE-NEXT:    vaddubm v2, v2, v3
102; PWR9LE-NEXT:    vsplth v3, v2, 6
103; PWR9LE-NEXT:    vaddubm v2, v2, v3
104; PWR9LE-NEXT:    vspltb v3, v2, 14
105; PWR9LE-NEXT:    vaddubm v2, v2, v3
106; PWR9LE-NEXT:    vextubrx r3, r3, v2
107; PWR9LE-NEXT:    blr
108;
109; PWR9BE-LABEL: v8i8:
110; PWR9BE:       # %bb.0: # %entry
111; PWR9BE-NEXT:    xxspltw v3, v2, 1
112; PWR9BE-NEXT:    li r3, 0
113; PWR9BE-NEXT:    vaddubm v2, v2, v3
114; PWR9BE-NEXT:    vsplth v3, v2, 1
115; PWR9BE-NEXT:    vaddubm v2, v2, v3
116; PWR9BE-NEXT:    vspltb v3, v2, 1
117; PWR9BE-NEXT:    vaddubm v2, v2, v3
118; PWR9BE-NEXT:    vextublx r3, r3, v2
119; PWR9BE-NEXT:    blr
120;
121; PWR10LE-LABEL: v8i8:
122; PWR10LE:       # %bb.0: # %entry
123; PWR10LE-NEXT:    xxspltw v3, v2, 2
124; PWR10LE-NEXT:    li r3, 0
125; PWR10LE-NEXT:    vaddubm v2, v2, v3
126; PWR10LE-NEXT:    vsplth v3, v2, 6
127; PWR10LE-NEXT:    vaddubm v2, v2, v3
128; PWR10LE-NEXT:    vspltb v3, v2, 14
129; PWR10LE-NEXT:    vaddubm v2, v2, v3
130; PWR10LE-NEXT:    vextubrx r3, r3, v2
131; PWR10LE-NEXT:    blr
132;
133; PWR10BE-LABEL: v8i8:
134; PWR10BE:       # %bb.0: # %entry
135; PWR10BE-NEXT:    xxspltw v3, v2, 1
136; PWR10BE-NEXT:    li r3, 0
137; PWR10BE-NEXT:    vaddubm v2, v2, v3
138; PWR10BE-NEXT:    vsplth v3, v2, 1
139; PWR10BE-NEXT:    vaddubm v2, v2, v3
140; PWR10BE-NEXT:    vspltb v3, v2, 1
141; PWR10BE-NEXT:    vaddubm v2, v2, v3
142; PWR10BE-NEXT:    vextublx r3, r3, v2
143; PWR10BE-NEXT:    blr
144entry:
145  %0 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a)
146  ret i8 %0
147}
148
149define dso_local signext i8 @v16i8_sign(<16 x i8> %a) local_unnamed_addr #0 {
150; PWR9LE-LABEL: v16i8_sign:
151; PWR9LE:       # %bb.0: # %entry
152; PWR9LE-NEXT:    xxswapd v3, v2
153; PWR9LE-NEXT:    li r3, 0
154; PWR9LE-NEXT:    vaddubm v2, v2, v3
155; PWR9LE-NEXT:    xxspltw v3, v2, 2
156; PWR9LE-NEXT:    vaddubm v2, v2, v3
157; PWR9LE-NEXT:    vsplth v3, v2, 6
158; PWR9LE-NEXT:    vaddubm v2, v2, v3
159; PWR9LE-NEXT:    vspltb v3, v2, 14
160; PWR9LE-NEXT:    vaddubm v2, v2, v3
161; PWR9LE-NEXT:    vextubrx r3, r3, v2
162; PWR9LE-NEXT:    extsb r3, r3
163; PWR9LE-NEXT:    blr
164;
165; PWR9BE-LABEL: v16i8_sign:
166; PWR9BE:       # %bb.0: # %entry
167; PWR9BE-NEXT:    xxswapd v3, v2
168; PWR9BE-NEXT:    li r3, 0
169; PWR9BE-NEXT:    vaddubm v2, v2, v3
170; PWR9BE-NEXT:    xxspltw v3, v2, 1
171; PWR9BE-NEXT:    vaddubm v2, v2, v3
172; PWR9BE-NEXT:    vsplth v3, v2, 1
173; PWR9BE-NEXT:    vaddubm v2, v2, v3
174; PWR9BE-NEXT:    vspltb v3, v2, 1
175; PWR9BE-NEXT:    vaddubm v2, v2, v3
176; PWR9BE-NEXT:    vextublx r3, r3, v2
177; PWR9BE-NEXT:    extsb r3, r3
178; PWR9BE-NEXT:    blr
179;
180; PWR10LE-LABEL: v16i8_sign:
181; PWR10LE:       # %bb.0: # %entry
182; PWR10LE-NEXT:    xxswapd v3, v2
183; PWR10LE-NEXT:    li r3, 0
184; PWR10LE-NEXT:    vaddubm v2, v2, v3
185; PWR10LE-NEXT:    xxspltw v3, v2, 2
186; PWR10LE-NEXT:    vaddubm v2, v2, v3
187; PWR10LE-NEXT:    vsplth v3, v2, 6
188; PWR10LE-NEXT:    vaddubm v2, v2, v3
189; PWR10LE-NEXT:    vspltb v3, v2, 14
190; PWR10LE-NEXT:    vaddubm v2, v2, v3
191; PWR10LE-NEXT:    vextubrx r3, r3, v2
192; PWR10LE-NEXT:    extsb r3, r3
193; PWR10LE-NEXT:    blr
194;
195; PWR10BE-LABEL: v16i8_sign:
196; PWR10BE:       # %bb.0: # %entry
197; PWR10BE-NEXT:    xxswapd v3, v2
198; PWR10BE-NEXT:    li r3, 0
199; PWR10BE-NEXT:    vaddubm v2, v2, v3
200; PWR10BE-NEXT:    xxspltw v3, v2, 1
201; PWR10BE-NEXT:    vaddubm v2, v2, v3
202; PWR10BE-NEXT:    vsplth v3, v2, 1
203; PWR10BE-NEXT:    vaddubm v2, v2, v3
204; PWR10BE-NEXT:    vspltb v3, v2, 1
205; PWR10BE-NEXT:    vaddubm v2, v2, v3
206; PWR10BE-NEXT:    vextublx r3, r3, v2
207; PWR10BE-NEXT:    extsb r3, r3
208; PWR10BE-NEXT:    blr
209entry:
210  %0 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a)
211  ret i8 %0
212}
213
214define dso_local zeroext i8 @v16i8_zero(<16 x i8> %a) local_unnamed_addr #0 {
215; PWR9LE-LABEL: v16i8_zero:
216; PWR9LE:       # %bb.0: # %entry
217; PWR9LE-NEXT:    xxswapd v3, v2
218; PWR9LE-NEXT:    li r3, 0
219; PWR9LE-NEXT:    vaddubm v2, v2, v3
220; PWR9LE-NEXT:    xxspltw v3, v2, 2
221; PWR9LE-NEXT:    vaddubm v2, v2, v3
222; PWR9LE-NEXT:    vsplth v3, v2, 6
223; PWR9LE-NEXT:    vaddubm v2, v2, v3
224; PWR9LE-NEXT:    vspltb v3, v2, 14
225; PWR9LE-NEXT:    vaddubm v2, v2, v3
226; PWR9LE-NEXT:    vextubrx r3, r3, v2
227; PWR9LE-NEXT:    clrldi r3, r3, 56
228; PWR9LE-NEXT:    blr
229;
230; PWR9BE-LABEL: v16i8_zero:
231; PWR9BE:       # %bb.0: # %entry
232; PWR9BE-NEXT:    xxswapd v3, v2
233; PWR9BE-NEXT:    li r3, 0
234; PWR9BE-NEXT:    vaddubm v2, v2, v3
235; PWR9BE-NEXT:    xxspltw v3, v2, 1
236; PWR9BE-NEXT:    vaddubm v2, v2, v3
237; PWR9BE-NEXT:    vsplth v3, v2, 1
238; PWR9BE-NEXT:    vaddubm v2, v2, v3
239; PWR9BE-NEXT:    vspltb v3, v2, 1
240; PWR9BE-NEXT:    vaddubm v2, v2, v3
241; PWR9BE-NEXT:    vextublx r3, r3, v2
242; PWR9BE-NEXT:    clrldi r3, r3, 56
243; PWR9BE-NEXT:    blr
244;
245; PWR10LE-LABEL: v16i8_zero:
246; PWR10LE:       # %bb.0: # %entry
247; PWR10LE-NEXT:    xxswapd v3, v2
248; PWR10LE-NEXT:    li r3, 0
249; PWR10LE-NEXT:    vaddubm v2, v2, v3
250; PWR10LE-NEXT:    xxspltw v3, v2, 2
251; PWR10LE-NEXT:    vaddubm v2, v2, v3
252; PWR10LE-NEXT:    vsplth v3, v2, 6
253; PWR10LE-NEXT:    vaddubm v2, v2, v3
254; PWR10LE-NEXT:    vspltb v3, v2, 14
255; PWR10LE-NEXT:    vaddubm v2, v2, v3
256; PWR10LE-NEXT:    vextubrx r3, r3, v2
257; PWR10LE-NEXT:    clrldi r3, r3, 56
258; PWR10LE-NEXT:    blr
259;
260; PWR10BE-LABEL: v16i8_zero:
261; PWR10BE:       # %bb.0: # %entry
262; PWR10BE-NEXT:    xxswapd v3, v2
263; PWR10BE-NEXT:    li r3, 0
264; PWR10BE-NEXT:    vaddubm v2, v2, v3
265; PWR10BE-NEXT:    xxspltw v3, v2, 1
266; PWR10BE-NEXT:    vaddubm v2, v2, v3
267; PWR10BE-NEXT:    vsplth v3, v2, 1
268; PWR10BE-NEXT:    vaddubm v2, v2, v3
269; PWR10BE-NEXT:    vspltb v3, v2, 1
270; PWR10BE-NEXT:    vaddubm v2, v2, v3
271; PWR10BE-NEXT:    vextublx r3, r3, v2
272; PWR10BE-NEXT:    clrldi r3, r3, 56
273; PWR10BE-NEXT:    blr
274entry:
275  %0 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a)
276  ret i8 %0
277}
278
279define dso_local i8 @v32i8(<32 x i8> %a) local_unnamed_addr #0 {
280; PWR9LE-LABEL: v32i8:
281; PWR9LE:       # %bb.0: # %entry
282; PWR9LE-NEXT:    vaddubm v2, v2, v3
283; PWR9LE-NEXT:    li r3, 0
284; PWR9LE-NEXT:    xxswapd v3, v2
285; PWR9LE-NEXT:    vaddubm v2, v2, v3
286; PWR9LE-NEXT:    xxspltw v3, v2, 2
287; PWR9LE-NEXT:    vaddubm v2, v2, v3
288; PWR9LE-NEXT:    vsplth v3, v2, 6
289; PWR9LE-NEXT:    vaddubm v2, v2, v3
290; PWR9LE-NEXT:    vspltb v3, v2, 14
291; PWR9LE-NEXT:    vaddubm v2, v2, v3
292; PWR9LE-NEXT:    vextubrx r3, r3, v2
293; PWR9LE-NEXT:    blr
294;
295; PWR9BE-LABEL: v32i8:
296; PWR9BE:       # %bb.0: # %entry
297; PWR9BE-NEXT:    vaddubm v2, v2, v3
298; PWR9BE-NEXT:    li r3, 0
299; PWR9BE-NEXT:    xxswapd v3, v2
300; PWR9BE-NEXT:    vaddubm v2, v2, v3
301; PWR9BE-NEXT:    xxspltw v3, v2, 1
302; PWR9BE-NEXT:    vaddubm v2, v2, v3
303; PWR9BE-NEXT:    vsplth v3, v2, 1
304; PWR9BE-NEXT:    vaddubm v2, v2, v3
305; PWR9BE-NEXT:    vspltb v3, v2, 1
306; PWR9BE-NEXT:    vaddubm v2, v2, v3
307; PWR9BE-NEXT:    vextublx r3, r3, v2
308; PWR9BE-NEXT:    blr
309;
310; PWR10LE-LABEL: v32i8:
311; PWR10LE:       # %bb.0: # %entry
312; PWR10LE-NEXT:    vaddubm v2, v2, v3
313; PWR10LE-NEXT:    li r3, 0
314; PWR10LE-NEXT:    xxswapd v3, v2
315; PWR10LE-NEXT:    vaddubm v2, v2, v3
316; PWR10LE-NEXT:    xxspltw v3, v2, 2
317; PWR10LE-NEXT:    vaddubm v2, v2, v3
318; PWR10LE-NEXT:    vsplth v3, v2, 6
319; PWR10LE-NEXT:    vaddubm v2, v2, v3
320; PWR10LE-NEXT:    vspltb v3, v2, 14
321; PWR10LE-NEXT:    vaddubm v2, v2, v3
322; PWR10LE-NEXT:    vextubrx r3, r3, v2
323; PWR10LE-NEXT:    blr
324;
325; PWR10BE-LABEL: v32i8:
326; PWR10BE:       # %bb.0: # %entry
327; PWR10BE-NEXT:    vaddubm v2, v2, v3
328; PWR10BE-NEXT:    li r3, 0
329; PWR10BE-NEXT:    xxswapd v3, v2
330; PWR10BE-NEXT:    vaddubm v2, v2, v3
331; PWR10BE-NEXT:    xxspltw v3, v2, 1
332; PWR10BE-NEXT:    vaddubm v2, v2, v3
333; PWR10BE-NEXT:    vsplth v3, v2, 1
334; PWR10BE-NEXT:    vaddubm v2, v2, v3
335; PWR10BE-NEXT:    vspltb v3, v2, 1
336; PWR10BE-NEXT:    vaddubm v2, v2, v3
337; PWR10BE-NEXT:    vextublx r3, r3, v2
338; PWR10BE-NEXT:    blr
339entry:
340  %0 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %a)
341  ret i8 %0
342}
343
344declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>) #0
345declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>) #0
346declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>) #0
347declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) #0
348declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>) #0
349
350;;
351;; Vectors of i16
352;;
353define dso_local i16 @v2i16(<2 x i16> %a) local_unnamed_addr #0 {
354; PWR9LE-LABEL: v2i16:
355; PWR9LE:       # %bb.0: # %entry
356; PWR9LE-NEXT:    vsplth v3, v2, 6
357; PWR9LE-NEXT:    li r3, 0
358; PWR9LE-NEXT:    vadduhm v2, v2, v3
359; PWR9LE-NEXT:    vextuhrx r3, r3, v2
360; PWR9LE-NEXT:    blr
361;
362; PWR9BE-LABEL: v2i16:
363; PWR9BE:       # %bb.0: # %entry
364; PWR9BE-NEXT:    vsplth v3, v2, 1
365; PWR9BE-NEXT:    li r3, 0
366; PWR9BE-NEXT:    vadduhm v2, v2, v3
367; PWR9BE-NEXT:    vextuhlx r3, r3, v2
368; PWR9BE-NEXT:    blr
369;
370; PWR10LE-LABEL: v2i16:
371; PWR10LE:       # %bb.0: # %entry
372; PWR10LE-NEXT:    vsplth v3, v2, 6
373; PWR10LE-NEXT:    li r3, 0
374; PWR10LE-NEXT:    vadduhm v2, v2, v3
375; PWR10LE-NEXT:    vextuhrx r3, r3, v2
376; PWR10LE-NEXT:    blr
377;
378; PWR10BE-LABEL: v2i16:
379; PWR10BE:       # %bb.0: # %entry
380; PWR10BE-NEXT:    vsplth v3, v2, 1
381; PWR10BE-NEXT:    li r3, 0
382; PWR10BE-NEXT:    vadduhm v2, v2, v3
383; PWR10BE-NEXT:    vextuhlx r3, r3, v2
384; PWR10BE-NEXT:    blr
385entry:
386  %0 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a)
387  ret i16 %0
388}
389
390define dso_local i16 @v4i16(<4 x i16> %a) local_unnamed_addr #0 {
391; PWR9LE-LABEL: v4i16:
392; PWR9LE:       # %bb.0: # %entry
393; PWR9LE-NEXT:    xxspltw v3, v2, 2
394; PWR9LE-NEXT:    li r3, 0
395; PWR9LE-NEXT:    vadduhm v2, v2, v3
396; PWR9LE-NEXT:    vsplth v3, v2, 6
397; PWR9LE-NEXT:    vadduhm v2, v2, v3
398; PWR9LE-NEXT:    vextuhrx r3, r3, v2
399; PWR9LE-NEXT:    blr
400;
401; PWR9BE-LABEL: v4i16:
402; PWR9BE:       # %bb.0: # %entry
403; PWR9BE-NEXT:    xxspltw v3, v2, 1
404; PWR9BE-NEXT:    li r3, 0
405; PWR9BE-NEXT:    vadduhm v2, v2, v3
406; PWR9BE-NEXT:    vsplth v3, v2, 1
407; PWR9BE-NEXT:    vadduhm v2, v2, v3
408; PWR9BE-NEXT:    vextuhlx r3, r3, v2
409; PWR9BE-NEXT:    blr
410;
411; PWR10LE-LABEL: v4i16:
412; PWR10LE:       # %bb.0: # %entry
413; PWR10LE-NEXT:    xxspltw v3, v2, 2
414; PWR10LE-NEXT:    li r3, 0
415; PWR10LE-NEXT:    vadduhm v2, v2, v3
416; PWR10LE-NEXT:    vsplth v3, v2, 6
417; PWR10LE-NEXT:    vadduhm v2, v2, v3
418; PWR10LE-NEXT:    vextuhrx r3, r3, v2
419; PWR10LE-NEXT:    blr
420;
421; PWR10BE-LABEL: v4i16:
422; PWR10BE:       # %bb.0: # %entry
423; PWR10BE-NEXT:    xxspltw v3, v2, 1
424; PWR10BE-NEXT:    li r3, 0
425; PWR10BE-NEXT:    vadduhm v2, v2, v3
426; PWR10BE-NEXT:    vsplth v3, v2, 1
427; PWR10BE-NEXT:    vadduhm v2, v2, v3
428; PWR10BE-NEXT:    vextuhlx r3, r3, v2
429; PWR10BE-NEXT:    blr
430entry:
431  %0 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a)
432  ret i16 %0
433}
434
435define dso_local i16 @v8i16(<8 x i16> %a) local_unnamed_addr #0 {
436; PWR9LE-LABEL: v8i16:
437; PWR9LE:       # %bb.0: # %entry
438; PWR9LE-NEXT:    xxswapd v3, v2
439; PWR9LE-NEXT:    li r3, 0
440; PWR9LE-NEXT:    vadduhm v2, v2, v3
441; PWR9LE-NEXT:    xxspltw v3, v2, 2
442; PWR9LE-NEXT:    vadduhm v2, v2, v3
443; PWR9LE-NEXT:    vsplth v3, v2, 6
444; PWR9LE-NEXT:    vadduhm v2, v2, v3
445; PWR9LE-NEXT:    vextuhrx r3, r3, v2
446; PWR9LE-NEXT:    blr
447;
448; PWR9BE-LABEL: v8i16:
449; PWR9BE:       # %bb.0: # %entry
450; PWR9BE-NEXT:    xxswapd v3, v2
451; PWR9BE-NEXT:    li r3, 0
452; PWR9BE-NEXT:    vadduhm v2, v2, v3
453; PWR9BE-NEXT:    xxspltw v3, v2, 1
454; PWR9BE-NEXT:    vadduhm v2, v2, v3
455; PWR9BE-NEXT:    vsplth v3, v2, 1
456; PWR9BE-NEXT:    vadduhm v2, v2, v3
457; PWR9BE-NEXT:    vextuhlx r3, r3, v2
458; PWR9BE-NEXT:    blr
459;
460; PWR10LE-LABEL: v8i16:
461; PWR10LE:       # %bb.0: # %entry
462; PWR10LE-NEXT:    xxswapd v3, v2
463; PWR10LE-NEXT:    li r3, 0
464; PWR10LE-NEXT:    vadduhm v2, v2, v3
465; PWR10LE-NEXT:    xxspltw v3, v2, 2
466; PWR10LE-NEXT:    vadduhm v2, v2, v3
467; PWR10LE-NEXT:    vsplth v3, v2, 6
468; PWR10LE-NEXT:    vadduhm v2, v2, v3
469; PWR10LE-NEXT:    vextuhrx r3, r3, v2
470; PWR10LE-NEXT:    blr
471;
472; PWR10BE-LABEL: v8i16:
473; PWR10BE:       # %bb.0: # %entry
474; PWR10BE-NEXT:    xxswapd v3, v2
475; PWR10BE-NEXT:    li r3, 0
476; PWR10BE-NEXT:    vadduhm v2, v2, v3
477; PWR10BE-NEXT:    xxspltw v3, v2, 1
478; PWR10BE-NEXT:    vadduhm v2, v2, v3
479; PWR10BE-NEXT:    vsplth v3, v2, 1
480; PWR10BE-NEXT:    vadduhm v2, v2, v3
481; PWR10BE-NEXT:    vextuhlx r3, r3, v2
482; PWR10BE-NEXT:    blr
483entry:
484  %0 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a)
485  ret i16 %0
486}
487
488define dso_local zeroext i16 @v16i16(<16 x i16> %a) local_unnamed_addr #0 {
489; PWR9LE-LABEL: v16i16:
490; PWR9LE:       # %bb.0: # %entry
491; PWR9LE-NEXT:    vadduhm v2, v2, v3
492; PWR9LE-NEXT:    li r3, 0
493; PWR9LE-NEXT:    xxswapd v3, v2
494; PWR9LE-NEXT:    vadduhm v2, v2, v3
495; PWR9LE-NEXT:    xxspltw v3, v2, 2
496; PWR9LE-NEXT:    vadduhm v2, v2, v3
497; PWR9LE-NEXT:    vsplth v3, v2, 6
498; PWR9LE-NEXT:    vadduhm v2, v2, v3
499; PWR9LE-NEXT:    vextuhrx r3, r3, v2
500; PWR9LE-NEXT:    clrldi r3, r3, 48
501; PWR9LE-NEXT:    blr
502;
503; PWR9BE-LABEL: v16i16:
504; PWR9BE:       # %bb.0: # %entry
505; PWR9BE-NEXT:    vadduhm v2, v2, v3
506; PWR9BE-NEXT:    li r3, 0
507; PWR9BE-NEXT:    xxswapd v3, v2
508; PWR9BE-NEXT:    vadduhm v2, v2, v3
509; PWR9BE-NEXT:    xxspltw v3, v2, 1
510; PWR9BE-NEXT:    vadduhm v2, v2, v3
511; PWR9BE-NEXT:    vsplth v3, v2, 1
512; PWR9BE-NEXT:    vadduhm v2, v2, v3
513; PWR9BE-NEXT:    vextuhlx r3, r3, v2
514; PWR9BE-NEXT:    clrldi r3, r3, 48
515; PWR9BE-NEXT:    blr
516;
517; PWR10LE-LABEL: v16i16:
518; PWR10LE:       # %bb.0: # %entry
519; PWR10LE-NEXT:    vadduhm v2, v2, v3
520; PWR10LE-NEXT:    li r3, 0
521; PWR10LE-NEXT:    xxswapd v3, v2
522; PWR10LE-NEXT:    vadduhm v2, v2, v3
523; PWR10LE-NEXT:    xxspltw v3, v2, 2
524; PWR10LE-NEXT:    vadduhm v2, v2, v3
525; PWR10LE-NEXT:    vsplth v3, v2, 6
526; PWR10LE-NEXT:    vadduhm v2, v2, v3
527; PWR10LE-NEXT:    vextuhrx r3, r3, v2
528; PWR10LE-NEXT:    clrldi r3, r3, 48
529; PWR10LE-NEXT:    blr
530;
531; PWR10BE-LABEL: v16i16:
532; PWR10BE:       # %bb.0: # %entry
533; PWR10BE-NEXT:    vadduhm v2, v2, v3
534; PWR10BE-NEXT:    li r3, 0
535; PWR10BE-NEXT:    xxswapd v3, v2
536; PWR10BE-NEXT:    vadduhm v2, v2, v3
537; PWR10BE-NEXT:    xxspltw v3, v2, 1
538; PWR10BE-NEXT:    vadduhm v2, v2, v3
539; PWR10BE-NEXT:    vsplth v3, v2, 1
540; PWR10BE-NEXT:    vadduhm v2, v2, v3
541; PWR10BE-NEXT:    vextuhlx r3, r3, v2
542; PWR10BE-NEXT:    clrldi r3, r3, 48
543; PWR10BE-NEXT:    blr
544entry:
545  %0 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a)
546  ret i16 %0
547}
548
549define dso_local signext i16 @v16i8tov16i16_sign(<16 x i8> %a) local_unnamed_addr #0 {
550; PWR9LE-LABEL: v16i8tov16i16_sign:
551; PWR9LE:       # %bb.0: # %entry
552; PWR9LE-NEXT:    vmrghb v3, v2, v2
553; PWR9LE-NEXT:    vspltish v4, 8
554; PWR9LE-NEXT:    li r3, 0
555; PWR9LE-NEXT:    vmrglb v2, v2, v2
556; PWR9LE-NEXT:    vslh v3, v3, v4
557; PWR9LE-NEXT:    vslh v2, v2, v4
558; PWR9LE-NEXT:    vsrah v3, v3, v4
559; PWR9LE-NEXT:    vsrah v2, v2, v4
560; PWR9LE-NEXT:    vadduhm v2, v2, v3
561; PWR9LE-NEXT:    xxswapd v3, v2
562; PWR9LE-NEXT:    vadduhm v2, v2, v3
563; PWR9LE-NEXT:    xxspltw v3, v2, 2
564; PWR9LE-NEXT:    vadduhm v2, v2, v3
565; PWR9LE-NEXT:    vsplth v3, v2, 6
566; PWR9LE-NEXT:    vadduhm v2, v2, v3
567; PWR9LE-NEXT:    vextuhrx r3, r3, v2
568; PWR9LE-NEXT:    extsh r3, r3
569; PWR9LE-NEXT:    blr
570;
571; PWR9BE-LABEL: v16i8tov16i16_sign:
572; PWR9BE:       # %bb.0: # %entry
573; PWR9BE-NEXT:    vmrglb v3, v2, v2
574; PWR9BE-NEXT:    vspltish v4, 8
575; PWR9BE-NEXT:    li r3, 0
576; PWR9BE-NEXT:    vmrghb v2, v2, v2
577; PWR9BE-NEXT:    vslh v3, v3, v4
578; PWR9BE-NEXT:    vslh v2, v2, v4
579; PWR9BE-NEXT:    vsrah v3, v3, v4
580; PWR9BE-NEXT:    vsrah v2, v2, v4
581; PWR9BE-NEXT:    vadduhm v2, v2, v3
582; PWR9BE-NEXT:    xxswapd v3, v2
583; PWR9BE-NEXT:    vadduhm v2, v2, v3
584; PWR9BE-NEXT:    xxspltw v3, v2, 1
585; PWR9BE-NEXT:    vadduhm v2, v2, v3
586; PWR9BE-NEXT:    vsplth v3, v2, 1
587; PWR9BE-NEXT:    vadduhm v2, v2, v3
588; PWR9BE-NEXT:    vextuhlx r3, r3, v2
589; PWR9BE-NEXT:    extsh r3, r3
590; PWR9BE-NEXT:    blr
591;
592; PWR10LE-LABEL: v16i8tov16i16_sign:
593; PWR10LE:       # %bb.0: # %entry
594; PWR10LE-NEXT:    vmrghb v3, v2, v2
595; PWR10LE-NEXT:    xxspltiw v4, 524296
596; PWR10LE-NEXT:    vmrglb v2, v2, v2
597; PWR10LE-NEXT:    li r3, 0
598; PWR10LE-NEXT:    vslh v3, v3, v4
599; PWR10LE-NEXT:    vslh v2, v2, v4
600; PWR10LE-NEXT:    vsrah v3, v3, v4
601; PWR10LE-NEXT:    vsrah v2, v2, v4
602; PWR10LE-NEXT:    vadduhm v2, v2, v3
603; PWR10LE-NEXT:    xxswapd v3, v2
604; PWR10LE-NEXT:    vadduhm v2, v2, v3
605; PWR10LE-NEXT:    xxspltw v3, v2, 2
606; PWR10LE-NEXT:    vadduhm v2, v2, v3
607; PWR10LE-NEXT:    vsplth v3, v2, 6
608; PWR10LE-NEXT:    vadduhm v2, v2, v3
609; PWR10LE-NEXT:    vextuhrx r3, r3, v2
610; PWR10LE-NEXT:    extsh r3, r3
611; PWR10LE-NEXT:    blr
612;
613; PWR10BE-LABEL: v16i8tov16i16_sign:
614; PWR10BE:       # %bb.0: # %entry
615; PWR10BE-NEXT:    vmrglb v3, v2, v2
616; PWR10BE-NEXT:    xxspltiw v4, 524296
617; PWR10BE-NEXT:    vmrghb v2, v2, v2
618; PWR10BE-NEXT:    li r3, 0
619; PWR10BE-NEXT:    vslh v3, v3, v4
620; PWR10BE-NEXT:    vslh v2, v2, v4
621; PWR10BE-NEXT:    vsrah v3, v3, v4
622; PWR10BE-NEXT:    vsrah v2, v2, v4
623; PWR10BE-NEXT:    vadduhm v2, v2, v3
624; PWR10BE-NEXT:    xxswapd v3, v2
625; PWR10BE-NEXT:    vadduhm v2, v2, v3
626; PWR10BE-NEXT:    xxspltw v3, v2, 1
627; PWR10BE-NEXT:    vadduhm v2, v2, v3
628; PWR10BE-NEXT:    vsplth v3, v2, 1
629; PWR10BE-NEXT:    vadduhm v2, v2, v3
630; PWR10BE-NEXT:    vextuhlx r3, r3, v2
631; PWR10BE-NEXT:    extsh r3, r3
632; PWR10BE-NEXT:    blr
633entry:
634  %0 = sext <16 x i8> %a to <16 x i16>
635  %1 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %0)
636  ret i16 %1
637}
638
639define dso_local zeroext i16 @v16i8tov16i16_zero(<16 x i8> %a) local_unnamed_addr #0 {
640; PWR9LE-LABEL: v16i8tov16i16_zero:
641; PWR9LE:       # %bb.0: # %entry
642; PWR9LE-NEXT:    xxlxor v3, v3, v3
643; PWR9LE-NEXT:    li r3, 0
644; PWR9LE-NEXT:    vmrghb v4, v3, v2
645; PWR9LE-NEXT:    vmrglb v2, v3, v2
646; PWR9LE-NEXT:    vadduhm v2, v2, v4
647; PWR9LE-NEXT:    xxswapd v3, v2
648; PWR9LE-NEXT:    vadduhm v2, v2, v3
649; PWR9LE-NEXT:    xxspltw v3, v2, 2
650; PWR9LE-NEXT:    vadduhm v2, v2, v3
651; PWR9LE-NEXT:    vsplth v3, v2, 6
652; PWR9LE-NEXT:    vadduhm v2, v2, v3
653; PWR9LE-NEXT:    vextuhrx r3, r3, v2
654; PWR9LE-NEXT:    clrldi r3, r3, 48
655; PWR9LE-NEXT:    blr
656;
657; PWR9BE-LABEL: v16i8tov16i16_zero:
658; PWR9BE:       # %bb.0: # %entry
659; PWR9BE-NEXT:    xxlxor v3, v3, v3
660; PWR9BE-NEXT:    li r3, 0
661; PWR9BE-NEXT:    vmrglb v4, v3, v2
662; PWR9BE-NEXT:    vmrghb v2, v3, v2
663; PWR9BE-NEXT:    vadduhm v2, v2, v4
664; PWR9BE-NEXT:    xxswapd v3, v2
665; PWR9BE-NEXT:    vadduhm v2, v2, v3
666; PWR9BE-NEXT:    xxspltw v3, v2, 1
667; PWR9BE-NEXT:    vadduhm v2, v2, v3
668; PWR9BE-NEXT:    vsplth v3, v2, 1
669; PWR9BE-NEXT:    vadduhm v2, v2, v3
670; PWR9BE-NEXT:    vextuhlx r3, r3, v2
671; PWR9BE-NEXT:    clrldi r3, r3, 48
672; PWR9BE-NEXT:    blr
673;
674; PWR10LE-LABEL: v16i8tov16i16_zero:
675; PWR10LE:       # %bb.0: # %entry
676; PWR10LE-NEXT:    xxlxor v3, v3, v3
677; PWR10LE-NEXT:    li r3, 0
678; PWR10LE-NEXT:    vmrghb v4, v3, v2
679; PWR10LE-NEXT:    vmrglb v2, v3, v2
680; PWR10LE-NEXT:    vadduhm v2, v2, v4
681; PWR10LE-NEXT:    xxswapd v3, v2
682; PWR10LE-NEXT:    vadduhm v2, v2, v3
683; PWR10LE-NEXT:    xxspltw v3, v2, 2
684; PWR10LE-NEXT:    vadduhm v2, v2, v3
685; PWR10LE-NEXT:    vsplth v3, v2, 6
686; PWR10LE-NEXT:    vadduhm v2, v2, v3
687; PWR10LE-NEXT:    vextuhrx r3, r3, v2
688; PWR10LE-NEXT:    clrldi r3, r3, 48
689; PWR10LE-NEXT:    blr
690;
691; PWR10BE-LABEL: v16i8tov16i16_zero:
692; PWR10BE:       # %bb.0: # %entry
693; PWR10BE-NEXT:    xxlxor v3, v3, v3
694; PWR10BE-NEXT:    li r3, 0
695; PWR10BE-NEXT:    vmrglb v4, v3, v2
696; PWR10BE-NEXT:    vmrghb v2, v3, v2
697; PWR10BE-NEXT:    vadduhm v2, v2, v4
698; PWR10BE-NEXT:    xxswapd v3, v2
699; PWR10BE-NEXT:    vadduhm v2, v2, v3
700; PWR10BE-NEXT:    xxspltw v3, v2, 1
701; PWR10BE-NEXT:    vadduhm v2, v2, v3
702; PWR10BE-NEXT:    vsplth v3, v2, 1
703; PWR10BE-NEXT:    vadduhm v2, v2, v3
704; PWR10BE-NEXT:    vextuhlx r3, r3, v2
705; PWR10BE-NEXT:    clrldi r3, r3, 48
706; PWR10BE-NEXT:    blr
707entry:
708  %0 = zext <16 x i8> %a to <16 x i16>
709  %1 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %0)
710  ret i16 %1
711}
712
713declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>) #0
714declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>) #0
715declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) #0
716declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) #0
717
718;;
719;; Vectors of i32
720;;
721define dso_local zeroext i32 @v2i32(<2 x i32> %a) local_unnamed_addr #0 {
722; PWR9LE-LABEL: v2i32:
723; PWR9LE:       # %bb.0: # %entry
724; PWR9LE-NEXT:    xxspltw v3, v2, 2
725; PWR9LE-NEXT:    li r3, 0
726; PWR9LE-NEXT:    vadduwm v2, v2, v3
727; PWR9LE-NEXT:    vextuwrx r3, r3, v2
728; PWR9LE-NEXT:    blr
729;
730; PWR9BE-LABEL: v2i32:
731; PWR9BE:       # %bb.0: # %entry
732; PWR9BE-NEXT:    xxspltw v3, v2, 1
733; PWR9BE-NEXT:    li r3, 0
734; PWR9BE-NEXT:    vadduwm v2, v2, v3
735; PWR9BE-NEXT:    vextuwlx r3, r3, v2
736; PWR9BE-NEXT:    blr
737;
738; PWR10LE-LABEL: v2i32:
739; PWR10LE:       # %bb.0: # %entry
740; PWR10LE-NEXT:    xxspltw v3, v2, 2
741; PWR10LE-NEXT:    li r3, 0
742; PWR10LE-NEXT:    vadduwm v2, v2, v3
743; PWR10LE-NEXT:    vextuwrx r3, r3, v2
744; PWR10LE-NEXT:    blr
745;
746; PWR10BE-LABEL: v2i32:
747; PWR10BE:       # %bb.0: # %entry
748; PWR10BE-NEXT:    xxspltw v3, v2, 1
749; PWR10BE-NEXT:    li r3, 0
750; PWR10BE-NEXT:    vadduwm v2, v2, v3
751; PWR10BE-NEXT:    vextuwlx r3, r3, v2
752; PWR10BE-NEXT:    blr
753entry:
754  %0 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a)
755  ret i32 %0
756}
757
758define dso_local zeroext i32 @v4i32(<4 x i32> %a) local_unnamed_addr #0 {
759; PWR9LE-LABEL: v4i32:
760; PWR9LE:       # %bb.0: # %entry
761; PWR9LE-NEXT:    xxswapd v3, v2
762; PWR9LE-NEXT:    li r3, 0
763; PWR9LE-NEXT:    vadduwm v2, v2, v3
764; PWR9LE-NEXT:    xxspltw v3, v2, 2
765; PWR9LE-NEXT:    vadduwm v2, v2, v3
766; PWR9LE-NEXT:    vextuwrx r3, r3, v2
767; PWR9LE-NEXT:    blr
768;
769; PWR9BE-LABEL: v4i32:
770; PWR9BE:       # %bb.0: # %entry
771; PWR9BE-NEXT:    xxswapd v3, v2
772; PWR9BE-NEXT:    li r3, 0
773; PWR9BE-NEXT:    vadduwm v2, v2, v3
774; PWR9BE-NEXT:    xxspltw v3, v2, 1
775; PWR9BE-NEXT:    vadduwm v2, v2, v3
776; PWR9BE-NEXT:    vextuwlx r3, r3, v2
777; PWR9BE-NEXT:    blr
778;
779; PWR10LE-LABEL: v4i32:
780; PWR10LE:       # %bb.0: # %entry
781; PWR10LE-NEXT:    xxswapd v3, v2
782; PWR10LE-NEXT:    li r3, 0
783; PWR10LE-NEXT:    vadduwm v2, v2, v3
784; PWR10LE-NEXT:    xxspltw v3, v2, 2
785; PWR10LE-NEXT:    vadduwm v2, v2, v3
786; PWR10LE-NEXT:    vextuwrx r3, r3, v2
787; PWR10LE-NEXT:    blr
788;
789; PWR10BE-LABEL: v4i32:
790; PWR10BE:       # %bb.0: # %entry
791; PWR10BE-NEXT:    xxswapd v3, v2
792; PWR10BE-NEXT:    li r3, 0
793; PWR10BE-NEXT:    vadduwm v2, v2, v3
794; PWR10BE-NEXT:    xxspltw v3, v2, 1
795; PWR10BE-NEXT:    vadduwm v2, v2, v3
796; PWR10BE-NEXT:    vextuwlx r3, r3, v2
797; PWR10BE-NEXT:    blr
798entry:
799  %0 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a)
800  ret i32 %0
801}
802
803define dso_local zeroext i32 @v8i32(<8 x i32> %a) local_unnamed_addr #0 {
804; PWR9LE-LABEL: v8i32:
805; PWR9LE:       # %bb.0: # %entry
806; PWR9LE-NEXT:    vadduwm v2, v2, v3
807; PWR9LE-NEXT:    li r3, 0
808; PWR9LE-NEXT:    xxswapd v3, v2
809; PWR9LE-NEXT:    vadduwm v2, v2, v3
810; PWR9LE-NEXT:    xxspltw v3, v2, 2
811; PWR9LE-NEXT:    vadduwm v2, v2, v3
812; PWR9LE-NEXT:    vextuwrx r3, r3, v2
813; PWR9LE-NEXT:    blr
814;
815; PWR9BE-LABEL: v8i32:
816; PWR9BE:       # %bb.0: # %entry
817; PWR9BE-NEXT:    vadduwm v2, v2, v3
818; PWR9BE-NEXT:    li r3, 0
819; PWR9BE-NEXT:    xxswapd v3, v2
820; PWR9BE-NEXT:    vadduwm v2, v2, v3
821; PWR9BE-NEXT:    xxspltw v3, v2, 1
822; PWR9BE-NEXT:    vadduwm v2, v2, v3
823; PWR9BE-NEXT:    vextuwlx r3, r3, v2
824; PWR9BE-NEXT:    blr
825;
826; PWR10LE-LABEL: v8i32:
827; PWR10LE:       # %bb.0: # %entry
828; PWR10LE-NEXT:    vadduwm v2, v2, v3
829; PWR10LE-NEXT:    li r3, 0
830; PWR10LE-NEXT:    xxswapd v3, v2
831; PWR10LE-NEXT:    vadduwm v2, v2, v3
832; PWR10LE-NEXT:    xxspltw v3, v2, 2
833; PWR10LE-NEXT:    vadduwm v2, v2, v3
834; PWR10LE-NEXT:    vextuwrx r3, r3, v2
835; PWR10LE-NEXT:    blr
836;
837; PWR10BE-LABEL: v8i32:
838; PWR10BE:       # %bb.0: # %entry
839; PWR10BE-NEXT:    vadduwm v2, v2, v3
840; PWR10BE-NEXT:    li r3, 0
841; PWR10BE-NEXT:    xxswapd v3, v2
842; PWR10BE-NEXT:    vadduwm v2, v2, v3
843; PWR10BE-NEXT:    xxspltw v3, v2, 1
844; PWR10BE-NEXT:    vadduwm v2, v2, v3
845; PWR10BE-NEXT:    vextuwlx r3, r3, v2
846; PWR10BE-NEXT:    blr
847entry:
848  %0 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a)
849  ret i32 %0
850}
851
852define dso_local zeroext i32 @v16i32(<16 x i32> %a) local_unnamed_addr #0 {
853; PWR9LE-LABEL: v16i32:
854; PWR9LE:       # %bb.0: # %entry
855; PWR9LE-NEXT:    vadduwm v3, v3, v5
856; PWR9LE-NEXT:    vadduwm v2, v2, v4
857; PWR9LE-NEXT:    li r3, 0
858; PWR9LE-NEXT:    vadduwm v2, v2, v3
859; PWR9LE-NEXT:    xxswapd v3, v2
860; PWR9LE-NEXT:    vadduwm v2, v2, v3
861; PWR9LE-NEXT:    xxspltw v3, v2, 2
862; PWR9LE-NEXT:    vadduwm v2, v2, v3
863; PWR9LE-NEXT:    vextuwrx r3, r3, v2
864; PWR9LE-NEXT:    blr
865;
866; PWR9BE-LABEL: v16i32:
867; PWR9BE:       # %bb.0: # %entry
868; PWR9BE-NEXT:    vadduwm v3, v3, v5
869; PWR9BE-NEXT:    vadduwm v2, v2, v4
870; PWR9BE-NEXT:    li r3, 0
871; PWR9BE-NEXT:    vadduwm v2, v2, v3
872; PWR9BE-NEXT:    xxswapd v3, v2
873; PWR9BE-NEXT:    vadduwm v2, v2, v3
874; PWR9BE-NEXT:    xxspltw v3, v2, 1
875; PWR9BE-NEXT:    vadduwm v2, v2, v3
876; PWR9BE-NEXT:    vextuwlx r3, r3, v2
877; PWR9BE-NEXT:    blr
878;
879; PWR10LE-LABEL: v16i32:
880; PWR10LE:       # %bb.0: # %entry
881; PWR10LE-NEXT:    vadduwm v3, v3, v5
882; PWR10LE-NEXT:    vadduwm v2, v2, v4
883; PWR10LE-NEXT:    li r3, 0
884; PWR10LE-NEXT:    vadduwm v2, v2, v3
885; PWR10LE-NEXT:    xxswapd v3, v2
886; PWR10LE-NEXT:    vadduwm v2, v2, v3
887; PWR10LE-NEXT:    xxspltw v3, v2, 2
888; PWR10LE-NEXT:    vadduwm v2, v2, v3
889; PWR10LE-NEXT:    vextuwrx r3, r3, v2
890; PWR10LE-NEXT:    blr
891;
892; PWR10BE-LABEL: v16i32:
893; PWR10BE:       # %bb.0: # %entry
894; PWR10BE-NEXT:    vadduwm v3, v3, v5
895; PWR10BE-NEXT:    vadduwm v2, v2, v4
896; PWR10BE-NEXT:    li r3, 0
897; PWR10BE-NEXT:    vadduwm v2, v2, v3
898; PWR10BE-NEXT:    xxswapd v3, v2
899; PWR10BE-NEXT:    vadduwm v2, v2, v3
900; PWR10BE-NEXT:    xxspltw v3, v2, 1
901; PWR10BE-NEXT:    vadduwm v2, v2, v3
902; PWR10BE-NEXT:    vextuwlx r3, r3, v2
903; PWR10BE-NEXT:    blr
904entry:
905  %0 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a)
906  ret i32 %0
907}
908
909define dso_local zeroext i32 @v32i32(<32 x i32> %a) local_unnamed_addr #0 {
910; PWR9LE-LABEL: v32i32:
911; PWR9LE:       # %bb.0: # %entry
912; PWR9LE-NEXT:    vadduwm v4, v4, v8
913; PWR9LE-NEXT:    vadduwm v2, v2, v6
914; PWR9LE-NEXT:    li r3, 0
915; PWR9LE-NEXT:    vadduwm v5, v5, v9
916; PWR9LE-NEXT:    vadduwm v3, v3, v7
917; PWR9LE-NEXT:    vadduwm v3, v3, v5
918; PWR9LE-NEXT:    vadduwm v2, v2, v4
919; PWR9LE-NEXT:    vadduwm v2, v2, v3
920; PWR9LE-NEXT:    xxswapd v3, v2
921; PWR9LE-NEXT:    vadduwm v2, v2, v3
922; PWR9LE-NEXT:    xxspltw v3, v2, 2
923; PWR9LE-NEXT:    vadduwm v2, v2, v3
924; PWR9LE-NEXT:    vextuwrx r3, r3, v2
925; PWR9LE-NEXT:    blr
926;
927; PWR9BE-LABEL: v32i32:
928; PWR9BE:       # %bb.0: # %entry
929; PWR9BE-NEXT:    vadduwm v4, v4, v8
930; PWR9BE-NEXT:    vadduwm v2, v2, v6
931; PWR9BE-NEXT:    li r3, 0
932; PWR9BE-NEXT:    vadduwm v5, v5, v9
933; PWR9BE-NEXT:    vadduwm v3, v3, v7
934; PWR9BE-NEXT:    vadduwm v3, v3, v5
935; PWR9BE-NEXT:    vadduwm v2, v2, v4
936; PWR9BE-NEXT:    vadduwm v2, v2, v3
937; PWR9BE-NEXT:    xxswapd v3, v2
938; PWR9BE-NEXT:    vadduwm v2, v2, v3
939; PWR9BE-NEXT:    xxspltw v3, v2, 1
940; PWR9BE-NEXT:    vadduwm v2, v2, v3
941; PWR9BE-NEXT:    vextuwlx r3, r3, v2
942; PWR9BE-NEXT:    blr
943;
944; PWR10LE-LABEL: v32i32:
945; PWR10LE:       # %bb.0: # %entry
946; PWR10LE-NEXT:    vadduwm v4, v4, v8
947; PWR10LE-NEXT:    vadduwm v2, v2, v6
948; PWR10LE-NEXT:    vadduwm v5, v5, v9
949; PWR10LE-NEXT:    vadduwm v3, v3, v7
950; PWR10LE-NEXT:    li r3, 0
951; PWR10LE-NEXT:    vadduwm v3, v3, v5
952; PWR10LE-NEXT:    vadduwm v2, v2, v4
953; PWR10LE-NEXT:    vadduwm v2, v2, v3
954; PWR10LE-NEXT:    xxswapd v3, v2
955; PWR10LE-NEXT:    vadduwm v2, v2, v3
956; PWR10LE-NEXT:    xxspltw v3, v2, 2
957; PWR10LE-NEXT:    vadduwm v2, v2, v3
958; PWR10LE-NEXT:    vextuwrx r3, r3, v2
959; PWR10LE-NEXT:    blr
960;
961; PWR10BE-LABEL: v32i32:
962; PWR10BE:       # %bb.0: # %entry
963; PWR10BE-NEXT:    vadduwm v4, v4, v8
964; PWR10BE-NEXT:    vadduwm v2, v2, v6
965; PWR10BE-NEXT:    vadduwm v5, v5, v9
966; PWR10BE-NEXT:    vadduwm v3, v3, v7
967; PWR10BE-NEXT:    li r3, 0
968; PWR10BE-NEXT:    vadduwm v3, v3, v5
969; PWR10BE-NEXT:    vadduwm v2, v2, v4
970; PWR10BE-NEXT:    vadduwm v2, v2, v3
971; PWR10BE-NEXT:    xxswapd v3, v2
972; PWR10BE-NEXT:    vadduwm v2, v2, v3
973; PWR10BE-NEXT:    xxspltw v3, v2, 1
974; PWR10BE-NEXT:    vadduwm v2, v2, v3
975; PWR10BE-NEXT:    vextuwlx r3, r3, v2
976; PWR10BE-NEXT:    blr
977entry:
978  %0 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %a)
979  ret i32 %0
980}
981
982define dso_local signext i32 @v16i8tov16i32_sign(<16 x i8> %a) local_unnamed_addr #0 {
983; PWR9LE-LABEL: v16i8tov16i32_sign:
984; PWR9LE:       # %bb.0: # %entry
985; PWR9LE-NEXT:    addis r3, r2, .LCPI17_0@toc@ha
986; PWR9LE-NEXT:    addi r3, r3, .LCPI17_0@toc@l
987; PWR9LE-NEXT:    lxv v3, 0(r3)
988; PWR9LE-NEXT:    addis r3, r2, .LCPI17_1@toc@ha
989; PWR9LE-NEXT:    addi r3, r3, .LCPI17_1@toc@l
990; PWR9LE-NEXT:    lxv v4, 0(r3)
991; PWR9LE-NEXT:    addis r3, r2, .LCPI17_2@toc@ha
992; PWR9LE-NEXT:    vperm v3, v2, v2, v3
993; PWR9LE-NEXT:    addi r3, r3, .LCPI17_2@toc@l
994; PWR9LE-NEXT:    lxv v5, 0(r3)
995; PWR9LE-NEXT:    addis r3, r2, .LCPI17_3@toc@ha
996; PWR9LE-NEXT:    vextsb2w v3, v3
997; PWR9LE-NEXT:    vperm v4, v2, v2, v4
998; PWR9LE-NEXT:    addi r3, r3, .LCPI17_3@toc@l
999; PWR9LE-NEXT:    lxv v0, 0(r3)
1000; PWR9LE-NEXT:    vextsb2w v4, v4
1001; PWR9LE-NEXT:    li r3, 0
1002; PWR9LE-NEXT:    vperm v5, v2, v2, v5
1003; PWR9LE-NEXT:    vadduwm v3, v4, v3
1004; PWR9LE-NEXT:    vextsb2w v5, v5
1005; PWR9LE-NEXT:    vperm v2, v2, v2, v0
1006; PWR9LE-NEXT:    vextsb2w v2, v2
1007; PWR9LE-NEXT:    vadduwm v2, v2, v5
1008; PWR9LE-NEXT:    vadduwm v2, v3, v2
1009; PWR9LE-NEXT:    xxswapd v3, v2
1010; PWR9LE-NEXT:    vadduwm v2, v2, v3
1011; PWR9LE-NEXT:    xxspltw v3, v2, 2
1012; PWR9LE-NEXT:    vadduwm v2, v2, v3
1013; PWR9LE-NEXT:    vextuwrx r3, r3, v2
1014; PWR9LE-NEXT:    extsw r3, r3
1015; PWR9LE-NEXT:    blr
1016;
1017; PWR9BE-LABEL: v16i8tov16i32_sign:
1018; PWR9BE:       # %bb.0: # %entry
1019; PWR9BE-NEXT:    addis r3, r2, .LCPI17_0@toc@ha
1020; PWR9BE-NEXT:    addi r3, r3, .LCPI17_0@toc@l
1021; PWR9BE-NEXT:    lxv v3, 0(r3)
1022; PWR9BE-NEXT:    addis r3, r2, .LCPI17_1@toc@ha
1023; PWR9BE-NEXT:    addi r3, r3, .LCPI17_1@toc@l
1024; PWR9BE-NEXT:    lxv v4, 0(r3)
1025; PWR9BE-NEXT:    addis r3, r2, .LCPI17_2@toc@ha
1026; PWR9BE-NEXT:    vperm v3, v2, v2, v3
1027; PWR9BE-NEXT:    addi r3, r3, .LCPI17_2@toc@l
1028; PWR9BE-NEXT:    lxv v5, 0(r3)
1029; PWR9BE-NEXT:    addis r3, r2, .LCPI17_3@toc@ha
1030; PWR9BE-NEXT:    vextsb2w v3, v3
1031; PWR9BE-NEXT:    vperm v4, v2, v2, v4
1032; PWR9BE-NEXT:    addi r3, r3, .LCPI17_3@toc@l
1033; PWR9BE-NEXT:    lxv v0, 0(r3)
1034; PWR9BE-NEXT:    vextsb2w v4, v4
1035; PWR9BE-NEXT:    li r3, 0
1036; PWR9BE-NEXT:    vperm v5, v2, v2, v5
1037; PWR9BE-NEXT:    vadduwm v3, v4, v3
1038; PWR9BE-NEXT:    vextsb2w v5, v5
1039; PWR9BE-NEXT:    vperm v2, v2, v2, v0
1040; PWR9BE-NEXT:    vextsb2w v2, v2
1041; PWR9BE-NEXT:    vadduwm v2, v2, v5
1042; PWR9BE-NEXT:    vadduwm v2, v3, v2
1043; PWR9BE-NEXT:    xxswapd v3, v2
1044; PWR9BE-NEXT:    vadduwm v2, v2, v3
1045; PWR9BE-NEXT:    xxspltw v3, v2, 1
1046; PWR9BE-NEXT:    vadduwm v2, v2, v3
1047; PWR9BE-NEXT:    vextuwlx r3, r3, v2
1048; PWR9BE-NEXT:    extsw r3, r3
1049; PWR9BE-NEXT:    blr
1050;
1051; PWR10LE-LABEL: v16i8tov16i32_sign:
1052; PWR10LE:       # %bb.0: # %entry
1053; PWR10LE-NEXT:    plxv v3, .LCPI17_0@PCREL(0), 1
1054; PWR10LE-NEXT:    plxv v4, .LCPI17_1@PCREL(0), 1
1055; PWR10LE-NEXT:    li r3, 0
1056; PWR10LE-NEXT:    vperm v3, v2, v2, v3
1057; PWR10LE-NEXT:    plxv v5, .LCPI17_2@PCREL(0), 1
1058; PWR10LE-NEXT:    plxv v0, .LCPI17_3@PCREL(0), 1
1059; PWR10LE-NEXT:    vperm v4, v2, v2, v4
1060; PWR10LE-NEXT:    vperm v5, v2, v2, v5
1061; PWR10LE-NEXT:    vperm v2, v2, v2, v0
1062; PWR10LE-NEXT:    vextsb2w v3, v3
1063; PWR10LE-NEXT:    vextsb2w v4, v4
1064; PWR10LE-NEXT:    vextsb2w v5, v5
1065; PWR10LE-NEXT:    vextsb2w v2, v2
1066; PWR10LE-NEXT:    vadduwm v2, v2, v5
1067; PWR10LE-NEXT:    vadduwm v3, v4, v3
1068; PWR10LE-NEXT:    vadduwm v2, v3, v2
1069; PWR10LE-NEXT:    xxswapd v3, v2
1070; PWR10LE-NEXT:    vadduwm v2, v2, v3
1071; PWR10LE-NEXT:    xxspltw v3, v2, 2
1072; PWR10LE-NEXT:    vadduwm v2, v2, v3
1073; PWR10LE-NEXT:    vextuwrx r3, r3, v2
1074; PWR10LE-NEXT:    extsw r3, r3
1075; PWR10LE-NEXT:    blr
1076;
1077; PWR10BE-LABEL: v16i8tov16i32_sign:
1078; PWR10BE:       # %bb.0: # %entry
1079; PWR10BE-NEXT:    addis r3, r2, .LCPI17_0@toc@ha
1080; PWR10BE-NEXT:    addi r3, r3, .LCPI17_0@toc@l
1081; PWR10BE-NEXT:    lxv v3, 0(r3)
1082; PWR10BE-NEXT:    addis r3, r2, .LCPI17_1@toc@ha
1083; PWR10BE-NEXT:    addi r3, r3, .LCPI17_1@toc@l
1084; PWR10BE-NEXT:    lxv v4, 0(r3)
1085; PWR10BE-NEXT:    addis r3, r2, .LCPI17_2@toc@ha
1086; PWR10BE-NEXT:    vperm v3, v2, v2, v3
1087; PWR10BE-NEXT:    addi r3, r3, .LCPI17_2@toc@l
1088; PWR10BE-NEXT:    vextsb2w v3, v3
1089; PWR10BE-NEXT:    lxv v5, 0(r3)
1090; PWR10BE-NEXT:    addis r3, r2, .LCPI17_3@toc@ha
1091; PWR10BE-NEXT:    vperm v4, v2, v2, v4
1092; PWR10BE-NEXT:    addi r3, r3, .LCPI17_3@toc@l
1093; PWR10BE-NEXT:    vextsb2w v4, v4
1094; PWR10BE-NEXT:    lxv v0, 0(r3)
1095; PWR10BE-NEXT:    li r3, 0
1096; PWR10BE-NEXT:    vperm v5, v2, v2, v5
1097; PWR10BE-NEXT:    vadduwm v3, v4, v3
1098; PWR10BE-NEXT:    vextsb2w v5, v5
1099; PWR10BE-NEXT:    vperm v2, v2, v2, v0
1100; PWR10BE-NEXT:    vextsb2w v2, v2
1101; PWR10BE-NEXT:    vadduwm v2, v2, v5
1102; PWR10BE-NEXT:    vadduwm v2, v3, v2
1103; PWR10BE-NEXT:    xxswapd v3, v2
1104; PWR10BE-NEXT:    vadduwm v2, v2, v3
1105; PWR10BE-NEXT:    xxspltw v3, v2, 1
1106; PWR10BE-NEXT:    vadduwm v2, v2, v3
1107; PWR10BE-NEXT:    vextuwlx r3, r3, v2
1108; PWR10BE-NEXT:    extsw r3, r3
1109; PWR10BE-NEXT:    blr
1110entry:
1111  %0 = sext <16 x i8> %a to <16 x i32>
1112  %1 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %0)
1113  ret i32 %1
1114}
1115
1116define dso_local zeroext i32 @v16i8tov16i32_zero(<16 x i8> %a) local_unnamed_addr #0 {
1117; PWR9LE-LABEL: v16i8tov16i32_zero:
1118; PWR9LE:       # %bb.0: # %entry
1119; PWR9LE-NEXT:    addis r3, r2, .LCPI18_0@toc@ha
1120; PWR9LE-NEXT:    xxlxor v4, v4, v4
1121; PWR9LE-NEXT:    addi r3, r3, .LCPI18_0@toc@l
1122; PWR9LE-NEXT:    lxv v3, 0(r3)
1123; PWR9LE-NEXT:    addis r3, r2, .LCPI18_1@toc@ha
1124; PWR9LE-NEXT:    addi r3, r3, .LCPI18_1@toc@l
1125; PWR9LE-NEXT:    lxv v5, 0(r3)
1126; PWR9LE-NEXT:    addis r3, r2, .LCPI18_2@toc@ha
1127; PWR9LE-NEXT:    vperm v3, v4, v2, v3
1128; PWR9LE-NEXT:    addi r3, r3, .LCPI18_2@toc@l
1129; PWR9LE-NEXT:    lxv v0, 0(r3)
1130; PWR9LE-NEXT:    addis r3, r2, .LCPI18_3@toc@ha
1131; PWR9LE-NEXT:    vperm v5, v4, v2, v5
1132; PWR9LE-NEXT:    addi r3, r3, .LCPI18_3@toc@l
1133; PWR9LE-NEXT:    lxv v1, 0(r3)
1134; PWR9LE-NEXT:    vadduwm v3, v5, v3
1135; PWR9LE-NEXT:    li r3, 0
1136; PWR9LE-NEXT:    vperm v0, v4, v2, v0
1137; PWR9LE-NEXT:    vperm v2, v4, v2, v1
1138; PWR9LE-NEXT:    vadduwm v2, v2, v0
1139; PWR9LE-NEXT:    vadduwm v2, v3, v2
1140; PWR9LE-NEXT:    xxswapd v3, v2
1141; PWR9LE-NEXT:    vadduwm v2, v2, v3
1142; PWR9LE-NEXT:    xxspltw v3, v2, 2
1143; PWR9LE-NEXT:    vadduwm v2, v2, v3
1144; PWR9LE-NEXT:    vextuwrx r3, r3, v2
1145; PWR9LE-NEXT:    blr
1146;
1147; PWR9BE-LABEL: v16i8tov16i32_zero:
1148; PWR9BE:       # %bb.0: # %entry
1149; PWR9BE-NEXT:    addis r3, r2, .LCPI18_0@toc@ha
1150; PWR9BE-NEXT:    xxlxor v4, v4, v4
1151; PWR9BE-NEXT:    addi r3, r3, .LCPI18_0@toc@l
1152; PWR9BE-NEXT:    lxv v3, 0(r3)
1153; PWR9BE-NEXT:    addis r3, r2, .LCPI18_1@toc@ha
1154; PWR9BE-NEXT:    addi r3, r3, .LCPI18_1@toc@l
1155; PWR9BE-NEXT:    lxv v5, 0(r3)
1156; PWR9BE-NEXT:    addis r3, r2, .LCPI18_2@toc@ha
1157; PWR9BE-NEXT:    vperm v3, v4, v2, v3
1158; PWR9BE-NEXT:    addi r3, r3, .LCPI18_2@toc@l
1159; PWR9BE-NEXT:    lxv v0, 0(r3)
1160; PWR9BE-NEXT:    addis r3, r2, .LCPI18_3@toc@ha
1161; PWR9BE-NEXT:    vperm v5, v4, v2, v5
1162; PWR9BE-NEXT:    addi r3, r3, .LCPI18_3@toc@l
1163; PWR9BE-NEXT:    lxv v1, 0(r3)
1164; PWR9BE-NEXT:    vadduwm v3, v5, v3
1165; PWR9BE-NEXT:    li r3, 0
1166; PWR9BE-NEXT:    vperm v0, v4, v2, v0
1167; PWR9BE-NEXT:    vperm v2, v4, v2, v1
1168; PWR9BE-NEXT:    vadduwm v2, v2, v0
1169; PWR9BE-NEXT:    vadduwm v2, v3, v2
1170; PWR9BE-NEXT:    xxswapd v3, v2
1171; PWR9BE-NEXT:    vadduwm v2, v2, v3
1172; PWR9BE-NEXT:    xxspltw v3, v2, 1
1173; PWR9BE-NEXT:    vadduwm v2, v2, v3
1174; PWR9BE-NEXT:    vextuwlx r3, r3, v2
1175; PWR9BE-NEXT:    blr
1176;
1177; PWR10LE-LABEL: v16i8tov16i32_zero:
1178; PWR10LE:       # %bb.0: # %entry
1179; PWR10LE-NEXT:    plxv v3, .LCPI18_0@PCREL(0), 1
1180; PWR10LE-NEXT:    plxv v5, .LCPI18_1@PCREL(0), 1
1181; PWR10LE-NEXT:    xxlxor v4, v4, v4
1182; PWR10LE-NEXT:    li r3, 0
1183; PWR10LE-NEXT:    vperm v3, v4, v2, v3
1184; PWR10LE-NEXT:    plxv v0, .LCPI18_2@PCREL(0), 1
1185; PWR10LE-NEXT:    plxv v1, .LCPI18_3@PCREL(0), 1
1186; PWR10LE-NEXT:    vperm v5, v4, v2, v5
1187; PWR10LE-NEXT:    vperm v0, v4, v2, v0
1188; PWR10LE-NEXT:    vperm v2, v4, v2, v1
1189; PWR10LE-NEXT:    vadduwm v2, v2, v0
1190; PWR10LE-NEXT:    vadduwm v3, v5, v3
1191; PWR10LE-NEXT:    vadduwm v2, v3, v2
1192; PWR10LE-NEXT:    xxswapd v3, v2
1193; PWR10LE-NEXT:    vadduwm v2, v2, v3
1194; PWR10LE-NEXT:    xxspltw v3, v2, 2
1195; PWR10LE-NEXT:    vadduwm v2, v2, v3
1196; PWR10LE-NEXT:    vextuwrx r3, r3, v2
1197; PWR10LE-NEXT:    blr
1198;
1199; PWR10BE-LABEL: v16i8tov16i32_zero:
1200; PWR10BE:       # %bb.0: # %entry
1201; PWR10BE-NEXT:    addis r3, r2, .LCPI18_0@toc@ha
1202; PWR10BE-NEXT:    xxlxor v4, v4, v4
1203; PWR10BE-NEXT:    addi r3, r3, .LCPI18_0@toc@l
1204; PWR10BE-NEXT:    lxv v3, 0(r3)
1205; PWR10BE-NEXT:    addis r3, r2, .LCPI18_1@toc@ha
1206; PWR10BE-NEXT:    addi r3, r3, .LCPI18_1@toc@l
1207; PWR10BE-NEXT:    lxv v5, 0(r3)
1208; PWR10BE-NEXT:    addis r3, r2, .LCPI18_2@toc@ha
1209; PWR10BE-NEXT:    vperm v3, v4, v2, v3
1210; PWR10BE-NEXT:    addi r3, r3, .LCPI18_2@toc@l
1211; PWR10BE-NEXT:    lxv v0, 0(r3)
1212; PWR10BE-NEXT:    addis r3, r2, .LCPI18_3@toc@ha
1213; PWR10BE-NEXT:    vperm v5, v4, v2, v5
1214; PWR10BE-NEXT:    addi r3, r3, .LCPI18_3@toc@l
1215; PWR10BE-NEXT:    vadduwm v3, v5, v3
1216; PWR10BE-NEXT:    lxv v1, 0(r3)
1217; PWR10BE-NEXT:    li r3, 0
1218; PWR10BE-NEXT:    vperm v0, v4, v2, v0
1219; PWR10BE-NEXT:    vperm v2, v4, v2, v1
1220; PWR10BE-NEXT:    vadduwm v2, v2, v0
1221; PWR10BE-NEXT:    vadduwm v2, v3, v2
1222; PWR10BE-NEXT:    xxswapd v3, v2
1223; PWR10BE-NEXT:    vadduwm v2, v2, v3
1224; PWR10BE-NEXT:    xxspltw v3, v2, 1
1225; PWR10BE-NEXT:    vadduwm v2, v2, v3
1226; PWR10BE-NEXT:    vextuwlx r3, r3, v2
1227; PWR10BE-NEXT:    blr
1228entry:
1229  %0 = zext <16 x i8> %a to <16 x i32>
1230  %1 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %0)
1231  ret i32 %1
1232}
1233
1234declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>) #0
1235declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) #0
1236declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) #0
1237declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) #0
1238declare i32 @llvm.vector.reduce.add.v32i32(<32 x i32>) #0
1239
1240;;
1241;; Vectors of i64
1242;;
1243define dso_local i64 @v2i64(<2 x i64> %a) local_unnamed_addr #0 {
1244; PWR9LE-LABEL: v2i64:
1245; PWR9LE:       # %bb.0: # %entry
1246; PWR9LE-NEXT:    xxswapd v3, v2
1247; PWR9LE-NEXT:    vaddudm v2, v2, v3
1248; PWR9LE-NEXT:    mfvsrld r3, v2
1249; PWR9LE-NEXT:    blr
1250;
1251; PWR9BE-LABEL: v2i64:
1252; PWR9BE:       # %bb.0: # %entry
1253; PWR9BE-NEXT:    xxswapd v3, v2
1254; PWR9BE-NEXT:    vaddudm v2, v2, v3
1255; PWR9BE-NEXT:    mfvsrd r3, v2
1256; PWR9BE-NEXT:    blr
1257;
1258; PWR10LE-LABEL: v2i64:
1259; PWR10LE:       # %bb.0: # %entry
1260; PWR10LE-NEXT:    xxswapd v3, v2
1261; PWR10LE-NEXT:    vaddudm v2, v2, v3
1262; PWR10LE-NEXT:    mfvsrld r3, v2
1263; PWR10LE-NEXT:    blr
1264;
1265; PWR10BE-LABEL: v2i64:
1266; PWR10BE:       # %bb.0: # %entry
1267; PWR10BE-NEXT:    xxswapd v3, v2
1268; PWR10BE-NEXT:    vaddudm v2, v2, v3
1269; PWR10BE-NEXT:    mfvsrd r3, v2
1270; PWR10BE-NEXT:    blr
1271entry:
1272  %0 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a)
1273  ret i64 %0
1274}
1275
1276define dso_local i64 @v4i64(<4 x i64> %a) local_unnamed_addr #0 {
1277; PWR9LE-LABEL: v4i64:
1278; PWR9LE:       # %bb.0: # %entry
1279; PWR9LE-NEXT:    vaddudm v2, v2, v3
1280; PWR9LE-NEXT:    xxswapd v3, v2
1281; PWR9LE-NEXT:    vaddudm v2, v2, v3
1282; PWR9LE-NEXT:    mfvsrld r3, v2
1283; PWR9LE-NEXT:    blr
1284;
1285; PWR9BE-LABEL: v4i64:
1286; PWR9BE:       # %bb.0: # %entry
1287; PWR9BE-NEXT:    vaddudm v2, v2, v3
1288; PWR9BE-NEXT:    xxswapd v3, v2
1289; PWR9BE-NEXT:    vaddudm v2, v2, v3
1290; PWR9BE-NEXT:    mfvsrd r3, v2
1291; PWR9BE-NEXT:    blr
1292;
1293; PWR10LE-LABEL: v4i64:
1294; PWR10LE:       # %bb.0: # %entry
1295; PWR10LE-NEXT:    vaddudm v2, v2, v3
1296; PWR10LE-NEXT:    xxswapd v3, v2
1297; PWR10LE-NEXT:    vaddudm v2, v2, v3
1298; PWR10LE-NEXT:    mfvsrld r3, v2
1299; PWR10LE-NEXT:    blr
1300;
1301; PWR10BE-LABEL: v4i64:
1302; PWR10BE:       # %bb.0: # %entry
1303; PWR10BE-NEXT:    vaddudm v2, v2, v3
1304; PWR10BE-NEXT:    xxswapd v3, v2
1305; PWR10BE-NEXT:    vaddudm v2, v2, v3
1306; PWR10BE-NEXT:    mfvsrd r3, v2
1307; PWR10BE-NEXT:    blr
1308entry:
1309  %0 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %a)
1310  ret i64 %0
1311}
1312
1313define dso_local i64 @v8i64(<8 x i64> %a) local_unnamed_addr #0 {
1314; PWR9LE-LABEL: v8i64:
1315; PWR9LE:       # %bb.0: # %entry
1316; PWR9LE-NEXT:    vaddudm v3, v3, v5
1317; PWR9LE-NEXT:    vaddudm v2, v2, v4
1318; PWR9LE-NEXT:    vaddudm v2, v2, v3
1319; PWR9LE-NEXT:    xxswapd v3, v2
1320; PWR9LE-NEXT:    vaddudm v2, v2, v3
1321; PWR9LE-NEXT:    mfvsrld r3, v2
1322; PWR9LE-NEXT:    blr
1323;
1324; PWR9BE-LABEL: v8i64:
1325; PWR9BE:       # %bb.0: # %entry
1326; PWR9BE-NEXT:    vaddudm v3, v3, v5
1327; PWR9BE-NEXT:    vaddudm v2, v2, v4
1328; PWR9BE-NEXT:    vaddudm v2, v2, v3
1329; PWR9BE-NEXT:    xxswapd v3, v2
1330; PWR9BE-NEXT:    vaddudm v2, v2, v3
1331; PWR9BE-NEXT:    mfvsrd r3, v2
1332; PWR9BE-NEXT:    blr
1333;
1334; PWR10LE-LABEL: v8i64:
1335; PWR10LE:       # %bb.0: # %entry
1336; PWR10LE-NEXT:    vaddudm v3, v3, v5
1337; PWR10LE-NEXT:    vaddudm v2, v2, v4
1338; PWR10LE-NEXT:    vaddudm v2, v2, v3
1339; PWR10LE-NEXT:    xxswapd v3, v2
1340; PWR10LE-NEXT:    vaddudm v2, v2, v3
1341; PWR10LE-NEXT:    mfvsrld r3, v2
1342; PWR10LE-NEXT:    blr
1343;
1344; PWR10BE-LABEL: v8i64:
1345; PWR10BE:       # %bb.0: # %entry
1346; PWR10BE-NEXT:    vaddudm v3, v3, v5
1347; PWR10BE-NEXT:    vaddudm v2, v2, v4
1348; PWR10BE-NEXT:    vaddudm v2, v2, v3
1349; PWR10BE-NEXT:    xxswapd v3, v2
1350; PWR10BE-NEXT:    vaddudm v2, v2, v3
1351; PWR10BE-NEXT:    mfvsrd r3, v2
1352; PWR10BE-NEXT:    blr
1353entry:
1354  %0 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %a)
1355  ret i64 %0
1356}
1357
1358define dso_local i64 @v16i64(<16 x i64> %a) local_unnamed_addr #0 {
1359; PWR9LE-LABEL: v16i64:
1360; PWR9LE:       # %bb.0: # %entry
1361; PWR9LE-NEXT:    vaddudm v4, v4, v8
1362; PWR9LE-NEXT:    vaddudm v2, v2, v6
1363; PWR9LE-NEXT:    vaddudm v5, v5, v9
1364; PWR9LE-NEXT:    vaddudm v3, v3, v7
1365; PWR9LE-NEXT:    vaddudm v3, v3, v5
1366; PWR9LE-NEXT:    vaddudm v2, v2, v4
1367; PWR9LE-NEXT:    vaddudm v2, v2, v3
1368; PWR9LE-NEXT:    xxswapd v3, v2
1369; PWR9LE-NEXT:    vaddudm v2, v2, v3
1370; PWR9LE-NEXT:    mfvsrld r3, v2
1371; PWR9LE-NEXT:    blr
1372;
1373; PWR9BE-LABEL: v16i64:
1374; PWR9BE:       # %bb.0: # %entry
1375; PWR9BE-NEXT:    vaddudm v4, v4, v8
1376; PWR9BE-NEXT:    vaddudm v2, v2, v6
1377; PWR9BE-NEXT:    vaddudm v5, v5, v9
1378; PWR9BE-NEXT:    vaddudm v3, v3, v7
1379; PWR9BE-NEXT:    vaddudm v3, v3, v5
1380; PWR9BE-NEXT:    vaddudm v2, v2, v4
1381; PWR9BE-NEXT:    vaddudm v2, v2, v3
1382; PWR9BE-NEXT:    xxswapd v3, v2
1383; PWR9BE-NEXT:    vaddudm v2, v2, v3
1384; PWR9BE-NEXT:    mfvsrd r3, v2
1385; PWR9BE-NEXT:    blr
1386;
1387; PWR10LE-LABEL: v16i64:
1388; PWR10LE:       # %bb.0: # %entry
1389; PWR10LE-NEXT:    vaddudm v4, v4, v8
1390; PWR10LE-NEXT:    vaddudm v5, v5, v9
1391; PWR10LE-NEXT:    vaddudm v3, v3, v7
1392; PWR10LE-NEXT:    vaddudm v3, v3, v5
1393; PWR10LE-NEXT:    vaddudm v2, v2, v6
1394; PWR10LE-NEXT:    vaddudm v2, v2, v4
1395; PWR10LE-NEXT:    vaddudm v2, v2, v3
1396; PWR10LE-NEXT:    xxswapd v3, v2
1397; PWR10LE-NEXT:    vaddudm v2, v2, v3
1398; PWR10LE-NEXT:    mfvsrld r3, v2
1399; PWR10LE-NEXT:    blr
1400;
1401; PWR10BE-LABEL: v16i64:
1402; PWR10BE:       # %bb.0: # %entry
1403; PWR10BE-NEXT:    vaddudm v4, v4, v8
1404; PWR10BE-NEXT:    vaddudm v5, v5, v9
1405; PWR10BE-NEXT:    vaddudm v3, v3, v7
1406; PWR10BE-NEXT:    vaddudm v3, v3, v5
1407; PWR10BE-NEXT:    vaddudm v2, v2, v6
1408; PWR10BE-NEXT:    vaddudm v2, v2, v4
1409; PWR10BE-NEXT:    vaddudm v2, v2, v3
1410; PWR10BE-NEXT:    xxswapd v3, v2
1411; PWR10BE-NEXT:    vaddudm v2, v2, v3
1412; PWR10BE-NEXT:    mfvsrd r3, v2
1413; PWR10BE-NEXT:    blr
1414entry:
1415  %0 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %a)
1416  ret i64 %0
1417}
1418
1419define dso_local i64 @v16i8tov16i64_sign(<16 x i8> %a) local_unnamed_addr #0 {
1420; PWR9LE-LABEL: v16i8tov16i64_sign:
1421; PWR9LE:       # %bb.0: # %entry
1422; PWR9LE-NEXT:    addis r3, r2, .LCPI23_0@toc@ha
1423; PWR9LE-NEXT:    addi r3, r3, .LCPI23_0@toc@l
1424; PWR9LE-NEXT:    lxv v3, 0(r3)
1425; PWR9LE-NEXT:    addis r3, r2, .LCPI23_1@toc@ha
1426; PWR9LE-NEXT:    addi r3, r3, .LCPI23_1@toc@l
1427; PWR9LE-NEXT:    lxv v4, 0(r3)
1428; PWR9LE-NEXT:    addis r3, r2, .LCPI23_2@toc@ha
1429; PWR9LE-NEXT:    vperm v3, v2, v2, v3
1430; PWR9LE-NEXT:    addi r3, r3, .LCPI23_2@toc@l
1431; PWR9LE-NEXT:    lxv v5, 0(r3)
1432; PWR9LE-NEXT:    addis r3, r2, .LCPI23_3@toc@ha
1433; PWR9LE-NEXT:    vextsb2d v3, v3
1434; PWR9LE-NEXT:    vperm v4, v2, v2, v4
1435; PWR9LE-NEXT:    addi r3, r3, .LCPI23_3@toc@l
1436; PWR9LE-NEXT:    lxv v0, 0(r3)
1437; PWR9LE-NEXT:    addis r3, r2, .LCPI23_4@toc@ha
1438; PWR9LE-NEXT:    vextsb2d v4, v4
1439; PWR9LE-NEXT:    vperm v5, v2, v2, v5
1440; PWR9LE-NEXT:    addi r3, r3, .LCPI23_4@toc@l
1441; PWR9LE-NEXT:    vaddudm v3, v4, v3
1442; PWR9LE-NEXT:    lxv v1, 0(r3)
1443; PWR9LE-NEXT:    addis r3, r2, .LCPI23_5@toc@ha
1444; PWR9LE-NEXT:    vextsb2d v5, v5
1445; PWR9LE-NEXT:    vperm v0, v2, v2, v0
1446; PWR9LE-NEXT:    addi r3, r3, .LCPI23_5@toc@l
1447; PWR9LE-NEXT:    lxv v6, 0(r3)
1448; PWR9LE-NEXT:    addis r3, r2, .LCPI23_6@toc@ha
1449; PWR9LE-NEXT:    vperm v1, v2, v2, v1
1450; PWR9LE-NEXT:    vextsb2d v0, v0
1451; PWR9LE-NEXT:    addi r3, r3, .LCPI23_6@toc@l
1452; PWR9LE-NEXT:    vaddudm v5, v0, v5
1453; PWR9LE-NEXT:    lxv v7, 0(r3)
1454; PWR9LE-NEXT:    addis r3, r2, .LCPI23_7@toc@ha
1455; PWR9LE-NEXT:    vperm v6, v2, v2, v6
1456; PWR9LE-NEXT:    vextsb2d v1, v1
1457; PWR9LE-NEXT:    vaddudm v3, v3, v5
1458; PWR9LE-NEXT:    addi r3, r3, .LCPI23_7@toc@l
1459; PWR9LE-NEXT:    lxv v8, 0(r3)
1460; PWR9LE-NEXT:    vextsb2d v6, v6
1461; PWR9LE-NEXT:    vperm v7, v2, v2, v7
1462; PWR9LE-NEXT:    vaddudm v1, v6, v1
1463; PWR9LE-NEXT:    vextsb2d v7, v7
1464; PWR9LE-NEXT:    vperm v2, v2, v2, v8
1465; PWR9LE-NEXT:    vextsb2d v2, v2
1466; PWR9LE-NEXT:    vaddudm v2, v2, v7
1467; PWR9LE-NEXT:    vaddudm v2, v1, v2
1468; PWR9LE-NEXT:    vaddudm v2, v2, v3
1469; PWR9LE-NEXT:    xxswapd v3, v2
1470; PWR9LE-NEXT:    vaddudm v2, v2, v3
1471; PWR9LE-NEXT:    mfvsrld r3, v2
1472; PWR9LE-NEXT:    blr
1473;
1474; PWR9BE-LABEL: v16i8tov16i64_sign:
1475; PWR9BE:       # %bb.0: # %entry
1476; PWR9BE-NEXT:    addis r3, r2, .LCPI23_0@toc@ha
1477; PWR9BE-NEXT:    addi r3, r3, .LCPI23_0@toc@l
1478; PWR9BE-NEXT:    lxv v3, 0(r3)
1479; PWR9BE-NEXT:    addis r3, r2, .LCPI23_1@toc@ha
1480; PWR9BE-NEXT:    addi r3, r3, .LCPI23_1@toc@l
1481; PWR9BE-NEXT:    lxv v4, 0(r3)
1482; PWR9BE-NEXT:    addis r3, r2, .LCPI23_2@toc@ha
1483; PWR9BE-NEXT:    vperm v3, v2, v2, v3
1484; PWR9BE-NEXT:    addi r3, r3, .LCPI23_2@toc@l
1485; PWR9BE-NEXT:    lxv v5, 0(r3)
1486; PWR9BE-NEXT:    addis r3, r2, .LCPI23_3@toc@ha
1487; PWR9BE-NEXT:    vextsb2d v3, v3
1488; PWR9BE-NEXT:    vperm v4, v2, v2, v4
1489; PWR9BE-NEXT:    addi r3, r3, .LCPI23_3@toc@l
1490; PWR9BE-NEXT:    lxv v0, 0(r3)
1491; PWR9BE-NEXT:    addis r3, r2, .LCPI23_4@toc@ha
1492; PWR9BE-NEXT:    vextsb2d v4, v4
1493; PWR9BE-NEXT:    vperm v5, v2, v2, v5
1494; PWR9BE-NEXT:    addi r3, r3, .LCPI23_4@toc@l
1495; PWR9BE-NEXT:    vaddudm v3, v4, v3
1496; PWR9BE-NEXT:    lxv v1, 0(r3)
1497; PWR9BE-NEXT:    addis r3, r2, .LCPI23_5@toc@ha
1498; PWR9BE-NEXT:    vextsb2d v5, v5
1499; PWR9BE-NEXT:    vperm v0, v2, v2, v0
1500; PWR9BE-NEXT:    addi r3, r3, .LCPI23_5@toc@l
1501; PWR9BE-NEXT:    lxv v6, 0(r3)
1502; PWR9BE-NEXT:    addis r3, r2, .LCPI23_6@toc@ha
1503; PWR9BE-NEXT:    vperm v1, v2, v2, v1
1504; PWR9BE-NEXT:    vextsb2d v0, v0
1505; PWR9BE-NEXT:    addi r3, r3, .LCPI23_6@toc@l
1506; PWR9BE-NEXT:    vaddudm v5, v0, v5
1507; PWR9BE-NEXT:    lxv v7, 0(r3)
1508; PWR9BE-NEXT:    addis r3, r2, .LCPI23_7@toc@ha
1509; PWR9BE-NEXT:    vperm v6, v2, v2, v6
1510; PWR9BE-NEXT:    vextsb2d v1, v1
1511; PWR9BE-NEXT:    vaddudm v3, v3, v5
1512; PWR9BE-NEXT:    addi r3, r3, .LCPI23_7@toc@l
1513; PWR9BE-NEXT:    lxv v8, 0(r3)
1514; PWR9BE-NEXT:    vextsb2d v6, v6
1515; PWR9BE-NEXT:    vperm v7, v2, v2, v7
1516; PWR9BE-NEXT:    vaddudm v1, v6, v1
1517; PWR9BE-NEXT:    vextsb2d v7, v7
1518; PWR9BE-NEXT:    vperm v2, v2, v2, v8
1519; PWR9BE-NEXT:    vextsb2d v2, v2
1520; PWR9BE-NEXT:    vaddudm v2, v2, v7
1521; PWR9BE-NEXT:    vaddudm v2, v1, v2
1522; PWR9BE-NEXT:    vaddudm v2, v2, v3
1523; PWR9BE-NEXT:    xxswapd v3, v2
1524; PWR9BE-NEXT:    vaddudm v2, v2, v3
1525; PWR9BE-NEXT:    mfvsrd r3, v2
1526; PWR9BE-NEXT:    blr
1527;
1528; PWR10LE-LABEL: v16i8tov16i64_sign:
1529; PWR10LE:       # %bb.0: # %entry
1530; PWR10LE-NEXT:    plxv v3, .LCPI23_0@PCREL(0), 1
1531; PWR10LE-NEXT:    plxv v4, .LCPI23_1@PCREL(0), 1
1532; PWR10LE-NEXT:    vperm v3, v2, v2, v3
1533; PWR10LE-NEXT:    plxv v5, .LCPI23_2@PCREL(0), 1
1534; PWR10LE-NEXT:    plxv v0, .LCPI23_3@PCREL(0), 1
1535; PWR10LE-NEXT:    plxv v1, .LCPI23_4@PCREL(0), 1
1536; PWR10LE-NEXT:    plxv v6, .LCPI23_5@PCREL(0), 1
1537; PWR10LE-NEXT:    plxv v7, .LCPI23_6@PCREL(0), 1
1538; PWR10LE-NEXT:    plxv v8, .LCPI23_7@PCREL(0), 1
1539; PWR10LE-NEXT:    vperm v4, v2, v2, v4
1540; PWR10LE-NEXT:    vperm v5, v2, v2, v5
1541; PWR10LE-NEXT:    vperm v0, v2, v2, v0
1542; PWR10LE-NEXT:    vperm v1, v2, v2, v1
1543; PWR10LE-NEXT:    vperm v6, v2, v2, v6
1544; PWR10LE-NEXT:    vperm v7, v2, v2, v7
1545; PWR10LE-NEXT:    vperm v2, v2, v2, v8
1546; PWR10LE-NEXT:    vextsb2d v5, v5
1547; PWR10LE-NEXT:    vextsb2d v0, v0
1548; PWR10LE-NEXT:    vextsb2d v7, v7
1549; PWR10LE-NEXT:    vextsb2d v2, v2
1550; PWR10LE-NEXT:    vextsb2d v3, v3
1551; PWR10LE-NEXT:    vextsb2d v4, v4
1552; PWR10LE-NEXT:    vextsb2d v1, v1
1553; PWR10LE-NEXT:    vextsb2d v6, v6
1554; PWR10LE-NEXT:    vaddudm v2, v2, v7
1555; PWR10LE-NEXT:    vaddudm v5, v0, v5
1556; PWR10LE-NEXT:    vaddudm v3, v4, v3
1557; PWR10LE-NEXT:    vaddudm v3, v3, v5
1558; PWR10LE-NEXT:    vaddudm v4, v6, v1
1559; PWR10LE-NEXT:    vaddudm v2, v4, v2
1560; PWR10LE-NEXT:    vaddudm v2, v2, v3
1561; PWR10LE-NEXT:    xxswapd v3, v2
1562; PWR10LE-NEXT:    vaddudm v2, v2, v3
1563; PWR10LE-NEXT:    mfvsrld r3, v2
1564; PWR10LE-NEXT:    blr
1565;
1566; PWR10BE-LABEL: v16i8tov16i64_sign:
1567; PWR10BE:       # %bb.0: # %entry
1568; PWR10BE-NEXT:    addis r3, r2, .LCPI23_0@toc@ha
1569; PWR10BE-NEXT:    addi r3, r3, .LCPI23_0@toc@l
1570; PWR10BE-NEXT:    lxv v3, 0(r3)
1571; PWR10BE-NEXT:    addis r3, r2, .LCPI23_1@toc@ha
1572; PWR10BE-NEXT:    addi r3, r3, .LCPI23_1@toc@l
1573; PWR10BE-NEXT:    lxv v4, 0(r3)
1574; PWR10BE-NEXT:    addis r3, r2, .LCPI23_2@toc@ha
1575; PWR10BE-NEXT:    vperm v3, v2, v2, v3
1576; PWR10BE-NEXT:    addi r3, r3, .LCPI23_2@toc@l
1577; PWR10BE-NEXT:    vextsb2d v3, v3
1578; PWR10BE-NEXT:    lxv v5, 0(r3)
1579; PWR10BE-NEXT:    addis r3, r2, .LCPI23_3@toc@ha
1580; PWR10BE-NEXT:    vperm v4, v2, v2, v4
1581; PWR10BE-NEXT:    addi r3, r3, .LCPI23_3@toc@l
1582; PWR10BE-NEXT:    vextsb2d v4, v4
1583; PWR10BE-NEXT:    lxv v0, 0(r3)
1584; PWR10BE-NEXT:    addis r3, r2, .LCPI23_4@toc@ha
1585; PWR10BE-NEXT:    vperm v5, v2, v2, v5
1586; PWR10BE-NEXT:    addi r3, r3, .LCPI23_4@toc@l
1587; PWR10BE-NEXT:    vextsb2d v5, v5
1588; PWR10BE-NEXT:    lxv v1, 0(r3)
1589; PWR10BE-NEXT:    addis r3, r2, .LCPI23_5@toc@ha
1590; PWR10BE-NEXT:    vperm v0, v2, v2, v0
1591; PWR10BE-NEXT:    addi r3, r3, .LCPI23_5@toc@l
1592; PWR10BE-NEXT:    vextsb2d v0, v0
1593; PWR10BE-NEXT:    lxv v6, 0(r3)
1594; PWR10BE-NEXT:    addis r3, r2, .LCPI23_6@toc@ha
1595; PWR10BE-NEXT:    vperm v1, v2, v2, v1
1596; PWR10BE-NEXT:    vaddudm v5, v0, v5
1597; PWR10BE-NEXT:    vaddudm v3, v4, v3
1598; PWR10BE-NEXT:    vaddudm v3, v3, v5
1599; PWR10BE-NEXT:    addi r3, r3, .LCPI23_6@toc@l
1600; PWR10BE-NEXT:    vextsb2d v1, v1
1601; PWR10BE-NEXT:    lxv v7, 0(r3)
1602; PWR10BE-NEXT:    addis r3, r2, .LCPI23_7@toc@ha
1603; PWR10BE-NEXT:    vperm v6, v2, v2, v6
1604; PWR10BE-NEXT:    addi r3, r3, .LCPI23_7@toc@l
1605; PWR10BE-NEXT:    vextsb2d v6, v6
1606; PWR10BE-NEXT:    lxv v8, 0(r3)
1607; PWR10BE-NEXT:    vperm v7, v2, v2, v7
1608; PWR10BE-NEXT:    vextsb2d v7, v7
1609; PWR10BE-NEXT:    vperm v2, v2, v2, v8
1610; PWR10BE-NEXT:    vextsb2d v2, v2
1611; PWR10BE-NEXT:    vaddudm v2, v2, v7
1612; PWR10BE-NEXT:    vaddudm v4, v6, v1
1613; PWR10BE-NEXT:    vaddudm v2, v4, v2
1614; PWR10BE-NEXT:    vaddudm v2, v2, v3
1615; PWR10BE-NEXT:    xxswapd v3, v2
1616; PWR10BE-NEXT:    vaddudm v2, v2, v3
1617; PWR10BE-NEXT:    mfvsrd r3, v2
1618; PWR10BE-NEXT:    blr
1619entry:
1620  %0 = sext <16 x i8> %a to <16 x i64>
1621  %1 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %0)
1622  ret i64 %1
1623}
1624
1625define dso_local i64 @v16i8tov16i64_zero(<16 x i8> %a) local_unnamed_addr #0 {
1626; PWR9LE-LABEL: v16i8tov16i64_zero:
1627; PWR9LE:       # %bb.0: # %entry
1628; PWR9LE-NEXT:    addis r3, r2, .LCPI24_0@toc@ha
1629; PWR9LE-NEXT:    xxlxor v4, v4, v4
1630; PWR9LE-NEXT:    addi r3, r3, .LCPI24_0@toc@l
1631; PWR9LE-NEXT:    lxv v3, 0(r3)
1632; PWR9LE-NEXT:    addis r3, r2, .LCPI24_1@toc@ha
1633; PWR9LE-NEXT:    addi r3, r3, .LCPI24_1@toc@l
1634; PWR9LE-NEXT:    lxv v5, 0(r3)
1635; PWR9LE-NEXT:    addis r3, r2, .LCPI24_2@toc@ha
1636; PWR9LE-NEXT:    vperm v3, v4, v2, v3
1637; PWR9LE-NEXT:    addi r3, r3, .LCPI24_2@toc@l
1638; PWR9LE-NEXT:    lxv v0, 0(r3)
1639; PWR9LE-NEXT:    addis r3, r2, .LCPI24_3@toc@ha
1640; PWR9LE-NEXT:    vperm v5, v4, v2, v5
1641; PWR9LE-NEXT:    addi r3, r3, .LCPI24_3@toc@l
1642; PWR9LE-NEXT:    lxv v1, 0(r3)
1643; PWR9LE-NEXT:    addis r3, r2, .LCPI24_4@toc@ha
1644; PWR9LE-NEXT:    vaddudm v3, v5, v3
1645; PWR9LE-NEXT:    vperm v0, v4, v2, v0
1646; PWR9LE-NEXT:    addi r3, r3, .LCPI24_4@toc@l
1647; PWR9LE-NEXT:    lxv v6, 0(r3)
1648; PWR9LE-NEXT:    addis r3, r2, .LCPI24_5@toc@ha
1649; PWR9LE-NEXT:    vperm v1, v4, v2, v1
1650; PWR9LE-NEXT:    addi r3, r3, .LCPI24_5@toc@l
1651; PWR9LE-NEXT:    lxv v7, 0(r3)
1652; PWR9LE-NEXT:    addis r3, r2, .LCPI24_6@toc@ha
1653; PWR9LE-NEXT:    vaddudm v0, v1, v0
1654; PWR9LE-NEXT:    vperm v6, v4, v2, v6
1655; PWR9LE-NEXT:    addi r3, r3, .LCPI24_6@toc@l
1656; PWR9LE-NEXT:    lxv v8, 0(r3)
1657; PWR9LE-NEXT:    addis r3, r2, .LCPI24_7@toc@ha
1658; PWR9LE-NEXT:    vaddudm v3, v3, v0
1659; PWR9LE-NEXT:    vperm v7, v4, v2, v7
1660; PWR9LE-NEXT:    addi r3, r3, .LCPI24_7@toc@l
1661; PWR9LE-NEXT:    lxv v9, 0(r3)
1662; PWR9LE-NEXT:    vperm v8, v4, v2, v8
1663; PWR9LE-NEXT:    vperm v2, v4, v2, v9
1664; PWR9LE-NEXT:    vaddudm v4, v7, v6
1665; PWR9LE-NEXT:    vaddudm v2, v2, v8
1666; PWR9LE-NEXT:    vaddudm v2, v4, v2
1667; PWR9LE-NEXT:    vaddudm v2, v2, v3
1668; PWR9LE-NEXT:    xxswapd v3, v2
1669; PWR9LE-NEXT:    vaddudm v2, v2, v3
1670; PWR9LE-NEXT:    mfvsrld r3, v2
1671; PWR9LE-NEXT:    blr
1672;
1673; PWR9BE-LABEL: v16i8tov16i64_zero:
1674; PWR9BE:       # %bb.0: # %entry
1675; PWR9BE-NEXT:    addis r3, r2, .LCPI24_0@toc@ha
1676; PWR9BE-NEXT:    xxlxor v4, v4, v4
1677; PWR9BE-NEXT:    addi r3, r3, .LCPI24_0@toc@l
1678; PWR9BE-NEXT:    lxv v3, 0(r3)
1679; PWR9BE-NEXT:    addis r3, r2, .LCPI24_1@toc@ha
1680; PWR9BE-NEXT:    addi r3, r3, .LCPI24_1@toc@l
1681; PWR9BE-NEXT:    lxv v5, 0(r3)
1682; PWR9BE-NEXT:    addis r3, r2, .LCPI24_2@toc@ha
1683; PWR9BE-NEXT:    vperm v3, v4, v2, v3
1684; PWR9BE-NEXT:    addi r3, r3, .LCPI24_2@toc@l
1685; PWR9BE-NEXT:    lxv v0, 0(r3)
1686; PWR9BE-NEXT:    addis r3, r2, .LCPI24_3@toc@ha
1687; PWR9BE-NEXT:    vperm v5, v4, v2, v5
1688; PWR9BE-NEXT:    addi r3, r3, .LCPI24_3@toc@l
1689; PWR9BE-NEXT:    lxv v1, 0(r3)
1690; PWR9BE-NEXT:    addis r3, r2, .LCPI24_4@toc@ha
1691; PWR9BE-NEXT:    vaddudm v3, v5, v3
1692; PWR9BE-NEXT:    vperm v0, v4, v2, v0
1693; PWR9BE-NEXT:    addi r3, r3, .LCPI24_4@toc@l
1694; PWR9BE-NEXT:    lxv v6, 0(r3)
1695; PWR9BE-NEXT:    addis r3, r2, .LCPI24_5@toc@ha
1696; PWR9BE-NEXT:    vperm v1, v4, v2, v1
1697; PWR9BE-NEXT:    addi r3, r3, .LCPI24_5@toc@l
1698; PWR9BE-NEXT:    lxv v7, 0(r3)
1699; PWR9BE-NEXT:    addis r3, r2, .LCPI24_6@toc@ha
1700; PWR9BE-NEXT:    vaddudm v0, v1, v0
1701; PWR9BE-NEXT:    vperm v6, v4, v2, v6
1702; PWR9BE-NEXT:    addi r3, r3, .LCPI24_6@toc@l
1703; PWR9BE-NEXT:    lxv v8, 0(r3)
1704; PWR9BE-NEXT:    addis r3, r2, .LCPI24_7@toc@ha
1705; PWR9BE-NEXT:    vaddudm v3, v3, v0
1706; PWR9BE-NEXT:    vperm v7, v4, v2, v7
1707; PWR9BE-NEXT:    addi r3, r3, .LCPI24_7@toc@l
1708; PWR9BE-NEXT:    lxv v9, 0(r3)
1709; PWR9BE-NEXT:    vperm v8, v4, v2, v8
1710; PWR9BE-NEXT:    vperm v2, v4, v2, v9
1711; PWR9BE-NEXT:    vaddudm v4, v7, v6
1712; PWR9BE-NEXT:    vaddudm v2, v2, v8
1713; PWR9BE-NEXT:    vaddudm v2, v4, v2
1714; PWR9BE-NEXT:    vaddudm v2, v2, v3
1715; PWR9BE-NEXT:    xxswapd v3, v2
1716; PWR9BE-NEXT:    vaddudm v2, v2, v3
1717; PWR9BE-NEXT:    mfvsrd r3, v2
1718; PWR9BE-NEXT:    blr
1719;
1720; PWR10LE-LABEL: v16i8tov16i64_zero:
1721; PWR10LE:       # %bb.0: # %entry
1722; PWR10LE-NEXT:    plxv v3, .LCPI24_0@PCREL(0), 1
1723; PWR10LE-NEXT:    plxv v5, .LCPI24_1@PCREL(0), 1
1724; PWR10LE-NEXT:    xxlxor v4, v4, v4
1725; PWR10LE-NEXT:    vperm v3, v4, v2, v3
1726; PWR10LE-NEXT:    plxv v0, .LCPI24_2@PCREL(0), 1
1727; PWR10LE-NEXT:    plxv v1, .LCPI24_3@PCREL(0), 1
1728; PWR10LE-NEXT:    plxv v6, .LCPI24_4@PCREL(0), 1
1729; PWR10LE-NEXT:    plxv v7, .LCPI24_5@PCREL(0), 1
1730; PWR10LE-NEXT:    plxv v8, .LCPI24_6@PCREL(0), 1
1731; PWR10LE-NEXT:    plxv v9, .LCPI24_7@PCREL(0), 1
1732; PWR10LE-NEXT:    vperm v5, v4, v2, v5
1733; PWR10LE-NEXT:    vperm v0, v4, v2, v0
1734; PWR10LE-NEXT:    vperm v1, v4, v2, v1
1735; PWR10LE-NEXT:    vperm v6, v4, v2, v6
1736; PWR10LE-NEXT:    vperm v7, v4, v2, v7
1737; PWR10LE-NEXT:    vperm v8, v4, v2, v8
1738; PWR10LE-NEXT:    vperm v2, v4, v2, v9
1739; PWR10LE-NEXT:    vaddudm v2, v2, v8
1740; PWR10LE-NEXT:    vaddudm v4, v1, v0
1741; PWR10LE-NEXT:    vaddudm v3, v5, v3
1742; PWR10LE-NEXT:    vaddudm v3, v3, v4
1743; PWR10LE-NEXT:    vaddudm v4, v7, v6
1744; PWR10LE-NEXT:    vaddudm v2, v4, v2
1745; PWR10LE-NEXT:    vaddudm v2, v2, v3
1746; PWR10LE-NEXT:    xxswapd v3, v2
1747; PWR10LE-NEXT:    vaddudm v2, v2, v3
1748; PWR10LE-NEXT:    mfvsrld r3, v2
1749; PWR10LE-NEXT:    blr
1750;
1751; PWR10BE-LABEL: v16i8tov16i64_zero:
1752; PWR10BE:       # %bb.0: # %entry
1753; PWR10BE-NEXT:    addis r3, r2, .LCPI24_0@toc@ha
1754; PWR10BE-NEXT:    xxlxor v4, v4, v4
1755; PWR10BE-NEXT:    addi r3, r3, .LCPI24_0@toc@l
1756; PWR10BE-NEXT:    lxv v3, 0(r3)
1757; PWR10BE-NEXT:    addis r3, r2, .LCPI24_1@toc@ha
1758; PWR10BE-NEXT:    addi r3, r3, .LCPI24_1@toc@l
1759; PWR10BE-NEXT:    lxv v5, 0(r3)
1760; PWR10BE-NEXT:    addis r3, r2, .LCPI24_2@toc@ha
1761; PWR10BE-NEXT:    vperm v3, v4, v2, v3
1762; PWR10BE-NEXT:    addi r3, r3, .LCPI24_2@toc@l
1763; PWR10BE-NEXT:    lxv v0, 0(r3)
1764; PWR10BE-NEXT:    addis r3, r2, .LCPI24_3@toc@ha
1765; PWR10BE-NEXT:    vperm v5, v4, v2, v5
1766; PWR10BE-NEXT:    addi r3, r3, .LCPI24_3@toc@l
1767; PWR10BE-NEXT:    lxv v1, 0(r3)
1768; PWR10BE-NEXT:    addis r3, r2, .LCPI24_4@toc@ha
1769; PWR10BE-NEXT:    vperm v0, v4, v2, v0
1770; PWR10BE-NEXT:    addi r3, r3, .LCPI24_4@toc@l
1771; PWR10BE-NEXT:    lxv v6, 0(r3)
1772; PWR10BE-NEXT:    addis r3, r2, .LCPI24_5@toc@ha
1773; PWR10BE-NEXT:    vperm v1, v4, v2, v1
1774; PWR10BE-NEXT:    addi r3, r3, .LCPI24_5@toc@l
1775; PWR10BE-NEXT:    lxv v7, 0(r3)
1776; PWR10BE-NEXT:    addis r3, r2, .LCPI24_6@toc@ha
1777; PWR10BE-NEXT:    vperm v6, v4, v2, v6
1778; PWR10BE-NEXT:    addi r3, r3, .LCPI24_6@toc@l
1779; PWR10BE-NEXT:    lxv v8, 0(r3)
1780; PWR10BE-NEXT:    addis r3, r2, .LCPI24_7@toc@ha
1781; PWR10BE-NEXT:    vperm v7, v4, v2, v7
1782; PWR10BE-NEXT:    addi r3, r3, .LCPI24_7@toc@l
1783; PWR10BE-NEXT:    lxv v9, 0(r3)
1784; PWR10BE-NEXT:    vperm v8, v4, v2, v8
1785; PWR10BE-NEXT:    vperm v2, v4, v2, v9
1786; PWR10BE-NEXT:    vaddudm v4, v1, v0
1787; PWR10BE-NEXT:    vaddudm v3, v5, v3
1788; PWR10BE-NEXT:    vaddudm v3, v3, v4
1789; PWR10BE-NEXT:    vaddudm v2, v2, v8
1790; PWR10BE-NEXT:    vaddudm v4, v7, v6
1791; PWR10BE-NEXT:    vaddudm v2, v4, v2
1792; PWR10BE-NEXT:    vaddudm v2, v2, v3
1793; PWR10BE-NEXT:    xxswapd v3, v2
1794; PWR10BE-NEXT:    vaddudm v2, v2, v3
1795; PWR10BE-NEXT:    mfvsrd r3, v2
1796; PWR10BE-NEXT:    blr
1797entry:
1798  %0 = zext <16 x i8> %a to <16 x i64>
1799  %1 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %0)
1800  ret i64 %1
1801}
1802
1803declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) #0
1804declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) #0
1805declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>) #0
1806declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>) #0
1807
1808attributes #0 = { nounwind }
1809