xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll (revision 7a0b9daac9edde4293d2e9fdf30d8b35c04d16a6)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4
5define i8 @explode_2xi8(<2 x i8> %v) {
6; CHECK-LABEL: explode_2xi8:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
9; CHECK-NEXT:    vmv.s.x v9, zero
10; CHECK-NEXT:    vredxor.vs v8, v8, v9
11; CHECK-NEXT:    vmv.x.s a0, v8
12; CHECK-NEXT:    ret
13  %e0 = extractelement <2 x i8> %v, i32 0
14  %e1 = extractelement <2 x i8> %v, i32 1
15  %add0 = xor i8 %e0, %e1
16  ret i8 %add0
17}
18
19define i8 @explode_4xi8(<4 x i8> %v) {
20; CHECK-LABEL: explode_4xi8:
21; CHECK:       # %bb.0:
22; CHECK-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
23; CHECK-NEXT:    vslidedown.vi v9, v8, 2
24; CHECK-NEXT:    vmv.x.s a0, v9
25; CHECK-NEXT:    vslidedown.vi v9, v8, 3
26; CHECK-NEXT:    vmv.x.s a1, v9
27; CHECK-NEXT:    vmv.s.x v9, zero
28; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
29; CHECK-NEXT:    vredxor.vs v8, v8, v9
30; CHECK-NEXT:    vmv.x.s a2, v8
31; CHECK-NEXT:    add a0, a0, a1
32; CHECK-NEXT:    add a0, a2, a0
33; CHECK-NEXT:    ret
34  %e0 = extractelement <4 x i8> %v, i32 0
35  %e1 = extractelement <4 x i8> %v, i32 1
36  %e2 = extractelement <4 x i8> %v, i32 2
37  %e3 = extractelement <4 x i8> %v, i32 3
38  %add0 = xor i8 %e0, %e1
39  %add1 = add i8 %add0, %e2
40  %add2 = add i8 %add1, %e3
41  ret i8 %add2
42}
43
44
45define i8 @explode_8xi8(<8 x i8> %v) {
46; CHECK-LABEL: explode_8xi8:
47; CHECK:       # %bb.0:
48; CHECK-NEXT:    vsetivli zero, 1, e8, mf2, ta, ma
49; CHECK-NEXT:    vslidedown.vi v9, v8, 2
50; CHECK-NEXT:    vmv.x.s a0, v9
51; CHECK-NEXT:    vslidedown.vi v9, v8, 3
52; CHECK-NEXT:    vmv.x.s a1, v9
53; CHECK-NEXT:    vslidedown.vi v9, v8, 4
54; CHECK-NEXT:    vmv.x.s a2, v9
55; CHECK-NEXT:    vslidedown.vi v9, v8, 5
56; CHECK-NEXT:    vmv.x.s a3, v9
57; CHECK-NEXT:    vslidedown.vi v9, v8, 6
58; CHECK-NEXT:    vmv.x.s a4, v9
59; CHECK-NEXT:    vslidedown.vi v9, v8, 7
60; CHECK-NEXT:    vmv.x.s a5, v9
61; CHECK-NEXT:    vmv.s.x v9, zero
62; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
63; CHECK-NEXT:    vredxor.vs v8, v8, v9
64; CHECK-NEXT:    vmv.x.s a6, v8
65; CHECK-NEXT:    add a0, a0, a1
66; CHECK-NEXT:    add a0, a6, a0
67; CHECK-NEXT:    add a2, a2, a3
68; CHECK-NEXT:    add a2, a2, a4
69; CHECK-NEXT:    add a0, a0, a2
70; CHECK-NEXT:    add a0, a0, a5
71; CHECK-NEXT:    ret
72  %e0 = extractelement <8 x i8> %v, i32 0
73  %e1 = extractelement <8 x i8> %v, i32 1
74  %e2 = extractelement <8 x i8> %v, i32 2
75  %e3 = extractelement <8 x i8> %v, i32 3
76  %e4 = extractelement <8 x i8> %v, i32 4
77  %e5 = extractelement <8 x i8> %v, i32 5
78  %e6 = extractelement <8 x i8> %v, i32 6
79  %e7 = extractelement <8 x i8> %v, i32 7
80  %add0 = xor i8 %e0, %e1
81  %add1 = add i8 %add0, %e2
82  %add2 = add i8 %add1, %e3
83  %add3 = add i8 %add2, %e4
84  %add4 = add i8 %add3, %e5
85  %add5 = add i8 %add4, %e6
86  %add6 = add i8 %add5, %e7
87  ret i8 %add6
88}
89
90define i8 @explode_16xi8(<16 x i8> %v) {
91; CHECK-LABEL: explode_16xi8:
92; CHECK:       # %bb.0:
93; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
94; CHECK-NEXT:    vslidedown.vi v9, v8, 2
95; CHECK-NEXT:    vmv.x.s a0, v9
96; CHECK-NEXT:    vslidedown.vi v9, v8, 3
97; CHECK-NEXT:    vmv.x.s a1, v9
98; CHECK-NEXT:    vslidedown.vi v9, v8, 4
99; CHECK-NEXT:    vmv.x.s a2, v9
100; CHECK-NEXT:    vslidedown.vi v9, v8, 5
101; CHECK-NEXT:    vmv.x.s a3, v9
102; CHECK-NEXT:    vslidedown.vi v9, v8, 6
103; CHECK-NEXT:    vmv.x.s a4, v9
104; CHECK-NEXT:    vslidedown.vi v9, v8, 7
105; CHECK-NEXT:    vmv.x.s a5, v9
106; CHECK-NEXT:    vslidedown.vi v9, v8, 8
107; CHECK-NEXT:    vmv.x.s a6, v9
108; CHECK-NEXT:    vslidedown.vi v9, v8, 9
109; CHECK-NEXT:    vmv.x.s a7, v9
110; CHECK-NEXT:    vslidedown.vi v9, v8, 10
111; CHECK-NEXT:    vmv.x.s t0, v9
112; CHECK-NEXT:    vslidedown.vi v9, v8, 11
113; CHECK-NEXT:    vmv.x.s t1, v9
114; CHECK-NEXT:    vslidedown.vi v9, v8, 12
115; CHECK-NEXT:    vmv.x.s t2, v9
116; CHECK-NEXT:    vslidedown.vi v9, v8, 13
117; CHECK-NEXT:    vmv.x.s t3, v9
118; CHECK-NEXT:    vslidedown.vi v9, v8, 14
119; CHECK-NEXT:    vmv.x.s t4, v9
120; CHECK-NEXT:    vslidedown.vi v9, v8, 15
121; CHECK-NEXT:    vmv.x.s t5, v9
122; CHECK-NEXT:    vmv.s.x v9, zero
123; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
124; CHECK-NEXT:    vredxor.vs v8, v8, v9
125; CHECK-NEXT:    vmv.x.s t6, v8
126; CHECK-NEXT:    add a0, a0, a1
127; CHECK-NEXT:    add a0, t6, a0
128; CHECK-NEXT:    add a2, a2, a3
129; CHECK-NEXT:    add a2, a2, a4
130; CHECK-NEXT:    add a0, a0, a2
131; CHECK-NEXT:    add a5, a5, a6
132; CHECK-NEXT:    add a5, a5, a7
133; CHECK-NEXT:    add a5, a5, t0
134; CHECK-NEXT:    add a0, a0, a5
135; CHECK-NEXT:    add t1, t1, t2
136; CHECK-NEXT:    add t1, t1, t3
137; CHECK-NEXT:    add t1, t1, t4
138; CHECK-NEXT:    add t1, t1, t5
139; CHECK-NEXT:    add a0, a0, t1
140; CHECK-NEXT:    ret
141  %e0 = extractelement <16 x i8> %v, i32 0
142  %e1 = extractelement <16 x i8> %v, i32 1
143  %e2 = extractelement <16 x i8> %v, i32 2
144  %e3 = extractelement <16 x i8> %v, i32 3
145  %e4 = extractelement <16 x i8> %v, i32 4
146  %e5 = extractelement <16 x i8> %v, i32 5
147  %e6 = extractelement <16 x i8> %v, i32 6
148  %e7 = extractelement <16 x i8> %v, i32 7
149  %e8 = extractelement <16 x i8> %v, i32 8
150  %e9 = extractelement <16 x i8> %v, i32 9
151  %e10 = extractelement <16 x i8> %v, i32 10
152  %e11 = extractelement <16 x i8> %v, i32 11
153  %e12 = extractelement <16 x i8> %v, i32 12
154  %e13 = extractelement <16 x i8> %v, i32 13
155  %e14 = extractelement <16 x i8> %v, i32 14
156  %e15 = extractelement <16 x i8> %v, i32 15
157  %add0 = xor i8 %e0, %e1
158  %add1 = add i8 %add0, %e2
159  %add2 = add i8 %add1, %e3
160  %add3 = add i8 %add2, %e4
161  %add4 = add i8 %add3, %e5
162  %add5 = add i8 %add4, %e6
163  %add6 = add i8 %add5, %e7
164  %add7 = add i8 %add6, %e8
165  %add8 = add i8 %add7, %e9
166  %add9 = add i8 %add8, %e10
167  %add10 = add i8 %add9, %e11
168  %add11 = add i8 %add10, %e12
169  %add12 = add i8 %add11, %e13
170  %add13 = add i8 %add12, %e14
171  %add14 = add i8 %add13, %e15
172  ret i8 %add14
173}
174
175define i16 @explode_2xi16(<2 x i16> %v) {
176; CHECK-LABEL: explode_2xi16:
177; CHECK:       # %bb.0:
178; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
179; CHECK-NEXT:    vmv.s.x v9, zero
180; CHECK-NEXT:    vredxor.vs v8, v8, v9
181; CHECK-NEXT:    vmv.x.s a0, v8
182; CHECK-NEXT:    ret
183  %e0 = extractelement <2 x i16> %v, i32 0
184  %e1 = extractelement <2 x i16> %v, i32 1
185  %add0 = xor i16 %e0, %e1
186  ret i16 %add0
187}
188
189define i16 @explode_4xi16(<4 x i16> %v) {
190; CHECK-LABEL: explode_4xi16:
191; CHECK:       # %bb.0:
192; CHECK-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
193; CHECK-NEXT:    vslidedown.vi v9, v8, 2
194; CHECK-NEXT:    vmv.x.s a0, v9
195; CHECK-NEXT:    vslidedown.vi v9, v8, 3
196; CHECK-NEXT:    vmv.x.s a1, v9
197; CHECK-NEXT:    vmv.s.x v9, zero
198; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
199; CHECK-NEXT:    vredxor.vs v8, v8, v9
200; CHECK-NEXT:    vmv.x.s a2, v8
201; CHECK-NEXT:    add a0, a0, a1
202; CHECK-NEXT:    add a0, a2, a0
203; CHECK-NEXT:    ret
204  %e0 = extractelement <4 x i16> %v, i32 0
205  %e1 = extractelement <4 x i16> %v, i32 1
206  %e2 = extractelement <4 x i16> %v, i32 2
207  %e3 = extractelement <4 x i16> %v, i32 3
208  %add0 = xor i16 %e0, %e1
209  %add1 = add i16 %add0, %e2
210  %add2 = add i16 %add1, %e3
211  ret i16 %add2
212}
213
214
215define i16 @explode_8xi16(<8 x i16> %v) {
216; CHECK-LABEL: explode_8xi16:
217; CHECK:       # %bb.0:
218; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
219; CHECK-NEXT:    vslidedown.vi v9, v8, 2
220; CHECK-NEXT:    vmv.x.s a0, v9
221; CHECK-NEXT:    vslidedown.vi v9, v8, 3
222; CHECK-NEXT:    vmv.x.s a1, v9
223; CHECK-NEXT:    vslidedown.vi v9, v8, 4
224; CHECK-NEXT:    vmv.x.s a2, v9
225; CHECK-NEXT:    vslidedown.vi v9, v8, 5
226; CHECK-NEXT:    vmv.x.s a3, v9
227; CHECK-NEXT:    vslidedown.vi v9, v8, 6
228; CHECK-NEXT:    vmv.x.s a4, v9
229; CHECK-NEXT:    vslidedown.vi v9, v8, 7
230; CHECK-NEXT:    vmv.x.s a5, v9
231; CHECK-NEXT:    vmv.s.x v9, zero
232; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
233; CHECK-NEXT:    vredxor.vs v8, v8, v9
234; CHECK-NEXT:    vmv.x.s a6, v8
235; CHECK-NEXT:    add a0, a0, a1
236; CHECK-NEXT:    add a0, a6, a0
237; CHECK-NEXT:    add a2, a2, a3
238; CHECK-NEXT:    add a2, a2, a4
239; CHECK-NEXT:    add a0, a0, a2
240; CHECK-NEXT:    add a0, a0, a5
241; CHECK-NEXT:    ret
242  %e0 = extractelement <8 x i16> %v, i32 0
243  %e1 = extractelement <8 x i16> %v, i32 1
244  %e2 = extractelement <8 x i16> %v, i32 2
245  %e3 = extractelement <8 x i16> %v, i32 3
246  %e4 = extractelement <8 x i16> %v, i32 4
247  %e5 = extractelement <8 x i16> %v, i32 5
248  %e6 = extractelement <8 x i16> %v, i32 6
249  %e7 = extractelement <8 x i16> %v, i32 7
250  %add0 = xor i16 %e0, %e1
251  %add1 = add i16 %add0, %e2
252  %add2 = add i16 %add1, %e3
253  %add3 = add i16 %add2, %e4
254  %add4 = add i16 %add3, %e5
255  %add5 = add i16 %add4, %e6
256  %add6 = add i16 %add5, %e7
257  ret i16 %add6
258}
259
260define i16 @explode_16xi16(<16 x i16> %v) {
261; CHECK-LABEL: explode_16xi16:
262; CHECK:       # %bb.0:
263; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
264; CHECK-NEXT:    vslidedown.vi v10, v8, 2
265; CHECK-NEXT:    vmv.x.s a0, v10
266; CHECK-NEXT:    vslidedown.vi v10, v8, 3
267; CHECK-NEXT:    vmv.x.s a1, v10
268; CHECK-NEXT:    vslidedown.vi v10, v8, 4
269; CHECK-NEXT:    vmv.x.s a2, v10
270; CHECK-NEXT:    vslidedown.vi v10, v8, 5
271; CHECK-NEXT:    vmv.x.s a3, v10
272; CHECK-NEXT:    vslidedown.vi v10, v8, 6
273; CHECK-NEXT:    vmv.x.s a4, v10
274; CHECK-NEXT:    vslidedown.vi v10, v8, 7
275; CHECK-NEXT:    vmv.x.s a5, v10
276; CHECK-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
277; CHECK-NEXT:    vslidedown.vi v10, v8, 8
278; CHECK-NEXT:    vmv.x.s a6, v10
279; CHECK-NEXT:    vslidedown.vi v10, v8, 9
280; CHECK-NEXT:    vmv.x.s a7, v10
281; CHECK-NEXT:    vslidedown.vi v10, v8, 10
282; CHECK-NEXT:    vmv.x.s t0, v10
283; CHECK-NEXT:    vslidedown.vi v10, v8, 11
284; CHECK-NEXT:    vmv.x.s t1, v10
285; CHECK-NEXT:    vslidedown.vi v10, v8, 12
286; CHECK-NEXT:    vmv.x.s t2, v10
287; CHECK-NEXT:    vslidedown.vi v10, v8, 13
288; CHECK-NEXT:    vmv.x.s t3, v10
289; CHECK-NEXT:    vslidedown.vi v10, v8, 14
290; CHECK-NEXT:    vmv.x.s t4, v10
291; CHECK-NEXT:    vslidedown.vi v10, v8, 15
292; CHECK-NEXT:    vmv.x.s t5, v10
293; CHECK-NEXT:    vmv.s.x v9, zero
294; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
295; CHECK-NEXT:    vredxor.vs v8, v8, v9
296; CHECK-NEXT:    vmv.x.s t6, v8
297; CHECK-NEXT:    add a0, a0, a1
298; CHECK-NEXT:    add a0, t6, a0
299; CHECK-NEXT:    add a2, a2, a3
300; CHECK-NEXT:    add a2, a2, a4
301; CHECK-NEXT:    add a0, a0, a2
302; CHECK-NEXT:    add a5, a5, a6
303; CHECK-NEXT:    add a5, a5, a7
304; CHECK-NEXT:    add a5, a5, t0
305; CHECK-NEXT:    add a0, a0, a5
306; CHECK-NEXT:    add t1, t1, t2
307; CHECK-NEXT:    add t1, t1, t3
308; CHECK-NEXT:    add t1, t1, t4
309; CHECK-NEXT:    add t1, t1, t5
310; CHECK-NEXT:    add a0, a0, t1
311; CHECK-NEXT:    ret
312  %e0 = extractelement <16 x i16> %v, i32 0
313  %e1 = extractelement <16 x i16> %v, i32 1
314  %e2 = extractelement <16 x i16> %v, i32 2
315  %e3 = extractelement <16 x i16> %v, i32 3
316  %e4 = extractelement <16 x i16> %v, i32 4
317  %e5 = extractelement <16 x i16> %v, i32 5
318  %e6 = extractelement <16 x i16> %v, i32 6
319  %e7 = extractelement <16 x i16> %v, i32 7
320  %e8 = extractelement <16 x i16> %v, i32 8
321  %e9 = extractelement <16 x i16> %v, i32 9
322  %e10 = extractelement <16 x i16> %v, i32 10
323  %e11 = extractelement <16 x i16> %v, i32 11
324  %e12 = extractelement <16 x i16> %v, i32 12
325  %e13 = extractelement <16 x i16> %v, i32 13
326  %e14 = extractelement <16 x i16> %v, i32 14
327  %e15 = extractelement <16 x i16> %v, i32 15
328  %add0 = xor i16 %e0, %e1
329  %add1 = add i16 %add0, %e2
330  %add2 = add i16 %add1, %e3
331  %add3 = add i16 %add2, %e4
332  %add4 = add i16 %add3, %e5
333  %add5 = add i16 %add4, %e6
334  %add6 = add i16 %add5, %e7
335  %add7 = add i16 %add6, %e8
336  %add8 = add i16 %add7, %e9
337  %add9 = add i16 %add8, %e10
338  %add10 = add i16 %add9, %e11
339  %add11 = add i16 %add10, %e12
340  %add12 = add i16 %add11, %e13
341  %add13 = add i16 %add12, %e14
342  %add14 = add i16 %add13, %e15
343  ret i16 %add14
344}
345
346define i32 @explode_2xi32(<2 x i32> %v) {
347; CHECK-LABEL: explode_2xi32:
348; CHECK:       # %bb.0:
349; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
350; CHECK-NEXT:    vmv.s.x v9, zero
351; CHECK-NEXT:    vredxor.vs v8, v8, v9
352; CHECK-NEXT:    vmv.x.s a0, v8
353; CHECK-NEXT:    ret
354  %e0 = extractelement <2 x i32> %v, i32 0
355  %e1 = extractelement <2 x i32> %v, i32 1
356  %add0 = xor i32 %e0, %e1
357  ret i32 %add0
358}
359
360define i32 @explode_4xi32(<4 x i32> %v) {
361; RV32-LABEL: explode_4xi32:
362; RV32:       # %bb.0:
363; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
364; RV32-NEXT:    vslidedown.vi v9, v8, 2
365; RV32-NEXT:    vmv.x.s a0, v9
366; RV32-NEXT:    vslidedown.vi v9, v8, 3
367; RV32-NEXT:    vmv.x.s a1, v9
368; RV32-NEXT:    vmv.s.x v9, zero
369; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
370; RV32-NEXT:    vredxor.vs v8, v8, v9
371; RV32-NEXT:    vmv.x.s a2, v8
372; RV32-NEXT:    add a0, a0, a1
373; RV32-NEXT:    add a0, a2, a0
374; RV32-NEXT:    ret
375;
376; RV64-LABEL: explode_4xi32:
377; RV64:       # %bb.0:
378; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
379; RV64-NEXT:    vslidedown.vi v9, v8, 2
380; RV64-NEXT:    vmv.x.s a0, v9
381; RV64-NEXT:    vslidedown.vi v9, v8, 3
382; RV64-NEXT:    vmv.x.s a1, v9
383; RV64-NEXT:    vmv.s.x v9, zero
384; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
385; RV64-NEXT:    vredxor.vs v8, v8, v9
386; RV64-NEXT:    vmv.x.s a2, v8
387; RV64-NEXT:    add a0, a0, a1
388; RV64-NEXT:    addw a0, a2, a0
389; RV64-NEXT:    ret
390  %e0 = extractelement <4 x i32> %v, i32 0
391  %e1 = extractelement <4 x i32> %v, i32 1
392  %e2 = extractelement <4 x i32> %v, i32 2
393  %e3 = extractelement <4 x i32> %v, i32 3
394  %add0 = xor i32 %e0, %e1
395  %add1 = add i32 %add0, %e2
396  %add2 = add i32 %add1, %e3
397  ret i32 %add2
398}
399
400
401define i32 @explode_8xi32(<8 x i32> %v) {
402; RV32-LABEL: explode_8xi32:
403; RV32:       # %bb.0:
404; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
405; RV32-NEXT:    vslidedown.vi v10, v8, 2
406; RV32-NEXT:    vmv.x.s a0, v10
407; RV32-NEXT:    vslidedown.vi v10, v8, 3
408; RV32-NEXT:    vmv.x.s a1, v10
409; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
410; RV32-NEXT:    vslidedown.vi v10, v8, 4
411; RV32-NEXT:    vmv.x.s a2, v10
412; RV32-NEXT:    vslidedown.vi v10, v8, 5
413; RV32-NEXT:    vmv.x.s a3, v10
414; RV32-NEXT:    vslidedown.vi v10, v8, 6
415; RV32-NEXT:    vmv.x.s a4, v10
416; RV32-NEXT:    vslidedown.vi v10, v8, 7
417; RV32-NEXT:    vmv.x.s a5, v10
418; RV32-NEXT:    vmv.s.x v9, zero
419; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
420; RV32-NEXT:    vredxor.vs v8, v8, v9
421; RV32-NEXT:    vmv.x.s a6, v8
422; RV32-NEXT:    add a0, a0, a1
423; RV32-NEXT:    add a0, a6, a0
424; RV32-NEXT:    add a2, a2, a3
425; RV32-NEXT:    add a2, a2, a4
426; RV32-NEXT:    add a0, a0, a2
427; RV32-NEXT:    add a0, a0, a5
428; RV32-NEXT:    ret
429;
430; RV64-LABEL: explode_8xi32:
431; RV64:       # %bb.0:
432; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
433; RV64-NEXT:    vslidedown.vi v10, v8, 2
434; RV64-NEXT:    vmv.x.s a0, v10
435; RV64-NEXT:    vslidedown.vi v10, v8, 3
436; RV64-NEXT:    vmv.x.s a1, v10
437; RV64-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
438; RV64-NEXT:    vslidedown.vi v10, v8, 4
439; RV64-NEXT:    vmv.x.s a2, v10
440; RV64-NEXT:    vslidedown.vi v10, v8, 5
441; RV64-NEXT:    vmv.x.s a3, v10
442; RV64-NEXT:    vslidedown.vi v10, v8, 6
443; RV64-NEXT:    vmv.x.s a4, v10
444; RV64-NEXT:    vslidedown.vi v10, v8, 7
445; RV64-NEXT:    vmv.x.s a5, v10
446; RV64-NEXT:    vmv.s.x v9, zero
447; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
448; RV64-NEXT:    vredxor.vs v8, v8, v9
449; RV64-NEXT:    vmv.x.s a6, v8
450; RV64-NEXT:    add a0, a0, a1
451; RV64-NEXT:    add a0, a6, a0
452; RV64-NEXT:    add a2, a2, a3
453; RV64-NEXT:    add a2, a2, a4
454; RV64-NEXT:    add a0, a0, a2
455; RV64-NEXT:    addw a0, a0, a5
456; RV64-NEXT:    ret
457  %e0 = extractelement <8 x i32> %v, i32 0
458  %e1 = extractelement <8 x i32> %v, i32 1
459  %e2 = extractelement <8 x i32> %v, i32 2
460  %e3 = extractelement <8 x i32> %v, i32 3
461  %e4 = extractelement <8 x i32> %v, i32 4
462  %e5 = extractelement <8 x i32> %v, i32 5
463  %e6 = extractelement <8 x i32> %v, i32 6
464  %e7 = extractelement <8 x i32> %v, i32 7
465  %add0 = xor i32 %e0, %e1
466  %add1 = add i32 %add0, %e2
467  %add2 = add i32 %add1, %e3
468  %add3 = add i32 %add2, %e4
469  %add4 = add i32 %add3, %e5
470  %add5 = add i32 %add4, %e6
471  %add6 = add i32 %add5, %e7
472  ret i32 %add6
473}
474
475define i32 @explode_16xi32(<16 x i32> %v) {
476; RV32-LABEL: explode_16xi32:
477; RV32:       # %bb.0:
478; RV32-NEXT:    addi sp, sp, -128
479; RV32-NEXT:    .cfi_def_cfa_offset 128
480; RV32-NEXT:    sw ra, 124(sp) # 4-byte Folded Spill
481; RV32-NEXT:    sw s0, 120(sp) # 4-byte Folded Spill
482; RV32-NEXT:    .cfi_offset ra, -4
483; RV32-NEXT:    .cfi_offset s0, -8
484; RV32-NEXT:    addi s0, sp, 128
485; RV32-NEXT:    .cfi_def_cfa s0, 0
486; RV32-NEXT:    andi sp, sp, -64
487; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
488; RV32-NEXT:    vslidedown.vi v12, v8, 2
489; RV32-NEXT:    vmv.x.s a0, v12
490; RV32-NEXT:    vslidedown.vi v12, v8, 3
491; RV32-NEXT:    vmv.x.s a1, v12
492; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
493; RV32-NEXT:    vslidedown.vi v12, v8, 4
494; RV32-NEXT:    vmv.x.s a2, v12
495; RV32-NEXT:    vslidedown.vi v12, v8, 5
496; RV32-NEXT:    vmv.x.s a3, v12
497; RV32-NEXT:    vslidedown.vi v12, v8, 6
498; RV32-NEXT:    vmv.x.s a4, v12
499; RV32-NEXT:    vslidedown.vi v12, v8, 7
500; RV32-NEXT:    vmv.x.s a5, v12
501; RV32-NEXT:    mv a6, sp
502; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
503; RV32-NEXT:    vse32.v v8, (a6)
504; RV32-NEXT:    lw a6, 32(sp)
505; RV32-NEXT:    lw a7, 36(sp)
506; RV32-NEXT:    lw t0, 40(sp)
507; RV32-NEXT:    lw t1, 44(sp)
508; RV32-NEXT:    lw t2, 48(sp)
509; RV32-NEXT:    lw t3, 52(sp)
510; RV32-NEXT:    lw t4, 56(sp)
511; RV32-NEXT:    lw t5, 60(sp)
512; RV32-NEXT:    vmv.s.x v9, zero
513; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
514; RV32-NEXT:    vredxor.vs v8, v8, v9
515; RV32-NEXT:    vmv.x.s t6, v8
516; RV32-NEXT:    add a0, a0, a1
517; RV32-NEXT:    add a0, t6, a0
518; RV32-NEXT:    add a2, a2, a3
519; RV32-NEXT:    add a2, a2, a4
520; RV32-NEXT:    add a0, a0, a2
521; RV32-NEXT:    add a5, a5, a6
522; RV32-NEXT:    add a0, a0, a5
523; RV32-NEXT:    add a7, a7, t0
524; RV32-NEXT:    add a7, a7, t1
525; RV32-NEXT:    add a0, a0, a7
526; RV32-NEXT:    add t2, t2, t3
527; RV32-NEXT:    add t2, t2, t4
528; RV32-NEXT:    add t2, t2, t5
529; RV32-NEXT:    add a0, a0, t2
530; RV32-NEXT:    addi sp, s0, -128
531; RV32-NEXT:    lw ra, 124(sp) # 4-byte Folded Reload
532; RV32-NEXT:    lw s0, 120(sp) # 4-byte Folded Reload
533; RV32-NEXT:    addi sp, sp, 128
534; RV32-NEXT:    ret
535;
536; RV64-LABEL: explode_16xi32:
537; RV64:       # %bb.0:
538; RV64-NEXT:    addi sp, sp, -128
539; RV64-NEXT:    .cfi_def_cfa_offset 128
540; RV64-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
541; RV64-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
542; RV64-NEXT:    .cfi_offset ra, -8
543; RV64-NEXT:    .cfi_offset s0, -16
544; RV64-NEXT:    addi s0, sp, 128
545; RV64-NEXT:    .cfi_def_cfa s0, 0
546; RV64-NEXT:    andi sp, sp, -64
547; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
548; RV64-NEXT:    vslidedown.vi v12, v8, 2
549; RV64-NEXT:    vmv.x.s a0, v12
550; RV64-NEXT:    vslidedown.vi v12, v8, 3
551; RV64-NEXT:    vmv.x.s a1, v12
552; RV64-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
553; RV64-NEXT:    vslidedown.vi v12, v8, 4
554; RV64-NEXT:    vmv.x.s a2, v12
555; RV64-NEXT:    vslidedown.vi v12, v8, 5
556; RV64-NEXT:    vmv.x.s a3, v12
557; RV64-NEXT:    vslidedown.vi v12, v8, 6
558; RV64-NEXT:    vmv.x.s a4, v12
559; RV64-NEXT:    vslidedown.vi v12, v8, 7
560; RV64-NEXT:    vmv.x.s a5, v12
561; RV64-NEXT:    mv a6, sp
562; RV64-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
563; RV64-NEXT:    vse32.v v8, (a6)
564; RV64-NEXT:    lw a6, 32(sp)
565; RV64-NEXT:    lw a7, 36(sp)
566; RV64-NEXT:    lw t0, 40(sp)
567; RV64-NEXT:    lw t1, 44(sp)
568; RV64-NEXT:    lw t2, 48(sp)
569; RV64-NEXT:    lw t3, 52(sp)
570; RV64-NEXT:    lw t4, 56(sp)
571; RV64-NEXT:    lw t5, 60(sp)
572; RV64-NEXT:    vmv.s.x v9, zero
573; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
574; RV64-NEXT:    vredxor.vs v8, v8, v9
575; RV64-NEXT:    vmv.x.s t6, v8
576; RV64-NEXT:    add a0, a0, a1
577; RV64-NEXT:    add a0, t6, a0
578; RV64-NEXT:    add a2, a2, a3
579; RV64-NEXT:    add a2, a2, a4
580; RV64-NEXT:    add a0, a0, a2
581; RV64-NEXT:    add a5, a5, a6
582; RV64-NEXT:    add a0, a0, a5
583; RV64-NEXT:    add a7, a7, t0
584; RV64-NEXT:    add a7, a7, t1
585; RV64-NEXT:    add a0, a0, a7
586; RV64-NEXT:    add t2, t2, t3
587; RV64-NEXT:    add t2, t2, t4
588; RV64-NEXT:    add t2, t2, t5
589; RV64-NEXT:    addw a0, a0, t2
590; RV64-NEXT:    addi sp, s0, -128
591; RV64-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
592; RV64-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
593; RV64-NEXT:    addi sp, sp, 128
594; RV64-NEXT:    ret
595  %e0 = extractelement <16 x i32> %v, i32 0
596  %e1 = extractelement <16 x i32> %v, i32 1
597  %e2 = extractelement <16 x i32> %v, i32 2
598  %e3 = extractelement <16 x i32> %v, i32 3
599  %e4 = extractelement <16 x i32> %v, i32 4
600  %e5 = extractelement <16 x i32> %v, i32 5
601  %e6 = extractelement <16 x i32> %v, i32 6
602  %e7 = extractelement <16 x i32> %v, i32 7
603  %e8 = extractelement <16 x i32> %v, i32 8
604  %e9 = extractelement <16 x i32> %v, i32 9
605  %e10 = extractelement <16 x i32> %v, i32 10
606  %e11 = extractelement <16 x i32> %v, i32 11
607  %e12 = extractelement <16 x i32> %v, i32 12
608  %e13 = extractelement <16 x i32> %v, i32 13
609  %e14 = extractelement <16 x i32> %v, i32 14
610  %e15 = extractelement <16 x i32> %v, i32 15
611  %add0 = xor i32 %e0, %e1
612  %add1 = add i32 %add0, %e2
613  %add2 = add i32 %add1, %e3
614  %add3 = add i32 %add2, %e4
615  %add4 = add i32 %add3, %e5
616  %add5 = add i32 %add4, %e6
617  %add6 = add i32 %add5, %e7
618  %add7 = add i32 %add6, %e8
619  %add8 = add i32 %add7, %e9
620  %add9 = add i32 %add8, %e10
621  %add10 = add i32 %add9, %e11
622  %add11 = add i32 %add10, %e12
623  %add12 = add i32 %add11, %e13
624  %add13 = add i32 %add12, %e14
625  %add14 = add i32 %add13, %e15
626  ret i32 %add14
627}
628
629define i64 @explode_2xi64(<2 x i64> %v) {
630; RV32-LABEL: explode_2xi64:
631; RV32:       # %bb.0:
632; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
633; RV32-NEXT:    vmv.s.x v9, zero
634; RV32-NEXT:    vredxor.vs v8, v8, v9
635; RV32-NEXT:    vmv.x.s a0, v8
636; RV32-NEXT:    li a1, 32
637; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
638; RV32-NEXT:    vsrl.vx v8, v8, a1
639; RV32-NEXT:    vmv.x.s a1, v8
640; RV32-NEXT:    ret
641;
642; RV64-LABEL: explode_2xi64:
643; RV64:       # %bb.0:
644; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
645; RV64-NEXT:    vmv.s.x v9, zero
646; RV64-NEXT:    vredxor.vs v8, v8, v9
647; RV64-NEXT:    vmv.x.s a0, v8
648; RV64-NEXT:    ret
649  %e0 = extractelement <2 x i64> %v, i32 0
650  %e1 = extractelement <2 x i64> %v, i32 1
651  %add0 = xor i64 %e0, %e1
652  ret i64 %add0
653}
654
655define i64 @explode_4xi64(<4 x i64> %v) {
656; RV32-LABEL: explode_4xi64:
657; RV32:       # %bb.0:
658; RV32-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
659; RV32-NEXT:    vslidedown.vi v10, v8, 2
660; RV32-NEXT:    li a0, 32
661; RV32-NEXT:    vsrl.vx v12, v10, a0
662; RV32-NEXT:    vmv.x.s a1, v12
663; RV32-NEXT:    vmv.x.s a2, v10
664; RV32-NEXT:    vslidedown.vi v10, v8, 3
665; RV32-NEXT:    vsrl.vx v12, v10, a0
666; RV32-NEXT:    vmv.x.s a3, v12
667; RV32-NEXT:    vmv.x.s a4, v10
668; RV32-NEXT:    vmv.s.x v9, zero
669; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
670; RV32-NEXT:    vredxor.vs v8, v8, v9
671; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
672; RV32-NEXT:    vsrl.vx v9, v8, a0
673; RV32-NEXT:    vmv.x.s a0, v9
674; RV32-NEXT:    vmv.x.s a5, v8
675; RV32-NEXT:    add a2, a5, a2
676; RV32-NEXT:    sltu a5, a2, a5
677; RV32-NEXT:    add a0, a0, a1
678; RV32-NEXT:    add a0, a0, a5
679; RV32-NEXT:    add a1, a0, a3
680; RV32-NEXT:    add a0, a2, a4
681; RV32-NEXT:    sltu a2, a0, a2
682; RV32-NEXT:    add a1, a1, a2
683; RV32-NEXT:    ret
684;
685; RV64-LABEL: explode_4xi64:
686; RV64:       # %bb.0:
687; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
688; RV64-NEXT:    vslidedown.vi v10, v8, 2
689; RV64-NEXT:    vmv.x.s a0, v10
690; RV64-NEXT:    vslidedown.vi v10, v8, 3
691; RV64-NEXT:    vmv.x.s a1, v10
692; RV64-NEXT:    vmv.s.x v9, zero
693; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
694; RV64-NEXT:    vredxor.vs v8, v8, v9
695; RV64-NEXT:    vmv.x.s a2, v8
696; RV64-NEXT:    add a0, a0, a1
697; RV64-NEXT:    add a0, a2, a0
698; RV64-NEXT:    ret
699  %e0 = extractelement <4 x i64> %v, i32 0
700  %e1 = extractelement <4 x i64> %v, i32 1
701  %e2 = extractelement <4 x i64> %v, i32 2
702  %e3 = extractelement <4 x i64> %v, i32 3
703  %add0 = xor i64 %e0, %e1
704  %add1 = add i64 %add0, %e2
705  %add2 = add i64 %add1, %e3
706  ret i64 %add2
707}
708
709
710define i64 @explode_8xi64(<8 x i64> %v) {
711; RV32-LABEL: explode_8xi64:
712; RV32:       # %bb.0:
713; RV32-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
714; RV32-NEXT:    vslidedown.vi v12, v8, 2
715; RV32-NEXT:    li a0, 32
716; RV32-NEXT:    vsrl.vx v16, v12, a0
717; RV32-NEXT:    vmv.x.s a1, v16
718; RV32-NEXT:    vmv.x.s a2, v12
719; RV32-NEXT:    vslidedown.vi v12, v8, 3
720; RV32-NEXT:    vsrl.vx v16, v12, a0
721; RV32-NEXT:    vmv.x.s a3, v16
722; RV32-NEXT:    vmv.x.s a4, v12
723; RV32-NEXT:    vslidedown.vi v12, v8, 4
724; RV32-NEXT:    vsrl.vx v16, v12, a0
725; RV32-NEXT:    vmv.x.s a5, v16
726; RV32-NEXT:    vmv.x.s a6, v12
727; RV32-NEXT:    vslidedown.vi v12, v8, 5
728; RV32-NEXT:    vsrl.vx v16, v12, a0
729; RV32-NEXT:    vmv.x.s a7, v16
730; RV32-NEXT:    vmv.x.s t0, v12
731; RV32-NEXT:    vslidedown.vi v12, v8, 6
732; RV32-NEXT:    vsrl.vx v16, v12, a0
733; RV32-NEXT:    vmv.x.s t1, v16
734; RV32-NEXT:    vmv.x.s t2, v12
735; RV32-NEXT:    vslidedown.vi v12, v8, 7
736; RV32-NEXT:    vsrl.vx v16, v12, a0
737; RV32-NEXT:    vmv.x.s t3, v16
738; RV32-NEXT:    vmv.x.s t4, v12
739; RV32-NEXT:    vmv.s.x v9, zero
740; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
741; RV32-NEXT:    vredxor.vs v8, v8, v9
742; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
743; RV32-NEXT:    vsrl.vx v9, v8, a0
744; RV32-NEXT:    vmv.x.s a0, v9
745; RV32-NEXT:    vmv.x.s t5, v8
746; RV32-NEXT:    add a2, t5, a2
747; RV32-NEXT:    sltu t5, a2, t5
748; RV32-NEXT:    add a0, a0, a1
749; RV32-NEXT:    add a0, a0, t5
750; RV32-NEXT:    add a0, a0, a3
751; RV32-NEXT:    add a4, a2, a4
752; RV32-NEXT:    sltu a1, a4, a2
753; RV32-NEXT:    add a1, a1, a5
754; RV32-NEXT:    add a0, a0, a1
755; RV32-NEXT:    add a6, a4, a6
756; RV32-NEXT:    sltu a1, a6, a4
757; RV32-NEXT:    add a1, a1, a7
758; RV32-NEXT:    add a0, a0, a1
759; RV32-NEXT:    add t0, a6, t0
760; RV32-NEXT:    sltu a1, t0, a6
761; RV32-NEXT:    add a1, a1, t1
762; RV32-NEXT:    add a0, a0, a1
763; RV32-NEXT:    add t2, t0, t2
764; RV32-NEXT:    sltu a1, t2, t0
765; RV32-NEXT:    add a1, a1, t3
766; RV32-NEXT:    add a1, a0, a1
767; RV32-NEXT:    add a0, t2, t4
768; RV32-NEXT:    sltu a2, a0, t2
769; RV32-NEXT:    add a1, a1, a2
770; RV32-NEXT:    ret
771;
772; RV64-LABEL: explode_8xi64:
773; RV64:       # %bb.0:
774; RV64-NEXT:    addi sp, sp, -128
775; RV64-NEXT:    .cfi_def_cfa_offset 128
776; RV64-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
777; RV64-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
778; RV64-NEXT:    .cfi_offset ra, -8
779; RV64-NEXT:    .cfi_offset s0, -16
780; RV64-NEXT:    addi s0, sp, 128
781; RV64-NEXT:    .cfi_def_cfa s0, 0
782; RV64-NEXT:    andi sp, sp, -64
783; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
784; RV64-NEXT:    vslidedown.vi v12, v8, 2
785; RV64-NEXT:    vmv.x.s a0, v12
786; RV64-NEXT:    vslidedown.vi v12, v8, 3
787; RV64-NEXT:    vmv.x.s a1, v12
788; RV64-NEXT:    mv a2, sp
789; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
790; RV64-NEXT:    vse64.v v8, (a2)
791; RV64-NEXT:    ld a2, 32(sp)
792; RV64-NEXT:    ld a3, 40(sp)
793; RV64-NEXT:    ld a4, 48(sp)
794; RV64-NEXT:    ld a5, 56(sp)
795; RV64-NEXT:    vmv.s.x v9, zero
796; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
797; RV64-NEXT:    vredxor.vs v8, v8, v9
798; RV64-NEXT:    vmv.x.s a6, v8
799; RV64-NEXT:    add a0, a0, a1
800; RV64-NEXT:    add a0, a6, a0
801; RV64-NEXT:    add a0, a0, a2
802; RV64-NEXT:    add a3, a3, a4
803; RV64-NEXT:    add a0, a0, a3
804; RV64-NEXT:    add a0, a0, a5
805; RV64-NEXT:    addi sp, s0, -128
806; RV64-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
807; RV64-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
808; RV64-NEXT:    addi sp, sp, 128
809; RV64-NEXT:    ret
810  %e0 = extractelement <8 x i64> %v, i32 0
811  %e1 = extractelement <8 x i64> %v, i32 1
812  %e2 = extractelement <8 x i64> %v, i32 2
813  %e3 = extractelement <8 x i64> %v, i32 3
814  %e4 = extractelement <8 x i64> %v, i32 4
815  %e5 = extractelement <8 x i64> %v, i32 5
816  %e6 = extractelement <8 x i64> %v, i32 6
817  %e7 = extractelement <8 x i64> %v, i32 7
818  %add0 = xor i64 %e0, %e1
819  %add1 = add i64 %add0, %e2
820  %add2 = add i64 %add1, %e3
821  %add3 = add i64 %add2, %e4
822  %add4 = add i64 %add3, %e5
823  %add5 = add i64 %add4, %e6
824  %add6 = add i64 %add5, %e7
825  ret i64 %add6
826}
827
828define i64 @explode_16xi64(<16 x i64> %v) {
829; RV32-LABEL: explode_16xi64:
830; RV32:       # %bb.0:
831; RV32-NEXT:    addi sp, sp, -64
832; RV32-NEXT:    .cfi_def_cfa_offset 64
833; RV32-NEXT:    sw ra, 60(sp) # 4-byte Folded Spill
834; RV32-NEXT:    sw s0, 56(sp) # 4-byte Folded Spill
835; RV32-NEXT:    sw s1, 52(sp) # 4-byte Folded Spill
836; RV32-NEXT:    sw s2, 48(sp) # 4-byte Folded Spill
837; RV32-NEXT:    sw s3, 44(sp) # 4-byte Folded Spill
838; RV32-NEXT:    sw s4, 40(sp) # 4-byte Folded Spill
839; RV32-NEXT:    sw s5, 36(sp) # 4-byte Folded Spill
840; RV32-NEXT:    sw s6, 32(sp) # 4-byte Folded Spill
841; RV32-NEXT:    sw s7, 28(sp) # 4-byte Folded Spill
842; RV32-NEXT:    sw s8, 24(sp) # 4-byte Folded Spill
843; RV32-NEXT:    sw s9, 20(sp) # 4-byte Folded Spill
844; RV32-NEXT:    sw s10, 16(sp) # 4-byte Folded Spill
845; RV32-NEXT:    sw s11, 12(sp) # 4-byte Folded Spill
846; RV32-NEXT:    .cfi_offset ra, -4
847; RV32-NEXT:    .cfi_offset s0, -8
848; RV32-NEXT:    .cfi_offset s1, -12
849; RV32-NEXT:    .cfi_offset s2, -16
850; RV32-NEXT:    .cfi_offset s3, -20
851; RV32-NEXT:    .cfi_offset s4, -24
852; RV32-NEXT:    .cfi_offset s5, -28
853; RV32-NEXT:    .cfi_offset s6, -32
854; RV32-NEXT:    .cfi_offset s7, -36
855; RV32-NEXT:    .cfi_offset s8, -40
856; RV32-NEXT:    .cfi_offset s9, -44
857; RV32-NEXT:    .cfi_offset s10, -48
858; RV32-NEXT:    .cfi_offset s11, -52
859; RV32-NEXT:    vsetivli zero, 1, e64, m8, ta, ma
860; RV32-NEXT:    vslidedown.vi v16, v8, 2
861; RV32-NEXT:    li a0, 32
862; RV32-NEXT:    vsrl.vx v24, v16, a0
863; RV32-NEXT:    vmv.x.s t6, v24
864; RV32-NEXT:    vmv.x.s a1, v16
865; RV32-NEXT:    sw a1, 8(sp) # 4-byte Folded Spill
866; RV32-NEXT:    vslidedown.vi v16, v8, 3
867; RV32-NEXT:    vsrl.vx v24, v16, a0
868; RV32-NEXT:    vmv.x.s a1, v24
869; RV32-NEXT:    sw a1, 4(sp) # 4-byte Folded Spill
870; RV32-NEXT:    vmv.x.s a2, v16
871; RV32-NEXT:    vslidedown.vi v16, v8, 4
872; RV32-NEXT:    vsrl.vx v24, v16, a0
873; RV32-NEXT:    vmv.x.s s0, v24
874; RV32-NEXT:    vmv.x.s a3, v16
875; RV32-NEXT:    vslidedown.vi v16, v8, 5
876; RV32-NEXT:    vsrl.vx v24, v16, a0
877; RV32-NEXT:    vmv.x.s s1, v24
878; RV32-NEXT:    vmv.x.s a4, v16
879; RV32-NEXT:    vslidedown.vi v16, v8, 6
880; RV32-NEXT:    vsrl.vx v24, v16, a0
881; RV32-NEXT:    vmv.x.s s2, v24
882; RV32-NEXT:    vmv.x.s a5, v16
883; RV32-NEXT:    vslidedown.vi v16, v8, 7
884; RV32-NEXT:    vsrl.vx v24, v16, a0
885; RV32-NEXT:    vmv.x.s s3, v24
886; RV32-NEXT:    vmv.x.s a6, v16
887; RV32-NEXT:    vslidedown.vi v16, v8, 8
888; RV32-NEXT:    vsrl.vx v24, v16, a0
889; RV32-NEXT:    vmv.x.s s4, v24
890; RV32-NEXT:    vmv.x.s a7, v16
891; RV32-NEXT:    vslidedown.vi v16, v8, 9
892; RV32-NEXT:    vsrl.vx v24, v16, a0
893; RV32-NEXT:    vmv.x.s s5, v24
894; RV32-NEXT:    vmv.x.s t0, v16
895; RV32-NEXT:    vslidedown.vi v16, v8, 10
896; RV32-NEXT:    vsrl.vx v24, v16, a0
897; RV32-NEXT:    vmv.x.s s6, v24
898; RV32-NEXT:    vmv.x.s t1, v16
899; RV32-NEXT:    vslidedown.vi v16, v8, 11
900; RV32-NEXT:    vsrl.vx v24, v16, a0
901; RV32-NEXT:    vmv.x.s s7, v24
902; RV32-NEXT:    vmv.x.s t2, v16
903; RV32-NEXT:    vslidedown.vi v16, v8, 12
904; RV32-NEXT:    vsrl.vx v24, v16, a0
905; RV32-NEXT:    vmv.x.s s8, v24
906; RV32-NEXT:    vmv.x.s t3, v16
907; RV32-NEXT:    vslidedown.vi v16, v8, 13
908; RV32-NEXT:    vsrl.vx v24, v16, a0
909; RV32-NEXT:    vmv.x.s s9, v24
910; RV32-NEXT:    vmv.x.s t4, v16
911; RV32-NEXT:    vslidedown.vi v16, v8, 14
912; RV32-NEXT:    vsrl.vx v24, v16, a0
913; RV32-NEXT:    vmv.x.s s10, v24
914; RV32-NEXT:    vmv.x.s t5, v16
915; RV32-NEXT:    vslidedown.vi v16, v8, 15
916; RV32-NEXT:    vsrl.vx v24, v16, a0
917; RV32-NEXT:    vmv.x.s s11, v24
918; RV32-NEXT:    vmv.s.x v9, zero
919; RV32-NEXT:    vmv.x.s ra, v16
920; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
921; RV32-NEXT:    vredxor.vs v8, v8, v9
922; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
923; RV32-NEXT:    vsrl.vx v9, v8, a0
924; RV32-NEXT:    vmv.x.s a0, v9
925; RV32-NEXT:    add a1, a0, t6
926; RV32-NEXT:    vmv.x.s a0, v8
927; RV32-NEXT:    lw t6, 8(sp) # 4-byte Folded Reload
928; RV32-NEXT:    add t6, a0, t6
929; RV32-NEXT:    sltu a0, t6, a0
930; RV32-NEXT:    add a0, a1, a0
931; RV32-NEXT:    lw a1, 4(sp) # 4-byte Folded Reload
932; RV32-NEXT:    add a0, a0, a1
933; RV32-NEXT:    add a2, t6, a2
934; RV32-NEXT:    sltu a1, a2, t6
935; RV32-NEXT:    add a1, a1, s0
936; RV32-NEXT:    add a0, a0, a1
937; RV32-NEXT:    add a3, a2, a3
938; RV32-NEXT:    sltu a1, a3, a2
939; RV32-NEXT:    add a1, a1, s1
940; RV32-NEXT:    add a0, a0, a1
941; RV32-NEXT:    add a4, a3, a4
942; RV32-NEXT:    sltu a1, a4, a3
943; RV32-NEXT:    add a1, a1, s2
944; RV32-NEXT:    add a0, a0, a1
945; RV32-NEXT:    add a5, a4, a5
946; RV32-NEXT:    sltu a1, a5, a4
947; RV32-NEXT:    add a1, a1, s3
948; RV32-NEXT:    add a0, a0, a1
949; RV32-NEXT:    add a6, a5, a6
950; RV32-NEXT:    sltu a1, a6, a5
951; RV32-NEXT:    add a1, a1, s4
952; RV32-NEXT:    add a0, a0, a1
953; RV32-NEXT:    add a7, a6, a7
954; RV32-NEXT:    sltu a1, a7, a6
955; RV32-NEXT:    add a1, a1, s5
956; RV32-NEXT:    add a0, a0, a1
957; RV32-NEXT:    add t0, a7, t0
958; RV32-NEXT:    sltu a1, t0, a7
959; RV32-NEXT:    add a1, a1, s6
960; RV32-NEXT:    add a0, a0, a1
961; RV32-NEXT:    add t1, t0, t1
962; RV32-NEXT:    sltu a1, t1, t0
963; RV32-NEXT:    add a1, a1, s7
964; RV32-NEXT:    add a0, a0, a1
965; RV32-NEXT:    add t2, t1, t2
966; RV32-NEXT:    sltu a1, t2, t1
967; RV32-NEXT:    add a1, a1, s8
968; RV32-NEXT:    add a0, a0, a1
969; RV32-NEXT:    add t3, t2, t3
970; RV32-NEXT:    sltu a1, t3, t2
971; RV32-NEXT:    add a1, a1, s9
972; RV32-NEXT:    add a0, a0, a1
973; RV32-NEXT:    add t4, t3, t4
974; RV32-NEXT:    sltu a1, t4, t3
975; RV32-NEXT:    add a1, a1, s10
976; RV32-NEXT:    add a0, a0, a1
977; RV32-NEXT:    add t5, t4, t5
978; RV32-NEXT:    sltu a1, t5, t4
979; RV32-NEXT:    add a1, a1, s11
980; RV32-NEXT:    add a1, a0, a1
981; RV32-NEXT:    add a0, t5, ra
982; RV32-NEXT:    sltu a2, a0, t5
983; RV32-NEXT:    add a1, a1, a2
984; RV32-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
985; RV32-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
986; RV32-NEXT:    lw s1, 52(sp) # 4-byte Folded Reload
987; RV32-NEXT:    lw s2, 48(sp) # 4-byte Folded Reload
988; RV32-NEXT:    lw s3, 44(sp) # 4-byte Folded Reload
989; RV32-NEXT:    lw s4, 40(sp) # 4-byte Folded Reload
990; RV32-NEXT:    lw s5, 36(sp) # 4-byte Folded Reload
991; RV32-NEXT:    lw s6, 32(sp) # 4-byte Folded Reload
992; RV32-NEXT:    lw s7, 28(sp) # 4-byte Folded Reload
993; RV32-NEXT:    lw s8, 24(sp) # 4-byte Folded Reload
994; RV32-NEXT:    lw s9, 20(sp) # 4-byte Folded Reload
995; RV32-NEXT:    lw s10, 16(sp) # 4-byte Folded Reload
996; RV32-NEXT:    lw s11, 12(sp) # 4-byte Folded Reload
997; RV32-NEXT:    addi sp, sp, 64
998; RV32-NEXT:    ret
999;
1000; RV64-LABEL: explode_16xi64:
1001; RV64:       # %bb.0:
1002; RV64-NEXT:    addi sp, sp, -256
1003; RV64-NEXT:    .cfi_def_cfa_offset 256
1004; RV64-NEXT:    sd ra, 248(sp) # 8-byte Folded Spill
1005; RV64-NEXT:    sd s0, 240(sp) # 8-byte Folded Spill
1006; RV64-NEXT:    .cfi_offset ra, -8
1007; RV64-NEXT:    .cfi_offset s0, -16
1008; RV64-NEXT:    addi s0, sp, 256
1009; RV64-NEXT:    .cfi_def_cfa s0, 0
1010; RV64-NEXT:    andi sp, sp, -128
1011; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
1012; RV64-NEXT:    vslidedown.vi v16, v8, 2
1013; RV64-NEXT:    vmv.x.s a0, v16
1014; RV64-NEXT:    vslidedown.vi v16, v8, 3
1015; RV64-NEXT:    vmv.x.s a1, v16
1016; RV64-NEXT:    mv a2, sp
1017; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1018; RV64-NEXT:    vse64.v v8, (a2)
1019; RV64-NEXT:    ld a2, 32(sp)
1020; RV64-NEXT:    ld a3, 40(sp)
1021; RV64-NEXT:    ld a4, 48(sp)
1022; RV64-NEXT:    ld a5, 56(sp)
1023; RV64-NEXT:    ld a6, 64(sp)
1024; RV64-NEXT:    ld a7, 72(sp)
1025; RV64-NEXT:    ld t0, 80(sp)
1026; RV64-NEXT:    ld t1, 88(sp)
1027; RV64-NEXT:    ld t2, 96(sp)
1028; RV64-NEXT:    ld t3, 104(sp)
1029; RV64-NEXT:    ld t4, 112(sp)
1030; RV64-NEXT:    ld t5, 120(sp)
1031; RV64-NEXT:    vmv.s.x v9, zero
1032; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1033; RV64-NEXT:    vredxor.vs v8, v8, v9
1034; RV64-NEXT:    vmv.x.s t6, v8
1035; RV64-NEXT:    add a0, a0, a1
1036; RV64-NEXT:    add a0, t6, a0
1037; RV64-NEXT:    add a0, a0, a2
1038; RV64-NEXT:    add a3, a3, a4
1039; RV64-NEXT:    add a0, a0, a3
1040; RV64-NEXT:    add a5, a5, a6
1041; RV64-NEXT:    add a5, a5, a7
1042; RV64-NEXT:    add a0, a0, a5
1043; RV64-NEXT:    add t0, t0, t1
1044; RV64-NEXT:    add t0, t0, t2
1045; RV64-NEXT:    add t0, t0, t3
1046; RV64-NEXT:    add a0, a0, t0
1047; RV64-NEXT:    add t4, t4, t5
1048; RV64-NEXT:    add a0, a0, t4
1049; RV64-NEXT:    addi sp, s0, -256
1050; RV64-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
1051; RV64-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
1052; RV64-NEXT:    addi sp, sp, 256
1053; RV64-NEXT:    ret
1054  %e0 = extractelement <16 x i64> %v, i32 0
1055  %e1 = extractelement <16 x i64> %v, i32 1
1056  %e2 = extractelement <16 x i64> %v, i32 2
1057  %e3 = extractelement <16 x i64> %v, i32 3
1058  %e4 = extractelement <16 x i64> %v, i32 4
1059  %e5 = extractelement <16 x i64> %v, i32 5
1060  %e6 = extractelement <16 x i64> %v, i32 6
1061  %e7 = extractelement <16 x i64> %v, i32 7
1062  %e8 = extractelement <16 x i64> %v, i32 8
1063  %e9 = extractelement <16 x i64> %v, i32 9
1064  %e10 = extractelement <16 x i64> %v, i32 10
1065  %e11 = extractelement <16 x i64> %v, i32 11
1066  %e12 = extractelement <16 x i64> %v, i32 12
1067  %e13 = extractelement <16 x i64> %v, i32 13
1068  %e14 = extractelement <16 x i64> %v, i32 14
1069  %e15 = extractelement <16 x i64> %v, i32 15
1070  %add0 = xor i64 %e0, %e1
1071  %add1 = add i64 %add0, %e2
1072  %add2 = add i64 %add1, %e3
1073  %add3 = add i64 %add2, %e4
1074  %add4 = add i64 %add3, %e5
1075  %add5 = add i64 %add4, %e6
1076  %add6 = add i64 %add5, %e7
1077  %add7 = add i64 %add6, %e8
1078  %add8 = add i64 %add7, %e9
1079  %add9 = add i64 %add8, %e10
1080  %add10 = add i64 %add9, %e11
1081  %add11 = add i64 %add10, %e12
1082  %add12 = add i64 %add11, %e13
1083  %add13 = add i64 %add12, %e14
1084  %add14 = add i64 %add13, %e15
1085  ret i64 %add14
1086}
1087