xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4
5define i8 @explode_2xi8(<2 x i8> %v) {
6; CHECK-LABEL: explode_2xi8:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
9; CHECK-NEXT:    vmv.s.x v9, zero
10; CHECK-NEXT:    vredxor.vs v8, v8, v9
11; CHECK-NEXT:    vmv.x.s a0, v8
12; CHECK-NEXT:    ret
13  %e0 = extractelement <2 x i8> %v, i32 0
14  %e1 = extractelement <2 x i8> %v, i32 1
15  %add0 = xor i8 %e0, %e1
16  ret i8 %add0
17}
18
19define i8 @explode_4xi8(<4 x i8> %v) {
20; CHECK-LABEL: explode_4xi8:
21; CHECK:       # %bb.0:
22; CHECK-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
23; CHECK-NEXT:    vslidedown.vi v9, v8, 2
24; CHECK-NEXT:    vmv.x.s a0, v9
25; CHECK-NEXT:    vslidedown.vi v9, v8, 3
26; CHECK-NEXT:    vmv.x.s a1, v9
27; CHECK-NEXT:    vmv.s.x v9, zero
28; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
29; CHECK-NEXT:    vredxor.vs v8, v8, v9
30; CHECK-NEXT:    vmv.x.s a2, v8
31; CHECK-NEXT:    add a0, a0, a1
32; CHECK-NEXT:    add a0, a2, a0
33; CHECK-NEXT:    ret
34  %e0 = extractelement <4 x i8> %v, i32 0
35  %e1 = extractelement <4 x i8> %v, i32 1
36  %e2 = extractelement <4 x i8> %v, i32 2
37  %e3 = extractelement <4 x i8> %v, i32 3
38  %add0 = xor i8 %e0, %e1
39  %add1 = add i8 %add0, %e2
40  %add2 = add i8 %add1, %e3
41  ret i8 %add2
42}
43
44
45define i8 @explode_8xi8(<8 x i8> %v) {
46; CHECK-LABEL: explode_8xi8:
47; CHECK:       # %bb.0:
48; CHECK-NEXT:    vsetivli zero, 1, e8, mf2, ta, ma
49; CHECK-NEXT:    vslidedown.vi v9, v8, 2
50; CHECK-NEXT:    vmv.x.s a0, v9
51; CHECK-NEXT:    vslidedown.vi v9, v8, 3
52; CHECK-NEXT:    vmv.x.s a1, v9
53; CHECK-NEXT:    vslidedown.vi v9, v8, 4
54; CHECK-NEXT:    vmv.x.s a2, v9
55; CHECK-NEXT:    vslidedown.vi v9, v8, 5
56; CHECK-NEXT:    vmv.x.s a3, v9
57; CHECK-NEXT:    vslidedown.vi v9, v8, 6
58; CHECK-NEXT:    vmv.x.s a4, v9
59; CHECK-NEXT:    vslidedown.vi v9, v8, 7
60; CHECK-NEXT:    vmv.x.s a5, v9
61; CHECK-NEXT:    vmv.s.x v9, zero
62; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
63; CHECK-NEXT:    vredxor.vs v8, v8, v9
64; CHECK-NEXT:    vmv.x.s a6, v8
65; CHECK-NEXT:    add a0, a0, a1
66; CHECK-NEXT:    add a2, a2, a3
67; CHECK-NEXT:    add a0, a6, a0
68; CHECK-NEXT:    add a2, a2, a4
69; CHECK-NEXT:    add a0, a0, a2
70; CHECK-NEXT:    add a0, a0, a5
71; CHECK-NEXT:    ret
72  %e0 = extractelement <8 x i8> %v, i32 0
73  %e1 = extractelement <8 x i8> %v, i32 1
74  %e2 = extractelement <8 x i8> %v, i32 2
75  %e3 = extractelement <8 x i8> %v, i32 3
76  %e4 = extractelement <8 x i8> %v, i32 4
77  %e5 = extractelement <8 x i8> %v, i32 5
78  %e6 = extractelement <8 x i8> %v, i32 6
79  %e7 = extractelement <8 x i8> %v, i32 7
80  %add0 = xor i8 %e0, %e1
81  %add1 = add i8 %add0, %e2
82  %add2 = add i8 %add1, %e3
83  %add3 = add i8 %add2, %e4
84  %add4 = add i8 %add3, %e5
85  %add5 = add i8 %add4, %e6
86  %add6 = add i8 %add5, %e7
87  ret i8 %add6
88}
89
90define i8 @explode_16xi8(<16 x i8> %v) {
91; CHECK-LABEL: explode_16xi8:
92; CHECK:       # %bb.0:
93; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
94; CHECK-NEXT:    vslidedown.vi v9, v8, 2
95; CHECK-NEXT:    vmv.x.s a0, v9
96; CHECK-NEXT:    vslidedown.vi v9, v8, 3
97; CHECK-NEXT:    vmv.x.s a1, v9
98; CHECK-NEXT:    vslidedown.vi v9, v8, 4
99; CHECK-NEXT:    vmv.x.s a2, v9
100; CHECK-NEXT:    vslidedown.vi v9, v8, 5
101; CHECK-NEXT:    vmv.x.s a3, v9
102; CHECK-NEXT:    vslidedown.vi v9, v8, 6
103; CHECK-NEXT:    vmv.x.s a4, v9
104; CHECK-NEXT:    vslidedown.vi v9, v8, 7
105; CHECK-NEXT:    vmv.x.s a5, v9
106; CHECK-NEXT:    vslidedown.vi v9, v8, 8
107; CHECK-NEXT:    vmv.x.s a6, v9
108; CHECK-NEXT:    vslidedown.vi v9, v8, 9
109; CHECK-NEXT:    vmv.x.s a7, v9
110; CHECK-NEXT:    vslidedown.vi v9, v8, 10
111; CHECK-NEXT:    vmv.x.s t0, v9
112; CHECK-NEXT:    vslidedown.vi v9, v8, 11
113; CHECK-NEXT:    vmv.x.s t1, v9
114; CHECK-NEXT:    vslidedown.vi v9, v8, 12
115; CHECK-NEXT:    vmv.x.s t2, v9
116; CHECK-NEXT:    vslidedown.vi v9, v8, 13
117; CHECK-NEXT:    vmv.x.s t3, v9
118; CHECK-NEXT:    vslidedown.vi v9, v8, 14
119; CHECK-NEXT:    vmv.x.s t4, v9
120; CHECK-NEXT:    vslidedown.vi v9, v8, 15
121; CHECK-NEXT:    vmv.x.s t5, v9
122; CHECK-NEXT:    vmv.s.x v9, zero
123; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
124; CHECK-NEXT:    vredxor.vs v8, v8, v9
125; CHECK-NEXT:    vmv.x.s t6, v8
126; CHECK-NEXT:    add a0, a0, a1
127; CHECK-NEXT:    add a2, a2, a3
128; CHECK-NEXT:    add a5, a5, a6
129; CHECK-NEXT:    add t1, t1, t2
130; CHECK-NEXT:    add a0, t6, a0
131; CHECK-NEXT:    add a2, a2, a4
132; CHECK-NEXT:    add a5, a5, a7
133; CHECK-NEXT:    add t1, t1, t3
134; CHECK-NEXT:    add a0, a0, a2
135; CHECK-NEXT:    add a5, a5, t0
136; CHECK-NEXT:    add t1, t1, t4
137; CHECK-NEXT:    add a0, a0, a5
138; CHECK-NEXT:    add t1, t1, t5
139; CHECK-NEXT:    add a0, a0, t1
140; CHECK-NEXT:    ret
141  %e0 = extractelement <16 x i8> %v, i32 0
142  %e1 = extractelement <16 x i8> %v, i32 1
143  %e2 = extractelement <16 x i8> %v, i32 2
144  %e3 = extractelement <16 x i8> %v, i32 3
145  %e4 = extractelement <16 x i8> %v, i32 4
146  %e5 = extractelement <16 x i8> %v, i32 5
147  %e6 = extractelement <16 x i8> %v, i32 6
148  %e7 = extractelement <16 x i8> %v, i32 7
149  %e8 = extractelement <16 x i8> %v, i32 8
150  %e9 = extractelement <16 x i8> %v, i32 9
151  %e10 = extractelement <16 x i8> %v, i32 10
152  %e11 = extractelement <16 x i8> %v, i32 11
153  %e12 = extractelement <16 x i8> %v, i32 12
154  %e13 = extractelement <16 x i8> %v, i32 13
155  %e14 = extractelement <16 x i8> %v, i32 14
156  %e15 = extractelement <16 x i8> %v, i32 15
157  %add0 = xor i8 %e0, %e1
158  %add1 = add i8 %add0, %e2
159  %add2 = add i8 %add1, %e3
160  %add3 = add i8 %add2, %e4
161  %add4 = add i8 %add3, %e5
162  %add5 = add i8 %add4, %e6
163  %add6 = add i8 %add5, %e7
164  %add7 = add i8 %add6, %e8
165  %add8 = add i8 %add7, %e9
166  %add9 = add i8 %add8, %e10
167  %add10 = add i8 %add9, %e11
168  %add11 = add i8 %add10, %e12
169  %add12 = add i8 %add11, %e13
170  %add13 = add i8 %add12, %e14
171  %add14 = add i8 %add13, %e15
172  ret i8 %add14
173}
174
175define i16 @explode_2xi16(<2 x i16> %v) {
176; CHECK-LABEL: explode_2xi16:
177; CHECK:       # %bb.0:
178; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
179; CHECK-NEXT:    vmv.s.x v9, zero
180; CHECK-NEXT:    vredxor.vs v8, v8, v9
181; CHECK-NEXT:    vmv.x.s a0, v8
182; CHECK-NEXT:    ret
183  %e0 = extractelement <2 x i16> %v, i32 0
184  %e1 = extractelement <2 x i16> %v, i32 1
185  %add0 = xor i16 %e0, %e1
186  ret i16 %add0
187}
188
189define i16 @explode_4xi16(<4 x i16> %v) {
190; CHECK-LABEL: explode_4xi16:
191; CHECK:       # %bb.0:
192; CHECK-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
193; CHECK-NEXT:    vslidedown.vi v9, v8, 2
194; CHECK-NEXT:    vmv.x.s a0, v9
195; CHECK-NEXT:    vslidedown.vi v9, v8, 3
196; CHECK-NEXT:    vmv.x.s a1, v9
197; CHECK-NEXT:    vmv.s.x v9, zero
198; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
199; CHECK-NEXT:    vredxor.vs v8, v8, v9
200; CHECK-NEXT:    vmv.x.s a2, v8
201; CHECK-NEXT:    add a0, a0, a1
202; CHECK-NEXT:    add a0, a2, a0
203; CHECK-NEXT:    ret
204  %e0 = extractelement <4 x i16> %v, i32 0
205  %e1 = extractelement <4 x i16> %v, i32 1
206  %e2 = extractelement <4 x i16> %v, i32 2
207  %e3 = extractelement <4 x i16> %v, i32 3
208  %add0 = xor i16 %e0, %e1
209  %add1 = add i16 %add0, %e2
210  %add2 = add i16 %add1, %e3
211  ret i16 %add2
212}
213
214
215define i16 @explode_8xi16(<8 x i16> %v) {
216; CHECK-LABEL: explode_8xi16:
217; CHECK:       # %bb.0:
218; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
219; CHECK-NEXT:    vslidedown.vi v9, v8, 2
220; CHECK-NEXT:    vmv.x.s a0, v9
221; CHECK-NEXT:    vslidedown.vi v9, v8, 3
222; CHECK-NEXT:    vmv.x.s a1, v9
223; CHECK-NEXT:    vslidedown.vi v9, v8, 4
224; CHECK-NEXT:    vmv.x.s a2, v9
225; CHECK-NEXT:    vslidedown.vi v9, v8, 5
226; CHECK-NEXT:    vmv.x.s a3, v9
227; CHECK-NEXT:    vslidedown.vi v9, v8, 6
228; CHECK-NEXT:    vmv.x.s a4, v9
229; CHECK-NEXT:    vslidedown.vi v9, v8, 7
230; CHECK-NEXT:    vmv.x.s a5, v9
231; CHECK-NEXT:    vmv.s.x v9, zero
232; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
233; CHECK-NEXT:    vredxor.vs v8, v8, v9
234; CHECK-NEXT:    vmv.x.s a6, v8
235; CHECK-NEXT:    add a0, a0, a1
236; CHECK-NEXT:    add a2, a2, a3
237; CHECK-NEXT:    add a0, a6, a0
238; CHECK-NEXT:    add a2, a2, a4
239; CHECK-NEXT:    add a0, a0, a2
240; CHECK-NEXT:    add a0, a0, a5
241; CHECK-NEXT:    ret
242  %e0 = extractelement <8 x i16> %v, i32 0
243  %e1 = extractelement <8 x i16> %v, i32 1
244  %e2 = extractelement <8 x i16> %v, i32 2
245  %e3 = extractelement <8 x i16> %v, i32 3
246  %e4 = extractelement <8 x i16> %v, i32 4
247  %e5 = extractelement <8 x i16> %v, i32 5
248  %e6 = extractelement <8 x i16> %v, i32 6
249  %e7 = extractelement <8 x i16> %v, i32 7
250  %add0 = xor i16 %e0, %e1
251  %add1 = add i16 %add0, %e2
252  %add2 = add i16 %add1, %e3
253  %add3 = add i16 %add2, %e4
254  %add4 = add i16 %add3, %e5
255  %add5 = add i16 %add4, %e6
256  %add6 = add i16 %add5, %e7
257  ret i16 %add6
258}
259
260define i16 @explode_16xi16(<16 x i16> %v) {
261; CHECK-LABEL: explode_16xi16:
262; CHECK:       # %bb.0:
263; CHECK-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
264; CHECK-NEXT:    vslidedown.vi v10, v8, 8
265; CHECK-NEXT:    vmv.x.s a0, v10
266; CHECK-NEXT:    vslidedown.vi v10, v8, 9
267; CHECK-NEXT:    vmv.x.s a1, v10
268; CHECK-NEXT:    vslidedown.vi v10, v8, 10
269; CHECK-NEXT:    vmv.x.s a2, v10
270; CHECK-NEXT:    vslidedown.vi v10, v8, 11
271; CHECK-NEXT:    vmv.x.s a3, v10
272; CHECK-NEXT:    vslidedown.vi v10, v8, 12
273; CHECK-NEXT:    vmv.x.s a4, v10
274; CHECK-NEXT:    vslidedown.vi v10, v8, 13
275; CHECK-NEXT:    vmv.x.s a5, v10
276; CHECK-NEXT:    vslidedown.vi v10, v8, 14
277; CHECK-NEXT:    vmv.x.s a6, v10
278; CHECK-NEXT:    vslidedown.vi v10, v8, 15
279; CHECK-NEXT:    vmv.x.s a7, v10
280; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
281; CHECK-NEXT:    vslidedown.vi v9, v8, 2
282; CHECK-NEXT:    vslidedown.vi v10, v8, 3
283; CHECK-NEXT:    vmv.x.s t0, v9
284; CHECK-NEXT:    vslidedown.vi v9, v8, 4
285; CHECK-NEXT:    vmv.x.s t1, v10
286; CHECK-NEXT:    vslidedown.vi v10, v8, 5
287; CHECK-NEXT:    vmv.x.s t2, v9
288; CHECK-NEXT:    vslidedown.vi v9, v8, 6
289; CHECK-NEXT:    vmv.x.s t3, v10
290; CHECK-NEXT:    vslidedown.vi v10, v8, 7
291; CHECK-NEXT:    vmv.x.s t4, v9
292; CHECK-NEXT:    vmv.s.x v9, zero
293; CHECK-NEXT:    vmv.x.s t5, v10
294; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
295; CHECK-NEXT:    vredxor.vs v8, v8, v9
296; CHECK-NEXT:    vmv.x.s t6, v8
297; CHECK-NEXT:    add t0, t0, t1
298; CHECK-NEXT:    add t2, t2, t3
299; CHECK-NEXT:    add a0, t5, a0
300; CHECK-NEXT:    add a3, a3, a4
301; CHECK-NEXT:    add t0, t6, t0
302; CHECK-NEXT:    add t2, t2, t4
303; CHECK-NEXT:    add a0, a0, a1
304; CHECK-NEXT:    add a3, a3, a5
305; CHECK-NEXT:    add t0, t0, t2
306; CHECK-NEXT:    add a0, a0, a2
307; CHECK-NEXT:    add a3, a3, a6
308; CHECK-NEXT:    add a0, t0, a0
309; CHECK-NEXT:    add a3, a3, a7
310; CHECK-NEXT:    add a0, a0, a3
311; CHECK-NEXT:    ret
312  %e0 = extractelement <16 x i16> %v, i32 0
313  %e1 = extractelement <16 x i16> %v, i32 1
314  %e2 = extractelement <16 x i16> %v, i32 2
315  %e3 = extractelement <16 x i16> %v, i32 3
316  %e4 = extractelement <16 x i16> %v, i32 4
317  %e5 = extractelement <16 x i16> %v, i32 5
318  %e6 = extractelement <16 x i16> %v, i32 6
319  %e7 = extractelement <16 x i16> %v, i32 7
320  %e8 = extractelement <16 x i16> %v, i32 8
321  %e9 = extractelement <16 x i16> %v, i32 9
322  %e10 = extractelement <16 x i16> %v, i32 10
323  %e11 = extractelement <16 x i16> %v, i32 11
324  %e12 = extractelement <16 x i16> %v, i32 12
325  %e13 = extractelement <16 x i16> %v, i32 13
326  %e14 = extractelement <16 x i16> %v, i32 14
327  %e15 = extractelement <16 x i16> %v, i32 15
328  %add0 = xor i16 %e0, %e1
329  %add1 = add i16 %add0, %e2
330  %add2 = add i16 %add1, %e3
331  %add3 = add i16 %add2, %e4
332  %add4 = add i16 %add3, %e5
333  %add5 = add i16 %add4, %e6
334  %add6 = add i16 %add5, %e7
335  %add7 = add i16 %add6, %e8
336  %add8 = add i16 %add7, %e9
337  %add9 = add i16 %add8, %e10
338  %add10 = add i16 %add9, %e11
339  %add11 = add i16 %add10, %e12
340  %add12 = add i16 %add11, %e13
341  %add13 = add i16 %add12, %e14
342  %add14 = add i16 %add13, %e15
343  ret i16 %add14
344}
345
346define i32 @explode_2xi32(<2 x i32> %v) {
347; CHECK-LABEL: explode_2xi32:
348; CHECK:       # %bb.0:
349; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
350; CHECK-NEXT:    vmv.s.x v9, zero
351; CHECK-NEXT:    vredxor.vs v8, v8, v9
352; CHECK-NEXT:    vmv.x.s a0, v8
353; CHECK-NEXT:    ret
354  %e0 = extractelement <2 x i32> %v, i32 0
355  %e1 = extractelement <2 x i32> %v, i32 1
356  %add0 = xor i32 %e0, %e1
357  ret i32 %add0
358}
359
360define i32 @explode_4xi32(<4 x i32> %v) {
361; RV32-LABEL: explode_4xi32:
362; RV32:       # %bb.0:
363; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
364; RV32-NEXT:    vslidedown.vi v9, v8, 2
365; RV32-NEXT:    vmv.x.s a0, v9
366; RV32-NEXT:    vslidedown.vi v9, v8, 3
367; RV32-NEXT:    vmv.x.s a1, v9
368; RV32-NEXT:    vmv.s.x v9, zero
369; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
370; RV32-NEXT:    vredxor.vs v8, v8, v9
371; RV32-NEXT:    vmv.x.s a2, v8
372; RV32-NEXT:    add a0, a0, a1
373; RV32-NEXT:    add a0, a2, a0
374; RV32-NEXT:    ret
375;
376; RV64-LABEL: explode_4xi32:
377; RV64:       # %bb.0:
378; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
379; RV64-NEXT:    vslidedown.vi v9, v8, 2
380; RV64-NEXT:    vmv.x.s a0, v9
381; RV64-NEXT:    vslidedown.vi v9, v8, 3
382; RV64-NEXT:    vmv.x.s a1, v9
383; RV64-NEXT:    vmv.s.x v9, zero
384; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
385; RV64-NEXT:    vredxor.vs v8, v8, v9
386; RV64-NEXT:    vmv.x.s a2, v8
387; RV64-NEXT:    add a0, a0, a1
388; RV64-NEXT:    addw a0, a2, a0
389; RV64-NEXT:    ret
390  %e0 = extractelement <4 x i32> %v, i32 0
391  %e1 = extractelement <4 x i32> %v, i32 1
392  %e2 = extractelement <4 x i32> %v, i32 2
393  %e3 = extractelement <4 x i32> %v, i32 3
394  %add0 = xor i32 %e0, %e1
395  %add1 = add i32 %add0, %e2
396  %add2 = add i32 %add1, %e3
397  ret i32 %add2
398}
399
400
401define i32 @explode_8xi32(<8 x i32> %v) {
402; RV32-LABEL: explode_8xi32:
403; RV32:       # %bb.0:
404; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
405; RV32-NEXT:    vslidedown.vi v10, v8, 4
406; RV32-NEXT:    vmv.x.s a0, v10
407; RV32-NEXT:    vslidedown.vi v10, v8, 5
408; RV32-NEXT:    vmv.x.s a1, v10
409; RV32-NEXT:    vslidedown.vi v10, v8, 6
410; RV32-NEXT:    vmv.x.s a2, v10
411; RV32-NEXT:    vslidedown.vi v10, v8, 7
412; RV32-NEXT:    vmv.x.s a3, v10
413; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
414; RV32-NEXT:    vslidedown.vi v9, v8, 2
415; RV32-NEXT:    vslidedown.vi v10, v8, 3
416; RV32-NEXT:    vmv.x.s a4, v9
417; RV32-NEXT:    vmv.s.x v9, zero
418; RV32-NEXT:    vmv.x.s a5, v10
419; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
420; RV32-NEXT:    vredxor.vs v8, v8, v9
421; RV32-NEXT:    vmv.x.s a6, v8
422; RV32-NEXT:    add a4, a4, a5
423; RV32-NEXT:    add a0, a0, a1
424; RV32-NEXT:    add a4, a6, a4
425; RV32-NEXT:    add a0, a0, a2
426; RV32-NEXT:    add a0, a4, a0
427; RV32-NEXT:    add a0, a0, a3
428; RV32-NEXT:    ret
429;
430; RV64-LABEL: explode_8xi32:
431; RV64:       # %bb.0:
432; RV64-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
433; RV64-NEXT:    vslidedown.vi v10, v8, 4
434; RV64-NEXT:    vmv.x.s a0, v10
435; RV64-NEXT:    vslidedown.vi v10, v8, 5
436; RV64-NEXT:    vmv.x.s a1, v10
437; RV64-NEXT:    vslidedown.vi v10, v8, 6
438; RV64-NEXT:    vmv.x.s a2, v10
439; RV64-NEXT:    vslidedown.vi v10, v8, 7
440; RV64-NEXT:    vmv.x.s a3, v10
441; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
442; RV64-NEXT:    vslidedown.vi v9, v8, 2
443; RV64-NEXT:    vslidedown.vi v10, v8, 3
444; RV64-NEXT:    vmv.x.s a4, v9
445; RV64-NEXT:    vmv.s.x v9, zero
446; RV64-NEXT:    vmv.x.s a5, v10
447; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
448; RV64-NEXT:    vredxor.vs v8, v8, v9
449; RV64-NEXT:    vmv.x.s a6, v8
450; RV64-NEXT:    add a4, a4, a5
451; RV64-NEXT:    add a0, a0, a1
452; RV64-NEXT:    add a4, a6, a4
453; RV64-NEXT:    add a0, a0, a2
454; RV64-NEXT:    add a0, a4, a0
455; RV64-NEXT:    addw a0, a0, a3
456; RV64-NEXT:    ret
457  %e0 = extractelement <8 x i32> %v, i32 0
458  %e1 = extractelement <8 x i32> %v, i32 1
459  %e2 = extractelement <8 x i32> %v, i32 2
460  %e3 = extractelement <8 x i32> %v, i32 3
461  %e4 = extractelement <8 x i32> %v, i32 4
462  %e5 = extractelement <8 x i32> %v, i32 5
463  %e6 = extractelement <8 x i32> %v, i32 6
464  %e7 = extractelement <8 x i32> %v, i32 7
465  %add0 = xor i32 %e0, %e1
466  %add1 = add i32 %add0, %e2
467  %add2 = add i32 %add1, %e3
468  %add3 = add i32 %add2, %e4
469  %add4 = add i32 %add3, %e5
470  %add5 = add i32 %add4, %e6
471  %add6 = add i32 %add5, %e7
472  ret i32 %add6
473}
474
475define i32 @explode_16xi32(<16 x i32> %v) {
476; RV32-LABEL: explode_16xi32:
477; RV32:       # %bb.0:
478; RV32-NEXT:    addi sp, sp, -128
479; RV32-NEXT:    .cfi_def_cfa_offset 128
480; RV32-NEXT:    sw ra, 124(sp) # 4-byte Folded Spill
481; RV32-NEXT:    sw s0, 120(sp) # 4-byte Folded Spill
482; RV32-NEXT:    .cfi_offset ra, -4
483; RV32-NEXT:    .cfi_offset s0, -8
484; RV32-NEXT:    addi s0, sp, 128
485; RV32-NEXT:    .cfi_def_cfa s0, 0
486; RV32-NEXT:    andi sp, sp, -64
487; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
488; RV32-NEXT:    vslidedown.vi v12, v8, 4
489; RV32-NEXT:    vmv.x.s a0, v12
490; RV32-NEXT:    vslidedown.vi v12, v8, 5
491; RV32-NEXT:    vmv.x.s a1, v12
492; RV32-NEXT:    vslidedown.vi v12, v8, 6
493; RV32-NEXT:    vmv.x.s a2, v12
494; RV32-NEXT:    vslidedown.vi v12, v8, 7
495; RV32-NEXT:    vmv.x.s a3, v12
496; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
497; RV32-NEXT:    vslidedown.vi v12, v8, 2
498; RV32-NEXT:    vslidedown.vi v13, v8, 3
499; RV32-NEXT:    mv a4, sp
500; RV32-NEXT:    vmv.x.s a5, v12
501; RV32-NEXT:    vmv.s.x v12, zero
502; RV32-NEXT:    vmv.x.s a6, v13
503; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
504; RV32-NEXT:    vse32.v v8, (a4)
505; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
506; RV32-NEXT:    vredxor.vs v8, v8, v12
507; RV32-NEXT:    lw a4, 32(sp)
508; RV32-NEXT:    lw a7, 36(sp)
509; RV32-NEXT:    lw t0, 40(sp)
510; RV32-NEXT:    lw t1, 44(sp)
511; RV32-NEXT:    lw t2, 48(sp)
512; RV32-NEXT:    lw t3, 52(sp)
513; RV32-NEXT:    lw t4, 56(sp)
514; RV32-NEXT:    lw t5, 60(sp)
515; RV32-NEXT:    vmv.x.s t6, v8
516; RV32-NEXT:    add a5, a5, a6
517; RV32-NEXT:    add a0, a0, a1
518; RV32-NEXT:    add a5, t6, a5
519; RV32-NEXT:    add a0, a0, a2
520; RV32-NEXT:    add a0, a5, a0
521; RV32-NEXT:    add a3, a3, a4
522; RV32-NEXT:    add a7, a7, t0
523; RV32-NEXT:    add t2, t2, t3
524; RV32-NEXT:    add a0, a0, a3
525; RV32-NEXT:    add a7, a7, t1
526; RV32-NEXT:    add t2, t2, t4
527; RV32-NEXT:    add a0, a0, a7
528; RV32-NEXT:    add t2, t2, t5
529; RV32-NEXT:    add a0, a0, t2
530; RV32-NEXT:    addi sp, s0, -128
531; RV32-NEXT:    .cfi_def_cfa sp, 128
532; RV32-NEXT:    lw ra, 124(sp) # 4-byte Folded Reload
533; RV32-NEXT:    lw s0, 120(sp) # 4-byte Folded Reload
534; RV32-NEXT:    .cfi_restore ra
535; RV32-NEXT:    .cfi_restore s0
536; RV32-NEXT:    addi sp, sp, 128
537; RV32-NEXT:    .cfi_def_cfa_offset 0
538; RV32-NEXT:    ret
539;
540; RV64-LABEL: explode_16xi32:
541; RV64:       # %bb.0:
542; RV64-NEXT:    addi sp, sp, -128
543; RV64-NEXT:    .cfi_def_cfa_offset 128
544; RV64-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
545; RV64-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
546; RV64-NEXT:    .cfi_offset ra, -8
547; RV64-NEXT:    .cfi_offset s0, -16
548; RV64-NEXT:    addi s0, sp, 128
549; RV64-NEXT:    .cfi_def_cfa s0, 0
550; RV64-NEXT:    andi sp, sp, -64
551; RV64-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
552; RV64-NEXT:    vslidedown.vi v12, v8, 4
553; RV64-NEXT:    vmv.x.s a0, v12
554; RV64-NEXT:    vslidedown.vi v12, v8, 5
555; RV64-NEXT:    vmv.x.s a1, v12
556; RV64-NEXT:    vslidedown.vi v12, v8, 6
557; RV64-NEXT:    vmv.x.s a2, v12
558; RV64-NEXT:    vslidedown.vi v12, v8, 7
559; RV64-NEXT:    vmv.x.s a3, v12
560; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
561; RV64-NEXT:    vslidedown.vi v12, v8, 2
562; RV64-NEXT:    vslidedown.vi v13, v8, 3
563; RV64-NEXT:    mv a4, sp
564; RV64-NEXT:    vmv.x.s a5, v12
565; RV64-NEXT:    vmv.s.x v12, zero
566; RV64-NEXT:    vmv.x.s a6, v13
567; RV64-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
568; RV64-NEXT:    vse32.v v8, (a4)
569; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
570; RV64-NEXT:    vredxor.vs v8, v8, v12
571; RV64-NEXT:    lw a4, 32(sp)
572; RV64-NEXT:    lw a7, 36(sp)
573; RV64-NEXT:    lw t0, 40(sp)
574; RV64-NEXT:    lw t1, 44(sp)
575; RV64-NEXT:    lw t2, 48(sp)
576; RV64-NEXT:    lw t3, 52(sp)
577; RV64-NEXT:    lw t4, 56(sp)
578; RV64-NEXT:    lw t5, 60(sp)
579; RV64-NEXT:    vmv.x.s t6, v8
580; RV64-NEXT:    add a5, a5, a6
581; RV64-NEXT:    add a0, a0, a1
582; RV64-NEXT:    add a5, t6, a5
583; RV64-NEXT:    add a0, a0, a2
584; RV64-NEXT:    add a0, a5, a0
585; RV64-NEXT:    add a3, a3, a4
586; RV64-NEXT:    add a7, a7, t0
587; RV64-NEXT:    add t2, t2, t3
588; RV64-NEXT:    add a0, a0, a3
589; RV64-NEXT:    add a7, a7, t1
590; RV64-NEXT:    add t2, t2, t4
591; RV64-NEXT:    add a0, a0, a7
592; RV64-NEXT:    add t2, t2, t5
593; RV64-NEXT:    addw a0, a0, t2
594; RV64-NEXT:    addi sp, s0, -128
595; RV64-NEXT:    .cfi_def_cfa sp, 128
596; RV64-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
597; RV64-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
598; RV64-NEXT:    .cfi_restore ra
599; RV64-NEXT:    .cfi_restore s0
600; RV64-NEXT:    addi sp, sp, 128
601; RV64-NEXT:    .cfi_def_cfa_offset 0
602; RV64-NEXT:    ret
603  %e0 = extractelement <16 x i32> %v, i32 0
604  %e1 = extractelement <16 x i32> %v, i32 1
605  %e2 = extractelement <16 x i32> %v, i32 2
606  %e3 = extractelement <16 x i32> %v, i32 3
607  %e4 = extractelement <16 x i32> %v, i32 4
608  %e5 = extractelement <16 x i32> %v, i32 5
609  %e6 = extractelement <16 x i32> %v, i32 6
610  %e7 = extractelement <16 x i32> %v, i32 7
611  %e8 = extractelement <16 x i32> %v, i32 8
612  %e9 = extractelement <16 x i32> %v, i32 9
613  %e10 = extractelement <16 x i32> %v, i32 10
614  %e11 = extractelement <16 x i32> %v, i32 11
615  %e12 = extractelement <16 x i32> %v, i32 12
616  %e13 = extractelement <16 x i32> %v, i32 13
617  %e14 = extractelement <16 x i32> %v, i32 14
618  %e15 = extractelement <16 x i32> %v, i32 15
619  %add0 = xor i32 %e0, %e1
620  %add1 = add i32 %add0, %e2
621  %add2 = add i32 %add1, %e3
622  %add3 = add i32 %add2, %e4
623  %add4 = add i32 %add3, %e5
624  %add5 = add i32 %add4, %e6
625  %add6 = add i32 %add5, %e7
626  %add7 = add i32 %add6, %e8
627  %add8 = add i32 %add7, %e9
628  %add9 = add i32 %add8, %e10
629  %add10 = add i32 %add9, %e11
630  %add11 = add i32 %add10, %e12
631  %add12 = add i32 %add11, %e13
632  %add13 = add i32 %add12, %e14
633  %add14 = add i32 %add13, %e15
634  ret i32 %add14
635}
636
637define i64 @explode_2xi64(<2 x i64> %v) {
638; RV32-LABEL: explode_2xi64:
639; RV32:       # %bb.0:
640; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
641; RV32-NEXT:    vmv.s.x v9, zero
642; RV32-NEXT:    li a1, 32
643; RV32-NEXT:    vredxor.vs v8, v8, v9
644; RV32-NEXT:    vmv.x.s a0, v8
645; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
646; RV32-NEXT:    vsrl.vx v8, v8, a1
647; RV32-NEXT:    vmv.x.s a1, v8
648; RV32-NEXT:    ret
649;
650; RV64-LABEL: explode_2xi64:
651; RV64:       # %bb.0:
652; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
653; RV64-NEXT:    vmv.s.x v9, zero
654; RV64-NEXT:    vredxor.vs v8, v8, v9
655; RV64-NEXT:    vmv.x.s a0, v8
656; RV64-NEXT:    ret
657  %e0 = extractelement <2 x i64> %v, i32 0
658  %e1 = extractelement <2 x i64> %v, i32 1
659  %add0 = xor i64 %e0, %e1
660  ret i64 %add0
661}
662
663define i64 @explode_4xi64(<4 x i64> %v) {
664; RV32-LABEL: explode_4xi64:
665; RV32:       # %bb.0:
666; RV32-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
667; RV32-NEXT:    vslidedown.vi v10, v8, 2
668; RV32-NEXT:    li a0, 32
669; RV32-NEXT:    vmv.s.x v12, zero
670; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
671; RV32-NEXT:    vredxor.vs v12, v8, v12
672; RV32-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
673; RV32-NEXT:    vslidedown.vi v8, v8, 3
674; RV32-NEXT:    vmv.x.s a1, v10
675; RV32-NEXT:    vsrl.vx v10, v10, a0
676; RV32-NEXT:    vmv.x.s a2, v8
677; RV32-NEXT:    vsrl.vx v8, v8, a0
678; RV32-NEXT:    vmv.x.s a3, v10
679; RV32-NEXT:    vmv.x.s a4, v8
680; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
681; RV32-NEXT:    vsrl.vx v8, v12, a0
682; RV32-NEXT:    vmv.x.s a0, v12
683; RV32-NEXT:    vmv.x.s a5, v8
684; RV32-NEXT:    add a1, a0, a1
685; RV32-NEXT:    sltu a6, a1, a0
686; RV32-NEXT:    add a3, a5, a3
687; RV32-NEXT:    add a0, a1, a2
688; RV32-NEXT:    add a3, a3, a6
689; RV32-NEXT:    add a3, a3, a4
690; RV32-NEXT:    sltu a1, a0, a1
691; RV32-NEXT:    add a1, a3, a1
692; RV32-NEXT:    ret
693;
694; RV64-LABEL: explode_4xi64:
695; RV64:       # %bb.0:
696; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
697; RV64-NEXT:    vslidedown.vi v10, v8, 2
698; RV64-NEXT:    vmv.x.s a0, v10
699; RV64-NEXT:    vslidedown.vi v10, v8, 3
700; RV64-NEXT:    vmv.x.s a1, v10
701; RV64-NEXT:    vmv.s.x v9, zero
702; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
703; RV64-NEXT:    vredxor.vs v8, v8, v9
704; RV64-NEXT:    vmv.x.s a2, v8
705; RV64-NEXT:    add a0, a0, a1
706; RV64-NEXT:    add a0, a2, a0
707; RV64-NEXT:    ret
708  %e0 = extractelement <4 x i64> %v, i32 0
709  %e1 = extractelement <4 x i64> %v, i32 1
710  %e2 = extractelement <4 x i64> %v, i32 2
711  %e3 = extractelement <4 x i64> %v, i32 3
712  %add0 = xor i64 %e0, %e1
713  %add1 = add i64 %add0, %e2
714  %add2 = add i64 %add1, %e3
715  ret i64 %add2
716}
717
718
719define i64 @explode_8xi64(<8 x i64> %v) {
720; RV32-LABEL: explode_8xi64:
721; RV32:       # %bb.0:
722; RV32-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
723; RV32-NEXT:    vslidedown.vi v12, v8, 2
724; RV32-NEXT:    li a0, 32
725; RV32-NEXT:    vslidedown.vi v16, v8, 3
726; RV32-NEXT:    vmv.x.s a2, v12
727; RV32-NEXT:    vsrl.vx v12, v12, a0
728; RV32-NEXT:    vmv.x.s a1, v12
729; RV32-NEXT:    vslidedown.vi v12, v8, 4
730; RV32-NEXT:    vmv.x.s a4, v16
731; RV32-NEXT:    vsrl.vx v16, v16, a0
732; RV32-NEXT:    vmv.x.s a3, v16
733; RV32-NEXT:    vslidedown.vi v16, v8, 5
734; RV32-NEXT:    vmv.x.s a5, v12
735; RV32-NEXT:    vsrl.vx v12, v12, a0
736; RV32-NEXT:    vmv.x.s a6, v12
737; RV32-NEXT:    vslidedown.vi v12, v8, 6
738; RV32-NEXT:    vmv.x.s a7, v16
739; RV32-NEXT:    vsrl.vx v16, v16, a0
740; RV32-NEXT:    vmv.x.s t0, v16
741; RV32-NEXT:    vmv.s.x v16, zero
742; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
743; RV32-NEXT:    vredxor.vs v16, v8, v16
744; RV32-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
745; RV32-NEXT:    vslidedown.vi v8, v8, 7
746; RV32-NEXT:    vmv.x.s t1, v12
747; RV32-NEXT:    vsrl.vx v12, v12, a0
748; RV32-NEXT:    vmv.x.s t2, v8
749; RV32-NEXT:    vsrl.vx v8, v8, a0
750; RV32-NEXT:    vmv.x.s t3, v12
751; RV32-NEXT:    vmv.x.s t4, v8
752; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
753; RV32-NEXT:    vsrl.vx v8, v16, a0
754; RV32-NEXT:    vmv.x.s a0, v16
755; RV32-NEXT:    vmv.x.s t5, v8
756; RV32-NEXT:    add a2, a0, a2
757; RV32-NEXT:    sltu a0, a2, a0
758; RV32-NEXT:    add a1, t5, a1
759; RV32-NEXT:    add a4, a2, a4
760; RV32-NEXT:    add a0, a1, a0
761; RV32-NEXT:    sltu a1, a4, a2
762; RV32-NEXT:    add a5, a4, a5
763; RV32-NEXT:    add a0, a0, a3
764; RV32-NEXT:    add a1, a1, a6
765; RV32-NEXT:    sltu a2, a5, a4
766; RV32-NEXT:    add a7, a5, a7
767; RV32-NEXT:    add a0, a0, a1
768; RV32-NEXT:    add a2, a2, t0
769; RV32-NEXT:    sltu a1, a7, a5
770; RV32-NEXT:    add t1, a7, t1
771; RV32-NEXT:    add a2, a0, a2
772; RV32-NEXT:    add a1, a1, t3
773; RV32-NEXT:    sltu a3, t1, a7
774; RV32-NEXT:    add a0, t1, t2
775; RV32-NEXT:    add a1, a2, a1
776; RV32-NEXT:    add a3, a3, t4
777; RV32-NEXT:    add a1, a1, a3
778; RV32-NEXT:    sltu a2, a0, t1
779; RV32-NEXT:    add a1, a1, a2
780; RV32-NEXT:    ret
781;
782; RV64-LABEL: explode_8xi64:
783; RV64:       # %bb.0:
784; RV64-NEXT:    addi sp, sp, -128
785; RV64-NEXT:    .cfi_def_cfa_offset 128
786; RV64-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
787; RV64-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
788; RV64-NEXT:    .cfi_offset ra, -8
789; RV64-NEXT:    .cfi_offset s0, -16
790; RV64-NEXT:    addi s0, sp, 128
791; RV64-NEXT:    .cfi_def_cfa s0, 0
792; RV64-NEXT:    andi sp, sp, -64
793; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
794; RV64-NEXT:    vslidedown.vi v12, v8, 2
795; RV64-NEXT:    vmv.x.s a0, v12
796; RV64-NEXT:    vslidedown.vi v12, v8, 3
797; RV64-NEXT:    mv a1, sp
798; RV64-NEXT:    vmv.x.s a2, v12
799; RV64-NEXT:    vmv.s.x v12, zero
800; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
801; RV64-NEXT:    vse64.v v8, (a1)
802; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
803; RV64-NEXT:    vredxor.vs v8, v8, v12
804; RV64-NEXT:    ld a1, 32(sp)
805; RV64-NEXT:    ld a3, 40(sp)
806; RV64-NEXT:    ld a4, 48(sp)
807; RV64-NEXT:    ld a5, 56(sp)
808; RV64-NEXT:    vmv.x.s a6, v8
809; RV64-NEXT:    add a0, a0, a2
810; RV64-NEXT:    add a0, a6, a0
811; RV64-NEXT:    add a0, a0, a1
812; RV64-NEXT:    add a3, a3, a4
813; RV64-NEXT:    add a0, a0, a3
814; RV64-NEXT:    add a0, a0, a5
815; RV64-NEXT:    addi sp, s0, -128
816; RV64-NEXT:    .cfi_def_cfa sp, 128
817; RV64-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
818; RV64-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
819; RV64-NEXT:    .cfi_restore ra
820; RV64-NEXT:    .cfi_restore s0
821; RV64-NEXT:    addi sp, sp, 128
822; RV64-NEXT:    .cfi_def_cfa_offset 0
823; RV64-NEXT:    ret
824  %e0 = extractelement <8 x i64> %v, i32 0
825  %e1 = extractelement <8 x i64> %v, i32 1
826  %e2 = extractelement <8 x i64> %v, i32 2
827  %e3 = extractelement <8 x i64> %v, i32 3
828  %e4 = extractelement <8 x i64> %v, i32 4
829  %e5 = extractelement <8 x i64> %v, i32 5
830  %e6 = extractelement <8 x i64> %v, i32 6
831  %e7 = extractelement <8 x i64> %v, i32 7
832  %add0 = xor i64 %e0, %e1
833  %add1 = add i64 %add0, %e2
834  %add2 = add i64 %add1, %e3
835  %add3 = add i64 %add2, %e4
836  %add4 = add i64 %add3, %e5
837  %add5 = add i64 %add4, %e6
838  %add6 = add i64 %add5, %e7
839  ret i64 %add6
840}
841
842define i64 @explode_16xi64(<16 x i64> %v) {
843; RV32-LABEL: explode_16xi64:
844; RV32:       # %bb.0:
845; RV32-NEXT:    addi sp, sp, -64
846; RV32-NEXT:    .cfi_def_cfa_offset 64
847; RV32-NEXT:    sw s0, 60(sp) # 4-byte Folded Spill
848; RV32-NEXT:    sw s1, 56(sp) # 4-byte Folded Spill
849; RV32-NEXT:    sw s2, 52(sp) # 4-byte Folded Spill
850; RV32-NEXT:    sw s3, 48(sp) # 4-byte Folded Spill
851; RV32-NEXT:    sw s4, 44(sp) # 4-byte Folded Spill
852; RV32-NEXT:    sw s5, 40(sp) # 4-byte Folded Spill
853; RV32-NEXT:    sw s6, 36(sp) # 4-byte Folded Spill
854; RV32-NEXT:    sw s7, 32(sp) # 4-byte Folded Spill
855; RV32-NEXT:    sw s8, 28(sp) # 4-byte Folded Spill
856; RV32-NEXT:    sw s9, 24(sp) # 4-byte Folded Spill
857; RV32-NEXT:    sw s10, 20(sp) # 4-byte Folded Spill
858; RV32-NEXT:    sw s11, 16(sp) # 4-byte Folded Spill
859; RV32-NEXT:    .cfi_offset s0, -4
860; RV32-NEXT:    .cfi_offset s1, -8
861; RV32-NEXT:    .cfi_offset s2, -12
862; RV32-NEXT:    .cfi_offset s3, -16
863; RV32-NEXT:    .cfi_offset s4, -20
864; RV32-NEXT:    .cfi_offset s5, -24
865; RV32-NEXT:    .cfi_offset s6, -28
866; RV32-NEXT:    .cfi_offset s7, -32
867; RV32-NEXT:    .cfi_offset s8, -36
868; RV32-NEXT:    .cfi_offset s9, -40
869; RV32-NEXT:    .cfi_offset s10, -44
870; RV32-NEXT:    .cfi_offset s11, -48
871; RV32-NEXT:    csrr a0, vlenb
872; RV32-NEXT:    slli a0, a0, 3
873; RV32-NEXT:    sub sp, sp, a0
874; RV32-NEXT:    .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 8 * vlenb
875; RV32-NEXT:    vsetivli zero, 1, e64, m8, ta, ma
876; RV32-NEXT:    vslidedown.vi v24, v8, 2
877; RV32-NEXT:    li a0, 32
878; RV32-NEXT:    vslidedown.vi v0, v8, 3
879; RV32-NEXT:    vslidedown.vi v16, v8, 4
880; RV32-NEXT:    vmv.x.s a1, v24
881; RV32-NEXT:    vsrl.vx v24, v24, a0
882; RV32-NEXT:    vmv.x.s a2, v24
883; RV32-NEXT:    vslidedown.vi v24, v8, 5
884; RV32-NEXT:    vmv.x.s a3, v0
885; RV32-NEXT:    vsrl.vx v0, v0, a0
886; RV32-NEXT:    vmv.x.s a4, v0
887; RV32-NEXT:    vslidedown.vi v0, v8, 6
888; RV32-NEXT:    vmv.x.s a5, v16
889; RV32-NEXT:    vsrl.vx v16, v16, a0
890; RV32-NEXT:    vmv.x.s a6, v16
891; RV32-NEXT:    vslidedown.vi v16, v8, 7
892; RV32-NEXT:    vmv.x.s a7, v24
893; RV32-NEXT:    vsrl.vx v24, v24, a0
894; RV32-NEXT:    vmv.x.s t0, v24
895; RV32-NEXT:    vslidedown.vi v24, v8, 8
896; RV32-NEXT:    vmv.x.s t1, v0
897; RV32-NEXT:    vsrl.vx v0, v0, a0
898; RV32-NEXT:    vmv.x.s t2, v0
899; RV32-NEXT:    vslidedown.vi v0, v8, 9
900; RV32-NEXT:    vmv.x.s t3, v16
901; RV32-NEXT:    vsrl.vx v16, v16, a0
902; RV32-NEXT:    vmv.x.s t4, v16
903; RV32-NEXT:    vslidedown.vi v16, v8, 10
904; RV32-NEXT:    vmv.x.s t5, v24
905; RV32-NEXT:    vsrl.vx v24, v24, a0
906; RV32-NEXT:    vmv.x.s t6, v24
907; RV32-NEXT:    vslidedown.vi v24, v8, 11
908; RV32-NEXT:    vmv.x.s s0, v0
909; RV32-NEXT:    vsrl.vx v0, v0, a0
910; RV32-NEXT:    vmv.x.s s1, v0
911; RV32-NEXT:    vslidedown.vi v0, v8, 12
912; RV32-NEXT:    vmv.x.s s2, v16
913; RV32-NEXT:    vsrl.vx v16, v16, a0
914; RV32-NEXT:    vmv.x.s s3, v16
915; RV32-NEXT:    vslidedown.vi v16, v8, 13
916; RV32-NEXT:    addi s4, sp, 16
917; RV32-NEXT:    vs8r.v v16, (s4) # Unknown-size Folded Spill
918; RV32-NEXT:    vmv.x.s s4, v24
919; RV32-NEXT:    vsrl.vx v24, v24, a0
920; RV32-NEXT:    vmv.x.s s5, v24
921; RV32-NEXT:    vslidedown.vi v24, v8, 14
922; RV32-NEXT:    vmv.x.s s6, v0
923; RV32-NEXT:    vsrl.vx v0, v0, a0
924; RV32-NEXT:    vmv.x.s s7, v0
925; RV32-NEXT:    vmv.s.x v7, zero
926; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
927; RV32-NEXT:    vredxor.vs v16, v8, v7
928; RV32-NEXT:    vsetivli zero, 1, e64, m8, ta, ma
929; RV32-NEXT:    vslidedown.vi v8, v8, 15
930; RV32-NEXT:    addi s8, sp, 16
931; RV32-NEXT:    vl8r.v v0, (s8) # Unknown-size Folded Reload
932; RV32-NEXT:    vmv.x.s s8, v0
933; RV32-NEXT:    vsrl.vx v0, v0, a0
934; RV32-NEXT:    vmv.x.s s9, v0
935; RV32-NEXT:    vsrl.vx v0, v24, a0
936; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
937; RV32-NEXT:    vsrl.vx v17, v16, a0
938; RV32-NEXT:    vmv.x.s s10, v16
939; RV32-NEXT:    vmv.x.s s11, v17
940; RV32-NEXT:    vsetivli zero, 1, e64, m8, ta, ma
941; RV32-NEXT:    vsrl.vx v16, v8, a0
942; RV32-NEXT:    add a2, s11, a2
943; RV32-NEXT:    add a1, s10, a1
944; RV32-NEXT:    sltu a0, a1, s10
945; RV32-NEXT:    add a0, a2, a0
946; RV32-NEXT:    add a0, a0, a4
947; RV32-NEXT:    add a3, a1, a3
948; RV32-NEXT:    sltu a1, a3, a1
949; RV32-NEXT:    add a1, a1, a6
950; RV32-NEXT:    add a0, a0, a1
951; RV32-NEXT:    add a5, a3, a5
952; RV32-NEXT:    sltu a1, a5, a3
953; RV32-NEXT:    add a1, a1, t0
954; RV32-NEXT:    add a0, a0, a1
955; RV32-NEXT:    add a7, a5, a7
956; RV32-NEXT:    sltu a1, a7, a5
957; RV32-NEXT:    add a1, a1, t2
958; RV32-NEXT:    add a0, a0, a1
959; RV32-NEXT:    add t1, a7, t1
960; RV32-NEXT:    sltu a1, t1, a7
961; RV32-NEXT:    add a1, a1, t4
962; RV32-NEXT:    add a0, a0, a1
963; RV32-NEXT:    add t3, t1, t3
964; RV32-NEXT:    sltu a1, t3, t1
965; RV32-NEXT:    add a1, a1, t6
966; RV32-NEXT:    add a0, a0, a1
967; RV32-NEXT:    add t5, t3, t5
968; RV32-NEXT:    sltu a1, t5, t3
969; RV32-NEXT:    add a1, a1, s1
970; RV32-NEXT:    add a0, a0, a1
971; RV32-NEXT:    add s0, t5, s0
972; RV32-NEXT:    sltu a1, s0, t5
973; RV32-NEXT:    add a1, a1, s3
974; RV32-NEXT:    add a0, a0, a1
975; RV32-NEXT:    add s2, s0, s2
976; RV32-NEXT:    sltu a1, s2, s0
977; RV32-NEXT:    add a1, a1, s5
978; RV32-NEXT:    add a0, a0, a1
979; RV32-NEXT:    add s4, s2, s4
980; RV32-NEXT:    sltu a1, s4, s2
981; RV32-NEXT:    add a1, a1, s7
982; RV32-NEXT:    add a0, a0, a1
983; RV32-NEXT:    add s6, s4, s6
984; RV32-NEXT:    sltu a1, s6, s4
985; RV32-NEXT:    add a1, a1, s9
986; RV32-NEXT:    add a0, a0, a1
987; RV32-NEXT:    vmv.x.s a1, v0
988; RV32-NEXT:    add s8, s6, s8
989; RV32-NEXT:    sltu a2, s8, s6
990; RV32-NEXT:    add a1, a2, a1
991; RV32-NEXT:    vmv.x.s a2, v24
992; RV32-NEXT:    add a0, a0, a1
993; RV32-NEXT:    vmv.x.s a1, v16
994; RV32-NEXT:    add a2, s8, a2
995; RV32-NEXT:    sltu a3, a2, s8
996; RV32-NEXT:    add a1, a3, a1
997; RV32-NEXT:    add a1, a0, a1
998; RV32-NEXT:    vmv.x.s a0, v8
999; RV32-NEXT:    add a0, a2, a0
1000; RV32-NEXT:    sltu a2, a0, a2
1001; RV32-NEXT:    add a1, a1, a2
1002; RV32-NEXT:    csrr a2, vlenb
1003; RV32-NEXT:    slli a2, a2, 3
1004; RV32-NEXT:    add sp, sp, a2
1005; RV32-NEXT:    .cfi_def_cfa sp, 64
1006; RV32-NEXT:    lw s0, 60(sp) # 4-byte Folded Reload
1007; RV32-NEXT:    lw s1, 56(sp) # 4-byte Folded Reload
1008; RV32-NEXT:    lw s2, 52(sp) # 4-byte Folded Reload
1009; RV32-NEXT:    lw s3, 48(sp) # 4-byte Folded Reload
1010; RV32-NEXT:    lw s4, 44(sp) # 4-byte Folded Reload
1011; RV32-NEXT:    lw s5, 40(sp) # 4-byte Folded Reload
1012; RV32-NEXT:    lw s6, 36(sp) # 4-byte Folded Reload
1013; RV32-NEXT:    lw s7, 32(sp) # 4-byte Folded Reload
1014; RV32-NEXT:    lw s8, 28(sp) # 4-byte Folded Reload
1015; RV32-NEXT:    lw s9, 24(sp) # 4-byte Folded Reload
1016; RV32-NEXT:    lw s10, 20(sp) # 4-byte Folded Reload
1017; RV32-NEXT:    lw s11, 16(sp) # 4-byte Folded Reload
1018; RV32-NEXT:    .cfi_restore s0
1019; RV32-NEXT:    .cfi_restore s1
1020; RV32-NEXT:    .cfi_restore s2
1021; RV32-NEXT:    .cfi_restore s3
1022; RV32-NEXT:    .cfi_restore s4
1023; RV32-NEXT:    .cfi_restore s5
1024; RV32-NEXT:    .cfi_restore s6
1025; RV32-NEXT:    .cfi_restore s7
1026; RV32-NEXT:    .cfi_restore s8
1027; RV32-NEXT:    .cfi_restore s9
1028; RV32-NEXT:    .cfi_restore s10
1029; RV32-NEXT:    .cfi_restore s11
1030; RV32-NEXT:    addi sp, sp, 64
1031; RV32-NEXT:    .cfi_def_cfa_offset 0
1032; RV32-NEXT:    ret
1033;
1034; RV64-LABEL: explode_16xi64:
1035; RV64:       # %bb.0:
1036; RV64-NEXT:    addi sp, sp, -256
1037; RV64-NEXT:    .cfi_def_cfa_offset 256
1038; RV64-NEXT:    sd ra, 248(sp) # 8-byte Folded Spill
1039; RV64-NEXT:    sd s0, 240(sp) # 8-byte Folded Spill
1040; RV64-NEXT:    .cfi_offset ra, -8
1041; RV64-NEXT:    .cfi_offset s0, -16
1042; RV64-NEXT:    addi s0, sp, 256
1043; RV64-NEXT:    .cfi_def_cfa s0, 0
1044; RV64-NEXT:    andi sp, sp, -128
1045; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
1046; RV64-NEXT:    vslidedown.vi v16, v8, 2
1047; RV64-NEXT:    vmv.x.s a0, v16
1048; RV64-NEXT:    vslidedown.vi v16, v8, 3
1049; RV64-NEXT:    mv a1, sp
1050; RV64-NEXT:    vmv.x.s a2, v16
1051; RV64-NEXT:    vmv.s.x v16, zero
1052; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1053; RV64-NEXT:    vse64.v v8, (a1)
1054; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1055; RV64-NEXT:    vredxor.vs v8, v8, v16
1056; RV64-NEXT:    ld a1, 32(sp)
1057; RV64-NEXT:    ld a3, 40(sp)
1058; RV64-NEXT:    ld a4, 48(sp)
1059; RV64-NEXT:    ld a5, 56(sp)
1060; RV64-NEXT:    ld a6, 64(sp)
1061; RV64-NEXT:    ld a7, 72(sp)
1062; RV64-NEXT:    ld t0, 80(sp)
1063; RV64-NEXT:    ld t1, 88(sp)
1064; RV64-NEXT:    ld t2, 96(sp)
1065; RV64-NEXT:    ld t3, 104(sp)
1066; RV64-NEXT:    ld t4, 112(sp)
1067; RV64-NEXT:    ld t5, 120(sp)
1068; RV64-NEXT:    vmv.x.s t6, v8
1069; RV64-NEXT:    add a0, a0, a2
1070; RV64-NEXT:    add a0, t6, a0
1071; RV64-NEXT:    add a0, a0, a1
1072; RV64-NEXT:    add a3, a3, a4
1073; RV64-NEXT:    add a5, a5, a6
1074; RV64-NEXT:    add t0, t0, t1
1075; RV64-NEXT:    add a0, a0, a3
1076; RV64-NEXT:    add a5, a5, a7
1077; RV64-NEXT:    add t0, t0, t2
1078; RV64-NEXT:    add a0, a0, a5
1079; RV64-NEXT:    add t0, t0, t3
1080; RV64-NEXT:    add a0, a0, t0
1081; RV64-NEXT:    add t4, t4, t5
1082; RV64-NEXT:    add a0, a0, t4
1083; RV64-NEXT:    addi sp, s0, -256
1084; RV64-NEXT:    .cfi_def_cfa sp, 256
1085; RV64-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
1086; RV64-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
1087; RV64-NEXT:    .cfi_restore ra
1088; RV64-NEXT:    .cfi_restore s0
1089; RV64-NEXT:    addi sp, sp, 256
1090; RV64-NEXT:    .cfi_def_cfa_offset 0
1091; RV64-NEXT:    ret
1092  %e0 = extractelement <16 x i64> %v, i32 0
1093  %e1 = extractelement <16 x i64> %v, i32 1
1094  %e2 = extractelement <16 x i64> %v, i32 2
1095  %e3 = extractelement <16 x i64> %v, i32 3
1096  %e4 = extractelement <16 x i64> %v, i32 4
1097  %e5 = extractelement <16 x i64> %v, i32 5
1098  %e6 = extractelement <16 x i64> %v, i32 6
1099  %e7 = extractelement <16 x i64> %v, i32 7
1100  %e8 = extractelement <16 x i64> %v, i32 8
1101  %e9 = extractelement <16 x i64> %v, i32 9
1102  %e10 = extractelement <16 x i64> %v, i32 10
1103  %e11 = extractelement <16 x i64> %v, i32 11
1104  %e12 = extractelement <16 x i64> %v, i32 12
1105  %e13 = extractelement <16 x i64> %v, i32 13
1106  %e14 = extractelement <16 x i64> %v, i32 14
1107  %e15 = extractelement <16 x i64> %v, i32 15
1108  %add0 = xor i64 %e0, %e1
1109  %add1 = add i64 %add0, %e2
1110  %add2 = add i64 %add1, %e3
1111  %add3 = add i64 %add2, %e4
1112  %add4 = add i64 %add3, %e5
1113  %add5 = add i64 %add4, %e6
1114  %add6 = add i64 %add5, %e7
1115  %add7 = add i64 %add6, %e8
1116  %add8 = add i64 %add7, %e9
1117  %add9 = add i64 %add8, %e10
1118  %add10 = add i64 %add9, %e11
1119  %add11 = add i64 %add10, %e12
1120  %add12 = add i64 %add11, %e13
1121  %add13 = add i64 %add12, %e14
1122  %add14 = add i64 %add13, %e15
1123  ret i64 %add14
1124}
1125
1126define i32 @explode_16xi32_exact_vlen(<16 x i32> %v) vscale_range(2, 2) {
1127; RV32-LABEL: explode_16xi32_exact_vlen:
1128; RV32:       # %bb.0:
1129; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
1130; RV32-NEXT:    vslidedown.vi v12, v8, 2
1131; RV32-NEXT:    vmv.x.s a0, v12
1132; RV32-NEXT:    vslidedown.vi v12, v8, 3
1133; RV32-NEXT:    vmv.x.s a1, v9
1134; RV32-NEXT:    vmv.x.s a2, v12
1135; RV32-NEXT:    vslidedown.vi v12, v9, 1
1136; RV32-NEXT:    vmv.x.s a3, v12
1137; RV32-NEXT:    vslidedown.vi v12, v9, 2
1138; RV32-NEXT:    vmv.x.s a4, v12
1139; RV32-NEXT:    vslidedown.vi v9, v9, 3
1140; RV32-NEXT:    vmv.x.s a5, v10
1141; RV32-NEXT:    vmv.x.s a6, v9
1142; RV32-NEXT:    vslidedown.vi v9, v10, 1
1143; RV32-NEXT:    vmv.x.s a7, v9
1144; RV32-NEXT:    vslidedown.vi v9, v10, 2
1145; RV32-NEXT:    vmv.x.s t0, v9
1146; RV32-NEXT:    vslidedown.vi v9, v10, 3
1147; RV32-NEXT:    vmv.x.s t1, v11
1148; RV32-NEXT:    vmv.x.s t2, v9
1149; RV32-NEXT:    vslidedown.vi v9, v11, 1
1150; RV32-NEXT:    vmv.x.s t3, v9
1151; RV32-NEXT:    vslidedown.vi v9, v11, 2
1152; RV32-NEXT:    vmv.x.s t4, v9
1153; RV32-NEXT:    vslidedown.vi v9, v11, 3
1154; RV32-NEXT:    vmv.x.s t5, v9
1155; RV32-NEXT:    vmv.s.x v9, zero
1156; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
1157; RV32-NEXT:    vredxor.vs v8, v8, v9
1158; RV32-NEXT:    vmv.x.s t6, v8
1159; RV32-NEXT:    add a0, a0, a2
1160; RV32-NEXT:    add a1, a1, a3
1161; RV32-NEXT:    add a5, a6, a5
1162; RV32-NEXT:    add t1, t2, t1
1163; RV32-NEXT:    add a0, t6, a0
1164; RV32-NEXT:    add a1, a1, a4
1165; RV32-NEXT:    add a5, a5, a7
1166; RV32-NEXT:    add t1, t1, t3
1167; RV32-NEXT:    add a0, a0, a1
1168; RV32-NEXT:    add a5, a5, t0
1169; RV32-NEXT:    add t1, t1, t4
1170; RV32-NEXT:    add a0, a0, a5
1171; RV32-NEXT:    add t1, t1, t5
1172; RV32-NEXT:    add a0, a0, t1
1173; RV32-NEXT:    ret
1174;
1175; RV64-LABEL: explode_16xi32_exact_vlen:
1176; RV64:       # %bb.0:
1177; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
1178; RV64-NEXT:    vslidedown.vi v12, v8, 2
1179; RV64-NEXT:    vmv.x.s a0, v12
1180; RV64-NEXT:    vslidedown.vi v12, v8, 3
1181; RV64-NEXT:    vmv.x.s a1, v9
1182; RV64-NEXT:    vmv.x.s a2, v12
1183; RV64-NEXT:    vslidedown.vi v12, v9, 1
1184; RV64-NEXT:    vmv.x.s a3, v12
1185; RV64-NEXT:    vslidedown.vi v12, v9, 2
1186; RV64-NEXT:    vmv.x.s a4, v12
1187; RV64-NEXT:    vslidedown.vi v9, v9, 3
1188; RV64-NEXT:    vmv.x.s a5, v10
1189; RV64-NEXT:    vmv.x.s a6, v9
1190; RV64-NEXT:    vslidedown.vi v9, v10, 1
1191; RV64-NEXT:    vmv.x.s a7, v9
1192; RV64-NEXT:    vslidedown.vi v9, v10, 2
1193; RV64-NEXT:    vmv.x.s t0, v9
1194; RV64-NEXT:    vslidedown.vi v9, v10, 3
1195; RV64-NEXT:    vmv.x.s t1, v11
1196; RV64-NEXT:    vmv.x.s t2, v9
1197; RV64-NEXT:    vslidedown.vi v9, v11, 1
1198; RV64-NEXT:    vmv.x.s t3, v9
1199; RV64-NEXT:    vslidedown.vi v9, v11, 2
1200; RV64-NEXT:    vmv.x.s t4, v9
1201; RV64-NEXT:    vslidedown.vi v9, v11, 3
1202; RV64-NEXT:    vmv.x.s t5, v9
1203; RV64-NEXT:    vmv.s.x v9, zero
1204; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
1205; RV64-NEXT:    vredxor.vs v8, v8, v9
1206; RV64-NEXT:    vmv.x.s t6, v8
1207; RV64-NEXT:    add a0, a0, a2
1208; RV64-NEXT:    add a1, a1, a3
1209; RV64-NEXT:    add a5, a6, a5
1210; RV64-NEXT:    add t1, t2, t1
1211; RV64-NEXT:    add a0, t6, a0
1212; RV64-NEXT:    add a1, a1, a4
1213; RV64-NEXT:    add a5, a5, a7
1214; RV64-NEXT:    add t1, t1, t3
1215; RV64-NEXT:    add a0, a0, a1
1216; RV64-NEXT:    add a5, a5, t0
1217; RV64-NEXT:    add t1, t1, t4
1218; RV64-NEXT:    add a0, a0, a5
1219; RV64-NEXT:    add t1, t1, t5
1220; RV64-NEXT:    addw a0, a0, t1
1221; RV64-NEXT:    ret
1222  %e0 = extractelement <16 x i32> %v, i32 0
1223  %e1 = extractelement <16 x i32> %v, i32 1
1224  %e2 = extractelement <16 x i32> %v, i32 2
1225  %e3 = extractelement <16 x i32> %v, i32 3
1226  %e4 = extractelement <16 x i32> %v, i32 4
1227  %e5 = extractelement <16 x i32> %v, i32 5
1228  %e6 = extractelement <16 x i32> %v, i32 6
1229  %e7 = extractelement <16 x i32> %v, i32 7
1230  %e8 = extractelement <16 x i32> %v, i32 8
1231  %e9 = extractelement <16 x i32> %v, i32 9
1232  %e10 = extractelement <16 x i32> %v, i32 10
1233  %e11 = extractelement <16 x i32> %v, i32 11
1234  %e12 = extractelement <16 x i32> %v, i32 12
1235  %e13 = extractelement <16 x i32> %v, i32 13
1236  %e14 = extractelement <16 x i32> %v, i32 14
1237  %e15 = extractelement <16 x i32> %v, i32 15
1238  %add0 = xor i32 %e0, %e1
1239  %add1 = add i32 %add0, %e2
1240  %add2 = add i32 %add1, %e3
1241  %add3 = add i32 %add2, %e4
1242  %add4 = add i32 %add3, %e5
1243  %add5 = add i32 %add4, %e6
1244  %add6 = add i32 %add5, %e7
1245  %add7 = add i32 %add6, %e8
1246  %add8 = add i32 %add7, %e9
1247  %add9 = add i32 %add8, %e10
1248  %add10 = add i32 %add9, %e11
1249  %add11 = add i32 %add10, %e12
1250  %add12 = add i32 %add11, %e13
1251  %add13 = add i32 %add12, %e14
1252  %add14 = add i32 %add13, %e15
1253  ret i32 %add14
1254}
1255