xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll (revision 7c4f455992bf9426f3fb76fa7bd52e355dddda46)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4
5define i8 @explode_2xi8(<2 x i8> %v) {
6; CHECK-LABEL: explode_2xi8:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
9; CHECK-NEXT:    vmv.x.s a0, v8
10; CHECK-NEXT:    vslidedown.vi v8, v8, 1
11; CHECK-NEXT:    vmv.x.s a1, v8
12; CHECK-NEXT:    add a0, a0, a1
13; CHECK-NEXT:    ret
14  %e0 = extractelement <2 x i8> %v, i32 0
15  %e1 = extractelement <2 x i8> %v, i32 1
16  %add0 = add i8 %e0, %e1
17  ret i8 %add0
18}
19
20define i8 @explode_4xi8(<4 x i8> %v) {
21; CHECK-LABEL: explode_4xi8:
22; CHECK:       # %bb.0:
23; CHECK-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
24; CHECK-NEXT:    vmv.x.s a0, v8
25; CHECK-NEXT:    vslidedown.vi v9, v8, 1
26; CHECK-NEXT:    vmv.x.s a1, v9
27; CHECK-NEXT:    vslidedown.vi v9, v8, 2
28; CHECK-NEXT:    vmv.x.s a2, v9
29; CHECK-NEXT:    vslidedown.vi v8, v8, 3
30; CHECK-NEXT:    vmv.x.s a3, v8
31; CHECK-NEXT:    add a0, a0, a1
32; CHECK-NEXT:    add a2, a2, a3
33; CHECK-NEXT:    add a0, a0, a2
34; CHECK-NEXT:    ret
35  %e0 = extractelement <4 x i8> %v, i32 0
36  %e1 = extractelement <4 x i8> %v, i32 1
37  %e2 = extractelement <4 x i8> %v, i32 2
38  %e3 = extractelement <4 x i8> %v, i32 3
39  %add0 = add i8 %e0, %e1
40  %add1 = add i8 %add0, %e2
41  %add2 = add i8 %add1, %e3
42  ret i8 %add2
43}
44
45
46define i8 @explode_8xi8(<8 x i8> %v) {
47; CHECK-LABEL: explode_8xi8:
48; CHECK:       # %bb.0:
49; CHECK-NEXT:    vsetivli zero, 1, e8, mf2, ta, ma
50; CHECK-NEXT:    vmv.x.s a0, v8
51; CHECK-NEXT:    vslidedown.vi v9, v8, 1
52; CHECK-NEXT:    vmv.x.s a1, v9
53; CHECK-NEXT:    vslidedown.vi v9, v8, 2
54; CHECK-NEXT:    vmv.x.s a2, v9
55; CHECK-NEXT:    vslidedown.vi v9, v8, 3
56; CHECK-NEXT:    vmv.x.s a3, v9
57; CHECK-NEXT:    vslidedown.vi v9, v8, 4
58; CHECK-NEXT:    vmv.x.s a4, v9
59; CHECK-NEXT:    vslidedown.vi v9, v8, 5
60; CHECK-NEXT:    vmv.x.s a5, v9
61; CHECK-NEXT:    vslidedown.vi v9, v8, 6
62; CHECK-NEXT:    vmv.x.s a6, v9
63; CHECK-NEXT:    vslidedown.vi v8, v8, 7
64; CHECK-NEXT:    vmv.x.s a7, v8
65; CHECK-NEXT:    add a0, a0, a1
66; CHECK-NEXT:    add a2, a2, a3
67; CHECK-NEXT:    add a0, a0, a2
68; CHECK-NEXT:    add a4, a4, a5
69; CHECK-NEXT:    add a4, a4, a6
70; CHECK-NEXT:    add a0, a0, a4
71; CHECK-NEXT:    add a0, a0, a7
72; CHECK-NEXT:    ret
73  %e0 = extractelement <8 x i8> %v, i32 0
74  %e1 = extractelement <8 x i8> %v, i32 1
75  %e2 = extractelement <8 x i8> %v, i32 2
76  %e3 = extractelement <8 x i8> %v, i32 3
77  %e4 = extractelement <8 x i8> %v, i32 4
78  %e5 = extractelement <8 x i8> %v, i32 5
79  %e6 = extractelement <8 x i8> %v, i32 6
80  %e7 = extractelement <8 x i8> %v, i32 7
81  %add0 = add i8 %e0, %e1
82  %add1 = add i8 %add0, %e2
83  %add2 = add i8 %add1, %e3
84  %add3 = add i8 %add2, %e4
85  %add4 = add i8 %add3, %e5
86  %add5 = add i8 %add4, %e6
87  %add6 = add i8 %add5, %e7
88  ret i8 %add6
89}
90
91define i8 @explode_16xi8(<16 x i8> %v) {
92; RV32-LABEL: explode_16xi8:
93; RV32:       # %bb.0:
94; RV32-NEXT:    addi sp, sp, -16
95; RV32-NEXT:    .cfi_def_cfa_offset 16
96; RV32-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
97; RV32-NEXT:    .cfi_offset s0, -4
98; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
99; RV32-NEXT:    vmv.x.s a0, v8
100; RV32-NEXT:    vslidedown.vi v9, v8, 1
101; RV32-NEXT:    vmv.x.s a1, v9
102; RV32-NEXT:    vslidedown.vi v9, v8, 2
103; RV32-NEXT:    vmv.x.s a2, v9
104; RV32-NEXT:    vslidedown.vi v9, v8, 3
105; RV32-NEXT:    vmv.x.s a3, v9
106; RV32-NEXT:    vslidedown.vi v9, v8, 4
107; RV32-NEXT:    vmv.x.s a4, v9
108; RV32-NEXT:    vslidedown.vi v9, v8, 5
109; RV32-NEXT:    vmv.x.s a5, v9
110; RV32-NEXT:    vslidedown.vi v9, v8, 6
111; RV32-NEXT:    vmv.x.s a6, v9
112; RV32-NEXT:    vslidedown.vi v9, v8, 7
113; RV32-NEXT:    vmv.x.s a7, v9
114; RV32-NEXT:    vslidedown.vi v9, v8, 8
115; RV32-NEXT:    vmv.x.s t0, v9
116; RV32-NEXT:    vslidedown.vi v9, v8, 9
117; RV32-NEXT:    vmv.x.s t1, v9
118; RV32-NEXT:    vslidedown.vi v9, v8, 10
119; RV32-NEXT:    vmv.x.s t2, v9
120; RV32-NEXT:    vslidedown.vi v9, v8, 11
121; RV32-NEXT:    vmv.x.s t3, v9
122; RV32-NEXT:    vslidedown.vi v9, v8, 12
123; RV32-NEXT:    vmv.x.s t4, v9
124; RV32-NEXT:    vslidedown.vi v9, v8, 13
125; RV32-NEXT:    vmv.x.s t5, v9
126; RV32-NEXT:    vslidedown.vi v9, v8, 14
127; RV32-NEXT:    vmv.x.s t6, v9
128; RV32-NEXT:    vslidedown.vi v8, v8, 15
129; RV32-NEXT:    vmv.x.s s0, v8
130; RV32-NEXT:    add a0, a0, a1
131; RV32-NEXT:    add a2, a2, a3
132; RV32-NEXT:    add a0, a0, a2
133; RV32-NEXT:    add a4, a4, a5
134; RV32-NEXT:    add a4, a4, a6
135; RV32-NEXT:    add a0, a0, a4
136; RV32-NEXT:    add a7, a7, t0
137; RV32-NEXT:    add a7, a7, t1
138; RV32-NEXT:    add a7, a7, t2
139; RV32-NEXT:    add a0, a0, a7
140; RV32-NEXT:    add t3, t3, t4
141; RV32-NEXT:    add t3, t3, t5
142; RV32-NEXT:    add t3, t3, t6
143; RV32-NEXT:    add t3, t3, s0
144; RV32-NEXT:    add a0, a0, t3
145; RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
146; RV32-NEXT:    addi sp, sp, 16
147; RV32-NEXT:    ret
148;
149; RV64-LABEL: explode_16xi8:
150; RV64:       # %bb.0:
151; RV64-NEXT:    addi sp, sp, -16
152; RV64-NEXT:    .cfi_def_cfa_offset 16
153; RV64-NEXT:    sd s0, 8(sp) # 8-byte Folded Spill
154; RV64-NEXT:    .cfi_offset s0, -8
155; RV64-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
156; RV64-NEXT:    vmv.x.s a0, v8
157; RV64-NEXT:    vslidedown.vi v9, v8, 1
158; RV64-NEXT:    vmv.x.s a1, v9
159; RV64-NEXT:    vslidedown.vi v9, v8, 2
160; RV64-NEXT:    vmv.x.s a2, v9
161; RV64-NEXT:    vslidedown.vi v9, v8, 3
162; RV64-NEXT:    vmv.x.s a3, v9
163; RV64-NEXT:    vslidedown.vi v9, v8, 4
164; RV64-NEXT:    vmv.x.s a4, v9
165; RV64-NEXT:    vslidedown.vi v9, v8, 5
166; RV64-NEXT:    vmv.x.s a5, v9
167; RV64-NEXT:    vslidedown.vi v9, v8, 6
168; RV64-NEXT:    vmv.x.s a6, v9
169; RV64-NEXT:    vslidedown.vi v9, v8, 7
170; RV64-NEXT:    vmv.x.s a7, v9
171; RV64-NEXT:    vslidedown.vi v9, v8, 8
172; RV64-NEXT:    vmv.x.s t0, v9
173; RV64-NEXT:    vslidedown.vi v9, v8, 9
174; RV64-NEXT:    vmv.x.s t1, v9
175; RV64-NEXT:    vslidedown.vi v9, v8, 10
176; RV64-NEXT:    vmv.x.s t2, v9
177; RV64-NEXT:    vslidedown.vi v9, v8, 11
178; RV64-NEXT:    vmv.x.s t3, v9
179; RV64-NEXT:    vslidedown.vi v9, v8, 12
180; RV64-NEXT:    vmv.x.s t4, v9
181; RV64-NEXT:    vslidedown.vi v9, v8, 13
182; RV64-NEXT:    vmv.x.s t5, v9
183; RV64-NEXT:    vslidedown.vi v9, v8, 14
184; RV64-NEXT:    vmv.x.s t6, v9
185; RV64-NEXT:    vslidedown.vi v8, v8, 15
186; RV64-NEXT:    vmv.x.s s0, v8
187; RV64-NEXT:    add a0, a0, a1
188; RV64-NEXT:    add a2, a2, a3
189; RV64-NEXT:    add a0, a0, a2
190; RV64-NEXT:    add a4, a4, a5
191; RV64-NEXT:    add a4, a4, a6
192; RV64-NEXT:    add a0, a0, a4
193; RV64-NEXT:    add a7, a7, t0
194; RV64-NEXT:    add a7, a7, t1
195; RV64-NEXT:    add a7, a7, t2
196; RV64-NEXT:    add a0, a0, a7
197; RV64-NEXT:    add t3, t3, t4
198; RV64-NEXT:    add t3, t3, t5
199; RV64-NEXT:    add t3, t3, t6
200; RV64-NEXT:    add t3, t3, s0
201; RV64-NEXT:    add a0, a0, t3
202; RV64-NEXT:    ld s0, 8(sp) # 8-byte Folded Reload
203; RV64-NEXT:    addi sp, sp, 16
204; RV64-NEXT:    ret
205  %e0 = extractelement <16 x i8> %v, i32 0
206  %e1 = extractelement <16 x i8> %v, i32 1
207  %e2 = extractelement <16 x i8> %v, i32 2
208  %e3 = extractelement <16 x i8> %v, i32 3
209  %e4 = extractelement <16 x i8> %v, i32 4
210  %e5 = extractelement <16 x i8> %v, i32 5
211  %e6 = extractelement <16 x i8> %v, i32 6
212  %e7 = extractelement <16 x i8> %v, i32 7
213  %e8 = extractelement <16 x i8> %v, i32 8
214  %e9 = extractelement <16 x i8> %v, i32 9
215  %e10 = extractelement <16 x i8> %v, i32 10
216  %e11 = extractelement <16 x i8> %v, i32 11
217  %e12 = extractelement <16 x i8> %v, i32 12
218  %e13 = extractelement <16 x i8> %v, i32 13
219  %e14 = extractelement <16 x i8> %v, i32 14
220  %e15 = extractelement <16 x i8> %v, i32 15
221  %add0 = add i8 %e0, %e1
222  %add1 = add i8 %add0, %e2
223  %add2 = add i8 %add1, %e3
224  %add3 = add i8 %add2, %e4
225  %add4 = add i8 %add3, %e5
226  %add5 = add i8 %add4, %e6
227  %add6 = add i8 %add5, %e7
228  %add7 = add i8 %add6, %e8
229  %add8 = add i8 %add7, %e9
230  %add9 = add i8 %add8, %e10
231  %add10 = add i8 %add9, %e11
232  %add11 = add i8 %add10, %e12
233  %add12 = add i8 %add11, %e13
234  %add13 = add i8 %add12, %e14
235  %add14 = add i8 %add13, %e15
236  ret i8 %add14
237}
238
239define i16 @explode_2xi16(<2 x i16> %v) {
240; CHECK-LABEL: explode_2xi16:
241; CHECK:       # %bb.0:
242; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
243; CHECK-NEXT:    vmv.x.s a0, v8
244; CHECK-NEXT:    vslidedown.vi v8, v8, 1
245; CHECK-NEXT:    vmv.x.s a1, v8
246; CHECK-NEXT:    add a0, a0, a1
247; CHECK-NEXT:    ret
248  %e0 = extractelement <2 x i16> %v, i32 0
249  %e1 = extractelement <2 x i16> %v, i32 1
250  %add0 = add i16 %e0, %e1
251  ret i16 %add0
252}
253
254define i16 @explode_4xi16(<4 x i16> %v) {
255; CHECK-LABEL: explode_4xi16:
256; CHECK:       # %bb.0:
257; CHECK-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
258; CHECK-NEXT:    vmv.x.s a0, v8
259; CHECK-NEXT:    vslidedown.vi v9, v8, 1
260; CHECK-NEXT:    vmv.x.s a1, v9
261; CHECK-NEXT:    vslidedown.vi v9, v8, 2
262; CHECK-NEXT:    vmv.x.s a2, v9
263; CHECK-NEXT:    vslidedown.vi v8, v8, 3
264; CHECK-NEXT:    vmv.x.s a3, v8
265; CHECK-NEXT:    add a0, a0, a1
266; CHECK-NEXT:    add a2, a2, a3
267; CHECK-NEXT:    add a0, a0, a2
268; CHECK-NEXT:    ret
269  %e0 = extractelement <4 x i16> %v, i32 0
270  %e1 = extractelement <4 x i16> %v, i32 1
271  %e2 = extractelement <4 x i16> %v, i32 2
272  %e3 = extractelement <4 x i16> %v, i32 3
273  %add0 = add i16 %e0, %e1
274  %add1 = add i16 %add0, %e2
275  %add2 = add i16 %add1, %e3
276  ret i16 %add2
277}
278
279
280define i16 @explode_8xi16(<8 x i16> %v) {
281; CHECK-LABEL: explode_8xi16:
282; CHECK:       # %bb.0:
283; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
284; CHECK-NEXT:    vmv.x.s a0, v8
285; CHECK-NEXT:    vslidedown.vi v9, v8, 1
286; CHECK-NEXT:    vmv.x.s a1, v9
287; CHECK-NEXT:    vslidedown.vi v9, v8, 2
288; CHECK-NEXT:    vmv.x.s a2, v9
289; CHECK-NEXT:    vslidedown.vi v9, v8, 3
290; CHECK-NEXT:    vmv.x.s a3, v9
291; CHECK-NEXT:    vslidedown.vi v9, v8, 4
292; CHECK-NEXT:    vmv.x.s a4, v9
293; CHECK-NEXT:    vslidedown.vi v9, v8, 5
294; CHECK-NEXT:    vmv.x.s a5, v9
295; CHECK-NEXT:    vslidedown.vi v9, v8, 6
296; CHECK-NEXT:    vmv.x.s a6, v9
297; CHECK-NEXT:    vslidedown.vi v8, v8, 7
298; CHECK-NEXT:    vmv.x.s a7, v8
299; CHECK-NEXT:    add a0, a0, a1
300; CHECK-NEXT:    add a2, a2, a3
301; CHECK-NEXT:    add a0, a0, a2
302; CHECK-NEXT:    add a4, a4, a5
303; CHECK-NEXT:    add a4, a4, a6
304; CHECK-NEXT:    add a0, a0, a4
305; CHECK-NEXT:    add a0, a0, a7
306; CHECK-NEXT:    ret
307  %e0 = extractelement <8 x i16> %v, i32 0
308  %e1 = extractelement <8 x i16> %v, i32 1
309  %e2 = extractelement <8 x i16> %v, i32 2
310  %e3 = extractelement <8 x i16> %v, i32 3
311  %e4 = extractelement <8 x i16> %v, i32 4
312  %e5 = extractelement <8 x i16> %v, i32 5
313  %e6 = extractelement <8 x i16> %v, i32 6
314  %e7 = extractelement <8 x i16> %v, i32 7
315  %add0 = add i16 %e0, %e1
316  %add1 = add i16 %add0, %e2
317  %add2 = add i16 %add1, %e3
318  %add3 = add i16 %add2, %e4
319  %add4 = add i16 %add3, %e5
320  %add5 = add i16 %add4, %e6
321  %add6 = add i16 %add5, %e7
322  ret i16 %add6
323}
324
325define i16 @explode_16xi16(<16 x i16> %v) {
326; RV32-LABEL: explode_16xi16:
327; RV32:       # %bb.0:
328; RV32-NEXT:    addi sp, sp, -16
329; RV32-NEXT:    .cfi_def_cfa_offset 16
330; RV32-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
331; RV32-NEXT:    .cfi_offset s0, -4
332; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
333; RV32-NEXT:    vmv.x.s a0, v8
334; RV32-NEXT:    vslidedown.vi v10, v8, 1
335; RV32-NEXT:    vmv.x.s a1, v10
336; RV32-NEXT:    vslidedown.vi v10, v8, 2
337; RV32-NEXT:    vmv.x.s a2, v10
338; RV32-NEXT:    vslidedown.vi v10, v8, 3
339; RV32-NEXT:    vmv.x.s a3, v10
340; RV32-NEXT:    vslidedown.vi v10, v8, 4
341; RV32-NEXT:    vmv.x.s a4, v10
342; RV32-NEXT:    vslidedown.vi v10, v8, 5
343; RV32-NEXT:    vmv.x.s a5, v10
344; RV32-NEXT:    vslidedown.vi v10, v8, 6
345; RV32-NEXT:    vmv.x.s a6, v10
346; RV32-NEXT:    vslidedown.vi v10, v8, 7
347; RV32-NEXT:    vmv.x.s a7, v10
348; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
349; RV32-NEXT:    vslidedown.vi v10, v8, 8
350; RV32-NEXT:    vmv.x.s t0, v10
351; RV32-NEXT:    vslidedown.vi v10, v8, 9
352; RV32-NEXT:    vmv.x.s t1, v10
353; RV32-NEXT:    vslidedown.vi v10, v8, 10
354; RV32-NEXT:    vmv.x.s t2, v10
355; RV32-NEXT:    vslidedown.vi v10, v8, 11
356; RV32-NEXT:    vmv.x.s t3, v10
357; RV32-NEXT:    vslidedown.vi v10, v8, 12
358; RV32-NEXT:    vmv.x.s t4, v10
359; RV32-NEXT:    vslidedown.vi v10, v8, 13
360; RV32-NEXT:    vmv.x.s t5, v10
361; RV32-NEXT:    vslidedown.vi v10, v8, 14
362; RV32-NEXT:    vmv.x.s t6, v10
363; RV32-NEXT:    vslidedown.vi v8, v8, 15
364; RV32-NEXT:    vmv.x.s s0, v8
365; RV32-NEXT:    add a0, a0, a1
366; RV32-NEXT:    add a2, a2, a3
367; RV32-NEXT:    add a0, a0, a2
368; RV32-NEXT:    add a4, a4, a5
369; RV32-NEXT:    add a4, a4, a6
370; RV32-NEXT:    add a0, a0, a4
371; RV32-NEXT:    add a7, a7, t0
372; RV32-NEXT:    add a7, a7, t1
373; RV32-NEXT:    add a7, a7, t2
374; RV32-NEXT:    add a0, a0, a7
375; RV32-NEXT:    add t3, t3, t4
376; RV32-NEXT:    add t3, t3, t5
377; RV32-NEXT:    add t3, t3, t6
378; RV32-NEXT:    add t3, t3, s0
379; RV32-NEXT:    add a0, a0, t3
380; RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
381; RV32-NEXT:    addi sp, sp, 16
382; RV32-NEXT:    ret
383;
384; RV64-LABEL: explode_16xi16:
385; RV64:       # %bb.0:
386; RV64-NEXT:    addi sp, sp, -16
387; RV64-NEXT:    .cfi_def_cfa_offset 16
388; RV64-NEXT:    sd s0, 8(sp) # 8-byte Folded Spill
389; RV64-NEXT:    .cfi_offset s0, -8
390; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
391; RV64-NEXT:    vmv.x.s a0, v8
392; RV64-NEXT:    vslidedown.vi v10, v8, 1
393; RV64-NEXT:    vmv.x.s a1, v10
394; RV64-NEXT:    vslidedown.vi v10, v8, 2
395; RV64-NEXT:    vmv.x.s a2, v10
396; RV64-NEXT:    vslidedown.vi v10, v8, 3
397; RV64-NEXT:    vmv.x.s a3, v10
398; RV64-NEXT:    vslidedown.vi v10, v8, 4
399; RV64-NEXT:    vmv.x.s a4, v10
400; RV64-NEXT:    vslidedown.vi v10, v8, 5
401; RV64-NEXT:    vmv.x.s a5, v10
402; RV64-NEXT:    vslidedown.vi v10, v8, 6
403; RV64-NEXT:    vmv.x.s a6, v10
404; RV64-NEXT:    vslidedown.vi v10, v8, 7
405; RV64-NEXT:    vmv.x.s a7, v10
406; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
407; RV64-NEXT:    vslidedown.vi v10, v8, 8
408; RV64-NEXT:    vmv.x.s t0, v10
409; RV64-NEXT:    vslidedown.vi v10, v8, 9
410; RV64-NEXT:    vmv.x.s t1, v10
411; RV64-NEXT:    vslidedown.vi v10, v8, 10
412; RV64-NEXT:    vmv.x.s t2, v10
413; RV64-NEXT:    vslidedown.vi v10, v8, 11
414; RV64-NEXT:    vmv.x.s t3, v10
415; RV64-NEXT:    vslidedown.vi v10, v8, 12
416; RV64-NEXT:    vmv.x.s t4, v10
417; RV64-NEXT:    vslidedown.vi v10, v8, 13
418; RV64-NEXT:    vmv.x.s t5, v10
419; RV64-NEXT:    vslidedown.vi v10, v8, 14
420; RV64-NEXT:    vmv.x.s t6, v10
421; RV64-NEXT:    vslidedown.vi v8, v8, 15
422; RV64-NEXT:    vmv.x.s s0, v8
423; RV64-NEXT:    add a0, a0, a1
424; RV64-NEXT:    add a2, a2, a3
425; RV64-NEXT:    add a0, a0, a2
426; RV64-NEXT:    add a4, a4, a5
427; RV64-NEXT:    add a4, a4, a6
428; RV64-NEXT:    add a0, a0, a4
429; RV64-NEXT:    add a7, a7, t0
430; RV64-NEXT:    add a7, a7, t1
431; RV64-NEXT:    add a7, a7, t2
432; RV64-NEXT:    add a0, a0, a7
433; RV64-NEXT:    add t3, t3, t4
434; RV64-NEXT:    add t3, t3, t5
435; RV64-NEXT:    add t3, t3, t6
436; RV64-NEXT:    add t3, t3, s0
437; RV64-NEXT:    add a0, a0, t3
438; RV64-NEXT:    ld s0, 8(sp) # 8-byte Folded Reload
439; RV64-NEXT:    addi sp, sp, 16
440; RV64-NEXT:    ret
441  %e0 = extractelement <16 x i16> %v, i32 0
442  %e1 = extractelement <16 x i16> %v, i32 1
443  %e2 = extractelement <16 x i16> %v, i32 2
444  %e3 = extractelement <16 x i16> %v, i32 3
445  %e4 = extractelement <16 x i16> %v, i32 4
446  %e5 = extractelement <16 x i16> %v, i32 5
447  %e6 = extractelement <16 x i16> %v, i32 6
448  %e7 = extractelement <16 x i16> %v, i32 7
449  %e8 = extractelement <16 x i16> %v, i32 8
450  %e9 = extractelement <16 x i16> %v, i32 9
451  %e10 = extractelement <16 x i16> %v, i32 10
452  %e11 = extractelement <16 x i16> %v, i32 11
453  %e12 = extractelement <16 x i16> %v, i32 12
454  %e13 = extractelement <16 x i16> %v, i32 13
455  %e14 = extractelement <16 x i16> %v, i32 14
456  %e15 = extractelement <16 x i16> %v, i32 15
457  %add0 = add i16 %e0, %e1
458  %add1 = add i16 %add0, %e2
459  %add2 = add i16 %add1, %e3
460  %add3 = add i16 %add2, %e4
461  %add4 = add i16 %add3, %e5
462  %add5 = add i16 %add4, %e6
463  %add6 = add i16 %add5, %e7
464  %add7 = add i16 %add6, %e8
465  %add8 = add i16 %add7, %e9
466  %add9 = add i16 %add8, %e10
467  %add10 = add i16 %add9, %e11
468  %add11 = add i16 %add10, %e12
469  %add12 = add i16 %add11, %e13
470  %add13 = add i16 %add12, %e14
471  %add14 = add i16 %add13, %e15
472  ret i16 %add14
473}
474
475define i32 @explode_2xi32(<2 x i32> %v) {
476; RV32-LABEL: explode_2xi32:
477; RV32:       # %bb.0:
478; RV32-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
479; RV32-NEXT:    vmv.x.s a0, v8
480; RV32-NEXT:    vslidedown.vi v8, v8, 1
481; RV32-NEXT:    vmv.x.s a1, v8
482; RV32-NEXT:    add a0, a0, a1
483; RV32-NEXT:    ret
484;
485; RV64-LABEL: explode_2xi32:
486; RV64:       # %bb.0:
487; RV64-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
488; RV64-NEXT:    vmv.x.s a0, v8
489; RV64-NEXT:    vslidedown.vi v8, v8, 1
490; RV64-NEXT:    vmv.x.s a1, v8
491; RV64-NEXT:    addw a0, a0, a1
492; RV64-NEXT:    ret
493  %e0 = extractelement <2 x i32> %v, i32 0
494  %e1 = extractelement <2 x i32> %v, i32 1
495  %add0 = add i32 %e0, %e1
496  ret i32 %add0
497}
498
499define i32 @explode_4xi32(<4 x i32> %v) {
500; RV32-LABEL: explode_4xi32:
501; RV32:       # %bb.0:
502; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
503; RV32-NEXT:    vmv.x.s a0, v8
504; RV32-NEXT:    vslidedown.vi v9, v8, 1
505; RV32-NEXT:    vmv.x.s a1, v9
506; RV32-NEXT:    vslidedown.vi v9, v8, 2
507; RV32-NEXT:    vmv.x.s a2, v9
508; RV32-NEXT:    vslidedown.vi v8, v8, 3
509; RV32-NEXT:    vmv.x.s a3, v8
510; RV32-NEXT:    add a0, a0, a1
511; RV32-NEXT:    add a2, a2, a3
512; RV32-NEXT:    add a0, a0, a2
513; RV32-NEXT:    ret
514;
515; RV64-LABEL: explode_4xi32:
516; RV64:       # %bb.0:
517; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
518; RV64-NEXT:    vmv.x.s a0, v8
519; RV64-NEXT:    vslidedown.vi v9, v8, 1
520; RV64-NEXT:    vmv.x.s a1, v9
521; RV64-NEXT:    vslidedown.vi v9, v8, 2
522; RV64-NEXT:    vmv.x.s a2, v9
523; RV64-NEXT:    vslidedown.vi v8, v8, 3
524; RV64-NEXT:    vmv.x.s a3, v8
525; RV64-NEXT:    add a0, a0, a1
526; RV64-NEXT:    add a2, a2, a3
527; RV64-NEXT:    addw a0, a0, a2
528; RV64-NEXT:    ret
529  %e0 = extractelement <4 x i32> %v, i32 0
530  %e1 = extractelement <4 x i32> %v, i32 1
531  %e2 = extractelement <4 x i32> %v, i32 2
532  %e3 = extractelement <4 x i32> %v, i32 3
533  %add0 = add i32 %e0, %e1
534  %add1 = add i32 %add0, %e2
535  %add2 = add i32 %add1, %e3
536  ret i32 %add2
537}
538
539
540define i32 @explode_8xi32(<8 x i32> %v) {
541; RV32-LABEL: explode_8xi32:
542; RV32:       # %bb.0:
543; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
544; RV32-NEXT:    vmv.x.s a0, v8
545; RV32-NEXT:    vslidedown.vi v10, v8, 1
546; RV32-NEXT:    vmv.x.s a1, v10
547; RV32-NEXT:    vslidedown.vi v10, v8, 2
548; RV32-NEXT:    vmv.x.s a2, v10
549; RV32-NEXT:    vslidedown.vi v10, v8, 3
550; RV32-NEXT:    vmv.x.s a3, v10
551; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
552; RV32-NEXT:    vslidedown.vi v10, v8, 4
553; RV32-NEXT:    vmv.x.s a4, v10
554; RV32-NEXT:    vslidedown.vi v10, v8, 5
555; RV32-NEXT:    vmv.x.s a5, v10
556; RV32-NEXT:    vslidedown.vi v10, v8, 6
557; RV32-NEXT:    vmv.x.s a6, v10
558; RV32-NEXT:    vslidedown.vi v8, v8, 7
559; RV32-NEXT:    vmv.x.s a7, v8
560; RV32-NEXT:    add a0, a0, a1
561; RV32-NEXT:    add a2, a2, a3
562; RV32-NEXT:    add a0, a0, a2
563; RV32-NEXT:    add a4, a4, a5
564; RV32-NEXT:    add a4, a4, a6
565; RV32-NEXT:    add a0, a0, a4
566; RV32-NEXT:    add a0, a0, a7
567; RV32-NEXT:    ret
568;
569; RV64-LABEL: explode_8xi32:
570; RV64:       # %bb.0:
571; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
572; RV64-NEXT:    vmv.x.s a0, v8
573; RV64-NEXT:    vslidedown.vi v10, v8, 1
574; RV64-NEXT:    vmv.x.s a1, v10
575; RV64-NEXT:    vslidedown.vi v10, v8, 2
576; RV64-NEXT:    vmv.x.s a2, v10
577; RV64-NEXT:    vslidedown.vi v10, v8, 3
578; RV64-NEXT:    vmv.x.s a3, v10
579; RV64-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
580; RV64-NEXT:    vslidedown.vi v10, v8, 4
581; RV64-NEXT:    vmv.x.s a4, v10
582; RV64-NEXT:    vslidedown.vi v10, v8, 5
583; RV64-NEXT:    vmv.x.s a5, v10
584; RV64-NEXT:    vslidedown.vi v10, v8, 6
585; RV64-NEXT:    vmv.x.s a6, v10
586; RV64-NEXT:    vslidedown.vi v8, v8, 7
587; RV64-NEXT:    vmv.x.s a7, v8
588; RV64-NEXT:    add a0, a0, a1
589; RV64-NEXT:    add a2, a2, a3
590; RV64-NEXT:    add a0, a0, a2
591; RV64-NEXT:    add a4, a4, a5
592; RV64-NEXT:    add a4, a4, a6
593; RV64-NEXT:    add a0, a0, a4
594; RV64-NEXT:    addw a0, a0, a7
595; RV64-NEXT:    ret
596  %e0 = extractelement <8 x i32> %v, i32 0
597  %e1 = extractelement <8 x i32> %v, i32 1
598  %e2 = extractelement <8 x i32> %v, i32 2
599  %e3 = extractelement <8 x i32> %v, i32 3
600  %e4 = extractelement <8 x i32> %v, i32 4
601  %e5 = extractelement <8 x i32> %v, i32 5
602  %e6 = extractelement <8 x i32> %v, i32 6
603  %e7 = extractelement <8 x i32> %v, i32 7
604  %add0 = add i32 %e0, %e1
605  %add1 = add i32 %add0, %e2
606  %add2 = add i32 %add1, %e3
607  %add3 = add i32 %add2, %e4
608  %add4 = add i32 %add3, %e5
609  %add5 = add i32 %add4, %e6
610  %add6 = add i32 %add5, %e7
611  ret i32 %add6
612}
613
614define i32 @explode_16xi32(<16 x i32> %v) {
615; RV32-LABEL: explode_16xi32:
616; RV32:       # %bb.0:
617; RV32-NEXT:    addi sp, sp, -16
618; RV32-NEXT:    .cfi_def_cfa_offset 16
619; RV32-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
620; RV32-NEXT:    .cfi_offset s0, -4
621; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
622; RV32-NEXT:    vmv.x.s a0, v8
623; RV32-NEXT:    vslidedown.vi v12, v8, 1
624; RV32-NEXT:    vmv.x.s a1, v12
625; RV32-NEXT:    vslidedown.vi v12, v8, 2
626; RV32-NEXT:    vmv.x.s a2, v12
627; RV32-NEXT:    vslidedown.vi v12, v8, 3
628; RV32-NEXT:    vmv.x.s a3, v12
629; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
630; RV32-NEXT:    vslidedown.vi v12, v8, 4
631; RV32-NEXT:    vmv.x.s a4, v12
632; RV32-NEXT:    vslidedown.vi v12, v8, 5
633; RV32-NEXT:    vmv.x.s a5, v12
634; RV32-NEXT:    vslidedown.vi v12, v8, 6
635; RV32-NEXT:    vmv.x.s a6, v12
636; RV32-NEXT:    vslidedown.vi v12, v8, 7
637; RV32-NEXT:    vmv.x.s a7, v12
638; RV32-NEXT:    vsetivli zero, 1, e32, m4, ta, ma
639; RV32-NEXT:    vslidedown.vi v12, v8, 8
640; RV32-NEXT:    vmv.x.s t0, v12
641; RV32-NEXT:    vslidedown.vi v12, v8, 9
642; RV32-NEXT:    vmv.x.s t1, v12
643; RV32-NEXT:    vslidedown.vi v12, v8, 10
644; RV32-NEXT:    vmv.x.s t2, v12
645; RV32-NEXT:    vslidedown.vi v12, v8, 11
646; RV32-NEXT:    vmv.x.s t3, v12
647; RV32-NEXT:    vslidedown.vi v12, v8, 12
648; RV32-NEXT:    vmv.x.s t4, v12
649; RV32-NEXT:    vslidedown.vi v12, v8, 13
650; RV32-NEXT:    vmv.x.s t5, v12
651; RV32-NEXT:    vslidedown.vi v12, v8, 14
652; RV32-NEXT:    vmv.x.s t6, v12
653; RV32-NEXT:    vslidedown.vi v8, v8, 15
654; RV32-NEXT:    vmv.x.s s0, v8
655; RV32-NEXT:    add a0, a0, a1
656; RV32-NEXT:    add a2, a2, a3
657; RV32-NEXT:    add a0, a0, a2
658; RV32-NEXT:    add a4, a4, a5
659; RV32-NEXT:    add a4, a4, a6
660; RV32-NEXT:    add a0, a0, a4
661; RV32-NEXT:    add a7, a7, t0
662; RV32-NEXT:    add a7, a7, t1
663; RV32-NEXT:    add a7, a7, t2
664; RV32-NEXT:    add a0, a0, a7
665; RV32-NEXT:    add t3, t3, t4
666; RV32-NEXT:    add t3, t3, t5
667; RV32-NEXT:    add t3, t3, t6
668; RV32-NEXT:    add t3, t3, s0
669; RV32-NEXT:    add a0, a0, t3
670; RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
671; RV32-NEXT:    addi sp, sp, 16
672; RV32-NEXT:    ret
673;
674; RV64-LABEL: explode_16xi32:
675; RV64:       # %bb.0:
676; RV64-NEXT:    addi sp, sp, -16
677; RV64-NEXT:    .cfi_def_cfa_offset 16
678; RV64-NEXT:    sd s0, 8(sp) # 8-byte Folded Spill
679; RV64-NEXT:    .cfi_offset s0, -8
680; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
681; RV64-NEXT:    vmv.x.s a0, v8
682; RV64-NEXT:    vslidedown.vi v12, v8, 1
683; RV64-NEXT:    vmv.x.s a1, v12
684; RV64-NEXT:    vslidedown.vi v12, v8, 2
685; RV64-NEXT:    vmv.x.s a2, v12
686; RV64-NEXT:    vslidedown.vi v12, v8, 3
687; RV64-NEXT:    vmv.x.s a3, v12
688; RV64-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
689; RV64-NEXT:    vslidedown.vi v12, v8, 4
690; RV64-NEXT:    vmv.x.s a4, v12
691; RV64-NEXT:    vslidedown.vi v12, v8, 5
692; RV64-NEXT:    vmv.x.s a5, v12
693; RV64-NEXT:    vslidedown.vi v12, v8, 6
694; RV64-NEXT:    vmv.x.s a6, v12
695; RV64-NEXT:    vslidedown.vi v12, v8, 7
696; RV64-NEXT:    vmv.x.s a7, v12
697; RV64-NEXT:    vsetivli zero, 1, e32, m4, ta, ma
698; RV64-NEXT:    vslidedown.vi v12, v8, 8
699; RV64-NEXT:    vmv.x.s t0, v12
700; RV64-NEXT:    vslidedown.vi v12, v8, 9
701; RV64-NEXT:    vmv.x.s t1, v12
702; RV64-NEXT:    vslidedown.vi v12, v8, 10
703; RV64-NEXT:    vmv.x.s t2, v12
704; RV64-NEXT:    vslidedown.vi v12, v8, 11
705; RV64-NEXT:    vmv.x.s t3, v12
706; RV64-NEXT:    vslidedown.vi v12, v8, 12
707; RV64-NEXT:    vmv.x.s t4, v12
708; RV64-NEXT:    vslidedown.vi v12, v8, 13
709; RV64-NEXT:    vmv.x.s t5, v12
710; RV64-NEXT:    vslidedown.vi v12, v8, 14
711; RV64-NEXT:    vmv.x.s t6, v12
712; RV64-NEXT:    vslidedown.vi v8, v8, 15
713; RV64-NEXT:    vmv.x.s s0, v8
714; RV64-NEXT:    add a0, a0, a1
715; RV64-NEXT:    add a2, a2, a3
716; RV64-NEXT:    add a0, a0, a2
717; RV64-NEXT:    add a4, a4, a5
718; RV64-NEXT:    add a4, a4, a6
719; RV64-NEXT:    add a0, a0, a4
720; RV64-NEXT:    add a7, a7, t0
721; RV64-NEXT:    add a7, a7, t1
722; RV64-NEXT:    add a7, a7, t2
723; RV64-NEXT:    add a0, a0, a7
724; RV64-NEXT:    add t3, t3, t4
725; RV64-NEXT:    add t3, t3, t5
726; RV64-NEXT:    add t3, t3, t6
727; RV64-NEXT:    add t3, t3, s0
728; RV64-NEXT:    addw a0, a0, t3
729; RV64-NEXT:    ld s0, 8(sp) # 8-byte Folded Reload
730; RV64-NEXT:    addi sp, sp, 16
731; RV64-NEXT:    ret
732  %e0 = extractelement <16 x i32> %v, i32 0
733  %e1 = extractelement <16 x i32> %v, i32 1
734  %e2 = extractelement <16 x i32> %v, i32 2
735  %e3 = extractelement <16 x i32> %v, i32 3
736  %e4 = extractelement <16 x i32> %v, i32 4
737  %e5 = extractelement <16 x i32> %v, i32 5
738  %e6 = extractelement <16 x i32> %v, i32 6
739  %e7 = extractelement <16 x i32> %v, i32 7
740  %e8 = extractelement <16 x i32> %v, i32 8
741  %e9 = extractelement <16 x i32> %v, i32 9
742  %e10 = extractelement <16 x i32> %v, i32 10
743  %e11 = extractelement <16 x i32> %v, i32 11
744  %e12 = extractelement <16 x i32> %v, i32 12
745  %e13 = extractelement <16 x i32> %v, i32 13
746  %e14 = extractelement <16 x i32> %v, i32 14
747  %e15 = extractelement <16 x i32> %v, i32 15
748  %add0 = add i32 %e0, %e1
749  %add1 = add i32 %add0, %e2
750  %add2 = add i32 %add1, %e3
751  %add3 = add i32 %add2, %e4
752  %add4 = add i32 %add3, %e5
753  %add5 = add i32 %add4, %e6
754  %add6 = add i32 %add5, %e7
755  %add7 = add i32 %add6, %e8
756  %add8 = add i32 %add7, %e9
757  %add9 = add i32 %add8, %e10
758  %add10 = add i32 %add9, %e11
759  %add11 = add i32 %add10, %e12
760  %add12 = add i32 %add11, %e13
761  %add13 = add i32 %add12, %e14
762  %add14 = add i32 %add13, %e15
763  ret i32 %add14
764}
765
766define i64 @explode_2xi64(<2 x i64> %v) {
767; RV32-LABEL: explode_2xi64:
768; RV32:       # %bb.0:
769; RV32-NEXT:    li a0, 32
770; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
771; RV32-NEXT:    vsrl.vx v9, v8, a0
772; RV32-NEXT:    vmv.x.s a1, v9
773; RV32-NEXT:    vmv.x.s a2, v8
774; RV32-NEXT:    vslidedown.vi v8, v8, 1
775; RV32-NEXT:    vsrl.vx v9, v8, a0
776; RV32-NEXT:    vmv.x.s a0, v9
777; RV32-NEXT:    vmv.x.s a3, v8
778; RV32-NEXT:    add a1, a1, a0
779; RV32-NEXT:    add a0, a2, a3
780; RV32-NEXT:    sltu a2, a0, a2
781; RV32-NEXT:    add a1, a1, a2
782; RV32-NEXT:    ret
783;
784; RV64-LABEL: explode_2xi64:
785; RV64:       # %bb.0:
786; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
787; RV64-NEXT:    vmv.x.s a0, v8
788; RV64-NEXT:    vslidedown.vi v8, v8, 1
789; RV64-NEXT:    vmv.x.s a1, v8
790; RV64-NEXT:    add a0, a0, a1
791; RV64-NEXT:    ret
792  %e0 = extractelement <2 x i64> %v, i32 0
793  %e1 = extractelement <2 x i64> %v, i32 1
794  %add0 = add i64 %e0, %e1
795  ret i64 %add0
796}
797
798define i64 @explode_4xi64(<4 x i64> %v) {
799; RV32-LABEL: explode_4xi64:
800; RV32:       # %bb.0:
801; RV32-NEXT:    li a0, 32
802; RV32-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
803; RV32-NEXT:    vsrl.vx v10, v8, a0
804; RV32-NEXT:    vmv.x.s a1, v10
805; RV32-NEXT:    vmv.x.s a2, v8
806; RV32-NEXT:    vslidedown.vi v10, v8, 1
807; RV32-NEXT:    vsrl.vx v12, v10, a0
808; RV32-NEXT:    vmv.x.s a3, v12
809; RV32-NEXT:    vmv.x.s a4, v10
810; RV32-NEXT:    vslidedown.vi v10, v8, 2
811; RV32-NEXT:    vsrl.vx v12, v10, a0
812; RV32-NEXT:    vmv.x.s a5, v12
813; RV32-NEXT:    vmv.x.s a6, v10
814; RV32-NEXT:    vslidedown.vi v8, v8, 3
815; RV32-NEXT:    vsrl.vx v10, v8, a0
816; RV32-NEXT:    vmv.x.s a0, v10
817; RV32-NEXT:    vmv.x.s a7, v8
818; RV32-NEXT:    add a1, a1, a3
819; RV32-NEXT:    add a4, a2, a4
820; RV32-NEXT:    sltu a2, a4, a2
821; RV32-NEXT:    add a1, a1, a2
822; RV32-NEXT:    add a6, a4, a6
823; RV32-NEXT:    sltu a2, a6, a4
824; RV32-NEXT:    add a1, a1, a5
825; RV32-NEXT:    add a0, a2, a0
826; RV32-NEXT:    add a1, a1, a0
827; RV32-NEXT:    add a0, a6, a7
828; RV32-NEXT:    sltu a2, a0, a6
829; RV32-NEXT:    add a1, a1, a2
830; RV32-NEXT:    ret
831;
832; RV64-LABEL: explode_4xi64:
833; RV64:       # %bb.0:
834; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
835; RV64-NEXT:    vmv.x.s a0, v8
836; RV64-NEXT:    vslidedown.vi v10, v8, 1
837; RV64-NEXT:    vmv.x.s a1, v10
838; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
839; RV64-NEXT:    vslidedown.vi v10, v8, 2
840; RV64-NEXT:    vmv.x.s a2, v10
841; RV64-NEXT:    vslidedown.vi v8, v8, 3
842; RV64-NEXT:    vmv.x.s a3, v8
843; RV64-NEXT:    add a0, a0, a1
844; RV64-NEXT:    add a2, a2, a3
845; RV64-NEXT:    add a0, a0, a2
846; RV64-NEXT:    ret
847  %e0 = extractelement <4 x i64> %v, i32 0
848  %e1 = extractelement <4 x i64> %v, i32 1
849  %e2 = extractelement <4 x i64> %v, i32 2
850  %e3 = extractelement <4 x i64> %v, i32 3
851  %add0 = add i64 %e0, %e1
852  %add1 = add i64 %add0, %e2
853  %add2 = add i64 %add1, %e3
854  ret i64 %add2
855}
856
857
858define i64 @explode_8xi64(<8 x i64> %v) {
859; RV32-LABEL: explode_8xi64:
860; RV32:       # %bb.0:
861; RV32-NEXT:    addi sp, sp, -16
862; RV32-NEXT:    .cfi_def_cfa_offset 16
863; RV32-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
864; RV32-NEXT:    .cfi_offset s0, -4
865; RV32-NEXT:    li a0, 32
866; RV32-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
867; RV32-NEXT:    vsrl.vx v12, v8, a0
868; RV32-NEXT:    vmv.x.s a1, v12
869; RV32-NEXT:    vmv.x.s a2, v8
870; RV32-NEXT:    vslidedown.vi v12, v8, 1
871; RV32-NEXT:    vsrl.vx v16, v12, a0
872; RV32-NEXT:    vmv.x.s a3, v16
873; RV32-NEXT:    vmv.x.s a4, v12
874; RV32-NEXT:    vslidedown.vi v12, v8, 2
875; RV32-NEXT:    vsrl.vx v16, v12, a0
876; RV32-NEXT:    vmv.x.s a5, v16
877; RV32-NEXT:    vmv.x.s a6, v12
878; RV32-NEXT:    vslidedown.vi v12, v8, 3
879; RV32-NEXT:    vsrl.vx v16, v12, a0
880; RV32-NEXT:    vmv.x.s a7, v16
881; RV32-NEXT:    vmv.x.s t0, v12
882; RV32-NEXT:    vslidedown.vi v12, v8, 4
883; RV32-NEXT:    vsrl.vx v16, v12, a0
884; RV32-NEXT:    vmv.x.s t1, v16
885; RV32-NEXT:    vmv.x.s t2, v12
886; RV32-NEXT:    vslidedown.vi v12, v8, 5
887; RV32-NEXT:    vsrl.vx v16, v12, a0
888; RV32-NEXT:    vmv.x.s t3, v16
889; RV32-NEXT:    vmv.x.s t4, v12
890; RV32-NEXT:    vslidedown.vi v12, v8, 6
891; RV32-NEXT:    vsrl.vx v16, v12, a0
892; RV32-NEXT:    vmv.x.s t5, v16
893; RV32-NEXT:    vmv.x.s t6, v12
894; RV32-NEXT:    vslidedown.vi v8, v8, 7
895; RV32-NEXT:    vsrl.vx v12, v8, a0
896; RV32-NEXT:    vmv.x.s a0, v12
897; RV32-NEXT:    vmv.x.s s0, v8
898; RV32-NEXT:    add a1, a1, a3
899; RV32-NEXT:    add a4, a2, a4
900; RV32-NEXT:    sltu a2, a4, a2
901; RV32-NEXT:    add a1, a1, a2
902; RV32-NEXT:    add a6, a4, a6
903; RV32-NEXT:    sltu a2, a6, a4
904; RV32-NEXT:    add a1, a1, a5
905; RV32-NEXT:    add a2, a2, a7
906; RV32-NEXT:    add a1, a1, a2
907; RV32-NEXT:    add t0, a6, t0
908; RV32-NEXT:    sltu a2, t0, a6
909; RV32-NEXT:    add a2, a2, t1
910; RV32-NEXT:    add a1, a1, a2
911; RV32-NEXT:    add t2, t0, t2
912; RV32-NEXT:    sltu a2, t2, t0
913; RV32-NEXT:    add a2, a2, t3
914; RV32-NEXT:    add a1, a1, a2
915; RV32-NEXT:    add t4, t2, t4
916; RV32-NEXT:    sltu a2, t4, t2
917; RV32-NEXT:    add a2, a2, t5
918; RV32-NEXT:    add a1, a1, a2
919; RV32-NEXT:    add t6, t4, t6
920; RV32-NEXT:    sltu a2, t6, t4
921; RV32-NEXT:    add a0, a2, a0
922; RV32-NEXT:    add a1, a1, a0
923; RV32-NEXT:    add a0, t6, s0
924; RV32-NEXT:    sltu a2, a0, t6
925; RV32-NEXT:    add a1, a1, a2
926; RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
927; RV32-NEXT:    addi sp, sp, 16
928; RV32-NEXT:    ret
929;
930; RV64-LABEL: explode_8xi64:
931; RV64:       # %bb.0:
932; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
933; RV64-NEXT:    vmv.x.s a0, v8
934; RV64-NEXT:    vslidedown.vi v12, v8, 1
935; RV64-NEXT:    vmv.x.s a1, v12
936; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
937; RV64-NEXT:    vslidedown.vi v12, v8, 2
938; RV64-NEXT:    vmv.x.s a2, v12
939; RV64-NEXT:    vslidedown.vi v12, v8, 3
940; RV64-NEXT:    vmv.x.s a3, v12
941; RV64-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
942; RV64-NEXT:    vslidedown.vi v12, v8, 4
943; RV64-NEXT:    vmv.x.s a4, v12
944; RV64-NEXT:    vslidedown.vi v12, v8, 5
945; RV64-NEXT:    vmv.x.s a5, v12
946; RV64-NEXT:    vslidedown.vi v12, v8, 6
947; RV64-NEXT:    vmv.x.s a6, v12
948; RV64-NEXT:    vslidedown.vi v8, v8, 7
949; RV64-NEXT:    vmv.x.s a7, v8
950; RV64-NEXT:    add a0, a0, a1
951; RV64-NEXT:    add a2, a2, a3
952; RV64-NEXT:    add a0, a0, a2
953; RV64-NEXT:    add a4, a4, a5
954; RV64-NEXT:    add a4, a4, a6
955; RV64-NEXT:    add a0, a0, a4
956; RV64-NEXT:    add a0, a0, a7
957; RV64-NEXT:    ret
958  %e0 = extractelement <8 x i64> %v, i32 0
959  %e1 = extractelement <8 x i64> %v, i32 1
960  %e2 = extractelement <8 x i64> %v, i32 2
961  %e3 = extractelement <8 x i64> %v, i32 3
962  %e4 = extractelement <8 x i64> %v, i32 4
963  %e5 = extractelement <8 x i64> %v, i32 5
964  %e6 = extractelement <8 x i64> %v, i32 6
965  %e7 = extractelement <8 x i64> %v, i32 7
966  %add0 = add i64 %e0, %e1
967  %add1 = add i64 %add0, %e2
968  %add2 = add i64 %add1, %e3
969  %add3 = add i64 %add2, %e4
970  %add4 = add i64 %add3, %e5
971  %add5 = add i64 %add4, %e6
972  %add6 = add i64 %add5, %e7
973  ret i64 %add6
974}
975
976define i64 @explode_16xi64(<16 x i64> %v) {
977; RV32-LABEL: explode_16xi64:
978; RV32:       # %bb.0:
979; RV32-NEXT:    addi sp, sp, -64
980; RV32-NEXT:    .cfi_def_cfa_offset 64
981; RV32-NEXT:    sw ra, 60(sp) # 4-byte Folded Spill
982; RV32-NEXT:    sw s0, 56(sp) # 4-byte Folded Spill
983; RV32-NEXT:    sw s1, 52(sp) # 4-byte Folded Spill
984; RV32-NEXT:    sw s2, 48(sp) # 4-byte Folded Spill
985; RV32-NEXT:    sw s3, 44(sp) # 4-byte Folded Spill
986; RV32-NEXT:    sw s4, 40(sp) # 4-byte Folded Spill
987; RV32-NEXT:    sw s5, 36(sp) # 4-byte Folded Spill
988; RV32-NEXT:    sw s6, 32(sp) # 4-byte Folded Spill
989; RV32-NEXT:    sw s7, 28(sp) # 4-byte Folded Spill
990; RV32-NEXT:    sw s8, 24(sp) # 4-byte Folded Spill
991; RV32-NEXT:    sw s9, 20(sp) # 4-byte Folded Spill
992; RV32-NEXT:    sw s10, 16(sp) # 4-byte Folded Spill
993; RV32-NEXT:    sw s11, 12(sp) # 4-byte Folded Spill
994; RV32-NEXT:    .cfi_offset ra, -4
995; RV32-NEXT:    .cfi_offset s0, -8
996; RV32-NEXT:    .cfi_offset s1, -12
997; RV32-NEXT:    .cfi_offset s2, -16
998; RV32-NEXT:    .cfi_offset s3, -20
999; RV32-NEXT:    .cfi_offset s4, -24
1000; RV32-NEXT:    .cfi_offset s5, -28
1001; RV32-NEXT:    .cfi_offset s6, -32
1002; RV32-NEXT:    .cfi_offset s7, -36
1003; RV32-NEXT:    .cfi_offset s8, -40
1004; RV32-NEXT:    .cfi_offset s9, -44
1005; RV32-NEXT:    .cfi_offset s10, -48
1006; RV32-NEXT:    .cfi_offset s11, -52
1007; RV32-NEXT:    li a1, 32
1008; RV32-NEXT:    vsetivli zero, 1, e64, m8, ta, ma
1009; RV32-NEXT:    vsrl.vx v16, v8, a1
1010; RV32-NEXT:    vmv.x.s a0, v16
1011; RV32-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
1012; RV32-NEXT:    vmv.x.s a0, v8
1013; RV32-NEXT:    vslidedown.vi v16, v8, 1
1014; RV32-NEXT:    vsrl.vx v24, v16, a1
1015; RV32-NEXT:    vmv.x.s a5, v24
1016; RV32-NEXT:    vmv.x.s a6, v16
1017; RV32-NEXT:    vslidedown.vi v16, v8, 2
1018; RV32-NEXT:    vsrl.vx v24, v16, a1
1019; RV32-NEXT:    vmv.x.s a3, v24
1020; RV32-NEXT:    vmv.x.s a4, v16
1021; RV32-NEXT:    vslidedown.vi v16, v8, 3
1022; RV32-NEXT:    vsrl.vx v24, v16, a1
1023; RV32-NEXT:    vmv.x.s s2, v24
1024; RV32-NEXT:    vmv.x.s a7, v16
1025; RV32-NEXT:    vslidedown.vi v16, v8, 4
1026; RV32-NEXT:    vsrl.vx v24, v16, a1
1027; RV32-NEXT:    vmv.x.s s3, v24
1028; RV32-NEXT:    vmv.x.s t0, v16
1029; RV32-NEXT:    vslidedown.vi v16, v8, 5
1030; RV32-NEXT:    vsrl.vx v24, v16, a1
1031; RV32-NEXT:    vmv.x.s s4, v24
1032; RV32-NEXT:    vmv.x.s t1, v16
1033; RV32-NEXT:    vslidedown.vi v16, v8, 6
1034; RV32-NEXT:    vsrl.vx v24, v16, a1
1035; RV32-NEXT:    vmv.x.s s5, v24
1036; RV32-NEXT:    vmv.x.s t2, v16
1037; RV32-NEXT:    vslidedown.vi v16, v8, 7
1038; RV32-NEXT:    vsrl.vx v24, v16, a1
1039; RV32-NEXT:    vmv.x.s s6, v24
1040; RV32-NEXT:    vmv.x.s t3, v16
1041; RV32-NEXT:    vslidedown.vi v16, v8, 8
1042; RV32-NEXT:    vsrl.vx v24, v16, a1
1043; RV32-NEXT:    vmv.x.s s7, v24
1044; RV32-NEXT:    vmv.x.s t4, v16
1045; RV32-NEXT:    vslidedown.vi v16, v8, 9
1046; RV32-NEXT:    vsrl.vx v24, v16, a1
1047; RV32-NEXT:    vmv.x.s s8, v24
1048; RV32-NEXT:    vmv.x.s t5, v16
1049; RV32-NEXT:    vslidedown.vi v16, v8, 10
1050; RV32-NEXT:    vsrl.vx v24, v16, a1
1051; RV32-NEXT:    vmv.x.s s9, v24
1052; RV32-NEXT:    vmv.x.s t6, v16
1053; RV32-NEXT:    vslidedown.vi v16, v8, 11
1054; RV32-NEXT:    vsrl.vx v24, v16, a1
1055; RV32-NEXT:    vmv.x.s s10, v24
1056; RV32-NEXT:    vmv.x.s s0, v16
1057; RV32-NEXT:    vslidedown.vi v16, v8, 12
1058; RV32-NEXT:    vsrl.vx v24, v16, a1
1059; RV32-NEXT:    vmv.x.s s11, v24
1060; RV32-NEXT:    vmv.x.s s1, v16
1061; RV32-NEXT:    vslidedown.vi v0, v8, 13
1062; RV32-NEXT:    vsrl.vx v16, v0, a1
1063; RV32-NEXT:    vmv.x.s ra, v16
1064; RV32-NEXT:    vslidedown.vi v16, v8, 14
1065; RV32-NEXT:    vsrl.vx v24, v16, a1
1066; RV32-NEXT:    vslidedown.vi v8, v8, 15
1067; RV32-NEXT:    vmv.x.s a2, v0
1068; RV32-NEXT:    vsrl.vx v0, v8, a1
1069; RV32-NEXT:    lw a1, 8(sp) # 4-byte Folded Reload
1070; RV32-NEXT:    add a5, a1, a5
1071; RV32-NEXT:    add a6, a0, a6
1072; RV32-NEXT:    sltu a0, a6, a0
1073; RV32-NEXT:    add a0, a5, a0
1074; RV32-NEXT:    add a0, a0, a3
1075; RV32-NEXT:    add a4, a6, a4
1076; RV32-NEXT:    sltu a1, a4, a6
1077; RV32-NEXT:    add a1, a1, s2
1078; RV32-NEXT:    add a0, a0, a1
1079; RV32-NEXT:    add a7, a4, a7
1080; RV32-NEXT:    sltu a1, a7, a4
1081; RV32-NEXT:    add a1, a1, s3
1082; RV32-NEXT:    add a0, a0, a1
1083; RV32-NEXT:    add t0, a7, t0
1084; RV32-NEXT:    sltu a1, t0, a7
1085; RV32-NEXT:    add a1, a1, s4
1086; RV32-NEXT:    add a0, a0, a1
1087; RV32-NEXT:    add t1, t0, t1
1088; RV32-NEXT:    sltu a1, t1, t0
1089; RV32-NEXT:    add a1, a1, s5
1090; RV32-NEXT:    add a0, a0, a1
1091; RV32-NEXT:    add t2, t1, t2
1092; RV32-NEXT:    sltu a1, t2, t1
1093; RV32-NEXT:    add a1, a1, s6
1094; RV32-NEXT:    add a0, a0, a1
1095; RV32-NEXT:    add t3, t2, t3
1096; RV32-NEXT:    sltu a1, t3, t2
1097; RV32-NEXT:    add a1, a1, s7
1098; RV32-NEXT:    add a0, a0, a1
1099; RV32-NEXT:    add t4, t3, t4
1100; RV32-NEXT:    sltu a1, t4, t3
1101; RV32-NEXT:    add a1, a1, s8
1102; RV32-NEXT:    add a0, a0, a1
1103; RV32-NEXT:    add t5, t4, t5
1104; RV32-NEXT:    sltu a1, t5, t4
1105; RV32-NEXT:    add a1, a1, s9
1106; RV32-NEXT:    add a0, a0, a1
1107; RV32-NEXT:    add t6, t5, t6
1108; RV32-NEXT:    sltu a1, t6, t5
1109; RV32-NEXT:    add a1, a1, s10
1110; RV32-NEXT:    add a0, a0, a1
1111; RV32-NEXT:    add s0, t6, s0
1112; RV32-NEXT:    sltu a1, s0, t6
1113; RV32-NEXT:    add a1, a1, s11
1114; RV32-NEXT:    add a0, a0, a1
1115; RV32-NEXT:    add s1, s0, s1
1116; RV32-NEXT:    sltu a1, s1, s0
1117; RV32-NEXT:    add a1, a1, ra
1118; RV32-NEXT:    add a0, a0, a1
1119; RV32-NEXT:    vmv.x.s a1, v24
1120; RV32-NEXT:    add a2, s1, a2
1121; RV32-NEXT:    sltu a3, a2, s1
1122; RV32-NEXT:    add a1, a3, a1
1123; RV32-NEXT:    vmv.x.s a3, v16
1124; RV32-NEXT:    add a0, a0, a1
1125; RV32-NEXT:    vmv.x.s a1, v0
1126; RV32-NEXT:    add a3, a2, a3
1127; RV32-NEXT:    sltu a2, a3, a2
1128; RV32-NEXT:    add a1, a2, a1
1129; RV32-NEXT:    add a1, a0, a1
1130; RV32-NEXT:    vmv.x.s a0, v8
1131; RV32-NEXT:    add a0, a3, a0
1132; RV32-NEXT:    sltu a2, a0, a3
1133; RV32-NEXT:    add a1, a1, a2
1134; RV32-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
1135; RV32-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
1136; RV32-NEXT:    lw s1, 52(sp) # 4-byte Folded Reload
1137; RV32-NEXT:    lw s2, 48(sp) # 4-byte Folded Reload
1138; RV32-NEXT:    lw s3, 44(sp) # 4-byte Folded Reload
1139; RV32-NEXT:    lw s4, 40(sp) # 4-byte Folded Reload
1140; RV32-NEXT:    lw s5, 36(sp) # 4-byte Folded Reload
1141; RV32-NEXT:    lw s6, 32(sp) # 4-byte Folded Reload
1142; RV32-NEXT:    lw s7, 28(sp) # 4-byte Folded Reload
1143; RV32-NEXT:    lw s8, 24(sp) # 4-byte Folded Reload
1144; RV32-NEXT:    lw s9, 20(sp) # 4-byte Folded Reload
1145; RV32-NEXT:    lw s10, 16(sp) # 4-byte Folded Reload
1146; RV32-NEXT:    lw s11, 12(sp) # 4-byte Folded Reload
1147; RV32-NEXT:    addi sp, sp, 64
1148; RV32-NEXT:    ret
1149;
1150; RV64-LABEL: explode_16xi64:
1151; RV64:       # %bb.0:
1152; RV64-NEXT:    addi sp, sp, -16
1153; RV64-NEXT:    .cfi_def_cfa_offset 16
1154; RV64-NEXT:    sd s0, 8(sp) # 8-byte Folded Spill
1155; RV64-NEXT:    .cfi_offset s0, -8
1156; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1157; RV64-NEXT:    vmv.x.s a0, v8
1158; RV64-NEXT:    vslidedown.vi v16, v8, 1
1159; RV64-NEXT:    vmv.x.s a1, v16
1160; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
1161; RV64-NEXT:    vslidedown.vi v16, v8, 2
1162; RV64-NEXT:    vmv.x.s a2, v16
1163; RV64-NEXT:    vslidedown.vi v16, v8, 3
1164; RV64-NEXT:    vmv.x.s a3, v16
1165; RV64-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
1166; RV64-NEXT:    vslidedown.vi v16, v8, 4
1167; RV64-NEXT:    vmv.x.s a4, v16
1168; RV64-NEXT:    vslidedown.vi v16, v8, 5
1169; RV64-NEXT:    vmv.x.s a5, v16
1170; RV64-NEXT:    vslidedown.vi v16, v8, 6
1171; RV64-NEXT:    vmv.x.s a6, v16
1172; RV64-NEXT:    vslidedown.vi v16, v8, 7
1173; RV64-NEXT:    vmv.x.s a7, v16
1174; RV64-NEXT:    vsetivli zero, 1, e64, m8, ta, ma
1175; RV64-NEXT:    vslidedown.vi v16, v8, 8
1176; RV64-NEXT:    vmv.x.s t0, v16
1177; RV64-NEXT:    vslidedown.vi v16, v8, 9
1178; RV64-NEXT:    vmv.x.s t1, v16
1179; RV64-NEXT:    vslidedown.vi v16, v8, 10
1180; RV64-NEXT:    vmv.x.s t2, v16
1181; RV64-NEXT:    vslidedown.vi v16, v8, 11
1182; RV64-NEXT:    vmv.x.s t3, v16
1183; RV64-NEXT:    vslidedown.vi v16, v8, 12
1184; RV64-NEXT:    vmv.x.s t4, v16
1185; RV64-NEXT:    vslidedown.vi v16, v8, 13
1186; RV64-NEXT:    vmv.x.s t5, v16
1187; RV64-NEXT:    vslidedown.vi v16, v8, 14
1188; RV64-NEXT:    vmv.x.s t6, v16
1189; RV64-NEXT:    vslidedown.vi v8, v8, 15
1190; RV64-NEXT:    vmv.x.s s0, v8
1191; RV64-NEXT:    add a0, a0, a1
1192; RV64-NEXT:    add a2, a2, a3
1193; RV64-NEXT:    add a0, a0, a2
1194; RV64-NEXT:    add a4, a4, a5
1195; RV64-NEXT:    add a4, a4, a6
1196; RV64-NEXT:    add a0, a0, a4
1197; RV64-NEXT:    add a7, a7, t0
1198; RV64-NEXT:    add a7, a7, t1
1199; RV64-NEXT:    add a7, a7, t2
1200; RV64-NEXT:    add a0, a0, a7
1201; RV64-NEXT:    add t3, t3, t4
1202; RV64-NEXT:    add t3, t3, t5
1203; RV64-NEXT:    add t3, t3, t6
1204; RV64-NEXT:    add t3, t3, s0
1205; RV64-NEXT:    add a0, a0, t3
1206; RV64-NEXT:    ld s0, 8(sp) # 8-byte Folded Reload
1207; RV64-NEXT:    addi sp, sp, 16
1208; RV64-NEXT:    ret
1209  %e0 = extractelement <16 x i64> %v, i32 0
1210  %e1 = extractelement <16 x i64> %v, i32 1
1211  %e2 = extractelement <16 x i64> %v, i32 2
1212  %e3 = extractelement <16 x i64> %v, i32 3
1213  %e4 = extractelement <16 x i64> %v, i32 4
1214  %e5 = extractelement <16 x i64> %v, i32 5
1215  %e6 = extractelement <16 x i64> %v, i32 6
1216  %e7 = extractelement <16 x i64> %v, i32 7
1217  %e8 = extractelement <16 x i64> %v, i32 8
1218  %e9 = extractelement <16 x i64> %v, i32 9
1219  %e10 = extractelement <16 x i64> %v, i32 10
1220  %e11 = extractelement <16 x i64> %v, i32 11
1221  %e12 = extractelement <16 x i64> %v, i32 12
1222  %e13 = extractelement <16 x i64> %v, i32 13
1223  %e14 = extractelement <16 x i64> %v, i32 14
1224  %e15 = extractelement <16 x i64> %v, i32 15
1225  %add0 = add i64 %e0, %e1
1226  %add1 = add i64 %add0, %e2
1227  %add2 = add i64 %add1, %e3
1228  %add3 = add i64 %add2, %e4
1229  %add4 = add i64 %add3, %e5
1230  %add5 = add i64 %add4, %e6
1231  %add6 = add i64 %add5, %e7
1232  %add7 = add i64 %add6, %e8
1233  %add8 = add i64 %add7, %e9
1234  %add9 = add i64 %add8, %e10
1235  %add10 = add i64 %add9, %e11
1236  %add11 = add i64 %add10, %e12
1237  %add12 = add i64 %add11, %e13
1238  %add13 = add i64 %add12, %e14
1239  %add14 = add i64 %add13, %e15
1240  ret i64 %add14
1241}
1242