xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-explodevector.ll (revision f0505c3dbe50f533e55d21dfcb584fcac44bd80c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
3; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
4
5define i8 @explode_2xi8(<2 x i8> %v) {
6; CHECK-LABEL: explode_2xi8:
7; CHECK:       # %bb.0:
8; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
9; CHECK-NEXT:    vmv.x.s a0, v8
10; CHECK-NEXT:    vslidedown.vi v8, v8, 1
11; CHECK-NEXT:    vmv.x.s a1, v8
12; CHECK-NEXT:    xor a0, a0, a1
13; CHECK-NEXT:    ret
14  %e0 = extractelement <2 x i8> %v, i32 0
15  %e1 = extractelement <2 x i8> %v, i32 1
16  %add0 = xor i8 %e0, %e1
17  ret i8 %add0
18}
19
20define i8 @explode_4xi8(<4 x i8> %v) {
21; CHECK-LABEL: explode_4xi8:
22; CHECK:       # %bb.0:
23; CHECK-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
24; CHECK-NEXT:    vmv.x.s a0, v8
25; CHECK-NEXT:    vslidedown.vi v9, v8, 1
26; CHECK-NEXT:    vmv.x.s a1, v9
27; CHECK-NEXT:    vslidedown.vi v9, v8, 2
28; CHECK-NEXT:    vmv.x.s a2, v9
29; CHECK-NEXT:    vslidedown.vi v8, v8, 3
30; CHECK-NEXT:    vmv.x.s a3, v8
31; CHECK-NEXT:    xor a0, a0, a1
32; CHECK-NEXT:    add a2, a2, a3
33; CHECK-NEXT:    add a0, a0, a2
34; CHECK-NEXT:    ret
35  %e0 = extractelement <4 x i8> %v, i32 0
36  %e1 = extractelement <4 x i8> %v, i32 1
37  %e2 = extractelement <4 x i8> %v, i32 2
38  %e3 = extractelement <4 x i8> %v, i32 3
39  %add0 = xor i8 %e0, %e1
40  %add1 = add i8 %add0, %e2
41  %add2 = add i8 %add1, %e3
42  ret i8 %add2
43}
44
45
46define i8 @explode_8xi8(<8 x i8> %v) {
47; CHECK-LABEL: explode_8xi8:
48; CHECK:       # %bb.0:
49; CHECK-NEXT:    vsetivli zero, 1, e8, mf2, ta, ma
50; CHECK-NEXT:    vmv.x.s a0, v8
51; CHECK-NEXT:    vslidedown.vi v9, v8, 1
52; CHECK-NEXT:    vmv.x.s a1, v9
53; CHECK-NEXT:    vslidedown.vi v9, v8, 2
54; CHECK-NEXT:    vmv.x.s a2, v9
55; CHECK-NEXT:    vslidedown.vi v9, v8, 3
56; CHECK-NEXT:    vmv.x.s a3, v9
57; CHECK-NEXT:    vslidedown.vi v9, v8, 4
58; CHECK-NEXT:    vmv.x.s a4, v9
59; CHECK-NEXT:    vslidedown.vi v9, v8, 5
60; CHECK-NEXT:    vmv.x.s a5, v9
61; CHECK-NEXT:    vslidedown.vi v9, v8, 6
62; CHECK-NEXT:    vmv.x.s a6, v9
63; CHECK-NEXT:    vslidedown.vi v8, v8, 7
64; CHECK-NEXT:    vmv.x.s a7, v8
65; CHECK-NEXT:    xor a0, a0, a1
66; CHECK-NEXT:    add a2, a2, a3
67; CHECK-NEXT:    add a0, a0, a2
68; CHECK-NEXT:    add a4, a4, a5
69; CHECK-NEXT:    add a4, a4, a6
70; CHECK-NEXT:    add a0, a0, a4
71; CHECK-NEXT:    add a0, a0, a7
72; CHECK-NEXT:    ret
73  %e0 = extractelement <8 x i8> %v, i32 0
74  %e1 = extractelement <8 x i8> %v, i32 1
75  %e2 = extractelement <8 x i8> %v, i32 2
76  %e3 = extractelement <8 x i8> %v, i32 3
77  %e4 = extractelement <8 x i8> %v, i32 4
78  %e5 = extractelement <8 x i8> %v, i32 5
79  %e6 = extractelement <8 x i8> %v, i32 6
80  %e7 = extractelement <8 x i8> %v, i32 7
81  %add0 = xor i8 %e0, %e1
82  %add1 = add i8 %add0, %e2
83  %add2 = add i8 %add1, %e3
84  %add3 = add i8 %add2, %e4
85  %add4 = add i8 %add3, %e5
86  %add5 = add i8 %add4, %e6
87  %add6 = add i8 %add5, %e7
88  ret i8 %add6
89}
90
91define i8 @explode_16xi8(<16 x i8> %v) {
92; RV32-LABEL: explode_16xi8:
93; RV32:       # %bb.0:
94; RV32-NEXT:    addi sp, sp, -16
95; RV32-NEXT:    .cfi_def_cfa_offset 16
96; RV32-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
97; RV32-NEXT:    .cfi_offset s0, -4
98; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
99; RV32-NEXT:    vmv.x.s a0, v8
100; RV32-NEXT:    vslidedown.vi v9, v8, 1
101; RV32-NEXT:    vmv.x.s a1, v9
102; RV32-NEXT:    vslidedown.vi v9, v8, 2
103; RV32-NEXT:    vmv.x.s a2, v9
104; RV32-NEXT:    vslidedown.vi v9, v8, 3
105; RV32-NEXT:    vmv.x.s a3, v9
106; RV32-NEXT:    vslidedown.vi v9, v8, 4
107; RV32-NEXT:    vmv.x.s a4, v9
108; RV32-NEXT:    vslidedown.vi v9, v8, 5
109; RV32-NEXT:    vmv.x.s a5, v9
110; RV32-NEXT:    vslidedown.vi v9, v8, 6
111; RV32-NEXT:    vmv.x.s a6, v9
112; RV32-NEXT:    vslidedown.vi v9, v8, 7
113; RV32-NEXT:    vmv.x.s a7, v9
114; RV32-NEXT:    vslidedown.vi v9, v8, 8
115; RV32-NEXT:    vmv.x.s t0, v9
116; RV32-NEXT:    vslidedown.vi v9, v8, 9
117; RV32-NEXT:    vmv.x.s t1, v9
118; RV32-NEXT:    vslidedown.vi v9, v8, 10
119; RV32-NEXT:    vmv.x.s t2, v9
120; RV32-NEXT:    vslidedown.vi v9, v8, 11
121; RV32-NEXT:    vmv.x.s t3, v9
122; RV32-NEXT:    vslidedown.vi v9, v8, 12
123; RV32-NEXT:    vmv.x.s t4, v9
124; RV32-NEXT:    vslidedown.vi v9, v8, 13
125; RV32-NEXT:    vmv.x.s t5, v9
126; RV32-NEXT:    vslidedown.vi v9, v8, 14
127; RV32-NEXT:    vmv.x.s t6, v9
128; RV32-NEXT:    vslidedown.vi v8, v8, 15
129; RV32-NEXT:    vmv.x.s s0, v8
130; RV32-NEXT:    xor a0, a0, a1
131; RV32-NEXT:    add a2, a2, a3
132; RV32-NEXT:    add a0, a0, a2
133; RV32-NEXT:    add a4, a4, a5
134; RV32-NEXT:    add a4, a4, a6
135; RV32-NEXT:    add a0, a0, a4
136; RV32-NEXT:    add a7, a7, t0
137; RV32-NEXT:    add a7, a7, t1
138; RV32-NEXT:    add a7, a7, t2
139; RV32-NEXT:    add a0, a0, a7
140; RV32-NEXT:    add t3, t3, t4
141; RV32-NEXT:    add t3, t3, t5
142; RV32-NEXT:    add t3, t3, t6
143; RV32-NEXT:    add t3, t3, s0
144; RV32-NEXT:    add a0, a0, t3
145; RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
146; RV32-NEXT:    addi sp, sp, 16
147; RV32-NEXT:    ret
148;
149; RV64-LABEL: explode_16xi8:
150; RV64:       # %bb.0:
151; RV64-NEXT:    addi sp, sp, -16
152; RV64-NEXT:    .cfi_def_cfa_offset 16
153; RV64-NEXT:    sd s0, 8(sp) # 8-byte Folded Spill
154; RV64-NEXT:    .cfi_offset s0, -8
155; RV64-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
156; RV64-NEXT:    vmv.x.s a0, v8
157; RV64-NEXT:    vslidedown.vi v9, v8, 1
158; RV64-NEXT:    vmv.x.s a1, v9
159; RV64-NEXT:    vslidedown.vi v9, v8, 2
160; RV64-NEXT:    vmv.x.s a2, v9
161; RV64-NEXT:    vslidedown.vi v9, v8, 3
162; RV64-NEXT:    vmv.x.s a3, v9
163; RV64-NEXT:    vslidedown.vi v9, v8, 4
164; RV64-NEXT:    vmv.x.s a4, v9
165; RV64-NEXT:    vslidedown.vi v9, v8, 5
166; RV64-NEXT:    vmv.x.s a5, v9
167; RV64-NEXT:    vslidedown.vi v9, v8, 6
168; RV64-NEXT:    vmv.x.s a6, v9
169; RV64-NEXT:    vslidedown.vi v9, v8, 7
170; RV64-NEXT:    vmv.x.s a7, v9
171; RV64-NEXT:    vslidedown.vi v9, v8, 8
172; RV64-NEXT:    vmv.x.s t0, v9
173; RV64-NEXT:    vslidedown.vi v9, v8, 9
174; RV64-NEXT:    vmv.x.s t1, v9
175; RV64-NEXT:    vslidedown.vi v9, v8, 10
176; RV64-NEXT:    vmv.x.s t2, v9
177; RV64-NEXT:    vslidedown.vi v9, v8, 11
178; RV64-NEXT:    vmv.x.s t3, v9
179; RV64-NEXT:    vslidedown.vi v9, v8, 12
180; RV64-NEXT:    vmv.x.s t4, v9
181; RV64-NEXT:    vslidedown.vi v9, v8, 13
182; RV64-NEXT:    vmv.x.s t5, v9
183; RV64-NEXT:    vslidedown.vi v9, v8, 14
184; RV64-NEXT:    vmv.x.s t6, v9
185; RV64-NEXT:    vslidedown.vi v8, v8, 15
186; RV64-NEXT:    vmv.x.s s0, v8
187; RV64-NEXT:    xor a0, a0, a1
188; RV64-NEXT:    add a2, a2, a3
189; RV64-NEXT:    add a0, a0, a2
190; RV64-NEXT:    add a4, a4, a5
191; RV64-NEXT:    add a4, a4, a6
192; RV64-NEXT:    add a0, a0, a4
193; RV64-NEXT:    add a7, a7, t0
194; RV64-NEXT:    add a7, a7, t1
195; RV64-NEXT:    add a7, a7, t2
196; RV64-NEXT:    add a0, a0, a7
197; RV64-NEXT:    add t3, t3, t4
198; RV64-NEXT:    add t3, t3, t5
199; RV64-NEXT:    add t3, t3, t6
200; RV64-NEXT:    add t3, t3, s0
201; RV64-NEXT:    add a0, a0, t3
202; RV64-NEXT:    ld s0, 8(sp) # 8-byte Folded Reload
203; RV64-NEXT:    addi sp, sp, 16
204; RV64-NEXT:    ret
205  %e0 = extractelement <16 x i8> %v, i32 0
206  %e1 = extractelement <16 x i8> %v, i32 1
207  %e2 = extractelement <16 x i8> %v, i32 2
208  %e3 = extractelement <16 x i8> %v, i32 3
209  %e4 = extractelement <16 x i8> %v, i32 4
210  %e5 = extractelement <16 x i8> %v, i32 5
211  %e6 = extractelement <16 x i8> %v, i32 6
212  %e7 = extractelement <16 x i8> %v, i32 7
213  %e8 = extractelement <16 x i8> %v, i32 8
214  %e9 = extractelement <16 x i8> %v, i32 9
215  %e10 = extractelement <16 x i8> %v, i32 10
216  %e11 = extractelement <16 x i8> %v, i32 11
217  %e12 = extractelement <16 x i8> %v, i32 12
218  %e13 = extractelement <16 x i8> %v, i32 13
219  %e14 = extractelement <16 x i8> %v, i32 14
220  %e15 = extractelement <16 x i8> %v, i32 15
221  %add0 = xor i8 %e0, %e1
222  %add1 = add i8 %add0, %e2
223  %add2 = add i8 %add1, %e3
224  %add3 = add i8 %add2, %e4
225  %add4 = add i8 %add3, %e5
226  %add5 = add i8 %add4, %e6
227  %add6 = add i8 %add5, %e7
228  %add7 = add i8 %add6, %e8
229  %add8 = add i8 %add7, %e9
230  %add9 = add i8 %add8, %e10
231  %add10 = add i8 %add9, %e11
232  %add11 = add i8 %add10, %e12
233  %add12 = add i8 %add11, %e13
234  %add13 = add i8 %add12, %e14
235  %add14 = add i8 %add13, %e15
236  ret i8 %add14
237}
238
239define i16 @explode_2xi16(<2 x i16> %v) {
240; CHECK-LABEL: explode_2xi16:
241; CHECK:       # %bb.0:
242; CHECK-NEXT:    vsetivli zero, 1, e16, mf4, ta, ma
243; CHECK-NEXT:    vmv.x.s a0, v8
244; CHECK-NEXT:    vslidedown.vi v8, v8, 1
245; CHECK-NEXT:    vmv.x.s a1, v8
246; CHECK-NEXT:    xor a0, a0, a1
247; CHECK-NEXT:    ret
248  %e0 = extractelement <2 x i16> %v, i32 0
249  %e1 = extractelement <2 x i16> %v, i32 1
250  %add0 = xor i16 %e0, %e1
251  ret i16 %add0
252}
253
254define i16 @explode_4xi16(<4 x i16> %v) {
255; CHECK-LABEL: explode_4xi16:
256; CHECK:       # %bb.0:
257; CHECK-NEXT:    vsetivli zero, 1, e16, mf2, ta, ma
258; CHECK-NEXT:    vmv.x.s a0, v8
259; CHECK-NEXT:    vslidedown.vi v9, v8, 1
260; CHECK-NEXT:    vmv.x.s a1, v9
261; CHECK-NEXT:    vslidedown.vi v9, v8, 2
262; CHECK-NEXT:    vmv.x.s a2, v9
263; CHECK-NEXT:    vslidedown.vi v8, v8, 3
264; CHECK-NEXT:    vmv.x.s a3, v8
265; CHECK-NEXT:    xor a0, a0, a1
266; CHECK-NEXT:    add a2, a2, a3
267; CHECK-NEXT:    add a0, a0, a2
268; CHECK-NEXT:    ret
269  %e0 = extractelement <4 x i16> %v, i32 0
270  %e1 = extractelement <4 x i16> %v, i32 1
271  %e2 = extractelement <4 x i16> %v, i32 2
272  %e3 = extractelement <4 x i16> %v, i32 3
273  %add0 = xor i16 %e0, %e1
274  %add1 = add i16 %add0, %e2
275  %add2 = add i16 %add1, %e3
276  ret i16 %add2
277}
278
279
280define i16 @explode_8xi16(<8 x i16> %v) {
281; CHECK-LABEL: explode_8xi16:
282; CHECK:       # %bb.0:
283; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
284; CHECK-NEXT:    vmv.x.s a0, v8
285; CHECK-NEXT:    vslidedown.vi v9, v8, 1
286; CHECK-NEXT:    vmv.x.s a1, v9
287; CHECK-NEXT:    vslidedown.vi v9, v8, 2
288; CHECK-NEXT:    vmv.x.s a2, v9
289; CHECK-NEXT:    vslidedown.vi v9, v8, 3
290; CHECK-NEXT:    vmv.x.s a3, v9
291; CHECK-NEXT:    vslidedown.vi v9, v8, 4
292; CHECK-NEXT:    vmv.x.s a4, v9
293; CHECK-NEXT:    vslidedown.vi v9, v8, 5
294; CHECK-NEXT:    vmv.x.s a5, v9
295; CHECK-NEXT:    vslidedown.vi v9, v8, 6
296; CHECK-NEXT:    vmv.x.s a6, v9
297; CHECK-NEXT:    vslidedown.vi v8, v8, 7
298; CHECK-NEXT:    vmv.x.s a7, v8
299; CHECK-NEXT:    xor a0, a0, a1
300; CHECK-NEXT:    add a2, a2, a3
301; CHECK-NEXT:    add a0, a0, a2
302; CHECK-NEXT:    add a4, a4, a5
303; CHECK-NEXT:    add a4, a4, a6
304; CHECK-NEXT:    add a0, a0, a4
305; CHECK-NEXT:    add a0, a0, a7
306; CHECK-NEXT:    ret
307  %e0 = extractelement <8 x i16> %v, i32 0
308  %e1 = extractelement <8 x i16> %v, i32 1
309  %e2 = extractelement <8 x i16> %v, i32 2
310  %e3 = extractelement <8 x i16> %v, i32 3
311  %e4 = extractelement <8 x i16> %v, i32 4
312  %e5 = extractelement <8 x i16> %v, i32 5
313  %e6 = extractelement <8 x i16> %v, i32 6
314  %e7 = extractelement <8 x i16> %v, i32 7
315  %add0 = xor i16 %e0, %e1
316  %add1 = add i16 %add0, %e2
317  %add2 = add i16 %add1, %e3
318  %add3 = add i16 %add2, %e4
319  %add4 = add i16 %add3, %e5
320  %add5 = add i16 %add4, %e6
321  %add6 = add i16 %add5, %e7
322  ret i16 %add6
323}
324
325define i16 @explode_16xi16(<16 x i16> %v) {
326; RV32-LABEL: explode_16xi16:
327; RV32:       # %bb.0:
328; RV32-NEXT:    addi sp, sp, -16
329; RV32-NEXT:    .cfi_def_cfa_offset 16
330; RV32-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
331; RV32-NEXT:    .cfi_offset s0, -4
332; RV32-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
333; RV32-NEXT:    vmv.x.s a0, v8
334; RV32-NEXT:    vslidedown.vi v10, v8, 1
335; RV32-NEXT:    vmv.x.s a1, v10
336; RV32-NEXT:    vslidedown.vi v10, v8, 2
337; RV32-NEXT:    vmv.x.s a2, v10
338; RV32-NEXT:    vslidedown.vi v10, v8, 3
339; RV32-NEXT:    vmv.x.s a3, v10
340; RV32-NEXT:    vslidedown.vi v10, v8, 4
341; RV32-NEXT:    vmv.x.s a4, v10
342; RV32-NEXT:    vslidedown.vi v10, v8, 5
343; RV32-NEXT:    vmv.x.s a5, v10
344; RV32-NEXT:    vslidedown.vi v10, v8, 6
345; RV32-NEXT:    vmv.x.s a6, v10
346; RV32-NEXT:    vslidedown.vi v10, v8, 7
347; RV32-NEXT:    vmv.x.s a7, v10
348; RV32-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
349; RV32-NEXT:    vslidedown.vi v10, v8, 8
350; RV32-NEXT:    vmv.x.s t0, v10
351; RV32-NEXT:    vslidedown.vi v10, v8, 9
352; RV32-NEXT:    vmv.x.s t1, v10
353; RV32-NEXT:    vslidedown.vi v10, v8, 10
354; RV32-NEXT:    vmv.x.s t2, v10
355; RV32-NEXT:    vslidedown.vi v10, v8, 11
356; RV32-NEXT:    vmv.x.s t3, v10
357; RV32-NEXT:    vslidedown.vi v10, v8, 12
358; RV32-NEXT:    vmv.x.s t4, v10
359; RV32-NEXT:    vslidedown.vi v10, v8, 13
360; RV32-NEXT:    vmv.x.s t5, v10
361; RV32-NEXT:    vslidedown.vi v10, v8, 14
362; RV32-NEXT:    vmv.x.s t6, v10
363; RV32-NEXT:    vslidedown.vi v8, v8, 15
364; RV32-NEXT:    vmv.x.s s0, v8
365; RV32-NEXT:    xor a0, a0, a1
366; RV32-NEXT:    add a2, a2, a3
367; RV32-NEXT:    add a0, a0, a2
368; RV32-NEXT:    add a4, a4, a5
369; RV32-NEXT:    add a4, a4, a6
370; RV32-NEXT:    add a0, a0, a4
371; RV32-NEXT:    add a7, a7, t0
372; RV32-NEXT:    add a7, a7, t1
373; RV32-NEXT:    add a7, a7, t2
374; RV32-NEXT:    add a0, a0, a7
375; RV32-NEXT:    add t3, t3, t4
376; RV32-NEXT:    add t3, t3, t5
377; RV32-NEXT:    add t3, t3, t6
378; RV32-NEXT:    add t3, t3, s0
379; RV32-NEXT:    add a0, a0, t3
380; RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
381; RV32-NEXT:    addi sp, sp, 16
382; RV32-NEXT:    ret
383;
384; RV64-LABEL: explode_16xi16:
385; RV64:       # %bb.0:
386; RV64-NEXT:    addi sp, sp, -16
387; RV64-NEXT:    .cfi_def_cfa_offset 16
388; RV64-NEXT:    sd s0, 8(sp) # 8-byte Folded Spill
389; RV64-NEXT:    .cfi_offset s0, -8
390; RV64-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
391; RV64-NEXT:    vmv.x.s a0, v8
392; RV64-NEXT:    vslidedown.vi v10, v8, 1
393; RV64-NEXT:    vmv.x.s a1, v10
394; RV64-NEXT:    vslidedown.vi v10, v8, 2
395; RV64-NEXT:    vmv.x.s a2, v10
396; RV64-NEXT:    vslidedown.vi v10, v8, 3
397; RV64-NEXT:    vmv.x.s a3, v10
398; RV64-NEXT:    vslidedown.vi v10, v8, 4
399; RV64-NEXT:    vmv.x.s a4, v10
400; RV64-NEXT:    vslidedown.vi v10, v8, 5
401; RV64-NEXT:    vmv.x.s a5, v10
402; RV64-NEXT:    vslidedown.vi v10, v8, 6
403; RV64-NEXT:    vmv.x.s a6, v10
404; RV64-NEXT:    vslidedown.vi v10, v8, 7
405; RV64-NEXT:    vmv.x.s a7, v10
406; RV64-NEXT:    vsetivli zero, 1, e16, m2, ta, ma
407; RV64-NEXT:    vslidedown.vi v10, v8, 8
408; RV64-NEXT:    vmv.x.s t0, v10
409; RV64-NEXT:    vslidedown.vi v10, v8, 9
410; RV64-NEXT:    vmv.x.s t1, v10
411; RV64-NEXT:    vslidedown.vi v10, v8, 10
412; RV64-NEXT:    vmv.x.s t2, v10
413; RV64-NEXT:    vslidedown.vi v10, v8, 11
414; RV64-NEXT:    vmv.x.s t3, v10
415; RV64-NEXT:    vslidedown.vi v10, v8, 12
416; RV64-NEXT:    vmv.x.s t4, v10
417; RV64-NEXT:    vslidedown.vi v10, v8, 13
418; RV64-NEXT:    vmv.x.s t5, v10
419; RV64-NEXT:    vslidedown.vi v10, v8, 14
420; RV64-NEXT:    vmv.x.s t6, v10
421; RV64-NEXT:    vslidedown.vi v8, v8, 15
422; RV64-NEXT:    vmv.x.s s0, v8
423; RV64-NEXT:    xor a0, a0, a1
424; RV64-NEXT:    add a2, a2, a3
425; RV64-NEXT:    add a0, a0, a2
426; RV64-NEXT:    add a4, a4, a5
427; RV64-NEXT:    add a4, a4, a6
428; RV64-NEXT:    add a0, a0, a4
429; RV64-NEXT:    add a7, a7, t0
430; RV64-NEXT:    add a7, a7, t1
431; RV64-NEXT:    add a7, a7, t2
432; RV64-NEXT:    add a0, a0, a7
433; RV64-NEXT:    add t3, t3, t4
434; RV64-NEXT:    add t3, t3, t5
435; RV64-NEXT:    add t3, t3, t6
436; RV64-NEXT:    add t3, t3, s0
437; RV64-NEXT:    add a0, a0, t3
438; RV64-NEXT:    ld s0, 8(sp) # 8-byte Folded Reload
439; RV64-NEXT:    addi sp, sp, 16
440; RV64-NEXT:    ret
441  %e0 = extractelement <16 x i16> %v, i32 0
442  %e1 = extractelement <16 x i16> %v, i32 1
443  %e2 = extractelement <16 x i16> %v, i32 2
444  %e3 = extractelement <16 x i16> %v, i32 3
445  %e4 = extractelement <16 x i16> %v, i32 4
446  %e5 = extractelement <16 x i16> %v, i32 5
447  %e6 = extractelement <16 x i16> %v, i32 6
448  %e7 = extractelement <16 x i16> %v, i32 7
449  %e8 = extractelement <16 x i16> %v, i32 8
450  %e9 = extractelement <16 x i16> %v, i32 9
451  %e10 = extractelement <16 x i16> %v, i32 10
452  %e11 = extractelement <16 x i16> %v, i32 11
453  %e12 = extractelement <16 x i16> %v, i32 12
454  %e13 = extractelement <16 x i16> %v, i32 13
455  %e14 = extractelement <16 x i16> %v, i32 14
456  %e15 = extractelement <16 x i16> %v, i32 15
457  %add0 = xor i16 %e0, %e1
458  %add1 = add i16 %add0, %e2
459  %add2 = add i16 %add1, %e3
460  %add3 = add i16 %add2, %e4
461  %add4 = add i16 %add3, %e5
462  %add5 = add i16 %add4, %e6
463  %add6 = add i16 %add5, %e7
464  %add7 = add i16 %add6, %e8
465  %add8 = add i16 %add7, %e9
466  %add9 = add i16 %add8, %e10
467  %add10 = add i16 %add9, %e11
468  %add11 = add i16 %add10, %e12
469  %add12 = add i16 %add11, %e13
470  %add13 = add i16 %add12, %e14
471  %add14 = add i16 %add13, %e15
472  ret i16 %add14
473}
474
475define i32 @explode_2xi32(<2 x i32> %v) {
476; CHECK-LABEL: explode_2xi32:
477; CHECK:       # %bb.0:
478; CHECK-NEXT:    vsetivli zero, 1, e32, mf2, ta, ma
479; CHECK-NEXT:    vmv.x.s a0, v8
480; CHECK-NEXT:    vslidedown.vi v8, v8, 1
481; CHECK-NEXT:    vmv.x.s a1, v8
482; CHECK-NEXT:    xor a0, a0, a1
483; CHECK-NEXT:    ret
484  %e0 = extractelement <2 x i32> %v, i32 0
485  %e1 = extractelement <2 x i32> %v, i32 1
486  %add0 = xor i32 %e0, %e1
487  ret i32 %add0
488}
489
490define i32 @explode_4xi32(<4 x i32> %v) {
491; RV32-LABEL: explode_4xi32:
492; RV32:       # %bb.0:
493; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
494; RV32-NEXT:    vmv.x.s a0, v8
495; RV32-NEXT:    vslidedown.vi v9, v8, 1
496; RV32-NEXT:    vmv.x.s a1, v9
497; RV32-NEXT:    vslidedown.vi v9, v8, 2
498; RV32-NEXT:    vmv.x.s a2, v9
499; RV32-NEXT:    vslidedown.vi v8, v8, 3
500; RV32-NEXT:    vmv.x.s a3, v8
501; RV32-NEXT:    xor a0, a0, a1
502; RV32-NEXT:    add a2, a2, a3
503; RV32-NEXT:    add a0, a0, a2
504; RV32-NEXT:    ret
505;
506; RV64-LABEL: explode_4xi32:
507; RV64:       # %bb.0:
508; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
509; RV64-NEXT:    vmv.x.s a0, v8
510; RV64-NEXT:    vslidedown.vi v9, v8, 1
511; RV64-NEXT:    vmv.x.s a1, v9
512; RV64-NEXT:    vslidedown.vi v9, v8, 2
513; RV64-NEXT:    vmv.x.s a2, v9
514; RV64-NEXT:    vslidedown.vi v8, v8, 3
515; RV64-NEXT:    vmv.x.s a3, v8
516; RV64-NEXT:    xor a0, a0, a1
517; RV64-NEXT:    add a2, a2, a3
518; RV64-NEXT:    addw a0, a0, a2
519; RV64-NEXT:    ret
520  %e0 = extractelement <4 x i32> %v, i32 0
521  %e1 = extractelement <4 x i32> %v, i32 1
522  %e2 = extractelement <4 x i32> %v, i32 2
523  %e3 = extractelement <4 x i32> %v, i32 3
524  %add0 = xor i32 %e0, %e1
525  %add1 = add i32 %add0, %e2
526  %add2 = add i32 %add1, %e3
527  ret i32 %add2
528}
529
530
531define i32 @explode_8xi32(<8 x i32> %v) {
532; RV32-LABEL: explode_8xi32:
533; RV32:       # %bb.0:
534; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
535; RV32-NEXT:    vmv.x.s a0, v8
536; RV32-NEXT:    vslidedown.vi v10, v8, 1
537; RV32-NEXT:    vmv.x.s a1, v10
538; RV32-NEXT:    vslidedown.vi v10, v8, 2
539; RV32-NEXT:    vmv.x.s a2, v10
540; RV32-NEXT:    vslidedown.vi v10, v8, 3
541; RV32-NEXT:    vmv.x.s a3, v10
542; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
543; RV32-NEXT:    vslidedown.vi v10, v8, 4
544; RV32-NEXT:    vmv.x.s a4, v10
545; RV32-NEXT:    vslidedown.vi v10, v8, 5
546; RV32-NEXT:    vmv.x.s a5, v10
547; RV32-NEXT:    vslidedown.vi v10, v8, 6
548; RV32-NEXT:    vmv.x.s a6, v10
549; RV32-NEXT:    vslidedown.vi v8, v8, 7
550; RV32-NEXT:    vmv.x.s a7, v8
551; RV32-NEXT:    xor a0, a0, a1
552; RV32-NEXT:    add a2, a2, a3
553; RV32-NEXT:    add a0, a0, a2
554; RV32-NEXT:    add a4, a4, a5
555; RV32-NEXT:    add a4, a4, a6
556; RV32-NEXT:    add a0, a0, a4
557; RV32-NEXT:    add a0, a0, a7
558; RV32-NEXT:    ret
559;
560; RV64-LABEL: explode_8xi32:
561; RV64:       # %bb.0:
562; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
563; RV64-NEXT:    vmv.x.s a0, v8
564; RV64-NEXT:    vslidedown.vi v10, v8, 1
565; RV64-NEXT:    vmv.x.s a1, v10
566; RV64-NEXT:    vslidedown.vi v10, v8, 2
567; RV64-NEXT:    vmv.x.s a2, v10
568; RV64-NEXT:    vslidedown.vi v10, v8, 3
569; RV64-NEXT:    vmv.x.s a3, v10
570; RV64-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
571; RV64-NEXT:    vslidedown.vi v10, v8, 4
572; RV64-NEXT:    vmv.x.s a4, v10
573; RV64-NEXT:    vslidedown.vi v10, v8, 5
574; RV64-NEXT:    vmv.x.s a5, v10
575; RV64-NEXT:    vslidedown.vi v10, v8, 6
576; RV64-NEXT:    vmv.x.s a6, v10
577; RV64-NEXT:    vslidedown.vi v8, v8, 7
578; RV64-NEXT:    vmv.x.s a7, v8
579; RV64-NEXT:    xor a0, a0, a1
580; RV64-NEXT:    add a2, a2, a3
581; RV64-NEXT:    add a0, a0, a2
582; RV64-NEXT:    add a4, a4, a5
583; RV64-NEXT:    add a4, a4, a6
584; RV64-NEXT:    add a0, a0, a4
585; RV64-NEXT:    addw a0, a0, a7
586; RV64-NEXT:    ret
587  %e0 = extractelement <8 x i32> %v, i32 0
588  %e1 = extractelement <8 x i32> %v, i32 1
589  %e2 = extractelement <8 x i32> %v, i32 2
590  %e3 = extractelement <8 x i32> %v, i32 3
591  %e4 = extractelement <8 x i32> %v, i32 4
592  %e5 = extractelement <8 x i32> %v, i32 5
593  %e6 = extractelement <8 x i32> %v, i32 6
594  %e7 = extractelement <8 x i32> %v, i32 7
595  %add0 = xor i32 %e0, %e1
596  %add1 = add i32 %add0, %e2
597  %add2 = add i32 %add1, %e3
598  %add3 = add i32 %add2, %e4
599  %add4 = add i32 %add3, %e5
600  %add5 = add i32 %add4, %e6
601  %add6 = add i32 %add5, %e7
602  ret i32 %add6
603}
604
605define i32 @explode_16xi32(<16 x i32> %v) {
606; RV32-LABEL: explode_16xi32:
607; RV32:       # %bb.0:
608; RV32-NEXT:    addi sp, sp, -128
609; RV32-NEXT:    .cfi_def_cfa_offset 128
610; RV32-NEXT:    sw ra, 124(sp) # 4-byte Folded Spill
611; RV32-NEXT:    sw s0, 120(sp) # 4-byte Folded Spill
612; RV32-NEXT:    sw s2, 116(sp) # 4-byte Folded Spill
613; RV32-NEXT:    .cfi_offset ra, -4
614; RV32-NEXT:    .cfi_offset s0, -8
615; RV32-NEXT:    .cfi_offset s2, -12
616; RV32-NEXT:    addi s0, sp, 128
617; RV32-NEXT:    .cfi_def_cfa s0, 0
618; RV32-NEXT:    andi sp, sp, -64
619; RV32-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
620; RV32-NEXT:    vmv.x.s a0, v8
621; RV32-NEXT:    vslidedown.vi v12, v8, 1
622; RV32-NEXT:    vmv.x.s a1, v12
623; RV32-NEXT:    vslidedown.vi v12, v8, 2
624; RV32-NEXT:    vmv.x.s a2, v12
625; RV32-NEXT:    vslidedown.vi v12, v8, 3
626; RV32-NEXT:    vmv.x.s a3, v12
627; RV32-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
628; RV32-NEXT:    vslidedown.vi v12, v8, 4
629; RV32-NEXT:    vmv.x.s a4, v12
630; RV32-NEXT:    vslidedown.vi v12, v8, 5
631; RV32-NEXT:    vmv.x.s a5, v12
632; RV32-NEXT:    vslidedown.vi v12, v8, 6
633; RV32-NEXT:    vmv.x.s a6, v12
634; RV32-NEXT:    vslidedown.vi v12, v8, 7
635; RV32-NEXT:    vmv.x.s a7, v12
636; RV32-NEXT:    mv t0, sp
637; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
638; RV32-NEXT:    vse32.v v8, (t0)
639; RV32-NEXT:    lw t0, 32(sp)
640; RV32-NEXT:    lw t1, 36(sp)
641; RV32-NEXT:    lw t2, 40(sp)
642; RV32-NEXT:    lw t3, 44(sp)
643; RV32-NEXT:    lw t4, 48(sp)
644; RV32-NEXT:    lw t5, 52(sp)
645; RV32-NEXT:    lw t6, 56(sp)
646; RV32-NEXT:    lw s2, 60(sp)
647; RV32-NEXT:    xor a0, a0, a1
648; RV32-NEXT:    add a2, a2, a3
649; RV32-NEXT:    add a0, a0, a2
650; RV32-NEXT:    add a4, a4, a5
651; RV32-NEXT:    add a4, a4, a6
652; RV32-NEXT:    add a0, a0, a4
653; RV32-NEXT:    add a7, a7, t0
654; RV32-NEXT:    add a0, a0, a7
655; RV32-NEXT:    add t1, t1, t2
656; RV32-NEXT:    add t1, t1, t3
657; RV32-NEXT:    add a0, a0, t1
658; RV32-NEXT:    add t4, t4, t5
659; RV32-NEXT:    add t4, t4, t6
660; RV32-NEXT:    add t4, t4, s2
661; RV32-NEXT:    add a0, a0, t4
662; RV32-NEXT:    addi sp, s0, -128
663; RV32-NEXT:    lw ra, 124(sp) # 4-byte Folded Reload
664; RV32-NEXT:    lw s0, 120(sp) # 4-byte Folded Reload
665; RV32-NEXT:    lw s2, 116(sp) # 4-byte Folded Reload
666; RV32-NEXT:    addi sp, sp, 128
667; RV32-NEXT:    ret
668;
669; RV64-LABEL: explode_16xi32:
670; RV64:       # %bb.0:
671; RV64-NEXT:    addi sp, sp, -128
672; RV64-NEXT:    .cfi_def_cfa_offset 128
673; RV64-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
674; RV64-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
675; RV64-NEXT:    sd s2, 104(sp) # 8-byte Folded Spill
676; RV64-NEXT:    .cfi_offset ra, -8
677; RV64-NEXT:    .cfi_offset s0, -16
678; RV64-NEXT:    .cfi_offset s2, -24
679; RV64-NEXT:    addi s0, sp, 128
680; RV64-NEXT:    .cfi_def_cfa s0, 0
681; RV64-NEXT:    andi sp, sp, -64
682; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, ma
683; RV64-NEXT:    vmv.x.s a0, v8
684; RV64-NEXT:    vslidedown.vi v12, v8, 1
685; RV64-NEXT:    vmv.x.s a1, v12
686; RV64-NEXT:    vslidedown.vi v12, v8, 2
687; RV64-NEXT:    vmv.x.s a2, v12
688; RV64-NEXT:    vslidedown.vi v12, v8, 3
689; RV64-NEXT:    vmv.x.s a3, v12
690; RV64-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
691; RV64-NEXT:    vslidedown.vi v12, v8, 4
692; RV64-NEXT:    vmv.x.s a4, v12
693; RV64-NEXT:    vslidedown.vi v12, v8, 5
694; RV64-NEXT:    vmv.x.s a5, v12
695; RV64-NEXT:    vslidedown.vi v12, v8, 6
696; RV64-NEXT:    vmv.x.s a6, v12
697; RV64-NEXT:    vslidedown.vi v12, v8, 7
698; RV64-NEXT:    vmv.x.s a7, v12
699; RV64-NEXT:    mv t0, sp
700; RV64-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
701; RV64-NEXT:    vse32.v v8, (t0)
702; RV64-NEXT:    lw t0, 32(sp)
703; RV64-NEXT:    lw t1, 36(sp)
704; RV64-NEXT:    lw t2, 40(sp)
705; RV64-NEXT:    lw t3, 44(sp)
706; RV64-NEXT:    lw t4, 48(sp)
707; RV64-NEXT:    lw t5, 52(sp)
708; RV64-NEXT:    lw t6, 56(sp)
709; RV64-NEXT:    lw s2, 60(sp)
710; RV64-NEXT:    xor a0, a0, a1
711; RV64-NEXT:    add a2, a2, a3
712; RV64-NEXT:    add a0, a0, a2
713; RV64-NEXT:    add a4, a4, a5
714; RV64-NEXT:    add a4, a4, a6
715; RV64-NEXT:    add a0, a0, a4
716; RV64-NEXT:    add a7, a7, t0
717; RV64-NEXT:    add a0, a0, a7
718; RV64-NEXT:    add t1, t1, t2
719; RV64-NEXT:    add t1, t1, t3
720; RV64-NEXT:    add a0, a0, t1
721; RV64-NEXT:    add t4, t4, t5
722; RV64-NEXT:    add t4, t4, t6
723; RV64-NEXT:    add t4, t4, s2
724; RV64-NEXT:    addw a0, a0, t4
725; RV64-NEXT:    addi sp, s0, -128
726; RV64-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
727; RV64-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
728; RV64-NEXT:    ld s2, 104(sp) # 8-byte Folded Reload
729; RV64-NEXT:    addi sp, sp, 128
730; RV64-NEXT:    ret
731  %e0 = extractelement <16 x i32> %v, i32 0
732  %e1 = extractelement <16 x i32> %v, i32 1
733  %e2 = extractelement <16 x i32> %v, i32 2
734  %e3 = extractelement <16 x i32> %v, i32 3
735  %e4 = extractelement <16 x i32> %v, i32 4
736  %e5 = extractelement <16 x i32> %v, i32 5
737  %e6 = extractelement <16 x i32> %v, i32 6
738  %e7 = extractelement <16 x i32> %v, i32 7
739  %e8 = extractelement <16 x i32> %v, i32 8
740  %e9 = extractelement <16 x i32> %v, i32 9
741  %e10 = extractelement <16 x i32> %v, i32 10
742  %e11 = extractelement <16 x i32> %v, i32 11
743  %e12 = extractelement <16 x i32> %v, i32 12
744  %e13 = extractelement <16 x i32> %v, i32 13
745  %e14 = extractelement <16 x i32> %v, i32 14
746  %e15 = extractelement <16 x i32> %v, i32 15
747  %add0 = xor i32 %e0, %e1
748  %add1 = add i32 %add0, %e2
749  %add2 = add i32 %add1, %e3
750  %add3 = add i32 %add2, %e4
751  %add4 = add i32 %add3, %e5
752  %add5 = add i32 %add4, %e6
753  %add6 = add i32 %add5, %e7
754  %add7 = add i32 %add6, %e8
755  %add8 = add i32 %add7, %e9
756  %add9 = add i32 %add8, %e10
757  %add10 = add i32 %add9, %e11
758  %add11 = add i32 %add10, %e12
759  %add12 = add i32 %add11, %e13
760  %add13 = add i32 %add12, %e14
761  %add14 = add i32 %add13, %e15
762  ret i32 %add14
763}
764
765define i64 @explode_2xi64(<2 x i64> %v) {
766; RV32-LABEL: explode_2xi64:
767; RV32:       # %bb.0:
768; RV32-NEXT:    li a0, 32
769; RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
770; RV32-NEXT:    vsrl.vx v9, v8, a0
771; RV32-NEXT:    vmv.x.s a1, v9
772; RV32-NEXT:    vmv.x.s a2, v8
773; RV32-NEXT:    vslidedown.vi v8, v8, 1
774; RV32-NEXT:    vsrl.vx v9, v8, a0
775; RV32-NEXT:    vmv.x.s a0, v9
776; RV32-NEXT:    vmv.x.s a3, v8
777; RV32-NEXT:    xor a1, a1, a0
778; RV32-NEXT:    xor a0, a2, a3
779; RV32-NEXT:    ret
780;
781; RV64-LABEL: explode_2xi64:
782; RV64:       # %bb.0:
783; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
784; RV64-NEXT:    vmv.x.s a0, v8
785; RV64-NEXT:    vslidedown.vi v8, v8, 1
786; RV64-NEXT:    vmv.x.s a1, v8
787; RV64-NEXT:    xor a0, a0, a1
788; RV64-NEXT:    ret
789  %e0 = extractelement <2 x i64> %v, i32 0
790  %e1 = extractelement <2 x i64> %v, i32 1
791  %add0 = xor i64 %e0, %e1
792  ret i64 %add0
793}
794
795define i64 @explode_4xi64(<4 x i64> %v) {
796; RV32-LABEL: explode_4xi64:
797; RV32:       # %bb.0:
798; RV32-NEXT:    li a0, 32
799; RV32-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
800; RV32-NEXT:    vsrl.vx v10, v8, a0
801; RV32-NEXT:    vmv.x.s a1, v10
802; RV32-NEXT:    vmv.x.s a2, v8
803; RV32-NEXT:    vslidedown.vi v10, v8, 1
804; RV32-NEXT:    vsrl.vx v12, v10, a0
805; RV32-NEXT:    vmv.x.s a3, v12
806; RV32-NEXT:    vmv.x.s a4, v10
807; RV32-NEXT:    vslidedown.vi v10, v8, 2
808; RV32-NEXT:    vsrl.vx v12, v10, a0
809; RV32-NEXT:    vmv.x.s a5, v12
810; RV32-NEXT:    vmv.x.s a6, v10
811; RV32-NEXT:    vslidedown.vi v8, v8, 3
812; RV32-NEXT:    vsrl.vx v10, v8, a0
813; RV32-NEXT:    vmv.x.s a0, v10
814; RV32-NEXT:    vmv.x.s a7, v8
815; RV32-NEXT:    xor a1, a1, a3
816; RV32-NEXT:    xor a2, a2, a4
817; RV32-NEXT:    add a6, a2, a6
818; RV32-NEXT:    sltu a2, a6, a2
819; RV32-NEXT:    add a1, a1, a5
820; RV32-NEXT:    add a1, a1, a2
821; RV32-NEXT:    add a1, a1, a0
822; RV32-NEXT:    add a0, a6, a7
823; RV32-NEXT:    sltu a2, a0, a6
824; RV32-NEXT:    add a1, a1, a2
825; RV32-NEXT:    ret
826;
827; RV64-LABEL: explode_4xi64:
828; RV64:       # %bb.0:
829; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
830; RV64-NEXT:    vmv.x.s a0, v8
831; RV64-NEXT:    vslidedown.vi v10, v8, 1
832; RV64-NEXT:    vmv.x.s a1, v10
833; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
834; RV64-NEXT:    vslidedown.vi v10, v8, 2
835; RV64-NEXT:    vmv.x.s a2, v10
836; RV64-NEXT:    vslidedown.vi v8, v8, 3
837; RV64-NEXT:    vmv.x.s a3, v8
838; RV64-NEXT:    xor a0, a0, a1
839; RV64-NEXT:    add a2, a2, a3
840; RV64-NEXT:    add a0, a0, a2
841; RV64-NEXT:    ret
842  %e0 = extractelement <4 x i64> %v, i32 0
843  %e1 = extractelement <4 x i64> %v, i32 1
844  %e2 = extractelement <4 x i64> %v, i32 2
845  %e3 = extractelement <4 x i64> %v, i32 3
846  %add0 = xor i64 %e0, %e1
847  %add1 = add i64 %add0, %e2
848  %add2 = add i64 %add1, %e3
849  ret i64 %add2
850}
851
852
853define i64 @explode_8xi64(<8 x i64> %v) {
854; RV32-LABEL: explode_8xi64:
855; RV32:       # %bb.0:
856; RV32-NEXT:    addi sp, sp, -16
857; RV32-NEXT:    .cfi_def_cfa_offset 16
858; RV32-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
859; RV32-NEXT:    .cfi_offset s0, -4
860; RV32-NEXT:    li a0, 32
861; RV32-NEXT:    vsetivli zero, 1, e64, m4, ta, ma
862; RV32-NEXT:    vsrl.vx v12, v8, a0
863; RV32-NEXT:    vmv.x.s a1, v12
864; RV32-NEXT:    vmv.x.s a2, v8
865; RV32-NEXT:    vslidedown.vi v12, v8, 1
866; RV32-NEXT:    vsrl.vx v16, v12, a0
867; RV32-NEXT:    vmv.x.s a3, v16
868; RV32-NEXT:    vmv.x.s a4, v12
869; RV32-NEXT:    vslidedown.vi v12, v8, 2
870; RV32-NEXT:    vsrl.vx v16, v12, a0
871; RV32-NEXT:    vmv.x.s a5, v16
872; RV32-NEXT:    vmv.x.s a6, v12
873; RV32-NEXT:    vslidedown.vi v12, v8, 3
874; RV32-NEXT:    vsrl.vx v16, v12, a0
875; RV32-NEXT:    vmv.x.s a7, v16
876; RV32-NEXT:    vmv.x.s t0, v12
877; RV32-NEXT:    vslidedown.vi v12, v8, 4
878; RV32-NEXT:    vsrl.vx v16, v12, a0
879; RV32-NEXT:    vmv.x.s t1, v16
880; RV32-NEXT:    vmv.x.s t2, v12
881; RV32-NEXT:    vslidedown.vi v12, v8, 5
882; RV32-NEXT:    vsrl.vx v16, v12, a0
883; RV32-NEXT:    vmv.x.s t3, v16
884; RV32-NEXT:    vmv.x.s t4, v12
885; RV32-NEXT:    vslidedown.vi v12, v8, 6
886; RV32-NEXT:    vsrl.vx v16, v12, a0
887; RV32-NEXT:    vmv.x.s t5, v16
888; RV32-NEXT:    vmv.x.s t6, v12
889; RV32-NEXT:    vslidedown.vi v8, v8, 7
890; RV32-NEXT:    vsrl.vx v12, v8, a0
891; RV32-NEXT:    vmv.x.s a0, v12
892; RV32-NEXT:    vmv.x.s s0, v8
893; RV32-NEXT:    xor a1, a1, a3
894; RV32-NEXT:    xor a2, a2, a4
895; RV32-NEXT:    add a6, a2, a6
896; RV32-NEXT:    sltu a2, a6, a2
897; RV32-NEXT:    add a1, a1, a5
898; RV32-NEXT:    add a1, a1, a2
899; RV32-NEXT:    add a1, a1, a7
900; RV32-NEXT:    add t0, a6, t0
901; RV32-NEXT:    sltu a2, t0, a6
902; RV32-NEXT:    add a2, a2, t1
903; RV32-NEXT:    add a1, a1, a2
904; RV32-NEXT:    add t2, t0, t2
905; RV32-NEXT:    sltu a2, t2, t0
906; RV32-NEXT:    add a2, a2, t3
907; RV32-NEXT:    add a1, a1, a2
908; RV32-NEXT:    add t4, t2, t4
909; RV32-NEXT:    sltu a2, t4, t2
910; RV32-NEXT:    add a2, a2, t5
911; RV32-NEXT:    add a1, a1, a2
912; RV32-NEXT:    add t6, t4, t6
913; RV32-NEXT:    sltu a2, t6, t4
914; RV32-NEXT:    add a0, a2, a0
915; RV32-NEXT:    add a1, a1, a0
916; RV32-NEXT:    add a0, t6, s0
917; RV32-NEXT:    sltu a2, a0, t6
918; RV32-NEXT:    add a1, a1, a2
919; RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
920; RV32-NEXT:    addi sp, sp, 16
921; RV32-NEXT:    ret
922;
923; RV64-LABEL: explode_8xi64:
924; RV64:       # %bb.0:
925; RV64-NEXT:    addi sp, sp, -128
926; RV64-NEXT:    .cfi_def_cfa_offset 128
927; RV64-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
928; RV64-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
929; RV64-NEXT:    .cfi_offset ra, -8
930; RV64-NEXT:    .cfi_offset s0, -16
931; RV64-NEXT:    addi s0, sp, 128
932; RV64-NEXT:    .cfi_def_cfa s0, 0
933; RV64-NEXT:    andi sp, sp, -64
934; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
935; RV64-NEXT:    vmv.x.s a0, v8
936; RV64-NEXT:    vslidedown.vi v12, v8, 1
937; RV64-NEXT:    vmv.x.s a1, v12
938; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
939; RV64-NEXT:    vslidedown.vi v12, v8, 2
940; RV64-NEXT:    vmv.x.s a2, v12
941; RV64-NEXT:    vslidedown.vi v12, v8, 3
942; RV64-NEXT:    vmv.x.s a3, v12
943; RV64-NEXT:    mv a4, sp
944; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
945; RV64-NEXT:    vse64.v v8, (a4)
946; RV64-NEXT:    ld a4, 32(sp)
947; RV64-NEXT:    ld a5, 40(sp)
948; RV64-NEXT:    ld a6, 48(sp)
949; RV64-NEXT:    ld a7, 56(sp)
950; RV64-NEXT:    xor a0, a0, a1
951; RV64-NEXT:    add a2, a2, a3
952; RV64-NEXT:    add a0, a0, a2
953; RV64-NEXT:    add a0, a0, a4
954; RV64-NEXT:    add a5, a5, a6
955; RV64-NEXT:    add a0, a0, a5
956; RV64-NEXT:    add a0, a0, a7
957; RV64-NEXT:    addi sp, s0, -128
958; RV64-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
959; RV64-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
960; RV64-NEXT:    addi sp, sp, 128
961; RV64-NEXT:    ret
962  %e0 = extractelement <8 x i64> %v, i32 0
963  %e1 = extractelement <8 x i64> %v, i32 1
964  %e2 = extractelement <8 x i64> %v, i32 2
965  %e3 = extractelement <8 x i64> %v, i32 3
966  %e4 = extractelement <8 x i64> %v, i32 4
967  %e5 = extractelement <8 x i64> %v, i32 5
968  %e6 = extractelement <8 x i64> %v, i32 6
969  %e7 = extractelement <8 x i64> %v, i32 7
970  %add0 = xor i64 %e0, %e1
971  %add1 = add i64 %add0, %e2
972  %add2 = add i64 %add1, %e3
973  %add3 = add i64 %add2, %e4
974  %add4 = add i64 %add3, %e5
975  %add5 = add i64 %add4, %e6
976  %add6 = add i64 %add5, %e7
977  ret i64 %add6
978}
979
980define i64 @explode_16xi64(<16 x i64> %v) {
981; RV32-LABEL: explode_16xi64:
982; RV32:       # %bb.0:
983; RV32-NEXT:    addi sp, sp, -64
984; RV32-NEXT:    .cfi_def_cfa_offset 64
985; RV32-NEXT:    sw ra, 60(sp) # 4-byte Folded Spill
986; RV32-NEXT:    sw s0, 56(sp) # 4-byte Folded Spill
987; RV32-NEXT:    sw s1, 52(sp) # 4-byte Folded Spill
988; RV32-NEXT:    sw s2, 48(sp) # 4-byte Folded Spill
989; RV32-NEXT:    sw s3, 44(sp) # 4-byte Folded Spill
990; RV32-NEXT:    sw s4, 40(sp) # 4-byte Folded Spill
991; RV32-NEXT:    sw s5, 36(sp) # 4-byte Folded Spill
992; RV32-NEXT:    sw s6, 32(sp) # 4-byte Folded Spill
993; RV32-NEXT:    sw s7, 28(sp) # 4-byte Folded Spill
994; RV32-NEXT:    sw s8, 24(sp) # 4-byte Folded Spill
995; RV32-NEXT:    sw s9, 20(sp) # 4-byte Folded Spill
996; RV32-NEXT:    sw s10, 16(sp) # 4-byte Folded Spill
997; RV32-NEXT:    sw s11, 12(sp) # 4-byte Folded Spill
998; RV32-NEXT:    .cfi_offset ra, -4
999; RV32-NEXT:    .cfi_offset s0, -8
1000; RV32-NEXT:    .cfi_offset s1, -12
1001; RV32-NEXT:    .cfi_offset s2, -16
1002; RV32-NEXT:    .cfi_offset s3, -20
1003; RV32-NEXT:    .cfi_offset s4, -24
1004; RV32-NEXT:    .cfi_offset s5, -28
1005; RV32-NEXT:    .cfi_offset s6, -32
1006; RV32-NEXT:    .cfi_offset s7, -36
1007; RV32-NEXT:    .cfi_offset s8, -40
1008; RV32-NEXT:    .cfi_offset s9, -44
1009; RV32-NEXT:    .cfi_offset s10, -48
1010; RV32-NEXT:    .cfi_offset s11, -52
1011; RV32-NEXT:    li a0, 32
1012; RV32-NEXT:    vsetivli zero, 1, e64, m8, ta, ma
1013; RV32-NEXT:    vsrl.vx v16, v8, a0
1014; RV32-NEXT:    vmv.x.s a1, v16
1015; RV32-NEXT:    sw a1, 8(sp) # 4-byte Folded Spill
1016; RV32-NEXT:    vmv.x.s a2, v8
1017; RV32-NEXT:    vslidedown.vi v16, v8, 1
1018; RV32-NEXT:    vsrl.vx v24, v16, a0
1019; RV32-NEXT:    vmv.x.s a3, v24
1020; RV32-NEXT:    vmv.x.s a4, v16
1021; RV32-NEXT:    vslidedown.vi v16, v8, 2
1022; RV32-NEXT:    vsrl.vx v24, v16, a0
1023; RV32-NEXT:    vmv.x.s a5, v24
1024; RV32-NEXT:    vmv.x.s a6, v16
1025; RV32-NEXT:    vslidedown.vi v16, v8, 3
1026; RV32-NEXT:    vsrl.vx v24, v16, a0
1027; RV32-NEXT:    vmv.x.s a7, v24
1028; RV32-NEXT:    vmv.x.s t0, v16
1029; RV32-NEXT:    vslidedown.vi v16, v8, 4
1030; RV32-NEXT:    vsrl.vx v24, v16, a0
1031; RV32-NEXT:    vmv.x.s s3, v24
1032; RV32-NEXT:    vmv.x.s t1, v16
1033; RV32-NEXT:    vslidedown.vi v16, v8, 5
1034; RV32-NEXT:    vsrl.vx v24, v16, a0
1035; RV32-NEXT:    vmv.x.s s4, v24
1036; RV32-NEXT:    vmv.x.s t2, v16
1037; RV32-NEXT:    vslidedown.vi v16, v8, 6
1038; RV32-NEXT:    vsrl.vx v24, v16, a0
1039; RV32-NEXT:    vmv.x.s s5, v24
1040; RV32-NEXT:    vmv.x.s t3, v16
1041; RV32-NEXT:    vslidedown.vi v16, v8, 7
1042; RV32-NEXT:    vsrl.vx v24, v16, a0
1043; RV32-NEXT:    vmv.x.s s6, v24
1044; RV32-NEXT:    vmv.x.s t4, v16
1045; RV32-NEXT:    vslidedown.vi v16, v8, 8
1046; RV32-NEXT:    vsrl.vx v24, v16, a0
1047; RV32-NEXT:    vmv.x.s s7, v24
1048; RV32-NEXT:    vmv.x.s t5, v16
1049; RV32-NEXT:    vslidedown.vi v16, v8, 9
1050; RV32-NEXT:    vsrl.vx v24, v16, a0
1051; RV32-NEXT:    vmv.x.s s8, v24
1052; RV32-NEXT:    vmv.x.s t6, v16
1053; RV32-NEXT:    vslidedown.vi v16, v8, 10
1054; RV32-NEXT:    vsrl.vx v24, v16, a0
1055; RV32-NEXT:    vmv.x.s s9, v24
1056; RV32-NEXT:    vmv.x.s s0, v16
1057; RV32-NEXT:    vslidedown.vi v16, v8, 11
1058; RV32-NEXT:    vsrl.vx v24, v16, a0
1059; RV32-NEXT:    vmv.x.s s10, v24
1060; RV32-NEXT:    vmv.x.s s1, v16
1061; RV32-NEXT:    vslidedown.vi v16, v8, 12
1062; RV32-NEXT:    vsrl.vx v24, v16, a0
1063; RV32-NEXT:    vmv.x.s s11, v24
1064; RV32-NEXT:    vmv.x.s s2, v16
1065; RV32-NEXT:    vslidedown.vi v24, v8, 13
1066; RV32-NEXT:    vsrl.vx v16, v24, a0
1067; RV32-NEXT:    vmv.x.s ra, v16
1068; RV32-NEXT:    vslidedown.vi v16, v8, 14
1069; RV32-NEXT:    vsrl.vx v0, v16, a0
1070; RV32-NEXT:    vslidedown.vi v8, v8, 15
1071; RV32-NEXT:    vmv.x.s a1, v24
1072; RV32-NEXT:    vsrl.vx v24, v8, a0
1073; RV32-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
1074; RV32-NEXT:    xor a0, a0, a3
1075; RV32-NEXT:    xor a2, a2, a4
1076; RV32-NEXT:    add a0, a0, a5
1077; RV32-NEXT:    add a6, a2, a6
1078; RV32-NEXT:    sltu a2, a6, a2
1079; RV32-NEXT:    add a0, a0, a2
1080; RV32-NEXT:    add a0, a0, a7
1081; RV32-NEXT:    add t0, a6, t0
1082; RV32-NEXT:    sltu a2, t0, a6
1083; RV32-NEXT:    add a2, a2, s3
1084; RV32-NEXT:    add a0, a0, a2
1085; RV32-NEXT:    add t1, t0, t1
1086; RV32-NEXT:    sltu a2, t1, t0
1087; RV32-NEXT:    add a2, a2, s4
1088; RV32-NEXT:    add a0, a0, a2
1089; RV32-NEXT:    add t2, t1, t2
1090; RV32-NEXT:    sltu a2, t2, t1
1091; RV32-NEXT:    add a2, a2, s5
1092; RV32-NEXT:    add a0, a0, a2
1093; RV32-NEXT:    add t3, t2, t3
1094; RV32-NEXT:    sltu a2, t3, t2
1095; RV32-NEXT:    add a2, a2, s6
1096; RV32-NEXT:    add a0, a0, a2
1097; RV32-NEXT:    add t4, t3, t4
1098; RV32-NEXT:    sltu a2, t4, t3
1099; RV32-NEXT:    add a2, a2, s7
1100; RV32-NEXT:    add a0, a0, a2
1101; RV32-NEXT:    add t5, t4, t5
1102; RV32-NEXT:    sltu a2, t5, t4
1103; RV32-NEXT:    add a2, a2, s8
1104; RV32-NEXT:    add a0, a0, a2
1105; RV32-NEXT:    add t6, t5, t6
1106; RV32-NEXT:    sltu a2, t6, t5
1107; RV32-NEXT:    add a2, a2, s9
1108; RV32-NEXT:    add a0, a0, a2
1109; RV32-NEXT:    add s0, t6, s0
1110; RV32-NEXT:    sltu a2, s0, t6
1111; RV32-NEXT:    add a2, a2, s10
1112; RV32-NEXT:    add a0, a0, a2
1113; RV32-NEXT:    add s1, s0, s1
1114; RV32-NEXT:    sltu a2, s1, s0
1115; RV32-NEXT:    add a2, a2, s11
1116; RV32-NEXT:    add a0, a0, a2
1117; RV32-NEXT:    add s2, s1, s2
1118; RV32-NEXT:    sltu a2, s2, s1
1119; RV32-NEXT:    add a2, a2, ra
1120; RV32-NEXT:    add a0, a0, a2
1121; RV32-NEXT:    vmv.x.s a2, v0
1122; RV32-NEXT:    add a1, s2, a1
1123; RV32-NEXT:    sltu a3, a1, s2
1124; RV32-NEXT:    add a2, a3, a2
1125; RV32-NEXT:    vmv.x.s a3, v16
1126; RV32-NEXT:    add a0, a0, a2
1127; RV32-NEXT:    vmv.x.s a2, v24
1128; RV32-NEXT:    add a3, a1, a3
1129; RV32-NEXT:    sltu a1, a3, a1
1130; RV32-NEXT:    add a1, a1, a2
1131; RV32-NEXT:    add a1, a0, a1
1132; RV32-NEXT:    vmv.x.s a0, v8
1133; RV32-NEXT:    add a0, a3, a0
1134; RV32-NEXT:    sltu a2, a0, a3
1135; RV32-NEXT:    add a1, a1, a2
1136; RV32-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
1137; RV32-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
1138; RV32-NEXT:    lw s1, 52(sp) # 4-byte Folded Reload
1139; RV32-NEXT:    lw s2, 48(sp) # 4-byte Folded Reload
1140; RV32-NEXT:    lw s3, 44(sp) # 4-byte Folded Reload
1141; RV32-NEXT:    lw s4, 40(sp) # 4-byte Folded Reload
1142; RV32-NEXT:    lw s5, 36(sp) # 4-byte Folded Reload
1143; RV32-NEXT:    lw s6, 32(sp) # 4-byte Folded Reload
1144; RV32-NEXT:    lw s7, 28(sp) # 4-byte Folded Reload
1145; RV32-NEXT:    lw s8, 24(sp) # 4-byte Folded Reload
1146; RV32-NEXT:    lw s9, 20(sp) # 4-byte Folded Reload
1147; RV32-NEXT:    lw s10, 16(sp) # 4-byte Folded Reload
1148; RV32-NEXT:    lw s11, 12(sp) # 4-byte Folded Reload
1149; RV32-NEXT:    addi sp, sp, 64
1150; RV32-NEXT:    ret
1151;
1152; RV64-LABEL: explode_16xi64:
1153; RV64:       # %bb.0:
1154; RV64-NEXT:    addi sp, sp, -256
1155; RV64-NEXT:    .cfi_def_cfa_offset 256
1156; RV64-NEXT:    sd ra, 248(sp) # 8-byte Folded Spill
1157; RV64-NEXT:    sd s0, 240(sp) # 8-byte Folded Spill
1158; RV64-NEXT:    sd s2, 232(sp) # 8-byte Folded Spill
1159; RV64-NEXT:    .cfi_offset ra, -8
1160; RV64-NEXT:    .cfi_offset s0, -16
1161; RV64-NEXT:    .cfi_offset s2, -24
1162; RV64-NEXT:    addi s0, sp, 256
1163; RV64-NEXT:    .cfi_def_cfa s0, 0
1164; RV64-NEXT:    andi sp, sp, -128
1165; RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
1166; RV64-NEXT:    vmv.x.s a0, v8
1167; RV64-NEXT:    vslidedown.vi v16, v8, 1
1168; RV64-NEXT:    vmv.x.s a1, v16
1169; RV64-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
1170; RV64-NEXT:    vslidedown.vi v16, v8, 2
1171; RV64-NEXT:    vmv.x.s a2, v16
1172; RV64-NEXT:    vslidedown.vi v16, v8, 3
1173; RV64-NEXT:    vmv.x.s a3, v16
1174; RV64-NEXT:    mv a4, sp
1175; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1176; RV64-NEXT:    vse64.v v8, (a4)
1177; RV64-NEXT:    ld a4, 32(sp)
1178; RV64-NEXT:    ld a5, 40(sp)
1179; RV64-NEXT:    ld a6, 48(sp)
1180; RV64-NEXT:    ld a7, 56(sp)
1181; RV64-NEXT:    ld t0, 64(sp)
1182; RV64-NEXT:    ld t1, 72(sp)
1183; RV64-NEXT:    ld t2, 80(sp)
1184; RV64-NEXT:    ld t3, 88(sp)
1185; RV64-NEXT:    ld t4, 96(sp)
1186; RV64-NEXT:    ld t5, 104(sp)
1187; RV64-NEXT:    ld t6, 112(sp)
1188; RV64-NEXT:    ld s2, 120(sp)
1189; RV64-NEXT:    xor a0, a0, a1
1190; RV64-NEXT:    add a2, a2, a3
1191; RV64-NEXT:    add a0, a0, a2
1192; RV64-NEXT:    add a0, a0, a4
1193; RV64-NEXT:    add a5, a5, a6
1194; RV64-NEXT:    add a0, a0, a5
1195; RV64-NEXT:    add a7, a7, t0
1196; RV64-NEXT:    add a7, a7, t1
1197; RV64-NEXT:    add a0, a0, a7
1198; RV64-NEXT:    add t2, t2, t3
1199; RV64-NEXT:    add t2, t2, t4
1200; RV64-NEXT:    add t2, t2, t5
1201; RV64-NEXT:    add a0, a0, t2
1202; RV64-NEXT:    add t6, t6, s2
1203; RV64-NEXT:    add a0, a0, t6
1204; RV64-NEXT:    addi sp, s0, -256
1205; RV64-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
1206; RV64-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
1207; RV64-NEXT:    ld s2, 232(sp) # 8-byte Folded Reload
1208; RV64-NEXT:    addi sp, sp, 256
1209; RV64-NEXT:    ret
1210  %e0 = extractelement <16 x i64> %v, i32 0
1211  %e1 = extractelement <16 x i64> %v, i32 1
1212  %e2 = extractelement <16 x i64> %v, i32 2
1213  %e3 = extractelement <16 x i64> %v, i32 3
1214  %e4 = extractelement <16 x i64> %v, i32 4
1215  %e5 = extractelement <16 x i64> %v, i32 5
1216  %e6 = extractelement <16 x i64> %v, i32 6
1217  %e7 = extractelement <16 x i64> %v, i32 7
1218  %e8 = extractelement <16 x i64> %v, i32 8
1219  %e9 = extractelement <16 x i64> %v, i32 9
1220  %e10 = extractelement <16 x i64> %v, i32 10
1221  %e11 = extractelement <16 x i64> %v, i32 11
1222  %e12 = extractelement <16 x i64> %v, i32 12
1223  %e13 = extractelement <16 x i64> %v, i32 13
1224  %e14 = extractelement <16 x i64> %v, i32 14
1225  %e15 = extractelement <16 x i64> %v, i32 15
1226  %add0 = xor i64 %e0, %e1
1227  %add1 = add i64 %add0, %e2
1228  %add2 = add i64 %add1, %e3
1229  %add3 = add i64 %add2, %e4
1230  %add4 = add i64 %add3, %e5
1231  %add5 = add i64 %add4, %e6
1232  %add6 = add i64 %add5, %e7
1233  %add7 = add i64 %add6, %e8
1234  %add8 = add i64 %add7, %e9
1235  %add9 = add i64 %add8, %e10
1236  %add10 = add i64 %add9, %e11
1237  %add11 = add i64 %add10, %e12
1238  %add12 = add i64 %add11, %e13
1239  %add13 = add i64 %add12, %e14
1240  %add14 = add i64 %add13, %e15
1241  ret i64 %add14
1242}
1243