xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll (revision 24bb180e8aeae95cb830e5c3da73e750edaa139f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=riscv32 -mattr=+v,m -O2 | FileCheck -check-prefixes=CHECK,RV32 %s
3; RUN: llc < %s -mtriple=riscv64 -mattr=+v,m -O2 | FileCheck -check-prefixes=CHECK,RV64 %s
4
5; ------------------------------------------------------------------------------
6; Loads
7; ------------------------------------------------------------------------------
8
9; FIXME: This should be widened to a vlseg2 of <4 x i32> with VL set to 3
10define {<3 x i32>, <3 x i32>} @load_factor2_v3(ptr %ptr) {
11; RV32-LABEL: load_factor2_v3:
12; RV32:       # %bb.0:
13; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
14; RV32-NEXT:    vle32.v v10, (a0)
15; RV32-NEXT:    li a0, 32
16; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
17; RV32-NEXT:    vnsrl.wi v8, v10, 0
18; RV32-NEXT:    vnsrl.wx v9, v10, a0
19; RV32-NEXT:    ret
20;
21; RV64-LABEL: load_factor2_v3:
22; RV64:       # %bb.0:
23; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
24; RV64-NEXT:    vle32.v v10, (a0)
25; RV64-NEXT:    li a0, 32
26; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
27; RV64-NEXT:    vnsrl.wx v9, v10, a0
28; RV64-NEXT:    vnsrl.wi v8, v10, 0
29; RV64-NEXT:    ret
30  %interleaved.vec = load <6 x i32>, ptr %ptr
31  %v0 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> poison, <3 x i32> <i32 0, i32 2, i32 4>
32  %v1 = shufflevector <6 x i32> %interleaved.vec, <6 x i32> poison, <3 x i32> <i32 1, i32 3, i32 5>
33  %res0 = insertvalue {<3 x i32>, <3 x i32>} undef, <3 x i32> %v0, 0
34  %res1 = insertvalue {<3 x i32>, <3 x i32>} %res0, <3 x i32> %v1, 1
35  ret {<3 x i32>, <3 x i32>} %res1
36}
37
38define {<4 x i32>, <4 x i32>} @load_factor2(ptr %ptr) {
39; CHECK-LABEL: load_factor2:
40; CHECK:       # %bb.0:
41; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
42; CHECK-NEXT:    vlseg2e32.v v8, (a0)
43; CHECK-NEXT:    ret
44  %interleaved.vec = load <8 x i32>, ptr %ptr
45  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
46  %v1 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
47  %res0 = insertvalue {<4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
48  %res1 = insertvalue {<4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
49  ret {<4 x i32>, <4 x i32>} %res1
50}
51
52
53define {<4 x i32>, <4 x i32>, <4 x i32>} @load_factor3(ptr %ptr) {
54; CHECK-LABEL: load_factor3:
55; CHECK:       # %bb.0:
56; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
57; CHECK-NEXT:    vlseg3e32.v v8, (a0)
58; CHECK-NEXT:    ret
59  %interleaved.vec = load <12 x i32>, ptr %ptr
60  %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
61  %v1 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
62  %v2 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 2, i32 5, i32 8, i32 11>
63  %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
64  %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
65  %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2
66  ret {<4 x i32>, <4 x i32>, <4 x i32>} %res2
67}
68
69define {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} @load_factor4(ptr %ptr) {
70; CHECK-LABEL: load_factor4:
71; CHECK:       # %bb.0:
72; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
73; CHECK-NEXT:    vlseg4e32.v v8, (a0)
74; CHECK-NEXT:    ret
75  %interleaved.vec = load <16 x i32>, ptr %ptr
76  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
77  %v1 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
78  %v2 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
79  %v3 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
80  %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
81  %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
82  %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2
83  %res3 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res2, <4 x i32> %v3, 3
84  ret {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res3
85}
86
87define {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} @load_factor5(ptr %ptr) {
88; CHECK-LABEL: load_factor5:
89; CHECK:       # %bb.0:
90; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
91; CHECK-NEXT:    vlseg5e32.v v8, (a0)
92; CHECK-NEXT:    ret
93  %interleaved.vec = load <20 x i32>, ptr %ptr
94  %v0 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 0, i32 5, i32 10, i32 15>
95  %v1 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 1, i32 6, i32 11, i32 16>
96  %v2 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 2, i32 7, i32 12, i32 17>
97  %v3 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 3, i32 8, i32 13, i32 18>
98  %v4 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 4, i32 9, i32 14, i32 19>
99  %res0 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} undef, <4 x i32> %v0, 0
100  %res1 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res0, <4 x i32> %v1, 1
101  %res2 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res1, <4 x i32> %v2, 2
102  %res3 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res2, <4 x i32> %v3, 3
103  %res4 = insertvalue {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res3, <4 x i32> %v4, 4
104  ret {<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>} %res4
105}
106
107define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @load_factor6(ptr %ptr) {
108; CHECK-LABEL: load_factor6:
109; CHECK:       # %bb.0:
110; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
111; CHECK-NEXT:    vlseg6e16.v v8, (a0)
112; CHECK-NEXT:    ret
113  %interleaved.vec = load <12 x i16>, ptr %ptr
114  %v0 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 0, i32 6>
115  %v1 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 1, i32 7>
116  %v2 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 2, i32 8>
117  %v3 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 3, i32 9>
118  %v4 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 4, i32 10>
119  %v5 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 5, i32 11>
120  %res0 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} undef, <2 x i16> %v0, 0
121  %res1 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res0, <2 x i16> %v1, 1
122  %res2 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res1, <2 x i16> %v2, 2
123  %res3 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res2, <2 x i16> %v3, 3
124  %res4 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res3, <2 x i16> %v4, 4
125  %res5 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res4, <2 x i16> %v5, 5
126  ret {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res5
127}
128
129define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @load_factor7(ptr %ptr) {
130; CHECK-LABEL: load_factor7:
131; CHECK:       # %bb.0:
132; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
133; CHECK-NEXT:    vlseg7e16.v v8, (a0)
134; CHECK-NEXT:    ret
135  %interleaved.vec = load <14 x i16>, ptr %ptr
136  %v0 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 0, i32 7>
137  %v1 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 1, i32 8>
138  %v2 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 2, i32 9>
139  %v3 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 3, i32 10>
140  %v4 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 4, i32 11>
141  %v5 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 5, i32 12>
142  %v6 = shufflevector <14 x i16> %interleaved.vec, <14 x i16> poison, <2 x i32> <i32 6, i32 13>
143  %res0 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} undef, <2 x i16> %v0, 0
144  %res1 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res0, <2 x i16> %v1, 1
145  %res2 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res1, <2 x i16> %v2, 2
146  %res3 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res2, <2 x i16> %v3, 3
147  %res4 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res3, <2 x i16> %v4, 4
148  %res5 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res4, <2 x i16> %v5, 5
149  %res6 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res5, <2 x i16> %v6, 6
150  ret {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res6
151}
152
153define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @load_factor8(ptr %ptr) {
154; CHECK-LABEL: load_factor8:
155; CHECK:       # %bb.0:
156; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
157; CHECK-NEXT:    vlseg8e16.v v8, (a0)
158; CHECK-NEXT:    ret
159  %interleaved.vec = load <16 x i16>, ptr %ptr
160  %v0 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 0, i32 8>
161  %v1 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 1, i32 9>
162  %v2 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 2, i32 10>
163  %v3 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 3, i32 11>
164  %v4 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 4, i32 12>
165  %v5 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 5, i32 13>
166  %v6 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 6, i32 14>
167  %v7 = shufflevector <16 x i16> %interleaved.vec, <16 x i16> poison, <2 x i32> <i32 7, i32 15>
168  %res0 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} undef, <2 x i16> %v0, 0
169  %res1 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res0, <2 x i16> %v1, 1
170  %res2 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res1, <2 x i16> %v2, 2
171  %res3 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res2, <2 x i16> %v3, 3
172  %res4 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res3, <2 x i16> %v4, 4
173  %res5 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res4, <2 x i16> %v5, 5
174  %res6 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res5, <2 x i16> %v6, 6
175  %res7 = insertvalue {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res6, <2 x i16> %v7, 7
176  ret {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} %res7
177}
178
179; LMUL * NF is > 8 here and so shouldn't be lowered to a vlseg
180define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_factor6_too_big(ptr %ptr) {
181; RV32-LABEL: load_factor6_too_big:
182; RV32:       # %bb.0:
183; RV32-NEXT:    addi sp, sp, -16
184; RV32-NEXT:    .cfi_def_cfa_offset 16
185; RV32-NEXT:    csrr a2, vlenb
186; RV32-NEXT:    li a3, 92
187; RV32-NEXT:    mul a2, a2, a3
188; RV32-NEXT:    sub sp, sp, a2
189; RV32-NEXT:    .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xdc, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 92 * vlenb
190; RV32-NEXT:    addi a3, a1, 256
191; RV32-NEXT:    addi a4, a1, 128
192; RV32-NEXT:    li a2, 32
193; RV32-NEXT:    lui a5, 12291
194; RV32-NEXT:    lui a6, %hi(.LCPI8_0)
195; RV32-NEXT:    addi a6, a6, %lo(.LCPI8_0)
196; RV32-NEXT:    li a7, 768
197; RV32-NEXT:    lui t0, 49164
198; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
199; RV32-NEXT:    vle32.v v16, (a1)
200; RV32-NEXT:    csrr a1, vlenb
201; RV32-NEXT:    li t1, 76
202; RV32-NEXT:    mul a1, a1, t1
203; RV32-NEXT:    add a1, sp, a1
204; RV32-NEXT:    addi a1, a1, 16
205; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
206; RV32-NEXT:    vle32.v v8, (a4)
207; RV32-NEXT:    csrr a1, vlenb
208; RV32-NEXT:    li a4, 68
209; RV32-NEXT:    mul a1, a1, a4
210; RV32-NEXT:    add a1, sp, a1
211; RV32-NEXT:    addi a1, a1, 16
212; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
213; RV32-NEXT:    addi a5, a5, 3
214; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
215; RV32-NEXT:    vle16.v v6, (a6)
216; RV32-NEXT:    vmv.s.x v0, a5
217; RV32-NEXT:    lui a1, %hi(.LCPI8_1)
218; RV32-NEXT:    addi a1, a1, %lo(.LCPI8_1)
219; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
220; RV32-NEXT:    vmerge.vvm v16, v8, v16, v0
221; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
222; RV32-NEXT:    vrgatherei16.vv v24, v16, v6
223; RV32-NEXT:    csrr a4, vlenb
224; RV32-NEXT:    li a5, 52
225; RV32-NEXT:    mul a4, a4, a5
226; RV32-NEXT:    add a4, sp, a4
227; RV32-NEXT:    addi a4, a4, 16
228; RV32-NEXT:    vs8r.v v24, (a4) # Unknown-size Folded Spill
229; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
230; RV32-NEXT:    vle32.v v16, (a3)
231; RV32-NEXT:    addi t0, t0, 12
232; RV32-NEXT:    vmv.s.x v0, a7
233; RV32-NEXT:    vmv.s.x v7, t0
234; RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
235; RV32-NEXT:    vle16.v v4, (a1)
236; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
237; RV32-NEXT:    vslidedown.vi v24, v16, 16
238; RV32-NEXT:    csrr a1, vlenb
239; RV32-NEXT:    li a3, 60
240; RV32-NEXT:    mul a1, a1, a3
241; RV32-NEXT:    add a1, sp, a1
242; RV32-NEXT:    addi a1, a1, 16
243; RV32-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
244; RV32-NEXT:    csrr a1, vlenb
245; RV32-NEXT:    li a3, 84
246; RV32-NEXT:    mul a1, a1, a3
247; RV32-NEXT:    add a1, sp, a1
248; RV32-NEXT:    addi a1, a1, 16
249; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
250; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
251; RV32-NEXT:    vmerge.vvm v20, v24, v16, v0
252; RV32-NEXT:    csrr a1, vlenb
253; RV32-NEXT:    li a3, 40
254; RV32-NEXT:    mul a1, a1, a3
255; RV32-NEXT:    add a1, sp, a1
256; RV32-NEXT:    addi a1, a1, 16
257; RV32-NEXT:    vs4r.v v20, (a1) # Unknown-size Folded Spill
258; RV32-NEXT:    vmv1r.v v0, v7
259; RV32-NEXT:    csrr a1, vlenb
260; RV32-NEXT:    li a3, 76
261; RV32-NEXT:    mul a1, a1, a3
262; RV32-NEXT:    add a1, sp, a1
263; RV32-NEXT:    addi a1, a1, 16
264; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
265; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
266; RV32-NEXT:    vmerge.vvm v24, v8, v16, v0
267; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
268; RV32-NEXT:    vrgatherei16.vv v8, v24, v4
269; RV32-NEXT:    csrr a1, vlenb
270; RV32-NEXT:    li a3, 44
271; RV32-NEXT:    mul a1, a1, a3
272; RV32-NEXT:    add a1, sp, a1
273; RV32-NEXT:    addi a1, a1, 16
274; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
275; RV32-NEXT:    li a1, 3
276; RV32-NEXT:    lui a3, 196656
277; RV32-NEXT:    lui a4, %hi(.LCPI8_2)
278; RV32-NEXT:    addi a4, a4, %lo(.LCPI8_2)
279; RV32-NEXT:    slli a1, a1, 10
280; RV32-NEXT:    addi a3, a3, 48
281; RV32-NEXT:    vmv.s.x v0, a1
282; RV32-NEXT:    vle16.v v14, (a4)
283; RV32-NEXT:    vmv.s.x v12, a3
284; RV32-NEXT:    csrr a1, vlenb
285; RV32-NEXT:    li a3, 84
286; RV32-NEXT:    mul a1, a1, a3
287; RV32-NEXT:    add a1, sp, a1
288; RV32-NEXT:    addi a1, a1, 16
289; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
290; RV32-NEXT:    vmv4r.v v8, v24
291; RV32-NEXT:    csrr a1, vlenb
292; RV32-NEXT:    li a3, 60
293; RV32-NEXT:    mul a1, a1, a3
294; RV32-NEXT:    add a1, sp, a1
295; RV32-NEXT:    addi a1, a1, 16
296; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
297; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
298; RV32-NEXT:    vmerge.vvm v8, v24, v8, v0
299; RV32-NEXT:    csrr a1, vlenb
300; RV32-NEXT:    li a3, 24
301; RV32-NEXT:    mul a1, a1, a3
302; RV32-NEXT:    add a1, sp, a1
303; RV32-NEXT:    addi a1, a1, 16
304; RV32-NEXT:    vs4r.v v8, (a1) # Unknown-size Folded Spill
305; RV32-NEXT:    vmv1r.v v0, v12
306; RV32-NEXT:    csrr a1, vlenb
307; RV32-NEXT:    li a3, 68
308; RV32-NEXT:    mul a1, a1, a3
309; RV32-NEXT:    add a1, sp, a1
310; RV32-NEXT:    addi a1, a1, 16
311; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
312; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
313; RV32-NEXT:    vmerge.vvm v24, v24, v16, v0
314; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
315; RV32-NEXT:    vrgatherei16.vv v16, v24, v14
316; RV32-NEXT:    csrr a1, vlenb
317; RV32-NEXT:    slli a1, a1, 5
318; RV32-NEXT:    add a1, sp, a1
319; RV32-NEXT:    addi a1, a1, 16
320; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
321; RV32-NEXT:    lui a1, 3
322; RV32-NEXT:    lui a3, 786624
323; RV32-NEXT:    lui a4, 12
324; RV32-NEXT:    lui a5, 768
325; RV32-NEXT:    li a6, 48
326; RV32-NEXT:    lui a7, 3073
327; RV32-NEXT:    li t0, 192
328; RV32-NEXT:    addi a1, a1, 3
329; RV32-NEXT:    addi a3, a3, 192
330; RV32-NEXT:    addi a4, a4, 12
331; RV32-NEXT:    addi a5, a5, 768
332; RV32-NEXT:    addi a7, a7, -1024
333; RV32-NEXT:    vmv.s.x v1, a6
334; RV32-NEXT:    vmv.s.x v12, t0
335; RV32-NEXT:    vmv.s.x v0, a1
336; RV32-NEXT:    vmv.s.x v3, a3
337; RV32-NEXT:    vmv.s.x v2, a4
338; RV32-NEXT:    vmv.s.x v13, a5
339; RV32-NEXT:    vmv.s.x v14, a7
340; RV32-NEXT:    csrr a1, vlenb
341; RV32-NEXT:    li a3, 60
342; RV32-NEXT:    mul a1, a1, a3
343; RV32-NEXT:    add a1, sp, a1
344; RV32-NEXT:    addi a1, a1, 16
345; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
346; RV32-NEXT:    vmv4r.v v8, v16
347; RV32-NEXT:    csrr a1, vlenb
348; RV32-NEXT:    li a3, 84
349; RV32-NEXT:    mul a1, a1, a3
350; RV32-NEXT:    add a1, sp, a1
351; RV32-NEXT:    addi a1, a1, 16
352; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
353; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
354; RV32-NEXT:    vmerge.vvm v20, v8, v16, v0
355; RV32-NEXT:    addi a1, sp, 16
356; RV32-NEXT:    vs4r.v v20, (a1) # Unknown-size Folded Spill
357; RV32-NEXT:    vmv1r.v v0, v3
358; RV32-NEXT:    csrr a1, vlenb
359; RV32-NEXT:    li a3, 68
360; RV32-NEXT:    mul a1, a1, a3
361; RV32-NEXT:    add a1, sp, a1
362; RV32-NEXT:    addi a1, a1, 16
363; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
364; RV32-NEXT:    csrr a1, vlenb
365; RV32-NEXT:    li a3, 76
366; RV32-NEXT:    mul a1, a1, a3
367; RV32-NEXT:    add a1, sp, a1
368; RV32-NEXT:    addi a1, a1, 16
369; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
370; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
371; RV32-NEXT:    vmerge.vvm v24, v16, v24, v0
372; RV32-NEXT:    csrr a1, vlenb
373; RV32-NEXT:    slli a1, a1, 4
374; RV32-NEXT:    add a1, sp, a1
375; RV32-NEXT:    addi a1, a1, 16
376; RV32-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
377; RV32-NEXT:    vmv1r.v v0, v2
378; RV32-NEXT:    csrr a1, vlenb
379; RV32-NEXT:    li a3, 84
380; RV32-NEXT:    mul a1, a1, a3
381; RV32-NEXT:    add a1, sp, a1
382; RV32-NEXT:    addi a1, a1, 16
383; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
384; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
385; RV32-NEXT:    vmerge.vvm v24, v8, v24, v0
386; RV32-NEXT:    csrr a1, vlenb
387; RV32-NEXT:    li a3, 12
388; RV32-NEXT:    mul a1, a1, a3
389; RV32-NEXT:    add a1, sp, a1
390; RV32-NEXT:    addi a1, a1, 16
391; RV32-NEXT:    vs4r.v v24, (a1) # Unknown-size Folded Spill
392; RV32-NEXT:    vmv1r.v v0, v13
393; RV32-NEXT:    csrr a1, vlenb
394; RV32-NEXT:    li a3, 76
395; RV32-NEXT:    mul a1, a1, a3
396; RV32-NEXT:    add a1, sp, a1
397; RV32-NEXT:    addi a1, a1, 16
398; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
399; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
400; RV32-NEXT:    vmerge.vvm v24, v16, v24, v0
401; RV32-NEXT:    csrr a1, vlenb
402; RV32-NEXT:    slli a1, a1, 2
403; RV32-NEXT:    add a1, sp, a1
404; RV32-NEXT:    addi a1, a1, 16
405; RV32-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
406; RV32-NEXT:    vmv1r.v v0, v1
407; RV32-NEXT:    csrr a1, vlenb
408; RV32-NEXT:    li a3, 84
409; RV32-NEXT:    mul a1, a1, a3
410; RV32-NEXT:    add a1, sp, a1
411; RV32-NEXT:    addi a1, a1, 16
412; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
413; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
414; RV32-NEXT:    vmerge.vvm v4, v8, v24, v0
415; RV32-NEXT:    csrr a1, vlenb
416; RV32-NEXT:    li a3, 28
417; RV32-NEXT:    mul a1, a1, a3
418; RV32-NEXT:    add a1, sp, a1
419; RV32-NEXT:    addi a1, a1, 16
420; RV32-NEXT:    vs4r.v v4, (a1) # Unknown-size Folded Spill
421; RV32-NEXT:    vmv1r.v v0, v14
422; RV32-NEXT:    csrr a1, vlenb
423; RV32-NEXT:    li a3, 76
424; RV32-NEXT:    mul a1, a1, a3
425; RV32-NEXT:    add a1, sp, a1
426; RV32-NEXT:    addi a1, a1, 16
427; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
428; RV32-NEXT:    csrr a1, vlenb
429; RV32-NEXT:    li a3, 68
430; RV32-NEXT:    mul a1, a1, a3
431; RV32-NEXT:    add a1, sp, a1
432; RV32-NEXT:    addi a1, a1, 16
433; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
434; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
435; RV32-NEXT:    vmerge.vvm v16, v24, v16, v0
436; RV32-NEXT:    csrr a1, vlenb
437; RV32-NEXT:    li a2, 76
438; RV32-NEXT:    mul a1, a1, a2
439; RV32-NEXT:    add a1, sp, a1
440; RV32-NEXT:    addi a1, a1, 16
441; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
442; RV32-NEXT:    vmv1r.v v0, v12
443; RV32-NEXT:    csrr a1, vlenb
444; RV32-NEXT:    li a2, 84
445; RV32-NEXT:    mul a1, a1, a2
446; RV32-NEXT:    add a1, sp, a1
447; RV32-NEXT:    addi a1, a1, 16
448; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
449; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
450; RV32-NEXT:    vmerge.vvm v8, v8, v16, v0
451; RV32-NEXT:    csrr a1, vlenb
452; RV32-NEXT:    li a2, 68
453; RV32-NEXT:    mul a1, a1, a2
454; RV32-NEXT:    add a1, sp, a1
455; RV32-NEXT:    addi a1, a1, 16
456; RV32-NEXT:    vs4r.v v8, (a1) # Unknown-size Folded Spill
457; RV32-NEXT:    lui a1, 32
458; RV32-NEXT:    addi a1, a1, 4
459; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
460; RV32-NEXT:    vmv.v.x v16, a1
461; RV32-NEXT:    csrr a1, vlenb
462; RV32-NEXT:    li a2, 40
463; RV32-NEXT:    mul a1, a1, a2
464; RV32-NEXT:    add a1, sp, a1
465; RV32-NEXT:    addi a1, a1, 16
466; RV32-NEXT:    vl4r.v v8, (a1) # Unknown-size Folded Reload
467; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
468; RV32-NEXT:    vrgatherei16.vv v20, v8, v16
469; RV32-NEXT:    csrr a1, vlenb
470; RV32-NEXT:    li a2, 52
471; RV32-NEXT:    mul a1, a1, a2
472; RV32-NEXT:    add a1, sp, a1
473; RV32-NEXT:    addi a1, a1, 16
474; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
475; RV32-NEXT:    vsetivli zero, 12, e32, m4, tu, ma
476; RV32-NEXT:    vmv.v.v v20, v8
477; RV32-NEXT:    csrr a1, vlenb
478; RV32-NEXT:    li a2, 84
479; RV32-NEXT:    mul a1, a1, a2
480; RV32-NEXT:    add a1, sp, a1
481; RV32-NEXT:    addi a1, a1, 16
482; RV32-NEXT:    vs4r.v v20, (a1) # Unknown-size Folded Spill
483; RV32-NEXT:    lui a1, 48
484; RV32-NEXT:    lui a2, %hi(.LCPI8_3)
485; RV32-NEXT:    addi a2, a2, %lo(.LCPI8_3)
486; RV32-NEXT:    addi a1, a1, 5
487; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
488; RV32-NEXT:    vle16.v v28, (a2)
489; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
490; RV32-NEXT:    vmv.v.x v20, a1
491; RV32-NEXT:    csrr a1, vlenb
492; RV32-NEXT:    li a2, 24
493; RV32-NEXT:    mul a1, a1, a2
494; RV32-NEXT:    add a1, sp, a1
495; RV32-NEXT:    addi a1, a1, 16
496; RV32-NEXT:    vl4r.v v12, (a1) # Unknown-size Folded Reload
497; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
498; RV32-NEXT:    vrgatherei16.vv v8, v12, v20
499; RV32-NEXT:    csrr a1, vlenb
500; RV32-NEXT:    li a2, 44
501; RV32-NEXT:    mul a1, a1, a2
502; RV32-NEXT:    add a1, sp, a1
503; RV32-NEXT:    addi a1, a1, 16
504; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
505; RV32-NEXT:    vsetivli zero, 12, e32, m4, tu, ma
506; RV32-NEXT:    vmv.v.v v8, v16
507; RV32-NEXT:    csrr a1, vlenb
508; RV32-NEXT:    li a2, 52
509; RV32-NEXT:    mul a1, a1, a2
510; RV32-NEXT:    add a1, sp, a1
511; RV32-NEXT:    addi a1, a1, 16
512; RV32-NEXT:    vs4r.v v8, (a1) # Unknown-size Folded Spill
513; RV32-NEXT:    addi a1, sp, 16
514; RV32-NEXT:    vl4r.v v12, (a1) # Unknown-size Folded Reload
515; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
516; RV32-NEXT:    vrgatherei16.vv v24, v12, v28
517; RV32-NEXT:    csrr a1, vlenb
518; RV32-NEXT:    slli a1, a1, 5
519; RV32-NEXT:    add a1, sp, a1
520; RV32-NEXT:    addi a1, a1, 16
521; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
522; RV32-NEXT:    vsetivli zero, 10, e32, m4, tu, ma
523; RV32-NEXT:    vmv.v.v v24, v16
524; RV32-NEXT:    lui a1, %hi(.LCPI8_4)
525; RV32-NEXT:    addi a1, a1, %lo(.LCPI8_4)
526; RV32-NEXT:    lui a2, %hi(.LCPI8_5)
527; RV32-NEXT:    addi a2, a2, %lo(.LCPI8_5)
528; RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
529; RV32-NEXT:    vle16.v v12, (a1)
530; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
531; RV32-NEXT:    vle16.v v28, (a2)
532; RV32-NEXT:    lui a1, %hi(.LCPI8_6)
533; RV32-NEXT:    addi a1, a1, %lo(.LCPI8_6)
534; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
535; RV32-NEXT:    vle16.v v30, (a1)
536; RV32-NEXT:    csrr a1, vlenb
537; RV32-NEXT:    slli a1, a1, 4
538; RV32-NEXT:    add a1, sp, a1
539; RV32-NEXT:    addi a1, a1, 16
540; RV32-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
541; RV32-NEXT:    vrgatherei16.vv v16, v0, v12
542; RV32-NEXT:    csrr a1, vlenb
543; RV32-NEXT:    li a2, 12
544; RV32-NEXT:    mul a1, a1, a2
545; RV32-NEXT:    add a1, sp, a1
546; RV32-NEXT:    addi a1, a1, 16
547; RV32-NEXT:    vl4r.v v20, (a1) # Unknown-size Folded Reload
548; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
549; RV32-NEXT:    vrgatherei16.vv v12, v20, v28
550; RV32-NEXT:    vsetivli zero, 10, e32, m4, tu, ma
551; RV32-NEXT:    vmv.v.v v12, v16
552; RV32-NEXT:    csrr a1, vlenb
553; RV32-NEXT:    slli a1, a1, 2
554; RV32-NEXT:    add a1, sp, a1
555; RV32-NEXT:    addi a1, a1, 16
556; RV32-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
557; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
558; RV32-NEXT:    vrgatherei16.vv v16, v0, v30
559; RV32-NEXT:    lui a1, %hi(.LCPI8_7)
560; RV32-NEXT:    addi a1, a1, %lo(.LCPI8_7)
561; RV32-NEXT:    lui a2, %hi(.LCPI8_8)
562; RV32-NEXT:    addi a2, a2, %lo(.LCPI8_8)
563; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
564; RV32-NEXT:    vle16.v v20, (a1)
565; RV32-NEXT:    lui a1, %hi(.LCPI8_9)
566; RV32-NEXT:    addi a1, a1, %lo(.LCPI8_9)
567; RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
568; RV32-NEXT:    vle16.v v8, (a2)
569; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
570; RV32-NEXT:    vle16.v v10, (a1)
571; RV32-NEXT:    csrr a1, vlenb
572; RV32-NEXT:    li a2, 28
573; RV32-NEXT:    mul a1, a1, a2
574; RV32-NEXT:    add a1, sp, a1
575; RV32-NEXT:    addi a1, a1, 16
576; RV32-NEXT:    vl4r.v v0, (a1) # Unknown-size Folded Reload
577; RV32-NEXT:    vrgatherei16.vv v28, v0, v20
578; RV32-NEXT:    vsetivli zero, 10, e32, m4, tu, ma
579; RV32-NEXT:    vmv.v.v v28, v16
580; RV32-NEXT:    csrr a1, vlenb
581; RV32-NEXT:    li a2, 76
582; RV32-NEXT:    mul a1, a1, a2
583; RV32-NEXT:    add a1, sp, a1
584; RV32-NEXT:    addi a1, a1, 16
585; RV32-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
586; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
587; RV32-NEXT:    vrgatherei16.vv v16, v0, v8
588; RV32-NEXT:    csrr a1, vlenb
589; RV32-NEXT:    li a2, 60
590; RV32-NEXT:    mul a1, a1, a2
591; RV32-NEXT:    add a1, sp, a1
592; RV32-NEXT:    addi a1, a1, 16
593; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
594; RV32-NEXT:    csrr a1, vlenb
595; RV32-NEXT:    li a2, 68
596; RV32-NEXT:    mul a1, a1, a2
597; RV32-NEXT:    add a1, sp, a1
598; RV32-NEXT:    addi a1, a1, 16
599; RV32-NEXT:    vl4r.v v4, (a1) # Unknown-size Folded Reload
600; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
601; RV32-NEXT:    vrgatherei16.vv v16, v4, v10
602; RV32-NEXT:    csrr a1, vlenb
603; RV32-NEXT:    li a2, 60
604; RV32-NEXT:    mul a1, a1, a2
605; RV32-NEXT:    add a1, sp, a1
606; RV32-NEXT:    addi a1, a1, 16
607; RV32-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
608; RV32-NEXT:    vsetivli zero, 10, e32, m4, tu, ma
609; RV32-NEXT:    vmv.v.v v16, v0
610; RV32-NEXT:    addi a1, a0, 320
611; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
612; RV32-NEXT:    vse32.v v16, (a1)
613; RV32-NEXT:    addi a1, a0, 256
614; RV32-NEXT:    vse32.v v28, (a1)
615; RV32-NEXT:    addi a1, a0, 192
616; RV32-NEXT:    vse32.v v12, (a1)
617; RV32-NEXT:    addi a1, a0, 128
618; RV32-NEXT:    vse32.v v24, (a1)
619; RV32-NEXT:    addi a1, a0, 64
620; RV32-NEXT:    csrr a2, vlenb
621; RV32-NEXT:    li a3, 52
622; RV32-NEXT:    mul a2, a2, a3
623; RV32-NEXT:    add a2, sp, a2
624; RV32-NEXT:    addi a2, a2, 16
625; RV32-NEXT:    vl4r.v v8, (a2) # Unknown-size Folded Reload
626; RV32-NEXT:    vse32.v v8, (a1)
627; RV32-NEXT:    csrr a1, vlenb
628; RV32-NEXT:    li a2, 84
629; RV32-NEXT:    mul a1, a1, a2
630; RV32-NEXT:    add a1, sp, a1
631; RV32-NEXT:    addi a1, a1, 16
632; RV32-NEXT:    vl4r.v v8, (a1) # Unknown-size Folded Reload
633; RV32-NEXT:    vse32.v v8, (a0)
634; RV32-NEXT:    csrr a0, vlenb
635; RV32-NEXT:    li a1, 92
636; RV32-NEXT:    mul a0, a0, a1
637; RV32-NEXT:    add sp, sp, a0
638; RV32-NEXT:    .cfi_def_cfa sp, 16
639; RV32-NEXT:    addi sp, sp, 16
640; RV32-NEXT:    .cfi_def_cfa_offset 0
641; RV32-NEXT:    ret
642;
643; RV64-LABEL: load_factor6_too_big:
644; RV64:       # %bb.0:
645; RV64-NEXT:    addi sp, sp, -16
646; RV64-NEXT:    .cfi_def_cfa_offset 16
647; RV64-NEXT:    csrr a2, vlenb
648; RV64-NEXT:    li a3, 88
649; RV64-NEXT:    mul a2, a2, a3
650; RV64-NEXT:    sub sp, sp, a2
651; RV64-NEXT:    .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 88 * vlenb
652; RV64-NEXT:    addi a3, a1, 128
653; RV64-NEXT:    addi a6, a1, 256
654; RV64-NEXT:    li a4, 128
655; RV64-NEXT:    lui a2, 1
656; RV64-NEXT:    lui a5, %hi(.LCPI8_0)
657; RV64-NEXT:    addi a5, a5, %lo(.LCPI8_0)
658; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
659; RV64-NEXT:    vmv.v.i v16, 6
660; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
661; RV64-NEXT:    vle64.v v8, (a6)
662; RV64-NEXT:    lui a6, 16
663; RV64-NEXT:    addi a6, a6, 7
664; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
665; RV64-NEXT:    vmv.v.x v17, a6
666; RV64-NEXT:    addi a6, a2, 65
667; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
668; RV64-NEXT:    vrgather.vi v24, v8, 4
669; RV64-NEXT:    vrgather.vi v20, v8, 5
670; RV64-NEXT:    csrr a7, vlenb
671; RV64-NEXT:    li t0, 68
672; RV64-NEXT:    mul a7, a7, t0
673; RV64-NEXT:    add a7, sp, a7
674; RV64-NEXT:    addi a7, a7, 16
675; RV64-NEXT:    vs4r.v v20, (a7) # Unknown-size Folded Spill
676; RV64-NEXT:    vrgatherei16.vv v20, v8, v16
677; RV64-NEXT:    csrr a7, vlenb
678; RV64-NEXT:    li t0, 84
679; RV64-NEXT:    mul a7, a7, t0
680; RV64-NEXT:    add a7, sp, a7
681; RV64-NEXT:    addi a7, a7, 16
682; RV64-NEXT:    vs4r.v v20, (a7) # Unknown-size Folded Spill
683; RV64-NEXT:    vrgatherei16.vv v20, v8, v17
684; RV64-NEXT:    csrr a7, vlenb
685; RV64-NEXT:    li t0, 72
686; RV64-NEXT:    mul a7, a7, t0
687; RV64-NEXT:    add a7, sp, a7
688; RV64-NEXT:    addi a7, a7, 16
689; RV64-NEXT:    vs4r.v v20, (a7) # Unknown-size Folded Spill
690; RV64-NEXT:    vrgather.vi v16, v8, 2
691; RV64-NEXT:    csrr a7, vlenb
692; RV64-NEXT:    slli a7, a7, 6
693; RV64-NEXT:    add a7, sp, a7
694; RV64-NEXT:    addi a7, a7, 16
695; RV64-NEXT:    vs4r.v v16, (a7) # Unknown-size Folded Spill
696; RV64-NEXT:    vrgather.vi v16, v8, 3
697; RV64-NEXT:    csrr a7, vlenb
698; RV64-NEXT:    li t0, 56
699; RV64-NEXT:    mul a7, a7, t0
700; RV64-NEXT:    add a7, sp, a7
701; RV64-NEXT:    addi a7, a7, 16
702; RV64-NEXT:    vs4r.v v16, (a7) # Unknown-size Folded Spill
703; RV64-NEXT:    vsetivli zero, 8, e64, m8, ta, ma
704; RV64-NEXT:    vslidedown.vi v16, v8, 8
705; RV64-NEXT:    csrr a7, vlenb
706; RV64-NEXT:    li t0, 48
707; RV64-NEXT:    mul a7, a7, t0
708; RV64-NEXT:    add a7, sp, a7
709; RV64-NEXT:    addi a7, a7, 16
710; RV64-NEXT:    vs8r.v v16, (a7) # Unknown-size Folded Spill
711; RV64-NEXT:    vmv.s.x v21, a4
712; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
713; RV64-NEXT:    vle64.v v8, (a1)
714; RV64-NEXT:    vle64.v v0, (a3)
715; RV64-NEXT:    csrr a1, vlenb
716; RV64-NEXT:    li a3, 40
717; RV64-NEXT:    mul a1, a1, a3
718; RV64-NEXT:    add a1, sp, a1
719; RV64-NEXT:    addi a1, a1, 16
720; RV64-NEXT:    vs8r.v v0, (a1) # Unknown-size Folded Spill
721; RV64-NEXT:    vle16.v v2, (a5)
722; RV64-NEXT:    vmv.s.x v20, a6
723; RV64-NEXT:    vmv1r.v v0, v21
724; RV64-NEXT:    vmv1r.v v7, v21
725; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
726; RV64-NEXT:    vrgather.vi v24, v16, 2, v0.t
727; RV64-NEXT:    csrr a1, vlenb
728; RV64-NEXT:    li a3, 60
729; RV64-NEXT:    mul a1, a1, a3
730; RV64-NEXT:    add a1, sp, a1
731; RV64-NEXT:    addi a1, a1, 16
732; RV64-NEXT:    vs4r.v v24, (a1) # Unknown-size Folded Spill
733; RV64-NEXT:    vmv1r.v v0, v20
734; RV64-NEXT:    csrr a1, vlenb
735; RV64-NEXT:    li a3, 40
736; RV64-NEXT:    mul a1, a1, a3
737; RV64-NEXT:    add a1, sp, a1
738; RV64-NEXT:    addi a1, a1, 16
739; RV64-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
740; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
741; RV64-NEXT:    vmerge.vvm v24, v16, v8, v0
742; RV64-NEXT:    vmv8r.v v16, v8
743; RV64-NEXT:    csrr a1, vlenb
744; RV64-NEXT:    li a3, 76
745; RV64-NEXT:    mul a1, a1, a3
746; RV64-NEXT:    add a1, sp, a1
747; RV64-NEXT:    addi a1, a1, 16
748; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
749; RV64-NEXT:    vrgatherei16.vv v8, v24, v2
750; RV64-NEXT:    csrr a1, vlenb
751; RV64-NEXT:    slli a1, a1, 5
752; RV64-NEXT:    add a1, sp, a1
753; RV64-NEXT:    addi a1, a1, 16
754; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
755; RV64-NEXT:    lui a1, 2
756; RV64-NEXT:    lui a3, %hi(.LCPI8_1)
757; RV64-NEXT:    addi a3, a3, %lo(.LCPI8_1)
758; RV64-NEXT:    addi a1, a1, 130
759; RV64-NEXT:    vle16.v v8, (a3)
760; RV64-NEXT:    csrr a3, vlenb
761; RV64-NEXT:    slli a3, a3, 4
762; RV64-NEXT:    add a3, sp, a3
763; RV64-NEXT:    addi a3, a3, 16
764; RV64-NEXT:    vs2r.v v8, (a3) # Unknown-size Folded Spill
765; RV64-NEXT:    vmv.s.x v2, a1
766; RV64-NEXT:    vmv1r.v v0, v7
767; RV64-NEXT:    addi a1, sp, 16
768; RV64-NEXT:    vs1r.v v7, (a1) # Unknown-size Folded Spill
769; RV64-NEXT:    csrr a1, vlenb
770; RV64-NEXT:    li a3, 68
771; RV64-NEXT:    mul a1, a1, a3
772; RV64-NEXT:    add a1, sp, a1
773; RV64-NEXT:    addi a1, a1, 16
774; RV64-NEXT:    vl4r.v v24, (a1) # Unknown-size Folded Reload
775; RV64-NEXT:    csrr a1, vlenb
776; RV64-NEXT:    li a3, 48
777; RV64-NEXT:    mul a1, a1, a3
778; RV64-NEXT:    add a1, sp, a1
779; RV64-NEXT:    addi a1, a1, 16
780; RV64-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
781; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
782; RV64-NEXT:    vrgather.vi v24, v8, 3, v0.t
783; RV64-NEXT:    csrr a1, vlenb
784; RV64-NEXT:    li a3, 68
785; RV64-NEXT:    mul a1, a1, a3
786; RV64-NEXT:    add a1, sp, a1
787; RV64-NEXT:    addi a1, a1, 16
788; RV64-NEXT:    vs4r.v v24, (a1) # Unknown-size Folded Spill
789; RV64-NEXT:    vmv1r.v v0, v2
790; RV64-NEXT:    csrr a1, vlenb
791; RV64-NEXT:    li a3, 40
792; RV64-NEXT:    mul a1, a1, a3
793; RV64-NEXT:    add a1, sp, a1
794; RV64-NEXT:    addi a1, a1, 16
795; RV64-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
796; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
797; RV64-NEXT:    vmerge.vvm v24, v8, v16, v0
798; RV64-NEXT:    csrr a1, vlenb
799; RV64-NEXT:    slli a1, a1, 4
800; RV64-NEXT:    add a1, sp, a1
801; RV64-NEXT:    addi a1, a1, 16
802; RV64-NEXT:    vl2r.v v16, (a1) # Unknown-size Folded Reload
803; RV64-NEXT:    vrgatherei16.vv v0, v24, v16
804; RV64-NEXT:    csrr a1, vlenb
805; RV64-NEXT:    li a3, 24
806; RV64-NEXT:    mul a1, a1, a3
807; RV64-NEXT:    add a1, sp, a1
808; RV64-NEXT:    addi a1, a1, 16
809; RV64-NEXT:    vs8r.v v0, (a1) # Unknown-size Folded Spill
810; RV64-NEXT:    lui a1, 4
811; RV64-NEXT:    lui a3, 8
812; RV64-NEXT:    addi a1, a1, 260
813; RV64-NEXT:    addi a3, a3, 520
814; RV64-NEXT:    vmv.s.x v0, a1
815; RV64-NEXT:    vmv.s.x v2, a3
816; RV64-NEXT:    csrr a1, vlenb
817; RV64-NEXT:    li a3, 76
818; RV64-NEXT:    mul a1, a1, a3
819; RV64-NEXT:    add a1, sp, a1
820; RV64-NEXT:    addi a1, a1, 16
821; RV64-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
822; RV64-NEXT:    vmerge.vvm v24, v8, v16, v0
823; RV64-NEXT:    csrr a1, vlenb
824; RV64-NEXT:    slli a1, a1, 3
825; RV64-NEXT:    add a1, sp, a1
826; RV64-NEXT:    addi a1, a1, 16
827; RV64-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
828; RV64-NEXT:    addi a1, sp, 16
829; RV64-NEXT:    vl1r.v v7, (a1) # Unknown-size Folded Reload
830; RV64-NEXT:    vmv1r.v v0, v7
831; RV64-NEXT:    csrr a1, vlenb
832; RV64-NEXT:    li a3, 84
833; RV64-NEXT:    mul a1, a1, a3
834; RV64-NEXT:    add a1, sp, a1
835; RV64-NEXT:    addi a1, a1, 16
836; RV64-NEXT:    vl4r.v v24, (a1) # Unknown-size Folded Reload
837; RV64-NEXT:    csrr a1, vlenb
838; RV64-NEXT:    li a3, 48
839; RV64-NEXT:    mul a1, a1, a3
840; RV64-NEXT:    add a1, sp, a1
841; RV64-NEXT:    addi a1, a1, 16
842; RV64-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
843; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
844; RV64-NEXT:    vrgather.vi v24, v16, 4, v0.t
845; RV64-NEXT:    csrr a1, vlenb
846; RV64-NEXT:    li a3, 84
847; RV64-NEXT:    mul a1, a1, a3
848; RV64-NEXT:    add a1, sp, a1
849; RV64-NEXT:    addi a1, a1, 16
850; RV64-NEXT:    vs4r.v v24, (a1) # Unknown-size Folded Spill
851; RV64-NEXT:    vmv1r.v v0, v2
852; RV64-NEXT:    csrr a1, vlenb
853; RV64-NEXT:    li a3, 76
854; RV64-NEXT:    mul a1, a1, a3
855; RV64-NEXT:    add a1, sp, a1
856; RV64-NEXT:    addi a1, a1, 16
857; RV64-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
858; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
859; RV64-NEXT:    vmerge.vvm v24, v8, v16, v0
860; RV64-NEXT:    csrr a1, vlenb
861; RV64-NEXT:    slli a1, a1, 4
862; RV64-NEXT:    add a1, sp, a1
863; RV64-NEXT:    addi a1, a1, 16
864; RV64-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
865; RV64-NEXT:    vmv8r.v v16, v8
866; RV64-NEXT:    vmv1r.v v0, v7
867; RV64-NEXT:    csrr a1, vlenb
868; RV64-NEXT:    li a3, 72
869; RV64-NEXT:    mul a1, a1, a3
870; RV64-NEXT:    add a1, sp, a1
871; RV64-NEXT:    addi a1, a1, 16
872; RV64-NEXT:    vl4r.v v12, (a1) # Unknown-size Folded Reload
873; RV64-NEXT:    csrr a1, vlenb
874; RV64-NEXT:    li a3, 48
875; RV64-NEXT:    mul a1, a1, a3
876; RV64-NEXT:    add a1, sp, a1
877; RV64-NEXT:    addi a1, a1, 16
878; RV64-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
879; RV64-NEXT:    vmv4r.v v8, v24
880; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
881; RV64-NEXT:    vrgather.vi v12, v24, 5, v0.t
882; RV64-NEXT:    csrr a1, vlenb
883; RV64-NEXT:    li a3, 72
884; RV64-NEXT:    mul a1, a1, a3
885; RV64-NEXT:    add a1, sp, a1
886; RV64-NEXT:    addi a1, a1, 16
887; RV64-NEXT:    vs4r.v v12, (a1) # Unknown-size Folded Spill
888; RV64-NEXT:    lui a1, 96
889; RV64-NEXT:    li a3, 192
890; RV64-NEXT:    vmv.s.x v3, a3
891; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
892; RV64-NEXT:    vmv.v.x v12, a1
893; RV64-NEXT:    vmv1r.v v0, v3
894; RV64-NEXT:    csrr a1, vlenb
895; RV64-NEXT:    slli a1, a1, 6
896; RV64-NEXT:    add a1, sp, a1
897; RV64-NEXT:    addi a1, a1, 16
898; RV64-NEXT:    vl4r.v v24, (a1) # Unknown-size Folded Reload
899; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
900; RV64-NEXT:    vrgatherei16.vv v24, v8, v12, v0.t
901; RV64-NEXT:    csrr a1, vlenb
902; RV64-NEXT:    slli a1, a1, 6
903; RV64-NEXT:    add a1, sp, a1
904; RV64-NEXT:    addi a1, a1, 16
905; RV64-NEXT:    vs4r.v v24, (a1) # Unknown-size Folded Spill
906; RV64-NEXT:    lui a1, %hi(.LCPI8_2)
907; RV64-NEXT:    addi a1, a1, %lo(.LCPI8_2)
908; RV64-NEXT:    li a3, 1040
909; RV64-NEXT:    lui a4, 112
910; RV64-NEXT:    addi a4, a4, 1
911; RV64-NEXT:    vmv.s.x v0, a3
912; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
913; RV64-NEXT:    vmv.v.x v12, a4
914; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
915; RV64-NEXT:    vle16.v v6, (a1)
916; RV64-NEXT:    vmv8r.v v24, v16
917; RV64-NEXT:    csrr a1, vlenb
918; RV64-NEXT:    li a3, 76
919; RV64-NEXT:    mul a1, a1, a3
920; RV64-NEXT:    add a1, sp, a1
921; RV64-NEXT:    addi a1, a1, 16
922; RV64-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
923; RV64-NEXT:    vmerge.vvm v16, v24, v16, v0
924; RV64-NEXT:    addi a1, sp, 16
925; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
926; RV64-NEXT:    vmv1r.v v0, v3
927; RV64-NEXT:    csrr a1, vlenb
928; RV64-NEXT:    li a3, 56
929; RV64-NEXT:    mul a1, a1, a3
930; RV64-NEXT:    add a1, sp, a1
931; RV64-NEXT:    addi a1, a1, 16
932; RV64-NEXT:    vl4r.v v16, (a1) # Unknown-size Folded Reload
933; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
934; RV64-NEXT:    vrgatherei16.vv v16, v8, v12, v0.t
935; RV64-NEXT:    csrr a1, vlenb
936; RV64-NEXT:    li a3, 56
937; RV64-NEXT:    mul a1, a1, a3
938; RV64-NEXT:    add a1, sp, a1
939; RV64-NEXT:    addi a1, a1, 16
940; RV64-NEXT:    vs4r.v v16, (a1) # Unknown-size Folded Spill
941; RV64-NEXT:    addi a1, a2, -2016
942; RV64-NEXT:    vmv.s.x v0, a1
943; RV64-NEXT:    csrr a1, vlenb
944; RV64-NEXT:    slli a1, a1, 3
945; RV64-NEXT:    add a1, sp, a1
946; RV64-NEXT:    addi a1, a1, 16
947; RV64-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
948; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
949; RV64-NEXT:    vrgatherei16.vv v16, v8, v6
950; RV64-NEXT:    csrr a1, vlenb
951; RV64-NEXT:    li a2, 76
952; RV64-NEXT:    mul a1, a1, a2
953; RV64-NEXT:    add a1, sp, a1
954; RV64-NEXT:    addi a1, a1, 16
955; RV64-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
956; RV64-NEXT:    vmerge.vvm v8, v24, v8, v0
957; RV64-NEXT:    csrr a1, vlenb
958; RV64-NEXT:    li a2, 76
959; RV64-NEXT:    mul a1, a1, a2
960; RV64-NEXT:    add a1, sp, a1
961; RV64-NEXT:    addi a1, a1, 16
962; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
963; RV64-NEXT:    lui a1, %hi(.LCPI8_3)
964; RV64-NEXT:    addi a1, a1, %lo(.LCPI8_3)
965; RV64-NEXT:    vle16.v v24, (a1)
966; RV64-NEXT:    csrr a1, vlenb
967; RV64-NEXT:    slli a1, a1, 5
968; RV64-NEXT:    add a1, sp, a1
969; RV64-NEXT:    addi a1, a1, 16
970; RV64-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
971; RV64-NEXT:    csrr a1, vlenb
972; RV64-NEXT:    li a2, 60
973; RV64-NEXT:    mul a1, a1, a2
974; RV64-NEXT:    add a1, sp, a1
975; RV64-NEXT:    addi a1, a1, 16
976; RV64-NEXT:    vl4r.v v8, (a1) # Unknown-size Folded Reload
977; RV64-NEXT:    vsetivli zero, 6, e64, m4, tu, ma
978; RV64-NEXT:    vmv.v.v v8, v0
979; RV64-NEXT:    csrr a1, vlenb
980; RV64-NEXT:    li a2, 60
981; RV64-NEXT:    mul a1, a1, a2
982; RV64-NEXT:    add a1, sp, a1
983; RV64-NEXT:    addi a1, a1, 16
984; RV64-NEXT:    vs4r.v v8, (a1) # Unknown-size Folded Spill
985; RV64-NEXT:    csrr a1, vlenb
986; RV64-NEXT:    li a2, 68
987; RV64-NEXT:    mul a1, a1, a2
988; RV64-NEXT:    add a1, sp, a1
989; RV64-NEXT:    addi a1, a1, 16
990; RV64-NEXT:    vl4r.v v0, (a1) # Unknown-size Folded Reload
991; RV64-NEXT:    csrr a1, vlenb
992; RV64-NEXT:    li a2, 24
993; RV64-NEXT:    mul a1, a1, a2
994; RV64-NEXT:    add a1, sp, a1
995; RV64-NEXT:    addi a1, a1, 16
996; RV64-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
997; RV64-NEXT:    vmv.v.v v0, v8
998; RV64-NEXT:    csrr a1, vlenb
999; RV64-NEXT:    li a2, 84
1000; RV64-NEXT:    mul a1, a1, a2
1001; RV64-NEXT:    add a1, sp, a1
1002; RV64-NEXT:    addi a1, a1, 16
1003; RV64-NEXT:    vl4r.v v8, (a1) # Unknown-size Folded Reload
1004; RV64-NEXT:    vsetivli zero, 5, e64, m4, tu, ma
1005; RV64-NEXT:    vmv.v.v v8, v16
1006; RV64-NEXT:    csrr a1, vlenb
1007; RV64-NEXT:    li a2, 84
1008; RV64-NEXT:    mul a1, a1, a2
1009; RV64-NEXT:    add a1, sp, a1
1010; RV64-NEXT:    addi a1, a1, 16
1011; RV64-NEXT:    vs4r.v v8, (a1) # Unknown-size Folded Spill
1012; RV64-NEXT:    csrr a1, vlenb
1013; RV64-NEXT:    slli a1, a1, 4
1014; RV64-NEXT:    add a1, sp, a1
1015; RV64-NEXT:    addi a1, a1, 16
1016; RV64-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
1017; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1018; RV64-NEXT:    vrgatherei16.vv v16, v8, v24
1019; RV64-NEXT:    lui a1, %hi(.LCPI8_4)
1020; RV64-NEXT:    addi a1, a1, %lo(.LCPI8_4)
1021; RV64-NEXT:    vle16.v v8, (a1)
1022; RV64-NEXT:    lui a1, %hi(.LCPI8_5)
1023; RV64-NEXT:    addi a1, a1, %lo(.LCPI8_5)
1024; RV64-NEXT:    vle16.v v6, (a1)
1025; RV64-NEXT:    csrr a1, vlenb
1026; RV64-NEXT:    li a2, 72
1027; RV64-NEXT:    mul a1, a1, a2
1028; RV64-NEXT:    add a1, sp, a1
1029; RV64-NEXT:    addi a1, a1, 16
1030; RV64-NEXT:    vl4r.v v12, (a1) # Unknown-size Folded Reload
1031; RV64-NEXT:    vsetivli zero, 5, e64, m4, tu, ma
1032; RV64-NEXT:    vmv.v.v v12, v16
1033; RV64-NEXT:    addi a1, sp, 16
1034; RV64-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
1035; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1036; RV64-NEXT:    vrgatherei16.vv v24, v16, v8
1037; RV64-NEXT:    csrr a1, vlenb
1038; RV64-NEXT:    slli a1, a1, 6
1039; RV64-NEXT:    add a1, sp, a1
1040; RV64-NEXT:    addi a1, a1, 16
1041; RV64-NEXT:    vl4r.v v8, (a1) # Unknown-size Folded Reload
1042; RV64-NEXT:    vsetivli zero, 5, e64, m4, tu, ma
1043; RV64-NEXT:    vmv.v.v v8, v24
1044; RV64-NEXT:    csrr a1, vlenb
1045; RV64-NEXT:    li a2, 76
1046; RV64-NEXT:    mul a1, a1, a2
1047; RV64-NEXT:    add a1, sp, a1
1048; RV64-NEXT:    addi a1, a1, 16
1049; RV64-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
1050; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1051; RV64-NEXT:    vrgatherei16.vv v24, v16, v6
1052; RV64-NEXT:    csrr a1, vlenb
1053; RV64-NEXT:    li a2, 56
1054; RV64-NEXT:    mul a1, a1, a2
1055; RV64-NEXT:    add a1, sp, a1
1056; RV64-NEXT:    addi a1, a1, 16
1057; RV64-NEXT:    vl4r.v v16, (a1) # Unknown-size Folded Reload
1058; RV64-NEXT:    vsetivli zero, 5, e64, m4, tu, ma
1059; RV64-NEXT:    vmv.v.v v16, v24
1060; RV64-NEXT:    addi a1, a0, 256
1061; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1062; RV64-NEXT:    vse64.v v8, (a1)
1063; RV64-NEXT:    addi a1, a0, 320
1064; RV64-NEXT:    vse64.v v16, (a1)
1065; RV64-NEXT:    addi a1, a0, 192
1066; RV64-NEXT:    vse64.v v12, (a1)
1067; RV64-NEXT:    addi a1, a0, 128
1068; RV64-NEXT:    csrr a2, vlenb
1069; RV64-NEXT:    li a3, 84
1070; RV64-NEXT:    mul a2, a2, a3
1071; RV64-NEXT:    add a2, sp, a2
1072; RV64-NEXT:    addi a2, a2, 16
1073; RV64-NEXT:    vl4r.v v8, (a2) # Unknown-size Folded Reload
1074; RV64-NEXT:    vse64.v v8, (a1)
1075; RV64-NEXT:    addi a1, a0, 64
1076; RV64-NEXT:    vse64.v v0, (a1)
1077; RV64-NEXT:    csrr a1, vlenb
1078; RV64-NEXT:    li a2, 60
1079; RV64-NEXT:    mul a1, a1, a2
1080; RV64-NEXT:    add a1, sp, a1
1081; RV64-NEXT:    addi a1, a1, 16
1082; RV64-NEXT:    vl4r.v v8, (a1) # Unknown-size Folded Reload
1083; RV64-NEXT:    vse64.v v8, (a0)
1084; RV64-NEXT:    csrr a0, vlenb
1085; RV64-NEXT:    li a1, 88
1086; RV64-NEXT:    mul a0, a0, a1
1087; RV64-NEXT:    add sp, sp, a0
1088; RV64-NEXT:    .cfi_def_cfa sp, 16
1089; RV64-NEXT:    addi sp, sp, 16
1090; RV64-NEXT:    .cfi_def_cfa_offset 0
1091; RV64-NEXT:    ret
1092  %interleaved.vec = load <48 x i64>, ptr %ptr
1093  %v0 = shufflevector <48 x i64> %interleaved.vec, <48 x i64> poison, <8 x i32> <i32 0, i32 6, i32 12, i32 18, i32 24, i32 30, i32 36, i32 42>
1094  %v1 = shufflevector <48 x i64> %interleaved.vec, <48 x i64> poison, <8 x i32> <i32 1, i32 7, i32 13, i32 19, i32 25, i32 31, i32 37, i32 43>
1095  %v2 = shufflevector <48 x i64> %interleaved.vec, <48 x i64> poison, <8 x i32> <i32 2, i32 8, i32 14, i32 20, i32 26, i32 32, i32 38, i32 44>
1096  %v3 = shufflevector <48 x i64> %interleaved.vec, <48 x i64> poison, <8 x i32> <i32 3, i32 9, i32 15, i32 21, i32 27, i32 33, i32 39, i32 45>
1097  %v4 = shufflevector <48 x i64> %interleaved.vec, <48 x i64> poison, <8 x i32> <i32 4, i32 10, i32 16, i32 22, i32 28, i32 34, i32 40, i32 46>
1098  %v5 = shufflevector <48 x i64> %interleaved.vec, <48 x i64> poison, <8 x i32> <i32 5, i32 11, i32 17, i32 23, i32 29, i32 35, i32 41, i32 47>
1099  %res0 = insertvalue {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} undef, <8 x i64> %v0, 0
1100  %res1 = insertvalue {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} %res0, <8 x i64> %v1, 1
1101  %res2 = insertvalue {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} %res1, <8 x i64> %v2, 2
1102  %res3 = insertvalue {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} %res2, <8 x i64> %v3, 3
1103  %res4 = insertvalue {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} %res3, <8 x i64> %v4, 4
1104  %res5 = insertvalue {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} %res4, <8 x i64> %v5, 5
1105  ret {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} %res5
1106}
1107
1108
1109; ------------------------------------------------------------------------------
1110; Stores
1111; ------------------------------------------------------------------------------
1112
1113define void @store_factor2(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1) {
1114; CHECK-LABEL: store_factor2:
1115; CHECK:       # %bb.0:
1116; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1117; CHECK-NEXT:    vsseg2e32.v v8, (a0)
1118; CHECK-NEXT:    ret
1119  %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
1120  store <8 x i32> %interleaved.vec, ptr %ptr
1121  ret void
1122}
1123
1124define void @store_factor3(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) {
1125; CHECK-LABEL: store_factor3:
1126; CHECK:       # %bb.0:
1127; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1128; CHECK-NEXT:    vsseg3e32.v v8, (a0)
1129; CHECK-NEXT:    ret
1130  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1131  %s1 = shufflevector <4 x i32> %v2, <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
1132  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <12 x i32> <i32 0, i32 4, i32 8, i32 1, i32 5, i32 9, i32 2, i32 6, i32 10, i32 3, i32 7, i32 11>
1133  store <12 x i32> %interleaved.vec, ptr %ptr
1134  ret void
1135}
1136
1137define void @store_factor4(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
1138; CHECK-LABEL: store_factor4:
1139; CHECK:       # %bb.0:
1140; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1141; CHECK-NEXT:    vsseg4e32.v v8, (a0)
1142; CHECK-NEXT:    ret
1143  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1144  %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1145  %interleaved.vec = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
1146  store <16 x i32> %interleaved.vec, ptr %ptr
1147  ret void
1148}
1149
1150define void @store_factor5(ptr %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <4 x i32> %v4) {
1151; CHECK-LABEL: store_factor5:
1152; CHECK:       # %bb.0:
1153; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1154; CHECK-NEXT:    vsseg5e32.v v8, (a0)
1155; CHECK-NEXT:    ret
1156  %s0 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1157  %s1 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1158  %s2 = shufflevector <8 x i32> %s0, <8 x i32> %s1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
1159  %s3 = shufflevector <4 x i32> %v4, <4 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1160  %interleaved.vec = shufflevector <16 x i32> %s2, <16 x i32> %s3, <20 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 1, i32 5, i32 9, i32 13, i32 17, i32 2, i32 6, i32 10, i32 14, i32 18, i32 3, i32 7, i32 11, i32 15, i32 19>
1161  store <20 x i32> %interleaved.vec, ptr %ptr
1162  ret void
1163}
1164
1165define void @store_factor6(ptr %ptr, <2 x i16> %v0, <2 x i16> %v1, <2 x i16> %v2, <2 x i16> %v3, <2 x i16> %v4, <2 x i16> %v5) {
1166; CHECK-LABEL: store_factor6:
1167; CHECK:       # %bb.0:
1168; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
1169; CHECK-NEXT:    vsseg6e16.v v8, (a0)
1170; CHECK-NEXT:    ret
1171  %s0 = shufflevector <2 x i16> %v0, <2 x i16> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1172  %s1 = shufflevector <2 x i16> %v2, <2 x i16> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1173  %s2 = shufflevector <4 x i16> %s0, <4 x i16> %s1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1174  %s3 = shufflevector <2 x i16> %v4, <2 x i16> %v5, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
1175  %interleaved.vec = shufflevector <8 x i16> %s2, <8 x i16> %s3, <12 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11>
1176  store <12 x i16> %interleaved.vec, ptr %ptr
1177  ret void
1178}
1179
1180
1181define <4 x i32> @load_factor2_one_active(ptr %ptr) {
1182; CHECK-LABEL: load_factor2_one_active:
1183; CHECK:       # %bb.0:
1184; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1185; CHECK-NEXT:    vlseg2e32.v v8, (a0)
1186; CHECK-NEXT:    ret
1187  %interleaved.vec = load <8 x i32>, ptr %ptr
1188  %v0 = shufflevector <8 x i32> %interleaved.vec, <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1189  ret <4 x i32> %v0
1190}
1191
1192
1193define <4 x i32> @load_factor3_one_active(ptr %ptr) {
1194; CHECK-LABEL: load_factor3_one_active:
1195; CHECK:       # %bb.0:
1196; CHECK-NEXT:    li a1, 12
1197; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1198; CHECK-NEXT:    vlse32.v v8, (a0), a1
1199; CHECK-NEXT:    ret
1200  %interleaved.vec = load <12 x i32>, ptr %ptr
1201  %v0 = shufflevector <12 x i32> %interleaved.vec, <12 x i32> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
1202  ret <4 x i32> %v0
1203}
1204
1205define <4 x i32> @load_factor4_one_active(ptr %ptr) {
1206; CHECK-LABEL: load_factor4_one_active:
1207; CHECK:       # %bb.0:
1208; CHECK-NEXT:    li a1, 16
1209; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1210; CHECK-NEXT:    vlse32.v v8, (a0), a1
1211; CHECK-NEXT:    ret
1212  %interleaved.vec = load <16 x i32>, ptr %ptr
1213  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
1214  ret <4 x i32> %v0
1215}
1216
1217define <4 x i32> @load_factor5_one_active(ptr %ptr) {
1218; CHECK-LABEL: load_factor5_one_active:
1219; CHECK:       # %bb.0:
1220; CHECK-NEXT:    li a1, 20
1221; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1222; CHECK-NEXT:    vlse32.v v8, (a0), a1
1223; CHECK-NEXT:    ret
1224  %interleaved.vec = load <20 x i32>, ptr %ptr
1225  %v0 = shufflevector <20 x i32> %interleaved.vec, <20 x i32> poison, <4 x i32> <i32 0, i32 5, i32 10, i32 15>
1226  ret <4 x i32> %v0
1227}
1228
1229define <2 x i16> @load_factor6_one_active(ptr %ptr) {
1230; CHECK-LABEL: load_factor6_one_active:
1231; CHECK:       # %bb.0:
1232; CHECK-NEXT:    addi a0, a0, 10
1233; CHECK-NEXT:    li a1, 12
1234; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
1235; CHECK-NEXT:    vlse16.v v8, (a0), a1
1236; CHECK-NEXT:    ret
1237  %interleaved.vec = load <12 x i16>, ptr %ptr
1238  %v0 = shufflevector <12 x i16> %interleaved.vec, <12 x i16> poison, <2 x i32> <i32 5, i32 11>
1239  ret <2 x i16> %v0
1240}
1241
1242define <4 x i8> @load_factor7_one_active(ptr %ptr) vscale_range(8,1024) {
1243; CHECK-LABEL: load_factor7_one_active:
1244; CHECK:       # %bb.0:
1245; CHECK-NEXT:    addi a0, a0, 1
1246; CHECK-NEXT:    li a1, 7
1247; CHECK-NEXT:    vsetivli zero, 4, e8, mf8, ta, ma
1248; CHECK-NEXT:    vlse8.v v8, (a0), a1
1249; CHECK-NEXT:    ret
1250  %interleaved.vec = load <32 x i8>, ptr %ptr
1251  %v0 = shufflevector <32 x i8> %interleaved.vec, <32 x i8> poison, <4 x i32> <i32 1, i32 8, i32 15, i32 22>
1252  ret <4 x i8> %v0
1253}
1254
1255define <4 x i8> @load_factor8_one_active(ptr %ptr) vscale_range(8,1024) {
1256; CHECK-LABEL: load_factor8_one_active:
1257; CHECK:       # %bb.0:
1258; CHECK-NEXT:    li a1, 8
1259; CHECK-NEXT:    vsetivli zero, 4, e8, mf8, ta, ma
1260; CHECK-NEXT:    vlse8.v v8, (a0), a1
1261; CHECK-NEXT:    ret
1262  %interleaved.vec = load <32 x i8>, ptr %ptr
1263  %v0 = shufflevector <32 x i8> %interleaved.vec, <32 x i8> poison, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
1264  ret <4 x i8> %v0
1265}
1266
1267define void @load_factor4_one_active_storeback(ptr %ptr) {
1268; CHECK-LABEL: load_factor4_one_active_storeback:
1269; CHECK:       # %bb.0:
1270; CHECK-NEXT:    li a1, 16
1271; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1272; CHECK-NEXT:    vlse32.v v8, (a0), a1
1273; CHECK-NEXT:    vse32.v v8, (a0)
1274; CHECK-NEXT:    ret
1275  %interleaved.vec = load <16 x i32>, ptr %ptr
1276  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
1277  store <4 x i32> %v0, ptr %ptr
1278  ret void
1279}
1280
1281; TODO: This should be a strided load
1282define void @load_factor4_one_active_storeback_full(ptr %ptr) {
1283; CHECK-LABEL: load_factor4_one_active_storeback_full:
1284; CHECK:       # %bb.0:
1285; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
1286; CHECK-NEXT:    vle32.v v8, (a0)
1287; CHECK-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
1288; CHECK-NEXT:    vslidedown.vi v12, v8, 4
1289; CHECK-NEXT:    vmv1r.v v13, v8
1290; CHECK-NEXT:    vmv1r.v v14, v12
1291; CHECK-NEXT:    vsetivli zero, 4, e32, m4, ta, ma
1292; CHECK-NEXT:    vslidedown.vi v16, v8, 8
1293; CHECK-NEXT:    vmv1r.v v15, v16
1294; CHECK-NEXT:    vslidedown.vi v16, v8, 12
1295; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1296; CHECK-NEXT:    vsseg4e32.v v13, (a0)
1297; CHECK-NEXT:    ret
1298  %interleaved.vec = load <16 x i32>, ptr %ptr
1299  %v0 = shufflevector <16 x i32> %interleaved.vec, <16 x i32> poison, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1300  store <16 x i32> %v0, ptr %ptr
1301  ret void
1302}
1303
1304define void @store_factor4_one_active(ptr %ptr, <4 x i32> %v) {
1305; CHECK-LABEL: store_factor4_one_active:
1306; CHECK:       # %bb.0:
1307; CHECK-NEXT:    li a1, 16
1308; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1309; CHECK-NEXT:    vsse32.v v8, (a0), a1
1310; CHECK-NEXT:    ret
1311  %v0 = shufflevector <4 x i32> %v, <4 x i32> poison, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3,  i32 undef, i32 undef, i32 undef>
1312  store <16 x i32> %v0, ptr %ptr
1313  ret void
1314}
1315
1316define void @store_factor4_one_active_idx1(ptr %ptr, <4 x i32> %v) {
1317; CHECK-LABEL: store_factor4_one_active_idx1:
1318; CHECK:       # %bb.0:
1319; CHECK-NEXT:    addi a0, a0, 4
1320; CHECK-NEXT:    li a1, 16
1321; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1322; CHECK-NEXT:    vsse32.v v8, (a0), a1
1323; CHECK-NEXT:    ret
1324  %v0 = shufflevector <4 x i32> %v, <4 x i32> poison, <16 x i32> <i32 undef, i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3,  i32 undef, i32 undef>
1325  store <16 x i32> %v0, ptr %ptr
1326  ret void
1327}
1328
1329define void @store_factor4_one_active_fullwidth(ptr %ptr, <16 x i32> %v) {
1330; CHECK-LABEL: store_factor4_one_active_fullwidth:
1331; CHECK:       # %bb.0:
1332; CHECK-NEXT:    li a1, 16
1333; CHECK-NEXT:    vsetivli zero, 4, e32, m4, ta, ma
1334; CHECK-NEXT:    vsse32.v v8, (a0), a1
1335; CHECK-NEXT:    ret
1336  %v0 = shufflevector <16 x i32> %v, <16 x i32> poison, <16 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3,  i32 undef, i32 undef, i32 undef>
1337  store <16 x i32> %v0, ptr %ptr
1338  ret void
1339}
1340
1341; TODO: This could be a vslidedown followed by a strided store
1342define void @store_factor4_one_active_slidedown(ptr %ptr, <4 x i32> %v) {
1343; CHECK-LABEL: store_factor4_one_active_slidedown:
1344; CHECK:       # %bb.0:
1345; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1346; CHECK-NEXT:    vslidedown.vi v9, v8, 1
1347; CHECK-NEXT:    vslideup.vi v10, v8, 1
1348; CHECK-NEXT:    vmv.v.v v11, v10
1349; CHECK-NEXT:    vmv.v.v v12, v10
1350; CHECK-NEXT:    vsseg4e32.v v9, (a0)
1351; CHECK-NEXT:    ret
1352  %v0 = shufflevector <4 x i32> %v, <4 x i32> poison, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 undef, i32 4,  i32 undef, i32 undef, i32 undef>
1353  store <16 x i32> %v0, ptr %ptr
1354  ret void
1355}
1356