xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll (revision b6c0f1bfa79a3a32d841ac5ab1f94c3aee3b5d90)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v \
3; RUN:   -verify-machineinstrs < %s | FileCheck %s
4; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v \
5; RUN:   -verify-machineinstrs < %s | FileCheck %s
6
7declare <2 x i8> @llvm.vp.load.v2i8.p0(ptr, <2 x i1>, i32)
8
9define <2 x i8> @vpload_v2i8(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
10; CHECK-LABEL: vpload_v2i8:
11; CHECK:       # %bb.0:
12; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, ma
13; CHECK-NEXT:    vle8.v v8, (a0), v0.t
14; CHECK-NEXT:    ret
15  %load = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %ptr, <2 x i1> %m, i32 %evl)
16  ret <2 x i8> %load
17}
18
19declare <3 x i8> @llvm.vp.load.v3i8.p0(ptr, <3 x i1>, i32)
20
21define <3 x i8> @vpload_v3i8(ptr %ptr, <3 x i1> %m, i32 zeroext %evl) {
22; CHECK-LABEL: vpload_v3i8:
23; CHECK:       # %bb.0:
24; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
25; CHECK-NEXT:    vle8.v v8, (a0), v0.t
26; CHECK-NEXT:    ret
27  %load = call <3 x i8> @llvm.vp.load.v3i8.p0(ptr %ptr, <3 x i1> %m, i32 %evl)
28  ret <3 x i8> %load
29}
30
31declare <4 x i8> @llvm.vp.load.v4i8.p0(ptr, <4 x i1>, i32)
32
33define <4 x i8> @vpload_v4i8(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
34; CHECK-LABEL: vpload_v4i8:
35; CHECK:       # %bb.0:
36; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
37; CHECK-NEXT:    vle8.v v8, (a0), v0.t
38; CHECK-NEXT:    ret
39  %load = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr %ptr, <4 x i1> %m, i32 %evl)
40  ret <4 x i8> %load
41}
42
43define <4 x i8> @vpload_v4i8_allones_mask(ptr %ptr, i32 zeroext %evl) {
44; CHECK-LABEL: vpload_v4i8_allones_mask:
45; CHECK:       # %bb.0:
46; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, ma
47; CHECK-NEXT:    vle8.v v8, (a0)
48; CHECK-NEXT:    ret
49  %load = call <4 x i8> @llvm.vp.load.v4i8.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl)
50  ret <4 x i8> %load
51}
52
53declare <8 x i8> @llvm.vp.load.v8i8.p0(ptr, <8 x i1>, i32)
54
55define <8 x i8> @vpload_v8i8(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
56; CHECK-LABEL: vpload_v8i8:
57; CHECK:       # %bb.0:
58; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
59; CHECK-NEXT:    vle8.v v8, (a0), v0.t
60; CHECK-NEXT:    ret
61  %load = call <8 x i8> @llvm.vp.load.v8i8.p0(ptr %ptr, <8 x i1> %m, i32 %evl)
62  ret <8 x i8> %load
63}
64
65declare <2 x i16> @llvm.vp.load.v2i16.p0(ptr, <2 x i1>, i32)
66
67define <2 x i16> @vpload_v2i16(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
68; CHECK-LABEL: vpload_v2i16:
69; CHECK:       # %bb.0:
70; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
71; CHECK-NEXT:    vle16.v v8, (a0), v0.t
72; CHECK-NEXT:    ret
73  %load = call <2 x i16> @llvm.vp.load.v2i16.p0(ptr %ptr, <2 x i1> %m, i32 %evl)
74  ret <2 x i16> %load
75}
76
77declare <4 x i16> @llvm.vp.load.v4i16.p0(ptr, <4 x i1>, i32)
78
79define <4 x i16> @vpload_v4i16(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
80; CHECK-LABEL: vpload_v4i16:
81; CHECK:       # %bb.0:
82; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
83; CHECK-NEXT:    vle16.v v8, (a0), v0.t
84; CHECK-NEXT:    ret
85  %load = call <4 x i16> @llvm.vp.load.v4i16.p0(ptr %ptr, <4 x i1> %m, i32 %evl)
86  ret <4 x i16> %load
87}
88
89declare <8 x i16> @llvm.vp.load.v8i16.p0(ptr, <8 x i1>, i32)
90
91define <8 x i16> @vpload_v8i16(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
92; CHECK-LABEL: vpload_v8i16:
93; CHECK:       # %bb.0:
94; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
95; CHECK-NEXT:    vle16.v v8, (a0), v0.t
96; CHECK-NEXT:    ret
97  %load = call <8 x i16> @llvm.vp.load.v8i16.p0(ptr %ptr, <8 x i1> %m, i32 %evl)
98  ret <8 x i16> %load
99}
100
101define <8 x i16> @vpload_v8i16_allones_mask(ptr %ptr, i32 zeroext %evl) {
102; CHECK-LABEL: vpload_v8i16_allones_mask:
103; CHECK:       # %bb.0:
104; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
105; CHECK-NEXT:    vle16.v v8, (a0)
106; CHECK-NEXT:    ret
107  %load = call <8 x i16> @llvm.vp.load.v8i16.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl)
108  ret <8 x i16> %load
109}
110
111declare <2 x i32> @llvm.vp.load.v2i32.p0(ptr, <2 x i1>, i32)
112
113define <2 x i32> @vpload_v2i32(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
114; CHECK-LABEL: vpload_v2i32:
115; CHECK:       # %bb.0:
116; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
117; CHECK-NEXT:    vle32.v v8, (a0), v0.t
118; CHECK-NEXT:    ret
119  %load = call <2 x i32> @llvm.vp.load.v2i32.p0(ptr %ptr, <2 x i1> %m, i32 %evl)
120  ret <2 x i32> %load
121}
122
123declare <4 x i32> @llvm.vp.load.v4i32.p0(ptr, <4 x i1>, i32)
124
125define <4 x i32> @vpload_v4i32(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
126; CHECK-LABEL: vpload_v4i32:
127; CHECK:       # %bb.0:
128; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
129; CHECK-NEXT:    vle32.v v8, (a0), v0.t
130; CHECK-NEXT:    ret
131  %load = call <4 x i32> @llvm.vp.load.v4i32.p0(ptr %ptr, <4 x i1> %m, i32 %evl)
132  ret <4 x i32> %load
133}
134
135declare <6 x i32> @llvm.vp.load.v6i32.p0(ptr, <6 x i1>, i32)
136
137define <6 x i32> @vpload_v6i32(ptr %ptr, <6 x i1> %m, i32 zeroext %evl) {
138; CHECK-LABEL: vpload_v6i32:
139; CHECK:       # %bb.0:
140; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
141; CHECK-NEXT:    vle32.v v8, (a0), v0.t
142; CHECK-NEXT:    ret
143  %load = call <6 x i32> @llvm.vp.load.v6i32.p0(ptr %ptr, <6 x i1> %m, i32 %evl)
144  ret <6 x i32> %load
145}
146
147define <6 x i32> @vpload_v6i32_allones_mask(ptr %ptr, i32 zeroext %evl) {
148; CHECK-LABEL: vpload_v6i32_allones_mask:
149; CHECK:       # %bb.0:
150; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
151; CHECK-NEXT:    vle32.v v8, (a0)
152; CHECK-NEXT:    ret
153  %load = call <6 x i32> @llvm.vp.load.v6i32.p0(ptr %ptr, <6 x i1> splat (i1 true), i32 %evl)
154  ret <6 x i32> %load
155}
156
157declare <8 x i32> @llvm.vp.load.v8i32.p0(ptr, <8 x i1>, i32)
158
159define <8 x i32> @vpload_v8i32(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
160; CHECK-LABEL: vpload_v8i32:
161; CHECK:       # %bb.0:
162; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
163; CHECK-NEXT:    vle32.v v8, (a0), v0.t
164; CHECK-NEXT:    ret
165  %load = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> %m, i32 %evl)
166  ret <8 x i32> %load
167}
168
169define <8 x i32> @vpload_v8i32_allones_mask(ptr %ptr, i32 zeroext %evl) {
170; CHECK-LABEL: vpload_v8i32_allones_mask:
171; CHECK:       # %bb.0:
172; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
173; CHECK-NEXT:    vle32.v v8, (a0)
174; CHECK-NEXT:    ret
175  %load = call <8 x i32> @llvm.vp.load.v8i32.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl)
176  ret <8 x i32> %load
177}
178
179declare <2 x i64> @llvm.vp.load.v2i64.p0(ptr, <2 x i1>, i32)
180
181define <2 x i64> @vpload_v2i64(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
182; CHECK-LABEL: vpload_v2i64:
183; CHECK:       # %bb.0:
184; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
185; CHECK-NEXT:    vle64.v v8, (a0), v0.t
186; CHECK-NEXT:    ret
187  %load = call <2 x i64> @llvm.vp.load.v2i64.p0(ptr %ptr, <2 x i1> %m, i32 %evl)
188  ret <2 x i64> %load
189}
190
191declare <4 x i64> @llvm.vp.load.v4i64.p0(ptr, <4 x i1>, i32)
192
193define <4 x i64> @vpload_v4i64(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
194; CHECK-LABEL: vpload_v4i64:
195; CHECK:       # %bb.0:
196; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
197; CHECK-NEXT:    vle64.v v8, (a0), v0.t
198; CHECK-NEXT:    ret
199  %load = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr %ptr, <4 x i1> %m, i32 %evl)
200  ret <4 x i64> %load
201}
202
203define <4 x i64> @vpload_v4i64_allones_mask(ptr %ptr, i32 zeroext %evl) {
204; CHECK-LABEL: vpload_v4i64_allones_mask:
205; CHECK:       # %bb.0:
206; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
207; CHECK-NEXT:    vle64.v v8, (a0)
208; CHECK-NEXT:    ret
209  %load = call <4 x i64> @llvm.vp.load.v4i64.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl)
210  ret <4 x i64> %load
211}
212
213declare <8 x i64> @llvm.vp.load.v8i64.p0(ptr, <8 x i1>, i32)
214
215define <8 x i64> @vpload_v8i64(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
216; CHECK-LABEL: vpload_v8i64:
217; CHECK:       # %bb.0:
218; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
219; CHECK-NEXT:    vle64.v v8, (a0), v0.t
220; CHECK-NEXT:    ret
221  %load = call <8 x i64> @llvm.vp.load.v8i64.p0(ptr %ptr, <8 x i1> %m, i32 %evl)
222  ret <8 x i64> %load
223}
224
225declare <2 x half> @llvm.vp.load.v2f16.p0(ptr, <2 x i1>, i32)
226
227define <2 x half> @vpload_v2f16(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
228; CHECK-LABEL: vpload_v2f16:
229; CHECK:       # %bb.0:
230; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
231; CHECK-NEXT:    vle16.v v8, (a0), v0.t
232; CHECK-NEXT:    ret
233  %load = call <2 x half> @llvm.vp.load.v2f16.p0(ptr %ptr, <2 x i1> %m, i32 %evl)
234  ret <2 x half> %load
235}
236
237define <2 x half> @vpload_v2f16_allones_mask(ptr %ptr, i32 zeroext %evl) {
238; CHECK-LABEL: vpload_v2f16_allones_mask:
239; CHECK:       # %bb.0:
240; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, ma
241; CHECK-NEXT:    vle16.v v8, (a0)
242; CHECK-NEXT:    ret
243  %load = call <2 x half> @llvm.vp.load.v2f16.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl)
244  ret <2 x half> %load
245}
246
247declare <4 x half> @llvm.vp.load.v4f16.p0(ptr, <4 x i1>, i32)
248
249define <4 x half> @vpload_v4f16(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
250; CHECK-LABEL: vpload_v4f16:
251; CHECK:       # %bb.0:
252; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, ma
253; CHECK-NEXT:    vle16.v v8, (a0), v0.t
254; CHECK-NEXT:    ret
255  %load = call <4 x half> @llvm.vp.load.v4f16.p0(ptr %ptr, <4 x i1> %m, i32 %evl)
256  ret <4 x half> %load
257}
258
259declare <8 x half> @llvm.vp.load.v8f16.p0(ptr, <8 x i1>, i32)
260
261define <8 x half> @vpload_v8f16(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
262; CHECK-LABEL: vpload_v8f16:
263; CHECK:       # %bb.0:
264; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
265; CHECK-NEXT:    vle16.v v8, (a0), v0.t
266; CHECK-NEXT:    ret
267  %load = call <8 x half> @llvm.vp.load.v8f16.p0(ptr %ptr, <8 x i1> %m, i32 %evl)
268  ret <8 x half> %load
269}
270
271declare <2 x float> @llvm.vp.load.v2f32.p0(ptr, <2 x i1>, i32)
272
273define <2 x float> @vpload_v2f32(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
274; CHECK-LABEL: vpload_v2f32:
275; CHECK:       # %bb.0:
276; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
277; CHECK-NEXT:    vle32.v v8, (a0), v0.t
278; CHECK-NEXT:    ret
279  %load = call <2 x float> @llvm.vp.load.v2f32.p0(ptr %ptr, <2 x i1> %m, i32 %evl)
280  ret <2 x float> %load
281}
282
283declare <4 x float> @llvm.vp.load.v4f32.p0(ptr, <4 x i1>, i32)
284
285define <4 x float> @vpload_v4f32(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
286; CHECK-LABEL: vpload_v4f32:
287; CHECK:       # %bb.0:
288; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
289; CHECK-NEXT:    vle32.v v8, (a0), v0.t
290; CHECK-NEXT:    ret
291  %load = call <4 x float> @llvm.vp.load.v4f32.p0(ptr %ptr, <4 x i1> %m, i32 %evl)
292  ret <4 x float> %load
293}
294
295declare <8 x float> @llvm.vp.load.v8f32.p0(ptr, <8 x i1>, i32)
296
297define <8 x float> @vpload_v8f32(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
298; CHECK-LABEL: vpload_v8f32:
299; CHECK:       # %bb.0:
300; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
301; CHECK-NEXT:    vle32.v v8, (a0), v0.t
302; CHECK-NEXT:    ret
303  %load = call <8 x float> @llvm.vp.load.v8f32.p0(ptr %ptr, <8 x i1> %m, i32 %evl)
304  ret <8 x float> %load
305}
306
307define <8 x float> @vpload_v8f32_allones_mask(ptr %ptr, i32 zeroext %evl) {
308; CHECK-LABEL: vpload_v8f32_allones_mask:
309; CHECK:       # %bb.0:
310; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
311; CHECK-NEXT:    vle32.v v8, (a0)
312; CHECK-NEXT:    ret
313  %load = call <8 x float> @llvm.vp.load.v8f32.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl)
314  ret <8 x float> %load
315}
316
317declare <2 x double> @llvm.vp.load.v2f64.p0(ptr, <2 x i1>, i32)
318
319define <2 x double> @vpload_v2f64(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
320; CHECK-LABEL: vpload_v2f64:
321; CHECK:       # %bb.0:
322; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
323; CHECK-NEXT:    vle64.v v8, (a0), v0.t
324; CHECK-NEXT:    ret
325  %load = call <2 x double> @llvm.vp.load.v2f64.p0(ptr %ptr, <2 x i1> %m, i32 %evl)
326  ret <2 x double> %load
327}
328
329declare <4 x double> @llvm.vp.load.v4f64.p0(ptr, <4 x i1>, i32)
330
331define <4 x double> @vpload_v4f64(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
332; CHECK-LABEL: vpload_v4f64:
333; CHECK:       # %bb.0:
334; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
335; CHECK-NEXT:    vle64.v v8, (a0), v0.t
336; CHECK-NEXT:    ret
337  %load = call <4 x double> @llvm.vp.load.v4f64.p0(ptr %ptr, <4 x i1> %m, i32 %evl)
338  ret <4 x double> %load
339}
340
341define <4 x double> @vpload_v4f64_allones_mask(ptr %ptr, i32 zeroext %evl) {
342; CHECK-LABEL: vpload_v4f64_allones_mask:
343; CHECK:       # %bb.0:
344; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, ma
345; CHECK-NEXT:    vle64.v v8, (a0)
346; CHECK-NEXT:    ret
347  %load = call <4 x double> @llvm.vp.load.v4f64.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl)
348  ret <4 x double> %load
349}
350
351declare <8 x double> @llvm.vp.load.v8f64.p0(ptr, <8 x i1>, i32)
352
353define <8 x double> @vpload_v8f64(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
354; CHECK-LABEL: vpload_v8f64:
355; CHECK:       # %bb.0:
356; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
357; CHECK-NEXT:    vle64.v v8, (a0), v0.t
358; CHECK-NEXT:    ret
359  %load = call <8 x double> @llvm.vp.load.v8f64.p0(ptr %ptr, <8 x i1> %m, i32 %evl)
360  ret <8 x double> %load
361}
362
363declare <32 x double> @llvm.vp.load.v32f64.p0(ptr, <32 x i1>, i32)
364
365define <32 x double> @vpload_v32f64(ptr %ptr, <32 x i1> %m, i32 zeroext %evl) {
366; CHECK-LABEL: vpload_v32f64:
367; CHECK:       # %bb.0:
368; CHECK-NEXT:    li a3, 16
369; CHECK-NEXT:    mv a2, a1
370; CHECK-NEXT:    bltu a1, a3, .LBB31_2
371; CHECK-NEXT:  # %bb.1:
372; CHECK-NEXT:    li a2, 16
373; CHECK-NEXT:  .LBB31_2:
374; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
375; CHECK-NEXT:    vle64.v v8, (a0), v0.t
376; CHECK-NEXT:    addi a2, a1, -16
377; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
378; CHECK-NEXT:    vslidedown.vi v0, v0, 2
379; CHECK-NEXT:    sltu a1, a1, a2
380; CHECK-NEXT:    addi a1, a1, -1
381; CHECK-NEXT:    and a1, a1, a2
382; CHECK-NEXT:    addi a0, a0, 128
383; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
384; CHECK-NEXT:    vle64.v v16, (a0), v0.t
385; CHECK-NEXT:    ret
386  %load = call <32 x double> @llvm.vp.load.v32f64.p0(ptr %ptr, <32 x i1> %m, i32 %evl)
387  ret <32 x double> %load
388}
389
390declare <33 x double> @llvm.vp.load.v33f64.p0(ptr, <33 x i1>, i32)
391
392; Widen to v64f64 then split into 4 x v16f64, of which 1 is empty.
393
394define <33 x double> @vpload_v33f64(ptr %ptr, <33 x i1> %m, i32 zeroext %evl) {
395; CHECK-LABEL: vpload_v33f64:
396; CHECK:       # %bb.0:
397; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
398; CHECK-NEXT:    vmv1r.v v8, v0
399; CHECK-NEXT:    li a4, 32
400; CHECK-NEXT:    mv a3, a2
401; CHECK-NEXT:    bltu a2, a4, .LBB32_2
402; CHECK-NEXT:  # %bb.1:
403; CHECK-NEXT:    li a3, 32
404; CHECK-NEXT:  .LBB32_2:
405; CHECK-NEXT:    addi a4, a3, -16
406; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
407; CHECK-NEXT:    vslidedown.vi v0, v8, 2
408; CHECK-NEXT:    sltu a3, a3, a4
409; CHECK-NEXT:    addi a3, a3, -1
410; CHECK-NEXT:    and a3, a3, a4
411; CHECK-NEXT:    addi a4, a1, 128
412; CHECK-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
413; CHECK-NEXT:    vle64.v v16, (a4), v0.t
414; CHECK-NEXT:    addi a3, a2, -32
415; CHECK-NEXT:    sltu a4, a2, a3
416; CHECK-NEXT:    addi a4, a4, -1
417; CHECK-NEXT:    and a4, a4, a3
418; CHECK-NEXT:    li a3, 16
419; CHECK-NEXT:    bltu a4, a3, .LBB32_4
420; CHECK-NEXT:  # %bb.3:
421; CHECK-NEXT:    li a4, 16
422; CHECK-NEXT:  .LBB32_4:
423; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
424; CHECK-NEXT:    vslidedown.vi v0, v8, 4
425; CHECK-NEXT:    addi a5, a1, 256
426; CHECK-NEXT:    vsetvli zero, a4, e64, m8, ta, ma
427; CHECK-NEXT:    vle64.v v24, (a5), v0.t
428; CHECK-NEXT:    bltu a2, a3, .LBB32_6
429; CHECK-NEXT:  # %bb.5:
430; CHECK-NEXT:    li a2, 16
431; CHECK-NEXT:  .LBB32_6:
432; CHECK-NEXT:    vmv1r.v v0, v8
433; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
434; CHECK-NEXT:    vle64.v v8, (a1), v0.t
435; CHECK-NEXT:    addi a1, a0, 128
436; CHECK-NEXT:    addi a2, a0, 256
437; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
438; CHECK-NEXT:    vse64.v v8, (a0)
439; CHECK-NEXT:    vse64.v v16, (a1)
440; CHECK-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
441; CHECK-NEXT:    vse64.v v24, (a2)
442; CHECK-NEXT:    ret
443  %load = call <33 x double> @llvm.vp.load.v33f64.p0(ptr %ptr, <33 x i1> %m, i32 %evl)
444  ret <33 x double> %load
445}
446