xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll (revision b6c0f1bfa79a3a32d841ac5ab1f94c3aee3b5d90)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh,+zvfbfmin,+optimized-zero-stride-load \
3; RUN:   -verify-machineinstrs < %s \
4; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT
5; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh,+zvfbfmin,+optimized-zero-stride-load \
6; RUN:   -verify-machineinstrs < %s \
7; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT
8; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh,+zvfbfmin \
9; RUN:   -verify-machineinstrs < %s \
10; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-ZVFH
11; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh,+zvfbfmin \
12; RUN:   -verify-machineinstrs < %s \
13; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-ZVFH
14; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin,+optimized-zero-stride-load \
15; RUN:   -verify-machineinstrs < %s \
16; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-OPT
17; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin,+optimized-zero-stride-load \
18; RUN:   -verify-machineinstrs < %s \
19; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-OPT
20; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin \
21; RUN:   -verify-machineinstrs < %s \
22; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,CHECK-NO-OPT,CHECK-NO-OPT-ZVFHMIN
23; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin \
24; RUN:   -verify-machineinstrs < %s \
25; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,CHECK-NO-OPT,CHECK-NO-OPT-ZVFHMIN
26
27declare <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i8(ptr, i8, <2 x i1>, i32)
28
29define <2 x i8> @strided_vpload_v2i8_i8(ptr %ptr, i8 signext %stride, <2 x i1> %m, i32 zeroext %evl) {
30; CHECK-LABEL: strided_vpload_v2i8_i8:
31; CHECK:       # %bb.0:
32; CHECK-NEXT:    vsetvli zero, a2, e8, mf8, ta, ma
33; CHECK-NEXT:    vlse8.v v8, (a0), a1, v0.t
34; CHECK-NEXT:    ret
35  %load = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i8(ptr %ptr, i8 %stride, <2 x i1> %m, i32 %evl)
36  ret <2 x i8> %load
37}
38
39declare <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i16(ptr, i16, <2 x i1>, i32)
40
41define <2 x i8> @strided_vpload_v2i8_i16(ptr %ptr, i16 signext %stride, <2 x i1> %m, i32 zeroext %evl) {
42; CHECK-LABEL: strided_vpload_v2i8_i16:
43; CHECK:       # %bb.0:
44; CHECK-NEXT:    vsetvli zero, a2, e8, mf8, ta, ma
45; CHECK-NEXT:    vlse8.v v8, (a0), a1, v0.t
46; CHECK-NEXT:    ret
47  %load = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i16(ptr %ptr, i16 %stride, <2 x i1> %m, i32 %evl)
48  ret <2 x i8> %load
49}
50
51declare <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr, i64, <2 x i1>, i32)
52
53define <2 x i8> @strided_vpload_v2i8_i64(ptr %ptr, i64 signext %stride, <2 x i1> %m, i32 zeroext %evl) {
54; CHECK-RV32-LABEL: strided_vpload_v2i8_i64:
55; CHECK-RV32:       # %bb.0:
56; CHECK-RV32-NEXT:    vsetvli zero, a3, e8, mf8, ta, ma
57; CHECK-RV32-NEXT:    vlse8.v v8, (a0), a1, v0.t
58; CHECK-RV32-NEXT:    ret
59;
60; CHECK-RV64-LABEL: strided_vpload_v2i8_i64:
61; CHECK-RV64:       # %bb.0:
62; CHECK-RV64-NEXT:    vsetvli zero, a2, e8, mf8, ta, ma
63; CHECK-RV64-NEXT:    vlse8.v v8, (a0), a1, v0.t
64; CHECK-RV64-NEXT:    ret
65  %load = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr %ptr, i64 %stride, <2 x i1> %m, i32 %evl)
66  ret <2 x i8> %load
67}
68
69declare <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i32(ptr, i32, <2 x i1>, i32)
70
71define <2 x i8> @strided_vpload_v2i8(ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) {
72; CHECK-LABEL: strided_vpload_v2i8:
73; CHECK:       # %bb.0:
74; CHECK-NEXT:    vsetvli zero, a2, e8, mf8, ta, ma
75; CHECK-NEXT:    vlse8.v v8, (a0), a1, v0.t
76; CHECK-NEXT:    ret
77  %load = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i32(ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl)
78  ret <2 x i8> %load
79}
80
81declare <4 x i8> @llvm.experimental.vp.strided.load.v4i8.p0.i32(ptr, i32, <4 x i1>, i32)
82
83define <4 x i8> @strided_vpload_v4i8(ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) {
84; CHECK-LABEL: strided_vpload_v4i8:
85; CHECK:       # %bb.0:
86; CHECK-NEXT:    vsetvli zero, a2, e8, mf4, ta, ma
87; CHECK-NEXT:    vlse8.v v8, (a0), a1, v0.t
88; CHECK-NEXT:    ret
89  %load = call <4 x i8> @llvm.experimental.vp.strided.load.v4i8.p0.i32(ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl)
90  ret <4 x i8> %load
91}
92
93define <4 x i8> @strided_vpload_v4i8_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
94; CHECK-LABEL: strided_vpload_v4i8_allones_mask:
95; CHECK:       # %bb.0:
96; CHECK-NEXT:    vsetvli zero, a2, e8, mf4, ta, ma
97; CHECK-NEXT:    vlse8.v v8, (a0), a1
98; CHECK-NEXT:    ret
99  %load = call <4 x i8> @llvm.experimental.vp.strided.load.v4i8.p0.i32(ptr %ptr, i32 %stride, <4 x i1> splat (i1 true), i32 %evl)
100  ret <4 x i8> %load
101}
102
103declare <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i32(ptr, i32, <8 x i1>, i32)
104
105define <8 x i8> @strided_vpload_v8i8(ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) {
106; CHECK-LABEL: strided_vpload_v8i8:
107; CHECK:       # %bb.0:
108; CHECK-NEXT:    vsetvli zero, a2, e8, mf2, ta, ma
109; CHECK-NEXT:    vlse8.v v8, (a0), a1, v0.t
110; CHECK-NEXT:    ret
111  %load = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i32(ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl)
112  ret <8 x i8> %load
113}
114
115define <8 x i8> @strided_vpload_v8i8_unit_stride(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
116; CHECK-LABEL: strided_vpload_v8i8_unit_stride:
117; CHECK:       # %bb.0:
118; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
119; CHECK-NEXT:    vle8.v v8, (a0), v0.t
120; CHECK-NEXT:    ret
121  %load = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i32(ptr %ptr, i32 1, <8 x i1> %m, i32 %evl)
122  ret <8 x i8> %load
123}
124
125declare <2 x i16> @llvm.experimental.vp.strided.load.v2i16.p0.i32(ptr, i32, <2 x i1>, i32)
126
127define <2 x i16> @strided_vpload_v2i16(ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) {
128; CHECK-LABEL: strided_vpload_v2i16:
129; CHECK:       # %bb.0:
130; CHECK-NEXT:    vsetvli zero, a2, e16, mf4, ta, ma
131; CHECK-NEXT:    vlse16.v v8, (a0), a1, v0.t
132; CHECK-NEXT:    ret
133  %load = call <2 x i16> @llvm.experimental.vp.strided.load.v2i16.p0.i32(ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl)
134  ret <2 x i16> %load
135}
136
137declare <4 x i16> @llvm.experimental.vp.strided.load.v4i16.p0.i32(ptr, i32, <4 x i1>, i32)
138
139define <4 x i16> @strided_vpload_v4i16(ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) {
140; CHECK-LABEL: strided_vpload_v4i16:
141; CHECK:       # %bb.0:
142; CHECK-NEXT:    vsetvli zero, a2, e16, mf2, ta, ma
143; CHECK-NEXT:    vlse16.v v8, (a0), a1, v0.t
144; CHECK-NEXT:    ret
145  %load = call <4 x i16> @llvm.experimental.vp.strided.load.v4i16.p0.i32(ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl)
146  ret <4 x i16> %load
147}
148
149declare <8 x i16> @llvm.experimental.vp.strided.load.v8i16.p0.i32(ptr, i32, <8 x i1>, i32)
150
151define <8 x i16> @strided_vpload_v8i16(ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) {
152; CHECK-LABEL: strided_vpload_v8i16:
153; CHECK:       # %bb.0:
154; CHECK-NEXT:    vsetvli zero, a2, e16, m1, ta, ma
155; CHECK-NEXT:    vlse16.v v8, (a0), a1, v0.t
156; CHECK-NEXT:    ret
157  %load = call <8 x i16> @llvm.experimental.vp.strided.load.v8i16.p0.i32(ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl)
158  ret <8 x i16> %load
159}
160
161define <8 x i16> @strided_vpload_v8i16_unit_stride(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
162; CHECK-LABEL: strided_vpload_v8i16_unit_stride:
163; CHECK:       # %bb.0:
164; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
165; CHECK-NEXT:    vle16.v v8, (a0), v0.t
166; CHECK-NEXT:    ret
167  %load = call <8 x i16> @llvm.experimental.vp.strided.load.v8i16.p0.i32(ptr %ptr, i32 2, <8 x i1> %m, i32 %evl)
168  ret <8 x i16> %load
169}
170
171define <8 x i16> @strided_vpload_v8i16_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
172; CHECK-LABEL: strided_vpload_v8i16_allones_mask:
173; CHECK:       # %bb.0:
174; CHECK-NEXT:    vsetvli zero, a2, e16, m1, ta, ma
175; CHECK-NEXT:    vlse16.v v8, (a0), a1
176; CHECK-NEXT:    ret
177  %load = call <8 x i16> @llvm.experimental.vp.strided.load.v8i16.p0.i32(ptr %ptr, i32 %stride, <8 x i1> splat (i1 true), i32 %evl)
178  ret <8 x i16> %load
179}
180
181declare <2 x i32> @llvm.experimental.vp.strided.load.v2i32.p0.i32(ptr, i32, <2 x i1>, i32)
182
183define <2 x i32> @strided_vpload_v2i32(ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) {
184; CHECK-LABEL: strided_vpload_v2i32:
185; CHECK:       # %bb.0:
186; CHECK-NEXT:    vsetvli zero, a2, e32, mf2, ta, ma
187; CHECK-NEXT:    vlse32.v v8, (a0), a1, v0.t
188; CHECK-NEXT:    ret
189  %load = call <2 x i32> @llvm.experimental.vp.strided.load.v2i32.p0.i32(ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl)
190  ret <2 x i32> %load
191}
192
193declare <4 x i32> @llvm.experimental.vp.strided.load.v4i32.p0.i32(ptr, i32, <4 x i1>, i32)
194
195define <4 x i32> @strided_vpload_v4i32(ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) {
196; CHECK-LABEL: strided_vpload_v4i32:
197; CHECK:       # %bb.0:
198; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
199; CHECK-NEXT:    vlse32.v v8, (a0), a1, v0.t
200; CHECK-NEXT:    ret
201  %load = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.p0.i32(ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl)
202  ret <4 x i32> %load
203}
204
205define <4 x i32> @strided_vpload_v4i32_unit_stride(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
206; CHECK-LABEL: strided_vpload_v4i32_unit_stride:
207; CHECK:       # %bb.0:
208; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
209; CHECK-NEXT:    vle32.v v8, (a0), v0.t
210; CHECK-NEXT:    ret
211  %load = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.p0.i32(ptr %ptr, i32 4, <4 x i1> %m, i32 %evl)
212  ret <4 x i32> %load
213}
214
215declare <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i32(ptr, i32, <8 x i1>, i32)
216
217define <8 x i32> @strided_vpload_v8i32(ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) {
218; CHECK-LABEL: strided_vpload_v8i32:
219; CHECK:       # %bb.0:
220; CHECK-NEXT:    vsetvli zero, a2, e32, m2, ta, ma
221; CHECK-NEXT:    vlse32.v v8, (a0), a1, v0.t
222; CHECK-NEXT:    ret
223  %load = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i32(ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl)
224  ret <8 x i32> %load
225}
226
227define <8 x i32> @strided_vpload_v8i32_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
228; CHECK-LABEL: strided_vpload_v8i32_allones_mask:
229; CHECK:       # %bb.0:
230; CHECK-NEXT:    vsetvli zero, a2, e32, m2, ta, ma
231; CHECK-NEXT:    vlse32.v v8, (a0), a1
232; CHECK-NEXT:    ret
233  %load = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i32(ptr %ptr, i32 %stride, <8 x i1> splat (i1 true), i32 %evl)
234  ret <8 x i32> %load
235}
236
237declare <2 x i64> @llvm.experimental.vp.strided.load.v2i64.p0.i32(ptr, i32, <2 x i1>, i32)
238
239define <2 x i64> @strided_vpload_v2i64(ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) {
240; CHECK-LABEL: strided_vpload_v2i64:
241; CHECK:       # %bb.0:
242; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
243; CHECK-NEXT:    vlse64.v v8, (a0), a1, v0.t
244; CHECK-NEXT:    ret
245  %load = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.p0.i32(ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl)
246  ret <2 x i64> %load
247}
248
249define <2 x i64> @strided_vpload_v2i64_unit_stride(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
250; CHECK-LABEL: strided_vpload_v2i64_unit_stride:
251; CHECK:       # %bb.0:
252; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
253; CHECK-NEXT:    vle64.v v8, (a0), v0.t
254; CHECK-NEXT:    ret
255  %load = call <2 x i64> @llvm.experimental.vp.strided.load.v2i64.p0.i32(ptr %ptr, i32 8, <2 x i1> %m, i32 %evl)
256  ret <2 x i64> %load
257}
258
259declare <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i32(ptr, i32, <4 x i1>, i32)
260
261define <4 x i64> @strided_vpload_v4i64(ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) {
262; CHECK-LABEL: strided_vpload_v4i64:
263; CHECK:       # %bb.0:
264; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
265; CHECK-NEXT:    vlse64.v v8, (a0), a1, v0.t
266; CHECK-NEXT:    ret
267  %load = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i32(ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl)
268  ret <4 x i64> %load
269}
270
271define <4 x i64> @strided_vpload_v4i64_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
272; CHECK-LABEL: strided_vpload_v4i64_allones_mask:
273; CHECK:       # %bb.0:
274; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
275; CHECK-NEXT:    vlse64.v v8, (a0), a1
276; CHECK-NEXT:    ret
277  %load = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i32(ptr %ptr, i32 %stride, <4 x i1> splat (i1 true), i32 %evl)
278  ret <4 x i64> %load
279}
280
281declare <8 x i64> @llvm.experimental.vp.strided.load.v8i64.p0.i32(ptr, i32, <8 x i1>, i32)
282
283define <8 x i64> @strided_vpload_v8i64(ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) {
284; CHECK-LABEL: strided_vpload_v8i64:
285; CHECK:       # %bb.0:
286; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
287; CHECK-NEXT:    vlse64.v v8, (a0), a1, v0.t
288; CHECK-NEXT:    ret
289  %load = call <8 x i64> @llvm.experimental.vp.strided.load.v8i64.p0.i32(ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl)
290  ret <8 x i64> %load
291}
292
293declare <2 x bfloat> @llvm.experimental.vp.strided.load.v2bf16.p0.i32(ptr, i32, <2 x i1>, i32)
294
295define <2 x bfloat> @strided_vpload_v2bf16(ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) {
296; CHECK-LABEL: strided_vpload_v2bf16:
297; CHECK:       # %bb.0:
298; CHECK-NEXT:    vsetvli zero, a2, e16, mf4, ta, ma
299; CHECK-NEXT:    vlse16.v v8, (a0), a1, v0.t
300; CHECK-NEXT:    ret
301  %load = call <2 x bfloat> @llvm.experimental.vp.strided.load.v2bf16.p0.i32(ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl)
302  ret <2 x bfloat> %load
303}
304
305define <2 x bfloat> @strided_vpload_v2bf16_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
306; CHECK-LABEL: strided_vpload_v2bf16_allones_mask:
307; CHECK:       # %bb.0:
308; CHECK-NEXT:    vsetvli zero, a2, e16, mf4, ta, ma
309; CHECK-NEXT:    vlse16.v v8, (a0), a1
310; CHECK-NEXT:    ret
311  %load = call <2 x bfloat> @llvm.experimental.vp.strided.load.v2bf16.p0.i32(ptr %ptr, i32 %stride, <2 x i1> splat (i1 true), i32 %evl)
312  ret <2 x bfloat> %load
313}
314
315declare <4 x bfloat> @llvm.experimental.vp.strided.load.v4bf16.p0.i32(ptr, i32, <4 x i1>, i32)
316
317define <4 x bfloat> @strided_vpload_v4bf16(ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) {
318; CHECK-LABEL: strided_vpload_v4bf16:
319; CHECK:       # %bb.0:
320; CHECK-NEXT:    vsetvli zero, a2, e16, mf2, ta, ma
321; CHECK-NEXT:    vlse16.v v8, (a0), a1, v0.t
322; CHECK-NEXT:    ret
323  %load = call <4 x bfloat> @llvm.experimental.vp.strided.load.v4bf16.p0.i32(ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl)
324  ret <4 x bfloat> %load
325}
326
327declare <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i32(ptr, i32, <8 x i1>, i32)
328
329define <8 x bfloat> @strided_vpload_v8bf16(ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) {
330; CHECK-LABEL: strided_vpload_v8bf16:
331; CHECK:       # %bb.0:
332; CHECK-NEXT:    vsetvli zero, a2, e16, m1, ta, ma
333; CHECK-NEXT:    vlse16.v v8, (a0), a1, v0.t
334; CHECK-NEXT:    ret
335  %load = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i32(ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl)
336  ret <8 x bfloat> %load
337}
338
339define <8 x bfloat> @strided_vpload_v8bf16_unit_stride(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
340; CHECK-LABEL: strided_vpload_v8bf16_unit_stride:
341; CHECK:       # %bb.0:
342; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
343; CHECK-NEXT:    vle16.v v8, (a0), v0.t
344; CHECK-NEXT:    ret
345  %load = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i32(ptr %ptr, i32 2, <8 x i1> %m, i32 %evl)
346  ret <8 x bfloat> %load
347}
348
349declare <2 x half> @llvm.experimental.vp.strided.load.v2f16.p0.i32(ptr, i32, <2 x i1>, i32)
350
351define <2 x half> @strided_vpload_v2f16(ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) {
352; CHECK-LABEL: strided_vpload_v2f16:
353; CHECK:       # %bb.0:
354; CHECK-NEXT:    vsetvli zero, a2, e16, mf4, ta, ma
355; CHECK-NEXT:    vlse16.v v8, (a0), a1, v0.t
356; CHECK-NEXT:    ret
357  %load = call <2 x half> @llvm.experimental.vp.strided.load.v2f16.p0.i32(ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl)
358  ret <2 x half> %load
359}
360
361define <2 x half> @strided_vpload_v2f16_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
362; CHECK-LABEL: strided_vpload_v2f16_allones_mask:
363; CHECK:       # %bb.0:
364; CHECK-NEXT:    vsetvli zero, a2, e16, mf4, ta, ma
365; CHECK-NEXT:    vlse16.v v8, (a0), a1
366; CHECK-NEXT:    ret
367  %load = call <2 x half> @llvm.experimental.vp.strided.load.v2f16.p0.i32(ptr %ptr, i32 %stride, <2 x i1> splat (i1 true), i32 %evl)
368  ret <2 x half> %load
369}
370
371declare <4 x half> @llvm.experimental.vp.strided.load.v4f16.p0.i32(ptr, i32, <4 x i1>, i32)
372
373define <4 x half> @strided_vpload_v4f16(ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) {
374; CHECK-LABEL: strided_vpload_v4f16:
375; CHECK:       # %bb.0:
376; CHECK-NEXT:    vsetvli zero, a2, e16, mf2, ta, ma
377; CHECK-NEXT:    vlse16.v v8, (a0), a1, v0.t
378; CHECK-NEXT:    ret
379  %load = call <4 x half> @llvm.experimental.vp.strided.load.v4f16.p0.i32(ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl)
380  ret <4 x half> %load
381}
382
383declare <8 x half> @llvm.experimental.vp.strided.load.v8f16.p0.i32(ptr, i32, <8 x i1>, i32)
384
385define <8 x half> @strided_vpload_v8f16(ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) {
386; CHECK-LABEL: strided_vpload_v8f16:
387; CHECK:       # %bb.0:
388; CHECK-NEXT:    vsetvli zero, a2, e16, m1, ta, ma
389; CHECK-NEXT:    vlse16.v v8, (a0), a1, v0.t
390; CHECK-NEXT:    ret
391  %load = call <8 x half> @llvm.experimental.vp.strided.load.v8f16.p0.i32(ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl)
392  ret <8 x half> %load
393}
394
395define <8 x half> @strided_vpload_v8f16_unit_stride(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
396; CHECK-LABEL: strided_vpload_v8f16_unit_stride:
397; CHECK:       # %bb.0:
398; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
399; CHECK-NEXT:    vle16.v v8, (a0), v0.t
400; CHECK-NEXT:    ret
401  %load = call <8 x half> @llvm.experimental.vp.strided.load.v8f16.p0.i32(ptr %ptr, i32 2, <8 x i1> %m, i32 %evl)
402  ret <8 x half> %load
403}
404
405declare <2 x float> @llvm.experimental.vp.strided.load.v2f32.p0.i32(ptr, i32, <2 x i1>, i32)
406
407define <2 x float> @strided_vpload_v2f32(ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) {
408; CHECK-LABEL: strided_vpload_v2f32:
409; CHECK:       # %bb.0:
410; CHECK-NEXT:    vsetvli zero, a2, e32, mf2, ta, ma
411; CHECK-NEXT:    vlse32.v v8, (a0), a1, v0.t
412; CHECK-NEXT:    ret
413  %load = call <2 x float> @llvm.experimental.vp.strided.load.v2f32.p0.i32(ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl)
414  ret <2 x float> %load
415}
416
417declare <4 x float> @llvm.experimental.vp.strided.load.v4f32.p0.i32(ptr, i32, <4 x i1>, i32)
418
419define <4 x float> @strided_vpload_v4f32(ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) {
420; CHECK-LABEL: strided_vpload_v4f32:
421; CHECK:       # %bb.0:
422; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
423; CHECK-NEXT:    vlse32.v v8, (a0), a1, v0.t
424; CHECK-NEXT:    ret
425  %load = call <4 x float> @llvm.experimental.vp.strided.load.v4f32.p0.i32(ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl)
426  ret <4 x float> %load
427}
428
429define <4 x float> @strided_vpload_v4f32_unit_stride(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
430; CHECK-LABEL: strided_vpload_v4f32_unit_stride:
431; CHECK:       # %bb.0:
432; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
433; CHECK-NEXT:    vle32.v v8, (a0), v0.t
434; CHECK-NEXT:    ret
435  %load = call <4 x float> @llvm.experimental.vp.strided.load.v4f32.p0.i32(ptr %ptr, i32 4, <4 x i1> %m, i32 %evl)
436  ret <4 x float> %load
437}
438
439declare <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i32(ptr, i32, <8 x i1>, i32)
440
441define <8 x float> @strided_vpload_v8f32(ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) {
442; CHECK-LABEL: strided_vpload_v8f32:
443; CHECK:       # %bb.0:
444; CHECK-NEXT:    vsetvli zero, a2, e32, m2, ta, ma
445; CHECK-NEXT:    vlse32.v v8, (a0), a1, v0.t
446; CHECK-NEXT:    ret
447  %load = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i32(ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl)
448  ret <8 x float> %load
449}
450
451define <8 x float> @strided_vpload_v8f32_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
452; CHECK-LABEL: strided_vpload_v8f32_allones_mask:
453; CHECK:       # %bb.0:
454; CHECK-NEXT:    vsetvli zero, a2, e32, m2, ta, ma
455; CHECK-NEXT:    vlse32.v v8, (a0), a1
456; CHECK-NEXT:    ret
457  %load = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i32(ptr %ptr, i32 %stride, <8 x i1> splat (i1 true), i32 %evl)
458  ret <8 x float> %load
459}
460
461declare <2 x double> @llvm.experimental.vp.strided.load.v2f64.p0.i32(ptr, i32, <2 x i1>, i32)
462
463define <2 x double> @strided_vpload_v2f64(ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) {
464; CHECK-LABEL: strided_vpload_v2f64:
465; CHECK:       # %bb.0:
466; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
467; CHECK-NEXT:    vlse64.v v8, (a0), a1, v0.t
468; CHECK-NEXT:    ret
469  %load = call <2 x double> @llvm.experimental.vp.strided.load.v2f64.p0.i32(ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl)
470  ret <2 x double> %load
471}
472
473define <2 x double> @strided_vpload_v2f64_unit_stride(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
474; CHECK-LABEL: strided_vpload_v2f64_unit_stride:
475; CHECK:       # %bb.0:
476; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
477; CHECK-NEXT:    vle64.v v8, (a0), v0.t
478; CHECK-NEXT:    ret
479  %load = call <2 x double> @llvm.experimental.vp.strided.load.v2f64.p0.i32(ptr %ptr, i32 8, <2 x i1> %m, i32 %evl)
480  ret <2 x double> %load
481}
482
483
484declare <4 x double> @llvm.experimental.vp.strided.load.v4f64.p0.i32(ptr, i32, <4 x i1>, i32)
485
486define <4 x double> @strided_vpload_v4f64(ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) {
487; CHECK-LABEL: strided_vpload_v4f64:
488; CHECK:       # %bb.0:
489; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
490; CHECK-NEXT:    vlse64.v v8, (a0), a1, v0.t
491; CHECK-NEXT:    ret
492  %load = call <4 x double> @llvm.experimental.vp.strided.load.v4f64.p0.i32(ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl)
493  ret <4 x double> %load
494}
495
496define <4 x double> @strided_vpload_v4f64_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
497; CHECK-LABEL: strided_vpload_v4f64_allones_mask:
498; CHECK:       # %bb.0:
499; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
500; CHECK-NEXT:    vlse64.v v8, (a0), a1
501; CHECK-NEXT:    ret
502  %load = call <4 x double> @llvm.experimental.vp.strided.load.v4f64.p0.i32(ptr %ptr, i32 %stride, <4 x i1> splat (i1 true), i32 %evl)
503  ret <4 x double> %load
504}
505
506declare <8 x double> @llvm.experimental.vp.strided.load.v8f64.p0.i32(ptr, i32, <8 x i1>, i32)
507
508define <8 x double> @strided_vpload_v8f64(ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) {
509; CHECK-LABEL: strided_vpload_v8f64:
510; CHECK:       # %bb.0:
511; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
512; CHECK-NEXT:    vlse64.v v8, (a0), a1, v0.t
513; CHECK-NEXT:    ret
514  %load = call <8 x double> @llvm.experimental.vp.strided.load.v8f64.p0.i32(ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl)
515  ret <8 x double> %load
516}
517
518; Widening
519define <3 x double> @strided_vpload_v3f64(ptr %ptr, i32 signext %stride, <3 x i1> %mask, i32 zeroext %evl) {
520; CHECK-LABEL: strided_vpload_v3f64:
521; CHECK:       # %bb.0:
522; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
523; CHECK-NEXT:    vlse64.v v8, (a0), a1, v0.t
524; CHECK-NEXT:    ret
525  %v = call <3 x double> @llvm.experimental.vp.strided.load.v3f64.p0.i32(ptr %ptr, i32 %stride, <3 x i1> %mask, i32 %evl)
526  ret <3 x double> %v
527}
528
529define <3 x double> @strided_vpload_v3f64_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
530; CHECK-LABEL: strided_vpload_v3f64_allones_mask:
531; CHECK:       # %bb.0:
532; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
533; CHECK-NEXT:    vlse64.v v8, (a0), a1
534; CHECK-NEXT:    ret
535  %v = call <3 x double> @llvm.experimental.vp.strided.load.v3f64.p0.i32(ptr %ptr, i32 %stride, <3 x i1> splat (i1 true), i32 %evl)
536  ret <3 x double> %v
537}
538
539declare <3 x double> @llvm.experimental.vp.strided.load.v3f64.p0.i32(ptr, i32, <3 x i1>, i32)
540
541; Splitting
542define <32 x double> @strided_vpload_v32f64(ptr %ptr, i32 signext %stride, <32 x i1> %m, i32 zeroext %evl) nounwind {
543; CHECK-LABEL: strided_vpload_v32f64:
544; CHECK:       # %bb.0:
545; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
546; CHECK-NEXT:    vmv1r.v v9, v0
547; CHECK-NEXT:    li a4, 16
548; CHECK-NEXT:    mv a3, a2
549; CHECK-NEXT:    bltu a2, a4, .LBB45_2
550; CHECK-NEXT:  # %bb.1:
551; CHECK-NEXT:    li a3, 16
552; CHECK-NEXT:  .LBB45_2:
553; CHECK-NEXT:    mul a4, a3, a1
554; CHECK-NEXT:    addi a5, a2, -16
555; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
556; CHECK-NEXT:    vslidedown.vi v8, v9, 2
557; CHECK-NEXT:    add a4, a0, a4
558; CHECK-NEXT:    sltu a2, a2, a5
559; CHECK-NEXT:    addi a2, a2, -1
560; CHECK-NEXT:    and a2, a2, a5
561; CHECK-NEXT:    vmv1r.v v0, v8
562; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
563; CHECK-NEXT:    vlse64.v v16, (a4), a1, v0.t
564; CHECK-NEXT:    vmv1r.v v0, v9
565; CHECK-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
566; CHECK-NEXT:    vlse64.v v8, (a0), a1, v0.t
567; CHECK-NEXT:    ret
568  %load = call <32 x double> @llvm.experimental.vp.strided.load.v32f64.p0.i32(ptr %ptr, i32 %stride, <32 x i1> %m, i32 %evl)
569  ret <32 x double> %load
570}
571
572define <32 x double> @strided_vpload_v32f64_allones_mask(ptr %ptr, i32 signext %stride, i32 zeroext %evl) nounwind {
573; CHECK-LABEL: strided_vpload_v32f64_allones_mask:
574; CHECK:       # %bb.0:
575; CHECK-NEXT:    li a4, 16
576; CHECK-NEXT:    mv a3, a2
577; CHECK-NEXT:    bltu a2, a4, .LBB46_2
578; CHECK-NEXT:  # %bb.1:
579; CHECK-NEXT:    li a3, 16
580; CHECK-NEXT:  .LBB46_2:
581; CHECK-NEXT:    mul a4, a3, a1
582; CHECK-NEXT:    addi a5, a2, -16
583; CHECK-NEXT:    add a4, a0, a4
584; CHECK-NEXT:    sltu a2, a2, a5
585; CHECK-NEXT:    addi a2, a2, -1
586; CHECK-NEXT:    and a2, a2, a5
587; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
588; CHECK-NEXT:    vlse64.v v16, (a4), a1
589; CHECK-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
590; CHECK-NEXT:    vlse64.v v8, (a0), a1
591; CHECK-NEXT:    ret
592  %load = call <32 x double> @llvm.experimental.vp.strided.load.v32f64.p0.i32(ptr %ptr, i32 %stride, <32 x i1> splat (i1 true), i32 %evl)
593  ret <32 x double> %load
594}
595
596declare <32 x double> @llvm.experimental.vp.strided.load.v32f64.p0.i32(ptr, i32, <32 x i1>, i32)
597
598; Widening + splitting (with HiIsEmpty == true)
599define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask, i32 zeroext %evl) {
600; CHECK-RV32-LABEL: strided_load_v33f64:
601; CHECK-RV32:       # %bb.0:
602; CHECK-RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
603; CHECK-RV32-NEXT:    vmv1r.v v8, v0
604; CHECK-RV32-NEXT:    li a5, 32
605; CHECK-RV32-NEXT:    mv a3, a4
606; CHECK-RV32-NEXT:    bltu a4, a5, .LBB47_2
607; CHECK-RV32-NEXT:  # %bb.1:
608; CHECK-RV32-NEXT:    li a3, 32
609; CHECK-RV32-NEXT:  .LBB47_2:
610; CHECK-RV32-NEXT:    mul a6, a3, a2
611; CHECK-RV32-NEXT:    addi a5, a4, -32
612; CHECK-RV32-NEXT:    sltu a7, a4, a5
613; CHECK-RV32-NEXT:    addi a7, a7, -1
614; CHECK-RV32-NEXT:    and a7, a7, a5
615; CHECK-RV32-NEXT:    li a5, 16
616; CHECK-RV32-NEXT:    add a6, a1, a6
617; CHECK-RV32-NEXT:    bltu a7, a5, .LBB47_4
618; CHECK-RV32-NEXT:  # %bb.3:
619; CHECK-RV32-NEXT:    li a7, 16
620; CHECK-RV32-NEXT:  .LBB47_4:
621; CHECK-RV32-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
622; CHECK-RV32-NEXT:    vslidedown.vi v0, v8, 4
623; CHECK-RV32-NEXT:    vsetvli zero, a7, e64, m8, ta, ma
624; CHECK-RV32-NEXT:    vlse64.v v16, (a6), a2, v0.t
625; CHECK-RV32-NEXT:    addi a6, a3, -16
626; CHECK-RV32-NEXT:    sltu a3, a3, a6
627; CHECK-RV32-NEXT:    addi a3, a3, -1
628; CHECK-RV32-NEXT:    and a3, a3, a6
629; CHECK-RV32-NEXT:    bltu a4, a5, .LBB47_6
630; CHECK-RV32-NEXT:  # %bb.5:
631; CHECK-RV32-NEXT:    li a4, 16
632; CHECK-RV32-NEXT:  .LBB47_6:
633; CHECK-RV32-NEXT:    mul a5, a4, a2
634; CHECK-RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
635; CHECK-RV32-NEXT:    vslidedown.vi v0, v8, 2
636; CHECK-RV32-NEXT:    add a5, a1, a5
637; CHECK-RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
638; CHECK-RV32-NEXT:    vlse64.v v24, (a5), a2, v0.t
639; CHECK-RV32-NEXT:    vmv1r.v v0, v8
640; CHECK-RV32-NEXT:    vsetvli zero, a4, e64, m8, ta, ma
641; CHECK-RV32-NEXT:    vlse64.v v8, (a1), a2, v0.t
642; CHECK-RV32-NEXT:    addi a1, a0, 128
643; CHECK-RV32-NEXT:    addi a2, a0, 256
644; CHECK-RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
645; CHECK-RV32-NEXT:    vse64.v v8, (a0)
646; CHECK-RV32-NEXT:    vse64.v v24, (a1)
647; CHECK-RV32-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
648; CHECK-RV32-NEXT:    vse64.v v16, (a2)
649; CHECK-RV32-NEXT:    ret
650;
651; CHECK-RV64-LABEL: strided_load_v33f64:
652; CHECK-RV64:       # %bb.0:
653; CHECK-RV64-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
654; CHECK-RV64-NEXT:    vmv1r.v v8, v0
655; CHECK-RV64-NEXT:    li a5, 32
656; CHECK-RV64-NEXT:    mv a4, a3
657; CHECK-RV64-NEXT:    bltu a3, a5, .LBB47_2
658; CHECK-RV64-NEXT:  # %bb.1:
659; CHECK-RV64-NEXT:    li a4, 32
660; CHECK-RV64-NEXT:  .LBB47_2:
661; CHECK-RV64-NEXT:    mul a6, a4, a2
662; CHECK-RV64-NEXT:    addi a5, a3, -32
663; CHECK-RV64-NEXT:    sltu a7, a3, a5
664; CHECK-RV64-NEXT:    addi a7, a7, -1
665; CHECK-RV64-NEXT:    and a7, a7, a5
666; CHECK-RV64-NEXT:    li a5, 16
667; CHECK-RV64-NEXT:    add a6, a1, a6
668; CHECK-RV64-NEXT:    bltu a7, a5, .LBB47_4
669; CHECK-RV64-NEXT:  # %bb.3:
670; CHECK-RV64-NEXT:    li a7, 16
671; CHECK-RV64-NEXT:  .LBB47_4:
672; CHECK-RV64-NEXT:    vsetivli zero, 4, e8, mf2, ta, ma
673; CHECK-RV64-NEXT:    vslidedown.vi v0, v8, 4
674; CHECK-RV64-NEXT:    vsetvli zero, a7, e64, m8, ta, ma
675; CHECK-RV64-NEXT:    vlse64.v v16, (a6), a2, v0.t
676; CHECK-RV64-NEXT:    addi a6, a4, -16
677; CHECK-RV64-NEXT:    sltu a4, a4, a6
678; CHECK-RV64-NEXT:    addi a4, a4, -1
679; CHECK-RV64-NEXT:    and a4, a4, a6
680; CHECK-RV64-NEXT:    bltu a3, a5, .LBB47_6
681; CHECK-RV64-NEXT:  # %bb.5:
682; CHECK-RV64-NEXT:    li a3, 16
683; CHECK-RV64-NEXT:  .LBB47_6:
684; CHECK-RV64-NEXT:    mul a5, a3, a2
685; CHECK-RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
686; CHECK-RV64-NEXT:    vslidedown.vi v0, v8, 2
687; CHECK-RV64-NEXT:    add a5, a1, a5
688; CHECK-RV64-NEXT:    vsetvli zero, a4, e64, m8, ta, ma
689; CHECK-RV64-NEXT:    vlse64.v v24, (a5), a2, v0.t
690; CHECK-RV64-NEXT:    vmv1r.v v0, v8
691; CHECK-RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
692; CHECK-RV64-NEXT:    vlse64.v v8, (a1), a2, v0.t
693; CHECK-RV64-NEXT:    addi a1, a0, 128
694; CHECK-RV64-NEXT:    addi a2, a0, 256
695; CHECK-RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
696; CHECK-RV64-NEXT:    vse64.v v8, (a0)
697; CHECK-RV64-NEXT:    vse64.v v24, (a1)
698; CHECK-RV64-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
699; CHECK-RV64-NEXT:    vse64.v v16, (a2)
700; CHECK-RV64-NEXT:    ret
701  %v = call <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr %ptr, i64 %stride, <33 x i1> %mask, i32 %evl)
702  ret <33 x double> %v
703}
704
705declare <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr, i64, <33 x i1>, i32)
706
707; Test unmasked integer zero strided
708define <4 x i8> @zero_strided_unmasked_vpload_4i8_i8(ptr %ptr) {
709; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8:
710; CHECK-OPT:       # %bb.0:
711; CHECK-OPT-NEXT:    vsetivli zero, 3, e8, mf4, ta, ma
712; CHECK-OPT-NEXT:    vlse8.v v8, (a0), zero
713; CHECK-OPT-NEXT:    ret
714;
715; CHECK-NO-OPT-LABEL: zero_strided_unmasked_vpload_4i8_i8:
716; CHECK-NO-OPT:       # %bb.0:
717; CHECK-NO-OPT-NEXT:    lbu a0, 0(a0)
718; CHECK-NO-OPT-NEXT:    vsetivli zero, 3, e8, mf4, ta, ma
719; CHECK-NO-OPT-NEXT:    vmv.v.x v8, a0
720; CHECK-NO-OPT-NEXT:    ret
721  %load = call <4 x i8> @llvm.experimental.vp.strided.load.4i8.p0.i8(ptr %ptr, i8 0, <4 x i1> splat (i1 true), i32 3)
722  ret <4 x i8> %load
723}
724
725; Test unmasked float zero strided
726define <4 x half> @zero_strided_unmasked_vpload_4f16(ptr %ptr) {
727; CHECK-OPT-LABEL: zero_strided_unmasked_vpload_4f16:
728; CHECK-OPT:       # %bb.0:
729; CHECK-OPT-NEXT:    vsetivli zero, 3, e16, mf2, ta, ma
730; CHECK-OPT-NEXT:    vlse16.v v8, (a0), zero
731; CHECK-OPT-NEXT:    ret
732;
733; CHECK-NO-OPT-ZVFH-LABEL: zero_strided_unmasked_vpload_4f16:
734; CHECK-NO-OPT-ZVFH:       # %bb.0:
735; CHECK-NO-OPT-ZVFH-NEXT:    flh fa5, 0(a0)
736; CHECK-NO-OPT-ZVFH-NEXT:    vsetivli zero, 3, e16, mf2, ta, ma
737; CHECK-NO-OPT-ZVFH-NEXT:    vfmv.v.f v8, fa5
738; CHECK-NO-OPT-ZVFH-NEXT:    ret
739;
740; CHECK-NO-OPT-ZVFHMIN-LABEL: zero_strided_unmasked_vpload_4f16:
741; CHECK-NO-OPT-ZVFHMIN:       # %bb.0:
742; CHECK-NO-OPT-ZVFHMIN-NEXT:    lh a0, 0(a0)
743; CHECK-NO-OPT-ZVFHMIN-NEXT:    vsetivli zero, 3, e16, mf2, ta, ma
744; CHECK-NO-OPT-ZVFHMIN-NEXT:    vmv.v.x v8, a0
745; CHECK-NO-OPT-ZVFHMIN-NEXT:    ret
746  %load = call <4 x half> @llvm.experimental.vp.strided.load.4f16.p0.i32(ptr %ptr, i32 0, <4 x i1> splat (i1 true), i32 3)
747  ret <4 x half> %load
748}
749
750define <4 x i64> @zero_strided_vadd.vx(<4 x i64> %v, ptr %ptr) {
751; CHECK-RV32-LABEL: zero_strided_vadd.vx:
752; CHECK-RV32:       # %bb.0:
753; CHECK-RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
754; CHECK-RV32-NEXT:    vlse64.v v10, (a0), zero
755; CHECK-RV32-NEXT:    vadd.vv v8, v8, v10
756; CHECK-RV32-NEXT:    ret
757;
758; CHECK-RV64-LABEL: zero_strided_vadd.vx:
759; CHECK-RV64:       # %bb.0:
760; CHECK-RV64-NEXT:    ld a0, 0(a0)
761; CHECK-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
762; CHECK-RV64-NEXT:    vadd.vx v8, v8, a0
763; CHECK-RV64-NEXT:    ret
764  %load = call <4 x i64> @llvm.experimental.vp.strided.load.v4i64.p0.i32(ptr %ptr, i32 0, <4 x i1> splat (i1 true), i32 4)
765  %w = add <4 x i64> %v, %load
766  ret <4 x i64> %w
767}
768