xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll (revision 7bf0d6d032c5ef04a0f4966df8760664aaefc871)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh,+zvfbfmin \
3; RUN:   -verify-machineinstrs < %s \
4; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-RV32
5; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh,+zvfbfmin \
6; RUN:   -verify-machineinstrs < %s \
7; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-RV64
8; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin \
9; RUN:   -verify-machineinstrs < %s \
10; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-RV32
11; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfhmin,+zvfbfmin \
12; RUN:   -verify-machineinstrs < %s \
13; RUN:   | FileCheck %s --check-prefixes=CHECK,CHECK-RV64
14
15declare void @llvm.experimental.vp.strided.store.v2i8.p0.i8(<2 x i8>, ptr, i8, <2 x i1>, i32)
16
17define void @strided_vpstore_v2i8_i8(<2 x i8> %val, ptr %ptr, i8 signext %stride, <2 x i1> %m, i32 zeroext %evl) {
18; CHECK-LABEL: strided_vpstore_v2i8_i8:
19; CHECK:       # %bb.0:
20; CHECK-NEXT:    vsetvli zero, a2, e8, mf8, ta, ma
21; CHECK-NEXT:    vsse8.v v8, (a0), a1, v0.t
22; CHECK-NEXT:    ret
23  call void @llvm.experimental.vp.strided.store.v2i8.p0.i8(<2 x i8> %val, ptr %ptr, i8 %stride, <2 x i1> %m, i32 %evl)
24  ret void
25}
26
27declare void @llvm.experimental.vp.strided.store.v2i8.p0.i16(<2 x i8>, ptr, i16, <2 x i1>, i32)
28
29define void @strided_vpstore_v2i8_i16(<2 x i8> %val, ptr %ptr, i16 signext %stride, <2 x i1> %m, i32 zeroext %evl) {
30; CHECK-LABEL: strided_vpstore_v2i8_i16:
31; CHECK:       # %bb.0:
32; CHECK-NEXT:    vsetvli zero, a2, e8, mf8, ta, ma
33; CHECK-NEXT:    vsse8.v v8, (a0), a1, v0.t
34; CHECK-NEXT:    ret
35  call void @llvm.experimental.vp.strided.store.v2i8.p0.i16(<2 x i8> %val, ptr %ptr, i16 %stride, <2 x i1> %m, i32 %evl)
36  ret void
37}
38
39declare void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8>, ptr, i64, <2 x i1>, i32)
40
41define void @strided_vpstore_v2i8_i64(<2 x i8> %val, ptr %ptr, i64 signext %stride, <2 x i1> %m, i32 zeroext %evl) {
42; CHECK-RV32-LABEL: strided_vpstore_v2i8_i64:
43; CHECK-RV32:       # %bb.0:
44; CHECK-RV32-NEXT:    vsetvli zero, a3, e8, mf8, ta, ma
45; CHECK-RV32-NEXT:    vsse8.v v8, (a0), a1, v0.t
46; CHECK-RV32-NEXT:    ret
47;
48; CHECK-RV64-LABEL: strided_vpstore_v2i8_i64:
49; CHECK-RV64:       # %bb.0:
50; CHECK-RV64-NEXT:    vsetvli zero, a2, e8, mf8, ta, ma
51; CHECK-RV64-NEXT:    vsse8.v v8, (a0), a1, v0.t
52; CHECK-RV64-NEXT:    ret
53  call void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8> %val, ptr %ptr, i64 %stride, <2 x i1> %m, i32 %evl)
54  ret void
55}
56
57declare void @llvm.experimental.vp.strided.store.v2i8.p0.i32(<2 x i8>, ptr, i32, <2 x i1>, i32)
58
59define void @strided_vpstore_v2i8(<2 x i8> %val, ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) {
60; CHECK-LABEL: strided_vpstore_v2i8:
61; CHECK:       # %bb.0:
62; CHECK-NEXT:    vsetvli zero, a2, e8, mf8, ta, ma
63; CHECK-NEXT:    vsse8.v v8, (a0), a1, v0.t
64; CHECK-NEXT:    ret
65  call void @llvm.experimental.vp.strided.store.v2i8.p0.i32(<2 x i8> %val, ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl)
66  ret void
67}
68
69declare void @llvm.experimental.vp.strided.store.v4i8.p0.i32(<4 x i8>, ptr, i32, <4 x i1>, i32)
70
71define void @strided_vpstore_v4i8(<4 x i8> %val, ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) {
72; CHECK-LABEL: strided_vpstore_v4i8:
73; CHECK:       # %bb.0:
74; CHECK-NEXT:    vsetvli zero, a2, e8, mf4, ta, ma
75; CHECK-NEXT:    vsse8.v v8, (a0), a1, v0.t
76; CHECK-NEXT:    ret
77  call void @llvm.experimental.vp.strided.store.v4i8.p0.i32(<4 x i8> %val, ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl)
78  ret void
79}
80
81declare void @llvm.experimental.vp.strided.store.v8i8.p0.i32(<8 x i8>, ptr, i32, <8 x i1>, i32)
82
83define void @strided_vpstore_v8i8(<8 x i8> %val, ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) {
84; CHECK-LABEL: strided_vpstore_v8i8:
85; CHECK:       # %bb.0:
86; CHECK-NEXT:    vsetvli zero, a2, e8, mf2, ta, ma
87; CHECK-NEXT:    vsse8.v v8, (a0), a1, v0.t
88; CHECK-NEXT:    ret
89  call void @llvm.experimental.vp.strided.store.v8i8.p0.i32(<8 x i8> %val, ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl)
90  ret void
91}
92
93define void @strided_vpstore_v8i8_unit_stride(<8 x i8> %val, ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
94; CHECK-LABEL: strided_vpstore_v8i8_unit_stride:
95; CHECK:       # %bb.0:
96; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
97; CHECK-NEXT:    vse8.v v8, (a0), v0.t
98; CHECK-NEXT:    ret
99  call void @llvm.experimental.vp.strided.store.v8i8.p0.i32(<8 x i8> %val, ptr %ptr, i32 1, <8 x i1> %m, i32 %evl)
100  ret void
101}
102
103declare void @llvm.experimental.vp.strided.store.v2i16.p0.i32(<2 x i16>, ptr, i32, <2 x i1>, i32)
104
105define void @strided_vpstore_v2i16(<2 x i16> %val, ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) {
106; CHECK-LABEL: strided_vpstore_v2i16:
107; CHECK:       # %bb.0:
108; CHECK-NEXT:    vsetvli zero, a2, e16, mf4, ta, ma
109; CHECK-NEXT:    vsse16.v v8, (a0), a1, v0.t
110; CHECK-NEXT:    ret
111  call void @llvm.experimental.vp.strided.store.v2i16.p0.i32(<2 x i16> %val, ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl)
112  ret void
113}
114
115declare void @llvm.experimental.vp.strided.store.v4i16.p0.i32(<4 x i16>, ptr, i32, <4 x i1>, i32)
116
117define void @strided_vpstore_v4i16(<4 x i16> %val, ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) {
118; CHECK-LABEL: strided_vpstore_v4i16:
119; CHECK:       # %bb.0:
120; CHECK-NEXT:    vsetvli zero, a2, e16, mf2, ta, ma
121; CHECK-NEXT:    vsse16.v v8, (a0), a1, v0.t
122; CHECK-NEXT:    ret
123  call void @llvm.experimental.vp.strided.store.v4i16.p0.i32(<4 x i16> %val, ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl)
124  ret void
125}
126
127declare void @llvm.experimental.vp.strided.store.v8i16.p0.i32(<8 x i16>, ptr, i32, <8 x i1>, i32)
128
129define void @strided_vpstore_v8i16(<8 x i16> %val, ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) {
130; CHECK-LABEL: strided_vpstore_v8i16:
131; CHECK:       # %bb.0:
132; CHECK-NEXT:    vsetvli zero, a2, e16, m1, ta, ma
133; CHECK-NEXT:    vsse16.v v8, (a0), a1, v0.t
134; CHECK-NEXT:    ret
135  call void @llvm.experimental.vp.strided.store.v8i16.p0.i32(<8 x i16> %val, ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl)
136  ret void
137}
138
139define void @strided_vpstore_v8i16_unit_stride(<8 x i16> %val, ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
140; CHECK-LABEL: strided_vpstore_v8i16_unit_stride:
141; CHECK:       # %bb.0:
142; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
143; CHECK-NEXT:    vse16.v v8, (a0), v0.t
144; CHECK-NEXT:    ret
145  call void @llvm.experimental.vp.strided.store.v8i16.p0.i32(<8 x i16> %val, ptr %ptr, i32 2, <8 x i1> %m, i32 %evl)
146  ret void
147}
148
149declare void @llvm.experimental.vp.strided.store.v2i32.p0.i32(<2 x i32>, ptr, i32, <2 x i1>, i32)
150
151define void @strided_vpstore_v2i32(<2 x i32> %val, ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) {
152; CHECK-LABEL: strided_vpstore_v2i32:
153; CHECK:       # %bb.0:
154; CHECK-NEXT:    vsetvli zero, a2, e32, mf2, ta, ma
155; CHECK-NEXT:    vsse32.v v8, (a0), a1, v0.t
156; CHECK-NEXT:    ret
157  call void @llvm.experimental.vp.strided.store.v2i32.p0.i32(<2 x i32> %val, ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl)
158  ret void
159}
160
161declare void @llvm.experimental.vp.strided.store.v4i32.p0.i32(<4 x i32>, ptr, i32, <4 x i1>, i32)
162
163define void @strided_vpstore_v4i32(<4 x i32> %val, ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) {
164; CHECK-LABEL: strided_vpstore_v4i32:
165; CHECK:       # %bb.0:
166; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
167; CHECK-NEXT:    vsse32.v v8, (a0), a1, v0.t
168; CHECK-NEXT:    ret
169  call void @llvm.experimental.vp.strided.store.v4i32.p0.i32(<4 x i32> %val, ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl)
170  ret void
171}
172
173define void @strided_vpstore_v4i32_unit_stride(<4 x i32> %val, ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
174; CHECK-LABEL: strided_vpstore_v4i32_unit_stride:
175; CHECK:       # %bb.0:
176; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
177; CHECK-NEXT:    vse32.v v8, (a0), v0.t
178; CHECK-NEXT:    ret
179  call void @llvm.experimental.vp.strided.store.v4i32.p0.i32(<4 x i32> %val, ptr %ptr, i32 4, <4 x i1> %m, i32 %evl)
180  ret void
181}
182
183declare void @llvm.experimental.vp.strided.store.v8i32.p0.i32(<8 x i32>, ptr, i32, <8 x i1>, i32)
184
185define void @strided_vpstore_v8i32(<8 x i32> %val, ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) {
186; CHECK-LABEL: strided_vpstore_v8i32:
187; CHECK:       # %bb.0:
188; CHECK-NEXT:    vsetvli zero, a2, e32, m2, ta, ma
189; CHECK-NEXT:    vsse32.v v8, (a0), a1, v0.t
190; CHECK-NEXT:    ret
191  call void @llvm.experimental.vp.strided.store.v8i32.p0.i32(<8 x i32> %val, ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl)
192  ret void
193}
194
195declare void @llvm.experimental.vp.strided.store.v2i64.p0.i32(<2 x i64>, ptr, i32, <2 x i1>, i32)
196
197define void @strided_vpstore_v2i64(<2 x i64> %val, ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) {
198; CHECK-LABEL: strided_vpstore_v2i64:
199; CHECK:       # %bb.0:
200; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
201; CHECK-NEXT:    vsse64.v v8, (a0), a1, v0.t
202; CHECK-NEXT:    ret
203  call void @llvm.experimental.vp.strided.store.v2i64.p0.i32(<2 x i64> %val, ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl)
204  ret void
205}
206
207define void @strided_vpstore_v2i64_unit_stride(<2 x i64> %val, ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
208; CHECK-LABEL: strided_vpstore_v2i64_unit_stride:
209; CHECK:       # %bb.0:
210; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
211; CHECK-NEXT:    vse64.v v8, (a0), v0.t
212; CHECK-NEXT:    ret
213  call void @llvm.experimental.vp.strided.store.v2i64.p0.i32(<2 x i64> %val, ptr %ptr, i32 8, <2 x i1> %m, i32 %evl)
214  ret void
215}
216
217declare void @llvm.experimental.vp.strided.store.v4i64.p0.i32(<4 x i64>, ptr, i32, <4 x i1>, i32)
218
219define void @strided_vpstore_v4i64(<4 x i64> %val, ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) {
220; CHECK-LABEL: strided_vpstore_v4i64:
221; CHECK:       # %bb.0:
222; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
223; CHECK-NEXT:    vsse64.v v8, (a0), a1, v0.t
224; CHECK-NEXT:    ret
225  call void @llvm.experimental.vp.strided.store.v4i64.p0.i32(<4 x i64> %val, ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl)
226  ret void
227}
228
229declare void @llvm.experimental.vp.strided.store.v8i64.p0.i32(<8 x i64>, ptr, i32, <8 x i1>, i32)
230
231define void @strided_vpstore_v8i64(<8 x i64> %val, ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) {
232; CHECK-LABEL: strided_vpstore_v8i64:
233; CHECK:       # %bb.0:
234; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
235; CHECK-NEXT:    vsse64.v v8, (a0), a1, v0.t
236; CHECK-NEXT:    ret
237  call void @llvm.experimental.vp.strided.store.v8i64.p0.i32(<8 x i64> %val, ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl)
238  ret void
239}
240
241declare void @llvm.experimental.vp.strided.store.v2bf16.p0.i32(<2 x bfloat>, ptr, i32, <2 x i1>, i32)
242
243define void @strided_vpstore_v2bf16(<2 x bfloat> %val, ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) {
244; CHECK-LABEL: strided_vpstore_v2bf16:
245; CHECK:       # %bb.0:
246; CHECK-NEXT:    vsetvli zero, a2, e16, mf4, ta, ma
247; CHECK-NEXT:    vsse16.v v8, (a0), a1, v0.t
248; CHECK-NEXT:    ret
249  call void @llvm.experimental.vp.strided.store.v2bf16.p0.i32(<2 x bfloat> %val, ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl)
250  ret void
251}
252
253declare void @llvm.experimental.vp.strided.store.v4bf16.p0.i32(<4 x bfloat>, ptr, i32, <4 x i1>, i32)
254
255define void @strided_vpstore_v4bf16(<4 x bfloat> %val, ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) {
256; CHECK-LABEL: strided_vpstore_v4bf16:
257; CHECK:       # %bb.0:
258; CHECK-NEXT:    vsetvli zero, a2, e16, mf2, ta, ma
259; CHECK-NEXT:    vsse16.v v8, (a0), a1, v0.t
260; CHECK-NEXT:    ret
261  call void @llvm.experimental.vp.strided.store.v4bf16.p0.i32(<4 x bfloat> %val, ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl)
262  ret void
263}
264
265declare void @llvm.experimental.vp.strided.store.v8bf16.p0.i32(<8 x bfloat>, ptr, i32, <8 x i1>, i32)
266
267define void @strided_vpstore_v8bf16(<8 x bfloat> %val, ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) {
268; CHECK-LABEL: strided_vpstore_v8bf16:
269; CHECK:       # %bb.0:
270; CHECK-NEXT:    vsetvli zero, a2, e16, m1, ta, ma
271; CHECK-NEXT:    vsse16.v v8, (a0), a1, v0.t
272; CHECK-NEXT:    ret
273  call void @llvm.experimental.vp.strided.store.v8bf16.p0.i32(<8 x bfloat> %val, ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl)
274  ret void
275}
276
277define void @strided_vpstore_v8bf16_unit_stride(<8 x bfloat> %val, ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
278; CHECK-LABEL: strided_vpstore_v8bf16_unit_stride:
279; CHECK:       # %bb.0:
280; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
281; CHECK-NEXT:    vse16.v v8, (a0), v0.t
282; CHECK-NEXT:    ret
283  call void @llvm.experimental.vp.strided.store.v8bf16.p0.i32(<8 x bfloat> %val, ptr %ptr, i32 2, <8 x i1> %m, i32 %evl)
284  ret void
285}
286
287declare void @llvm.experimental.vp.strided.store.v2f16.p0.i32(<2 x half>, ptr, i32, <2 x i1>, i32)
288
289define void @strided_vpstore_v2f16(<2 x half> %val, ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) {
290; CHECK-LABEL: strided_vpstore_v2f16:
291; CHECK:       # %bb.0:
292; CHECK-NEXT:    vsetvli zero, a2, e16, mf4, ta, ma
293; CHECK-NEXT:    vsse16.v v8, (a0), a1, v0.t
294; CHECK-NEXT:    ret
295  call void @llvm.experimental.vp.strided.store.v2f16.p0.i32(<2 x half> %val, ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl)
296  ret void
297}
298
299declare void @llvm.experimental.vp.strided.store.v4f16.p0.i32(<4 x half>, ptr, i32, <4 x i1>, i32)
300
301define void @strided_vpstore_v4f16(<4 x half> %val, ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) {
302; CHECK-LABEL: strided_vpstore_v4f16:
303; CHECK:       # %bb.0:
304; CHECK-NEXT:    vsetvli zero, a2, e16, mf2, ta, ma
305; CHECK-NEXT:    vsse16.v v8, (a0), a1, v0.t
306; CHECK-NEXT:    ret
307  call void @llvm.experimental.vp.strided.store.v4f16.p0.i32(<4 x half> %val, ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl)
308  ret void
309}
310
311declare void @llvm.experimental.vp.strided.store.v8f16.p0.i32(<8 x half>, ptr, i32, <8 x i1>, i32)
312
313define void @strided_vpstore_v8f16(<8 x half> %val, ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) {
314; CHECK-LABEL: strided_vpstore_v8f16:
315; CHECK:       # %bb.0:
316; CHECK-NEXT:    vsetvli zero, a2, e16, m1, ta, ma
317; CHECK-NEXT:    vsse16.v v8, (a0), a1, v0.t
318; CHECK-NEXT:    ret
319  call void @llvm.experimental.vp.strided.store.v8f16.p0.i32(<8 x half> %val, ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl)
320  ret void
321}
322
323define void @strided_vpstore_v8f16_unit_stride(<8 x half> %val, ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
324; CHECK-LABEL: strided_vpstore_v8f16_unit_stride:
325; CHECK:       # %bb.0:
326; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
327; CHECK-NEXT:    vse16.v v8, (a0), v0.t
328; CHECK-NEXT:    ret
329  call void @llvm.experimental.vp.strided.store.v8f16.p0.i32(<8 x half> %val, ptr %ptr, i32 2, <8 x i1> %m, i32 %evl)
330  ret void
331}
332
333declare void @llvm.experimental.vp.strided.store.v2f32.p0.i32(<2 x float>, ptr, i32, <2 x i1>, i32)
334
335define void @strided_vpstore_v2f32(<2 x float> %val, ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) {
336; CHECK-LABEL: strided_vpstore_v2f32:
337; CHECK:       # %bb.0:
338; CHECK-NEXT:    vsetvli zero, a2, e32, mf2, ta, ma
339; CHECK-NEXT:    vsse32.v v8, (a0), a1, v0.t
340; CHECK-NEXT:    ret
341  call void @llvm.experimental.vp.strided.store.v2f32.p0.i32(<2 x float> %val, ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl)
342  ret void
343}
344
345declare void @llvm.experimental.vp.strided.store.v4f32.p0.i32(<4 x float>, ptr, i32, <4 x i1>, i32)
346
347define void @strided_vpstore_v4f32(<4 x float> %val, ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) {
348; CHECK-LABEL: strided_vpstore_v4f32:
349; CHECK:       # %bb.0:
350; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
351; CHECK-NEXT:    vsse32.v v8, (a0), a1, v0.t
352; CHECK-NEXT:    ret
353  call void @llvm.experimental.vp.strided.store.v4f32.p0.i32(<4 x float> %val, ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl)
354  ret void
355}
356
357define void @strided_vpstore_v4f32_unit_stride(<4 x float> %val, ptr %ptr, <4 x i1> %m, i32 zeroext %evl) {
358; CHECK-LABEL: strided_vpstore_v4f32_unit_stride:
359; CHECK:       # %bb.0:
360; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
361; CHECK-NEXT:    vse32.v v8, (a0), v0.t
362; CHECK-NEXT:    ret
363  call void @llvm.experimental.vp.strided.store.v4f32.p0.i32(<4 x float> %val, ptr %ptr, i32 4, <4 x i1> %m, i32 %evl)
364  ret void
365}
366
367declare void @llvm.experimental.vp.strided.store.v8f32.p0.i32(<8 x float>, ptr, i32, <8 x i1>, i32)
368
369define void @strided_vpstore_v8f32(<8 x float> %val, ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) {
370; CHECK-LABEL: strided_vpstore_v8f32:
371; CHECK:       # %bb.0:
372; CHECK-NEXT:    vsetvli zero, a2, e32, m2, ta, ma
373; CHECK-NEXT:    vsse32.v v8, (a0), a1, v0.t
374; CHECK-NEXT:    ret
375  call void @llvm.experimental.vp.strided.store.v8f32.p0.i32(<8 x float> %val, ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl)
376  ret void
377}
378
379declare void @llvm.experimental.vp.strided.store.v2f64.p0.i32(<2 x double>, ptr, i32, <2 x i1>, i32)
380
381define void @strided_vpstore_v2f64(<2 x double> %val, ptr %ptr, i32 signext %stride, <2 x i1> %m, i32 zeroext %evl) {
382; CHECK-LABEL: strided_vpstore_v2f64:
383; CHECK:       # %bb.0:
384; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, ma
385; CHECK-NEXT:    vsse64.v v8, (a0), a1, v0.t
386; CHECK-NEXT:    ret
387  call void @llvm.experimental.vp.strided.store.v2f64.p0.i32(<2 x double> %val, ptr %ptr, i32 %stride, <2 x i1> %m, i32 %evl)
388  ret void
389}
390
391define void @strided_vpstore_v2f64_unit_stride(<2 x double> %val, ptr %ptr, <2 x i1> %m, i32 zeroext %evl) {
392; CHECK-LABEL: strided_vpstore_v2f64_unit_stride:
393; CHECK:       # %bb.0:
394; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
395; CHECK-NEXT:    vse64.v v8, (a0), v0.t
396; CHECK-NEXT:    ret
397  call void @llvm.experimental.vp.strided.store.v2f64.p0.i32(<2 x double> %val, ptr %ptr, i32 8, <2 x i1> %m, i32 %evl)
398  ret void
399}
400
401declare void @llvm.experimental.vp.strided.store.v4f64.p0.i32(<4 x double>, ptr, i32, <4 x i1>, i32)
402
403define void @strided_vpstore_v4f64(<4 x double> %val, ptr %ptr, i32 signext %stride, <4 x i1> %m, i32 zeroext %evl) {
404; CHECK-LABEL: strided_vpstore_v4f64:
405; CHECK:       # %bb.0:
406; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, ma
407; CHECK-NEXT:    vsse64.v v8, (a0), a1, v0.t
408; CHECK-NEXT:    ret
409  call void @llvm.experimental.vp.strided.store.v4f64.p0.i32(<4 x double> %val, ptr %ptr, i32 %stride, <4 x i1> %m, i32 %evl)
410  ret void
411}
412
413declare void @llvm.experimental.vp.strided.store.v8f64.p0.i32(<8 x double>, ptr, i32, <8 x i1>, i32)
414
415define void @strided_vpstore_v8f64(<8 x double> %val, ptr %ptr, i32 signext %stride, <8 x i1> %m, i32 zeroext %evl) {
416; CHECK-LABEL: strided_vpstore_v8f64:
417; CHECK:       # %bb.0:
418; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, ma
419; CHECK-NEXT:    vsse64.v v8, (a0), a1, v0.t
420; CHECK-NEXT:    ret
421  call void @llvm.experimental.vp.strided.store.v8f64.p0.i32(<8 x double> %val, ptr %ptr, i32 %stride, <8 x i1> %m, i32 %evl)
422  ret void
423}
424
425define void @strided_vpstore_v2i8_allones_mask(<2 x i8> %val, ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
426; CHECK-LABEL: strided_vpstore_v2i8_allones_mask:
427; CHECK:       # %bb.0:
428; CHECK-NEXT:    vsetvli zero, a2, e8, mf8, ta, ma
429; CHECK-NEXT:    vsse8.v v8, (a0), a1
430; CHECK-NEXT:    ret
431  call void @llvm.experimental.vp.strided.store.v2i8.p0.i32(<2 x i8> %val, ptr %ptr, i32 %stride, <2 x i1> splat (i1 true), i32 %evl)
432  ret void
433}
434
435; Widening
436define void @strided_vpstore_v3f32(<3 x float> %v, ptr %ptr, i32 signext %stride, <3 x i1> %mask, i32 zeroext %evl) {
437; CHECK-LABEL: strided_vpstore_v3f32:
438; CHECK:       # %bb.0:
439; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
440; CHECK-NEXT:    vsse32.v v8, (a0), a1, v0.t
441; CHECK-NEXT:    ret
442  call void @llvm.experimental.vp.strided.store.v3f32.p0.i32(<3 x float> %v, ptr %ptr, i32 %stride, <3 x i1> %mask, i32 %evl)
443  ret void
444}
445
446define void @strided_vpstore_v3f32_allones_mask(<3 x float> %v, ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
447; CHECK-LABEL: strided_vpstore_v3f32_allones_mask:
448; CHECK:       # %bb.0:
449; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
450; CHECK-NEXT:    vsse32.v v8, (a0), a1
451; CHECK-NEXT:    ret
452  call void @llvm.experimental.vp.strided.store.v3f32.p0.i32(<3 x float> %v, ptr %ptr, i32 %stride, <3 x i1> splat (i1 true), i32 %evl)
453  ret void
454}
455
456declare void @llvm.experimental.vp.strided.store.v3f32.p0.i32(<3 x float>, ptr , i32, <3 x i1>, i32)
457
458; Splitting
459define void @strided_store_v32f64(<32 x double> %v, ptr %ptr, i32 signext %stride, <32 x i1> %mask, i32 zeroext %evl) {
460; CHECK-LABEL: strided_store_v32f64:
461; CHECK:       # %bb.0:
462; CHECK-NEXT:    li a4, 16
463; CHECK-NEXT:    mv a3, a2
464; CHECK-NEXT:    bltu a2, a4, .LBB38_2
465; CHECK-NEXT:  # %bb.1:
466; CHECK-NEXT:    li a3, 16
467; CHECK-NEXT:  .LBB38_2:
468; CHECK-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
469; CHECK-NEXT:    vsse64.v v8, (a0), a1, v0.t
470; CHECK-NEXT:    mul a3, a3, a1
471; CHECK-NEXT:    add a0, a0, a3
472; CHECK-NEXT:    addi a3, a2, -16
473; CHECK-NEXT:    sltu a2, a2, a3
474; CHECK-NEXT:    addi a2, a2, -1
475; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
476; CHECK-NEXT:    vslidedown.vi v0, v0, 2
477; CHECK-NEXT:    and a2, a2, a3
478; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
479; CHECK-NEXT:    vsse64.v v16, (a0), a1, v0.t
480; CHECK-NEXT:    ret
481  call void @llvm.experimental.vp.strided.store.v32f64.p0.i32(<32 x double> %v, ptr %ptr, i32 %stride, <32 x i1> %mask, i32 %evl)
482  ret void
483}
484
485define void @strided_store_v32f64_allones_mask(<32 x double> %v, ptr %ptr, i32 signext %stride, i32 zeroext %evl) {
486; CHECK-LABEL: strided_store_v32f64_allones_mask:
487; CHECK:       # %bb.0:
488; CHECK-NEXT:    li a4, 16
489; CHECK-NEXT:    mv a3, a2
490; CHECK-NEXT:    bltu a2, a4, .LBB39_2
491; CHECK-NEXT:  # %bb.1:
492; CHECK-NEXT:    li a3, 16
493; CHECK-NEXT:  .LBB39_2:
494; CHECK-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
495; CHECK-NEXT:    vsse64.v v8, (a0), a1
496; CHECK-NEXT:    mul a3, a3, a1
497; CHECK-NEXT:    add a0, a0, a3
498; CHECK-NEXT:    addi a3, a2, -16
499; CHECK-NEXT:    sltu a2, a2, a3
500; CHECK-NEXT:    addi a2, a2, -1
501; CHECK-NEXT:    and a2, a2, a3
502; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
503; CHECK-NEXT:    vsse64.v v16, (a0), a1
504; CHECK-NEXT:    ret
505  call void @llvm.experimental.vp.strided.store.v32f64.p0.i32(<32 x double> %v, ptr %ptr, i32 %stride, <32 x i1> splat (i1 true), i32 %evl)
506  ret void
507}
508
509declare void @llvm.experimental.vp.strided.store.v32f64.p0.i32(<32 x double>, ptr, i32, <32 x i1>, i32)
510