xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll (revision a61eeaa7486178a6887e0efc843559d8a35bf4af)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v,+m \
3; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
4; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v,+m \
5; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
6; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v,+m \
7; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
8; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v,+m \
9; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
10
11declare void @llvm.vp.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
12
13define void @vpscatter_nxv1i8(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
14; RV32-LABEL: vpscatter_nxv1i8:
15; RV32:       # %bb.0:
16; RV32-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
17; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
18; RV32-NEXT:    ret
19;
20; RV64-LABEL: vpscatter_nxv1i8:
21; RV64:       # %bb.0:
22; RV64-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
23; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
24; RV64-NEXT:    ret
25  call void @llvm.vp.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
26  ret void
27}
28
29declare void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
30
31define void @vpscatter_nxv2i8(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
32; RV32-LABEL: vpscatter_nxv2i8:
33; RV32:       # %bb.0:
34; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
35; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
36; RV32-NEXT:    ret
37;
38; RV64-LABEL: vpscatter_nxv2i8:
39; RV64:       # %bb.0:
40; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
41; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
42; RV64-NEXT:    ret
43  call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
44  ret void
45}
46
47define void @vpscatter_nxv2i16_truncstore_nxv2i8(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
48; RV32-LABEL: vpscatter_nxv2i16_truncstore_nxv2i8:
49; RV32:       # %bb.0:
50; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
51; RV32-NEXT:    vnsrl.wi v8, v8, 0
52; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
53; RV32-NEXT:    ret
54;
55; RV64-LABEL: vpscatter_nxv2i16_truncstore_nxv2i8:
56; RV64:       # %bb.0:
57; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
58; RV64-NEXT:    vnsrl.wi v8, v8, 0
59; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
60; RV64-NEXT:    ret
61  %tval = trunc <vscale x 2 x i16> %val to <vscale x 2 x i8>
62  call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
63  ret void
64}
65
66define void @vpscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
67; RV32-LABEL: vpscatter_nxv2i32_truncstore_nxv2i8:
68; RV32:       # %bb.0:
69; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
70; RV32-NEXT:    vnsrl.wi v8, v8, 0
71; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
72; RV32-NEXT:    vnsrl.wi v8, v8, 0
73; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
74; RV32-NEXT:    ret
75;
76; RV64-LABEL: vpscatter_nxv2i32_truncstore_nxv2i8:
77; RV64:       # %bb.0:
78; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
79; RV64-NEXT:    vnsrl.wi v8, v8, 0
80; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
81; RV64-NEXT:    vnsrl.wi v8, v8, 0
82; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
83; RV64-NEXT:    ret
84  %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i8>
85  call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
86  ret void
87}
88
89define void @vpscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
90; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i8:
91; RV32:       # %bb.0:
92; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
93; RV32-NEXT:    vnsrl.wi v11, v8, 0
94; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
95; RV32-NEXT:    vnsrl.wi v8, v11, 0
96; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
97; RV32-NEXT:    vnsrl.wi v8, v8, 0
98; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
99; RV32-NEXT:    ret
100;
101; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i8:
102; RV64:       # %bb.0:
103; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
104; RV64-NEXT:    vnsrl.wi v12, v8, 0
105; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
106; RV64-NEXT:    vnsrl.wi v8, v12, 0
107; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
108; RV64-NEXT:    vnsrl.wi v8, v8, 0
109; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
110; RV64-NEXT:    ret
111  %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i8>
112  call void @llvm.vp.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
113  ret void
114}
115
116declare void @llvm.vp.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
117
118define void @vpscatter_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
119; RV32-LABEL: vpscatter_nxv4i8:
120; RV32:       # %bb.0:
121; RV32-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
122; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
123; RV32-NEXT:    ret
124;
125; RV64-LABEL: vpscatter_nxv4i8:
126; RV64:       # %bb.0:
127; RV64-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
128; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
129; RV64-NEXT:    ret
130  call void @llvm.vp.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
131  ret void
132}
133
134define void @vpscatter_truemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
135; RV32-LABEL: vpscatter_truemask_nxv4i8:
136; RV32:       # %bb.0:
137; RV32-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
138; RV32-NEXT:    vsoxei32.v v8, (zero), v10
139; RV32-NEXT:    ret
140;
141; RV64-LABEL: vpscatter_truemask_nxv4i8:
142; RV64:       # %bb.0:
143; RV64-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
144; RV64-NEXT:    vsoxei64.v v8, (zero), v12
145; RV64-NEXT:    ret
146  call void @llvm.vp.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
147  ret void
148}
149
150declare void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
151
152define void @vpscatter_nxv8i8(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
153; RV32-LABEL: vpscatter_nxv8i8:
154; RV32:       # %bb.0:
155; RV32-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
156; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
157; RV32-NEXT:    ret
158;
159; RV64-LABEL: vpscatter_nxv8i8:
160; RV64:       # %bb.0:
161; RV64-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
162; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
163; RV64-NEXT:    ret
164  call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
165  ret void
166}
167
168define void @vpscatter_baseidx_nxv8i8(<vscale x 8 x i8> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
169; RV32-LABEL: vpscatter_baseidx_nxv8i8:
170; RV32:       # %bb.0:
171; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
172; RV32-NEXT:    vsext.vf4 v12, v9
173; RV32-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
174; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
175; RV32-NEXT:    ret
176;
177; RV64-LABEL: vpscatter_baseidx_nxv8i8:
178; RV64:       # %bb.0:
179; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
180; RV64-NEXT:    vsext.vf8 v16, v9
181; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
182; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
183; RV64-NEXT:    ret
184  %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 8 x i8> %idxs
185  call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
186  ret void
187}
188
189declare void @llvm.vp.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
190
191define void @vpscatter_nxv1i16(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
192; RV32-LABEL: vpscatter_nxv1i16:
193; RV32:       # %bb.0:
194; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
195; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
196; RV32-NEXT:    ret
197;
198; RV64-LABEL: vpscatter_nxv1i16:
199; RV64:       # %bb.0:
200; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
201; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
202; RV64-NEXT:    ret
203  call void @llvm.vp.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
204  ret void
205}
206
207declare void @llvm.vp.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
208
209define void @vpscatter_nxv2i16(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
210; RV32-LABEL: vpscatter_nxv2i16:
211; RV32:       # %bb.0:
212; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
213; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
214; RV32-NEXT:    ret
215;
216; RV64-LABEL: vpscatter_nxv2i16:
217; RV64:       # %bb.0:
218; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
219; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
220; RV64-NEXT:    ret
221  call void @llvm.vp.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
222  ret void
223}
224
225define void @vpscatter_nxv2i32_truncstore_nxv2i16(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
226; RV32-LABEL: vpscatter_nxv2i32_truncstore_nxv2i16:
227; RV32:       # %bb.0:
228; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
229; RV32-NEXT:    vnsrl.wi v8, v8, 0
230; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
231; RV32-NEXT:    ret
232;
233; RV64-LABEL: vpscatter_nxv2i32_truncstore_nxv2i16:
234; RV64:       # %bb.0:
235; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
236; RV64-NEXT:    vnsrl.wi v8, v8, 0
237; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
238; RV64-NEXT:    ret
239  %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i16>
240  call void @llvm.vp.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
241  ret void
242}
243
244define void @vpscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
245; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i16:
246; RV32:       # %bb.0:
247; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
248; RV32-NEXT:    vnsrl.wi v11, v8, 0
249; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
250; RV32-NEXT:    vnsrl.wi v8, v11, 0
251; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
252; RV32-NEXT:    ret
253;
254; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i16:
255; RV64:       # %bb.0:
256; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
257; RV64-NEXT:    vnsrl.wi v12, v8, 0
258; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
259; RV64-NEXT:    vnsrl.wi v8, v12, 0
260; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
261; RV64-NEXT:    ret
262  %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i16>
263  call void @llvm.vp.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
264  ret void
265}
266
267declare void @llvm.vp.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
268
269define void @vpscatter_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
270; RV32-LABEL: vpscatter_nxv4i16:
271; RV32:       # %bb.0:
272; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
273; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
274; RV32-NEXT:    ret
275;
276; RV64-LABEL: vpscatter_nxv4i16:
277; RV64:       # %bb.0:
278; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
279; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
280; RV64-NEXT:    ret
281  call void @llvm.vp.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
282  ret void
283}
284
285define void @vpscatter_truemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
286; RV32-LABEL: vpscatter_truemask_nxv4i16:
287; RV32:       # %bb.0:
288; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
289; RV32-NEXT:    vsoxei32.v v8, (zero), v10
290; RV32-NEXT:    ret
291;
292; RV64-LABEL: vpscatter_truemask_nxv4i16:
293; RV64:       # %bb.0:
294; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
295; RV64-NEXT:    vsoxei64.v v8, (zero), v12
296; RV64-NEXT:    ret
297  call void @llvm.vp.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
298  ret void
299}
300
301declare void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
302
303define void @vpscatter_nxv8i16(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
304; RV32-LABEL: vpscatter_nxv8i16:
305; RV32:       # %bb.0:
306; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
307; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
308; RV32-NEXT:    ret
309;
310; RV64-LABEL: vpscatter_nxv8i16:
311; RV64:       # %bb.0:
312; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
313; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
314; RV64-NEXT:    ret
315  call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
316  ret void
317}
318
319define void @vpscatter_baseidx_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
320; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i16:
321; RV32:       # %bb.0:
322; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
323; RV32-NEXT:    vsext.vf4 v12, v10
324; RV32-NEXT:    vadd.vv v12, v12, v12
325; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
326; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
327; RV32-NEXT:    ret
328;
329; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i16:
330; RV64:       # %bb.0:
331; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
332; RV64-NEXT:    vsext.vf8 v16, v10
333; RV64-NEXT:    vadd.vv v16, v16, v16
334; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
335; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
336; RV64-NEXT:    ret
337  %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i8> %idxs
338  call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
339  ret void
340}
341
342define void @vpscatter_baseidx_sext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
343; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i16:
344; RV32:       # %bb.0:
345; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
346; RV32-NEXT:    vsext.vf4 v12, v10
347; RV32-NEXT:    vadd.vv v12, v12, v12
348; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
349; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
350; RV32-NEXT:    ret
351;
352; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i16:
353; RV64:       # %bb.0:
354; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
355; RV64-NEXT:    vsext.vf8 v16, v10
356; RV64-NEXT:    vadd.vv v16, v16, v16
357; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
358; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
359; RV64-NEXT:    ret
360  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
361  %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
362  call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
363  ret void
364}
365
366define void @vpscatter_baseidx_zext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
367; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i16:
368; RV32:       # %bb.0:
369; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
370; RV32-NEXT:    vwaddu.vv v12, v10, v10
371; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
372; RV32-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
373; RV32-NEXT:    ret
374;
375; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i16:
376; RV64:       # %bb.0:
377; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
378; RV64-NEXT:    vwaddu.vv v12, v10, v10
379; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
380; RV64-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
381; RV64-NEXT:    ret
382  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
383  %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
384  call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
385  ret void
386}
387
388define void @vpscatter_baseidx_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
389; RV32-LABEL: vpscatter_baseidx_nxv8i16:
390; RV32:       # %bb.0:
391; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
392; RV32-NEXT:    vwadd.vv v12, v10, v10
393; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
394; RV32-NEXT:    ret
395;
396; RV64-LABEL: vpscatter_baseidx_nxv8i16:
397; RV64:       # %bb.0:
398; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
399; RV64-NEXT:    vsext.vf4 v16, v10
400; RV64-NEXT:    vadd.vv v16, v16, v16
401; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
402; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
403; RV64-NEXT:    ret
404  %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %idxs
405  call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
406  ret void
407}
408
409declare <vscale x 8 x i32> @llvm.vp.sext.nxv8i16.nxv8i32(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
410define void @vpscatter_baseidx_vpsext_nxv8i16_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
411; RV32-LABEL: vpscatter_baseidx_vpsext_nxv8i16_nxv8i16:
412; RV32:       # %bb.0:
413; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
414; RV32-NEXT:    vwadd.vv v12, v10, v10, v0.t
415; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
416; RV32-NEXT:    ret
417;
418; RV64-LABEL: vpscatter_baseidx_vpsext_nxv8i16_nxv8i16:
419; RV64:       # %bb.0:
420; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
421; RV64-NEXT:    vsext.vf2 v12, v10, v0.t
422; RV64-NEXT:    vwadd.vv v16, v12, v12
423; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
424; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
425; RV64-NEXT:    ret
426  %eidxs = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i16.nxv8i32(<vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 %evl)
427  %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i32> %eidxs
428  call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
429  ret void
430}
431
432declare <vscale x 8 x i32> @llvm.vp.zext.nxv8i16.nxv8i32(<vscale x 8 x i16>, <vscale x 8 x i1>, i32)
433define void @vpscatter_baseidx_vpzext_nxv8i16_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
434; RV32-LABEL: vpscatter_baseidx_vpzext_nxv8i16_nxv8i16:
435; RV32:       # %bb.0:
436; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
437; RV32-NEXT:    vwaddu.vv v12, v10, v10, v0.t
438; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
439; RV32-NEXT:    ret
440;
441; RV64-LABEL: vpscatter_baseidx_vpzext_nxv8i16_nxv8i16:
442; RV64:       # %bb.0:
443; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
444; RV64-NEXT:    vzext.vf2 v12, v10, v0.t
445; RV64-NEXT:    vwadd.vv v16, v12, v12
446; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
447; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
448; RV64-NEXT:    ret
449  %eidxs = call <vscale x 8 x i32> @llvm.vp.zext.nxv8i16.nxv8i32(<vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 %evl)
450  %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i32> %eidxs
451  call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
452  ret void
453}
454
455declare <vscale x 8 x i64> @llvm.vp.sext.nxv8i32.nxv8i64(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
456define void @vpscatter_baseidx_vpsext_nxv8i32_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
457; RV32-LABEL: vpscatter_baseidx_vpsext_nxv8i32_nxv8i16:
458; RV32:       # %bb.0:
459; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
460; RV32-NEXT:    vsext.vf2 v16, v12, v0.t
461; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
462; RV32-NEXT:    vnsrl.wi v12, v16, 0
463; RV32-NEXT:    vadd.vv v12, v12, v12
464; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
465; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
466; RV32-NEXT:    ret
467;
468; RV64-LABEL: vpscatter_baseidx_vpsext_nxv8i32_nxv8i16:
469; RV64:       # %bb.0:
470; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
471; RV64-NEXT:    vwadd.vv v16, v12, v12, v0.t
472; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
473; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
474; RV64-NEXT:    ret
475  %eidxs = call <vscale x 8 x i64> @llvm.vp.sext.nxv8i32.nxv8i64(<vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 %evl)
476  %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i64> %eidxs
477  call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
478  ret void
479}
480
481declare <vscale x 8 x i64> @llvm.vp.zext.nxv8i32.nxv8i64(<vscale x 8 x i32>, <vscale x 8 x i1>, i32)
482define void @vpscatter_baseidx_vpzext_nxv8i32_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
483; RV32-LABEL: vpscatter_baseidx_vpzext_nxv8i32_nxv8i16:
484; RV32:       # %bb.0:
485; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
486; RV32-NEXT:    vzext.vf2 v16, v12, v0.t
487; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
488; RV32-NEXT:    vnsrl.wi v12, v16, 0
489; RV32-NEXT:    vadd.vv v12, v12, v12
490; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
491; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
492; RV32-NEXT:    ret
493;
494; RV64-LABEL: vpscatter_baseidx_vpzext_nxv8i32_nxv8i16:
495; RV64:       # %bb.0:
496; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
497; RV64-NEXT:    vwaddu.vv v16, v12, v12, v0.t
498; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
499; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
500; RV64-NEXT:    ret
501  %eidxs = call <vscale x 8 x i64> @llvm.vp.zext.nxv8i32.nxv8i64(<vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 %evl)
502  %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i64> %eidxs
503  call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
504  ret void
505}
506
507declare void @llvm.vp.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
508
509define void @vpscatter_nxv1i32(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
510; RV32-LABEL: vpscatter_nxv1i32:
511; RV32:       # %bb.0:
512; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
513; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
514; RV32-NEXT:    ret
515;
516; RV64-LABEL: vpscatter_nxv1i32:
517; RV64:       # %bb.0:
518; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
519; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
520; RV64-NEXT:    ret
521  call void @llvm.vp.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
522  ret void
523}
524
525declare void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
526
527define void @vpscatter_nxv2i32(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
528; RV32-LABEL: vpscatter_nxv2i32:
529; RV32:       # %bb.0:
530; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
531; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
532; RV32-NEXT:    ret
533;
534; RV64-LABEL: vpscatter_nxv2i32:
535; RV64:       # %bb.0:
536; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
537; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
538; RV64-NEXT:    ret
539  call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
540  ret void
541}
542
543define void @vpscatter_nxv2i64_truncstore_nxv2i32(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
544; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i32:
545; RV32:       # %bb.0:
546; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
547; RV32-NEXT:    vnsrl.wi v11, v8, 0
548; RV32-NEXT:    vsoxei32.v v11, (zero), v10, v0.t
549; RV32-NEXT:    ret
550;
551; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i32:
552; RV64:       # %bb.0:
553; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
554; RV64-NEXT:    vnsrl.wi v12, v8, 0
555; RV64-NEXT:    vsoxei64.v v12, (zero), v10, v0.t
556; RV64-NEXT:    ret
557  %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i32>
558  call void @llvm.vp.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %tval, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
559  ret void
560}
561
562declare void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
563
564define void @vpscatter_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
565; RV32-LABEL: vpscatter_nxv4i32:
566; RV32:       # %bb.0:
567; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
568; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
569; RV32-NEXT:    ret
570;
571; RV64-LABEL: vpscatter_nxv4i32:
572; RV64:       # %bb.0:
573; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
574; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
575; RV64-NEXT:    ret
576  call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
577  ret void
578}
579
580define void @vpscatter_truemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
581; RV32-LABEL: vpscatter_truemask_nxv4i32:
582; RV32:       # %bb.0:
583; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
584; RV32-NEXT:    vsoxei32.v v8, (zero), v10
585; RV32-NEXT:    ret
586;
587; RV64-LABEL: vpscatter_truemask_nxv4i32:
588; RV64:       # %bb.0:
589; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
590; RV64-NEXT:    vsoxei64.v v8, (zero), v12
591; RV64-NEXT:    ret
592  call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
593  ret void
594}
595
596declare void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
597
598define void @vpscatter_nxv8i32(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
599; RV32-LABEL: vpscatter_nxv8i32:
600; RV32:       # %bb.0:
601; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
602; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
603; RV32-NEXT:    ret
604;
605; RV64-LABEL: vpscatter_nxv8i32:
606; RV64:       # %bb.0:
607; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
608; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
609; RV64-NEXT:    ret
610  call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
611  ret void
612}
613
614define void @vpscatter_baseidx_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
615; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i32:
616; RV32:       # %bb.0:
617; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
618; RV32-NEXT:    vsext.vf4 v16, v12
619; RV32-NEXT:    vsll.vi v12, v16, 2
620; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
621; RV32-NEXT:    ret
622;
623; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i32:
624; RV64:       # %bb.0:
625; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
626; RV64-NEXT:    vsext.vf8 v16, v12
627; RV64-NEXT:    vsll.vi v16, v16, 2
628; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
629; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
630; RV64-NEXT:    ret
631  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i8> %idxs
632  call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
633  ret void
634}
635
636define void @vpscatter_baseidx_sext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
637; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i32:
638; RV32:       # %bb.0:
639; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
640; RV32-NEXT:    vsext.vf4 v16, v12
641; RV32-NEXT:    vsll.vi v12, v16, 2
642; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
643; RV32-NEXT:    ret
644;
645; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i32:
646; RV64:       # %bb.0:
647; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
648; RV64-NEXT:    vsext.vf8 v16, v12
649; RV64-NEXT:    vsll.vi v16, v16, 2
650; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
651; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
652; RV64-NEXT:    ret
653  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
654  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
655  call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
656  ret void
657}
658
659define void @vpscatter_baseidx_zext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
660; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i32:
661; RV32:       # %bb.0:
662; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
663; RV32-NEXT:    vzext.vf2 v14, v12
664; RV32-NEXT:    vsll.vi v12, v14, 2
665; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
666; RV32-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
667; RV32-NEXT:    ret
668;
669; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i32:
670; RV64:       # %bb.0:
671; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
672; RV64-NEXT:    vzext.vf2 v14, v12
673; RV64-NEXT:    vsll.vi v12, v14, 2
674; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
675; RV64-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
676; RV64-NEXT:    ret
677  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
678  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
679  call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
680  ret void
681}
682
683define void @vpscatter_baseidx_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
684; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8i32:
685; RV32:       # %bb.0:
686; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
687; RV32-NEXT:    vsext.vf2 v16, v12
688; RV32-NEXT:    vsll.vi v12, v16, 2
689; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
690; RV32-NEXT:    ret
691;
692; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8i32:
693; RV64:       # %bb.0:
694; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
695; RV64-NEXT:    vsext.vf4 v16, v12
696; RV64-NEXT:    vsll.vi v16, v16, 2
697; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
698; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
699; RV64-NEXT:    ret
700  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i16> %idxs
701  call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
702  ret void
703}
704
705define void @vpscatter_baseidx_sext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
706; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i32:
707; RV32:       # %bb.0:
708; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
709; RV32-NEXT:    vsext.vf2 v16, v12
710; RV32-NEXT:    vsll.vi v12, v16, 2
711; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
712; RV32-NEXT:    ret
713;
714; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i32:
715; RV64:       # %bb.0:
716; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
717; RV64-NEXT:    vsext.vf4 v16, v12
718; RV64-NEXT:    vsll.vi v16, v16, 2
719; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
720; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
721; RV64-NEXT:    ret
722  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
723  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
724  call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
725  ret void
726}
727
728define void @vpscatter_baseidx_zext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
729; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i32:
730; RV32:       # %bb.0:
731; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
732; RV32-NEXT:    vzext.vf2 v16, v12
733; RV32-NEXT:    vsll.vi v12, v16, 2
734; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
735; RV32-NEXT:    ret
736;
737; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i32:
738; RV64:       # %bb.0:
739; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
740; RV64-NEXT:    vzext.vf2 v16, v12
741; RV64-NEXT:    vsll.vi v12, v16, 2
742; RV64-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
743; RV64-NEXT:    ret
744  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
745  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
746  call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
747  ret void
748}
749
750define void @vpscatter_baseidx_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
751; RV32-LABEL: vpscatter_baseidx_nxv8i32:
752; RV32:       # %bb.0:
753; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
754; RV32-NEXT:    vsll.vi v12, v12, 2
755; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
756; RV32-NEXT:    ret
757;
758; RV64-LABEL: vpscatter_baseidx_nxv8i32:
759; RV64:       # %bb.0:
760; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
761; RV64-NEXT:    vsext.vf2 v16, v12
762; RV64-NEXT:    vsll.vi v16, v16, 2
763; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
764; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
765; RV64-NEXT:    ret
766  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %idxs
767  call void @llvm.vp.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
768  ret void
769}
770
771declare void @llvm.vp.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
772
773define void @vpscatter_nxv1i64(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
774; RV32-LABEL: vpscatter_nxv1i64:
775; RV32:       # %bb.0:
776; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
777; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
778; RV32-NEXT:    ret
779;
780; RV64-LABEL: vpscatter_nxv1i64:
781; RV64:       # %bb.0:
782; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
783; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
784; RV64-NEXT:    ret
785  call void @llvm.vp.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
786  ret void
787}
788
789declare void @llvm.vp.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
790
791define void @vpscatter_nxv2i64(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
792; RV32-LABEL: vpscatter_nxv2i64:
793; RV32:       # %bb.0:
794; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
795; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
796; RV32-NEXT:    ret
797;
798; RV64-LABEL: vpscatter_nxv2i64:
799; RV64:       # %bb.0:
800; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
801; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
802; RV64-NEXT:    ret
803  call void @llvm.vp.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
804  ret void
805}
806
807declare void @llvm.vp.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
808
809define void @vpscatter_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
810; RV32-LABEL: vpscatter_nxv4i64:
811; RV32:       # %bb.0:
812; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
813; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
814; RV32-NEXT:    ret
815;
816; RV64-LABEL: vpscatter_nxv4i64:
817; RV64:       # %bb.0:
818; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
819; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
820; RV64-NEXT:    ret
821  call void @llvm.vp.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
822  ret void
823}
824
825define void @vpscatter_truemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
826; RV32-LABEL: vpscatter_truemask_nxv4i64:
827; RV32:       # %bb.0:
828; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
829; RV32-NEXT:    vsoxei32.v v8, (zero), v12
830; RV32-NEXT:    ret
831;
832; RV64-LABEL: vpscatter_truemask_nxv4i64:
833; RV64:       # %bb.0:
834; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
835; RV64-NEXT:    vsoxei64.v v8, (zero), v12
836; RV64-NEXT:    ret
837  call void @llvm.vp.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
838  ret void
839}
840
841declare void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
842
843define void @vpscatter_nxv8i64(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
844; RV32-LABEL: vpscatter_nxv8i64:
845; RV32:       # %bb.0:
846; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
847; RV32-NEXT:    vsoxei32.v v8, (zero), v16, v0.t
848; RV32-NEXT:    ret
849;
850; RV64-LABEL: vpscatter_nxv8i64:
851; RV64:       # %bb.0:
852; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
853; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
854; RV64-NEXT:    ret
855  call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
856  ret void
857}
858
859define void @vpscatter_baseidx_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
860; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8i64:
861; RV32:       # %bb.0:
862; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
863; RV32-NEXT:    vsext.vf4 v20, v16
864; RV32-NEXT:    vsll.vi v16, v20, 3
865; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
866; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
867; RV32-NEXT:    ret
868;
869; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i64:
870; RV64:       # %bb.0:
871; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
872; RV64-NEXT:    vsext.vf8 v24, v16
873; RV64-NEXT:    vsll.vi v16, v24, 3
874; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
875; RV64-NEXT:    ret
876  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i8> %idxs
877  call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
878  ret void
879}
880
881define void @vpscatter_baseidx_sext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
882; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i64:
883; RV32:       # %bb.0:
884; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
885; RV32-NEXT:    vsext.vf4 v20, v16
886; RV32-NEXT:    vsll.vi v16, v20, 3
887; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
888; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
889; RV32-NEXT:    ret
890;
891; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i64:
892; RV64:       # %bb.0:
893; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
894; RV64-NEXT:    vsext.vf8 v24, v16
895; RV64-NEXT:    vsll.vi v16, v24, 3
896; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
897; RV64-NEXT:    ret
898  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
899  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
900  call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
901  ret void
902}
903
904define void @vpscatter_baseidx_zext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
905; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i64:
906; RV32:       # %bb.0:
907; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
908; RV32-NEXT:    vzext.vf2 v18, v16
909; RV32-NEXT:    vsll.vi v16, v18, 3
910; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
911; RV32-NEXT:    vsoxei16.v v8, (a0), v16, v0.t
912; RV32-NEXT:    ret
913;
914; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8i64:
915; RV64:       # %bb.0:
916; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
917; RV64-NEXT:    vzext.vf2 v18, v16
918; RV64-NEXT:    vsll.vi v16, v18, 3
919; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
920; RV64-NEXT:    vsoxei16.v v8, (a0), v16, v0.t
921; RV64-NEXT:    ret
922  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
923  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
924  call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
925  ret void
926}
927
928define void @vpscatter_baseidx_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
929; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8i64:
930; RV32:       # %bb.0:
931; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
932; RV32-NEXT:    vsext.vf2 v20, v16
933; RV32-NEXT:    vsll.vi v16, v20, 3
934; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
935; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
936; RV32-NEXT:    ret
937;
938; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8i64:
939; RV64:       # %bb.0:
940; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
941; RV64-NEXT:    vsext.vf4 v24, v16
942; RV64-NEXT:    vsll.vi v16, v24, 3
943; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
944; RV64-NEXT:    ret
945  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i16> %idxs
946  call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
947  ret void
948}
949
950define void @vpscatter_baseidx_sext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
951; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i64:
952; RV32:       # %bb.0:
953; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
954; RV32-NEXT:    vsext.vf2 v20, v16
955; RV32-NEXT:    vsll.vi v16, v20, 3
956; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
957; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
958; RV32-NEXT:    ret
959;
960; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i64:
961; RV64:       # %bb.0:
962; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
963; RV64-NEXT:    vsext.vf4 v24, v16
964; RV64-NEXT:    vsll.vi v16, v24, 3
965; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
966; RV64-NEXT:    ret
967  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
968  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
969  call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
970  ret void
971}
972
973define void @vpscatter_baseidx_zext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
974; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i64:
975; RV32:       # %bb.0:
976; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
977; RV32-NEXT:    vzext.vf2 v20, v16
978; RV32-NEXT:    vsll.vi v16, v20, 3
979; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
980; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
981; RV32-NEXT:    ret
982;
983; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8i64:
984; RV64:       # %bb.0:
985; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
986; RV64-NEXT:    vzext.vf2 v20, v16
987; RV64-NEXT:    vsll.vi v16, v20, 3
988; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
989; RV64-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
990; RV64-NEXT:    ret
991  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
992  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
993  call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
994  ret void
995}
996
997define void @vpscatter_baseidx_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
998; RV32-LABEL: vpscatter_baseidx_nxv8i32_nxv8i64:
999; RV32:       # %bb.0:
1000; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1001; RV32-NEXT:    vsll.vi v16, v16, 3
1002; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1003; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1004; RV32-NEXT:    ret
1005;
1006; RV64-LABEL: vpscatter_baseidx_nxv8i32_nxv8i64:
1007; RV64:       # %bb.0:
1008; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1009; RV64-NEXT:    vsext.vf2 v24, v16
1010; RV64-NEXT:    vsll.vi v16, v24, 3
1011; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1012; RV64-NEXT:    ret
1013  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i32> %idxs
1014  call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1015  ret void
1016}
1017
1018define void @vpscatter_baseidx_sext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1019; RV32-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8i64:
1020; RV32:       # %bb.0:
1021; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1022; RV32-NEXT:    vsll.vi v16, v16, 3
1023; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1024; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1025; RV32-NEXT:    ret
1026;
1027; RV64-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8i64:
1028; RV64:       # %bb.0:
1029; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1030; RV64-NEXT:    vsext.vf2 v24, v16
1031; RV64-NEXT:    vsll.vi v16, v24, 3
1032; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1033; RV64-NEXT:    ret
1034  %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1035  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
1036  call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1037  ret void
1038}
1039
1040define void @vpscatter_baseidx_zext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1041; RV32-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8i64:
1042; RV32:       # %bb.0:
1043; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1044; RV32-NEXT:    vsll.vi v16, v16, 3
1045; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1046; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1047; RV32-NEXT:    ret
1048;
1049; RV64-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8i64:
1050; RV64:       # %bb.0:
1051; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1052; RV64-NEXT:    vzext.vf2 v24, v16
1053; RV64-NEXT:    vsll.vi v16, v24, 3
1054; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1055; RV64-NEXT:    ret
1056  %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1057  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
1058  call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1059  ret void
1060}
1061
1062define void @vpscatter_baseidx_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1063; RV32-LABEL: vpscatter_baseidx_nxv8i64:
1064; RV32:       # %bb.0:
1065; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1066; RV32-NEXT:    vnsrl.wi v24, v16, 0
1067; RV32-NEXT:    vsll.vi v16, v24, 3
1068; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1069; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1070; RV32-NEXT:    ret
1071;
1072; RV64-LABEL: vpscatter_baseidx_nxv8i64:
1073; RV64:       # %bb.0:
1074; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1075; RV64-NEXT:    vsll.vi v16, v16, 3
1076; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1077; RV64-NEXT:    ret
1078  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %idxs
1079  call void @llvm.vp.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1080  ret void
1081}
1082
1083declare void @llvm.vp.scatter.nxv1bf16.nxv1p0(<vscale x 1 x bfloat>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
1084
1085define void @vpscatter_nxv1bf16(<vscale x 1 x bfloat> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1086; RV32-LABEL: vpscatter_nxv1bf16:
1087; RV32:       # %bb.0:
1088; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
1089; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1090; RV32-NEXT:    ret
1091;
1092; RV64-LABEL: vpscatter_nxv1bf16:
1093; RV64:       # %bb.0:
1094; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
1095; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
1096; RV64-NEXT:    ret
1097  call void @llvm.vp.scatter.nxv1bf16.nxv1p0(<vscale x 1 x bfloat> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
1098  ret void
1099}
1100
1101declare void @llvm.vp.scatter.nxv2bf16.nxv2p0(<vscale x 2 x bfloat>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
1102
1103define void @vpscatter_nxv2bf16(<vscale x 2 x bfloat> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1104; RV32-LABEL: vpscatter_nxv2bf16:
1105; RV32:       # %bb.0:
1106; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1107; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1108; RV32-NEXT:    ret
1109;
1110; RV64-LABEL: vpscatter_nxv2bf16:
1111; RV64:       # %bb.0:
1112; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1113; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
1114; RV64-NEXT:    ret
1115  call void @llvm.vp.scatter.nxv2bf16.nxv2p0(<vscale x 2 x bfloat> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
1116  ret void
1117}
1118
1119declare void @llvm.vp.scatter.nxv4bf16.nxv4p0(<vscale x 4 x bfloat>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
1120
1121define void @vpscatter_nxv4bf16(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1122; RV32-LABEL: vpscatter_nxv4bf16:
1123; RV32:       # %bb.0:
1124; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1125; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
1126; RV32-NEXT:    ret
1127;
1128; RV64-LABEL: vpscatter_nxv4bf16:
1129; RV64:       # %bb.0:
1130; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1131; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
1132; RV64-NEXT:    ret
1133  call void @llvm.vp.scatter.nxv4bf16.nxv4p0(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
1134  ret void
1135}
1136
1137define void @vpscatter_truemask_nxv4bf16(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
1138; RV32-LABEL: vpscatter_truemask_nxv4bf16:
1139; RV32:       # %bb.0:
1140; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1141; RV32-NEXT:    vsoxei32.v v8, (zero), v10
1142; RV32-NEXT:    ret
1143;
1144; RV64-LABEL: vpscatter_truemask_nxv4bf16:
1145; RV64:       # %bb.0:
1146; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1147; RV64-NEXT:    vsoxei64.v v8, (zero), v12
1148; RV64-NEXT:    ret
1149  call void @llvm.vp.scatter.nxv4bf16.nxv4p0(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
1150  ret void
1151}
1152
1153declare void @llvm.vp.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
1154
1155define void @vpscatter_nxv8bf16(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1156; RV32-LABEL: vpscatter_nxv8bf16:
1157; RV32:       # %bb.0:
1158; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
1159; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
1160; RV32-NEXT:    ret
1161;
1162; RV64-LABEL: vpscatter_nxv8bf16:
1163; RV64:       # %bb.0:
1164; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
1165; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
1166; RV64-NEXT:    ret
1167  call void @llvm.vp.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1168  ret void
1169}
1170
1171define void @vpscatter_baseidx_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1172; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8bf16:
1173; RV32:       # %bb.0:
1174; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1175; RV32-NEXT:    vsext.vf4 v12, v10
1176; RV32-NEXT:    vadd.vv v12, v12, v12
1177; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1178; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1179; RV32-NEXT:    ret
1180;
1181; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8bf16:
1182; RV64:       # %bb.0:
1183; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1184; RV64-NEXT:    vsext.vf8 v16, v10
1185; RV64-NEXT:    vadd.vv v16, v16, v16
1186; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1187; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1188; RV64-NEXT:    ret
1189  %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i8> %idxs
1190  call void @llvm.vp.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1191  ret void
1192}
1193
1194define void @vpscatter_baseidx_sext_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1195; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8bf16:
1196; RV32:       # %bb.0:
1197; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1198; RV32-NEXT:    vsext.vf4 v12, v10
1199; RV32-NEXT:    vadd.vv v12, v12, v12
1200; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1201; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1202; RV32-NEXT:    ret
1203;
1204; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8bf16:
1205; RV64:       # %bb.0:
1206; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1207; RV64-NEXT:    vsext.vf8 v16, v10
1208; RV64-NEXT:    vadd.vv v16, v16, v16
1209; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1210; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1211; RV64-NEXT:    ret
1212  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1213  %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %eidxs
1214  call void @llvm.vp.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1215  ret void
1216}
1217
1218define void @vpscatter_baseidx_zext_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1219; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8bf16:
1220; RV32:       # %bb.0:
1221; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
1222; RV32-NEXT:    vwaddu.vv v12, v10, v10
1223; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1224; RV32-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
1225; RV32-NEXT:    ret
1226;
1227; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8bf16:
1228; RV64:       # %bb.0:
1229; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
1230; RV64-NEXT:    vwaddu.vv v12, v10, v10
1231; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1232; RV64-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
1233; RV64-NEXT:    ret
1234  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1235  %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %eidxs
1236  call void @llvm.vp.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1237  ret void
1238}
1239
1240define void @vpscatter_baseidx_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1241; RV32-LABEL: vpscatter_baseidx_nxv8bf16:
1242; RV32:       # %bb.0:
1243; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
1244; RV32-NEXT:    vwadd.vv v12, v10, v10
1245; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1246; RV32-NEXT:    ret
1247;
1248; RV64-LABEL: vpscatter_baseidx_nxv8bf16:
1249; RV64:       # %bb.0:
1250; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1251; RV64-NEXT:    vsext.vf4 v16, v10
1252; RV64-NEXT:    vadd.vv v16, v16, v16
1253; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1254; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1255; RV64-NEXT:    ret
1256  %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %idxs
1257  call void @llvm.vp.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1258  ret void
1259}
1260
1261declare void @llvm.vp.scatter.nxv1f16.nxv1p0(<vscale x 1 x half>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
1262
1263define void @vpscatter_nxv1f16(<vscale x 1 x half> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1264; RV32-LABEL: vpscatter_nxv1f16:
1265; RV32:       # %bb.0:
1266; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
1267; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1268; RV32-NEXT:    ret
1269;
1270; RV64-LABEL: vpscatter_nxv1f16:
1271; RV64:       # %bb.0:
1272; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
1273; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
1274; RV64-NEXT:    ret
1275  call void @llvm.vp.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
1276  ret void
1277}
1278
1279declare void @llvm.vp.scatter.nxv2f16.nxv2p0(<vscale x 2 x half>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
1280
1281define void @vpscatter_nxv2f16(<vscale x 2 x half> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1282; RV32-LABEL: vpscatter_nxv2f16:
1283; RV32:       # %bb.0:
1284; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1285; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1286; RV32-NEXT:    ret
1287;
1288; RV64-LABEL: vpscatter_nxv2f16:
1289; RV64:       # %bb.0:
1290; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1291; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
1292; RV64-NEXT:    ret
1293  call void @llvm.vp.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
1294  ret void
1295}
1296
1297declare void @llvm.vp.scatter.nxv4f16.nxv4p0(<vscale x 4 x half>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
1298
1299define void @vpscatter_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1300; RV32-LABEL: vpscatter_nxv4f16:
1301; RV32:       # %bb.0:
1302; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1303; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
1304; RV32-NEXT:    ret
1305;
1306; RV64-LABEL: vpscatter_nxv4f16:
1307; RV64:       # %bb.0:
1308; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1309; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
1310; RV64-NEXT:    ret
1311  call void @llvm.vp.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
1312  ret void
1313}
1314
1315define void @vpscatter_truemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
1316; RV32-LABEL: vpscatter_truemask_nxv4f16:
1317; RV32:       # %bb.0:
1318; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1319; RV32-NEXT:    vsoxei32.v v8, (zero), v10
1320; RV32-NEXT:    ret
1321;
1322; RV64-LABEL: vpscatter_truemask_nxv4f16:
1323; RV64:       # %bb.0:
1324; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1325; RV64-NEXT:    vsoxei64.v v8, (zero), v12
1326; RV64-NEXT:    ret
1327  call void @llvm.vp.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
1328  ret void
1329}
1330
1331declare void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
1332
1333define void @vpscatter_nxv8f16(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1334; RV32-LABEL: vpscatter_nxv8f16:
1335; RV32:       # %bb.0:
1336; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
1337; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
1338; RV32-NEXT:    ret
1339;
1340; RV64-LABEL: vpscatter_nxv8f16:
1341; RV64:       # %bb.0:
1342; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
1343; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
1344; RV64-NEXT:    ret
1345  call void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1346  ret void
1347}
1348
1349define void @vpscatter_baseidx_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1350; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f16:
1351; RV32:       # %bb.0:
1352; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1353; RV32-NEXT:    vsext.vf4 v12, v10
1354; RV32-NEXT:    vadd.vv v12, v12, v12
1355; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1356; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1357; RV32-NEXT:    ret
1358;
1359; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f16:
1360; RV64:       # %bb.0:
1361; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1362; RV64-NEXT:    vsext.vf8 v16, v10
1363; RV64-NEXT:    vadd.vv v16, v16, v16
1364; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1365; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1366; RV64-NEXT:    ret
1367  %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i8> %idxs
1368  call void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1369  ret void
1370}
1371
1372define void @vpscatter_baseidx_sext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1373; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f16:
1374; RV32:       # %bb.0:
1375; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1376; RV32-NEXT:    vsext.vf4 v12, v10
1377; RV32-NEXT:    vadd.vv v12, v12, v12
1378; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1379; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1380; RV32-NEXT:    ret
1381;
1382; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f16:
1383; RV64:       # %bb.0:
1384; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1385; RV64-NEXT:    vsext.vf8 v16, v10
1386; RV64-NEXT:    vadd.vv v16, v16, v16
1387; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1388; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1389; RV64-NEXT:    ret
1390  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1391  %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs
1392  call void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1393  ret void
1394}
1395
1396define void @vpscatter_baseidx_zext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1397; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f16:
1398; RV32:       # %bb.0:
1399; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
1400; RV32-NEXT:    vwaddu.vv v12, v10, v10
1401; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1402; RV32-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
1403; RV32-NEXT:    ret
1404;
1405; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f16:
1406; RV64:       # %bb.0:
1407; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
1408; RV64-NEXT:    vwaddu.vv v12, v10, v10
1409; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1410; RV64-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
1411; RV64-NEXT:    ret
1412  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1413  %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs
1414  call void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1415  ret void
1416}
1417
1418define void @vpscatter_baseidx_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1419; RV32-LABEL: vpscatter_baseidx_nxv8f16:
1420; RV32:       # %bb.0:
1421; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
1422; RV32-NEXT:    vwadd.vv v12, v10, v10
1423; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1424; RV32-NEXT:    ret
1425;
1426; RV64-LABEL: vpscatter_baseidx_nxv8f16:
1427; RV64:       # %bb.0:
1428; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1429; RV64-NEXT:    vsext.vf4 v16, v10
1430; RV64-NEXT:    vadd.vv v16, v16, v16
1431; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1432; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1433; RV64-NEXT:    ret
1434  %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %idxs
1435  call void @llvm.vp.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1436  ret void
1437}
1438
1439declare void @llvm.vp.scatter.nxv1f32.nxv1p0(<vscale x 1 x float>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
1440
1441define void @vpscatter_nxv1f32(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1442; RV32-LABEL: vpscatter_nxv1f32:
1443; RV32:       # %bb.0:
1444; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
1445; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1446; RV32-NEXT:    ret
1447;
1448; RV64-LABEL: vpscatter_nxv1f32:
1449; RV64:       # %bb.0:
1450; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
1451; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
1452; RV64-NEXT:    ret
1453  call void @llvm.vp.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
1454  ret void
1455}
1456
1457declare void @llvm.vp.scatter.nxv2f32.nxv2p0(<vscale x 2 x float>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
1458
1459define void @vpscatter_nxv2f32(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1460; RV32-LABEL: vpscatter_nxv2f32:
1461; RV32:       # %bb.0:
1462; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1463; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1464; RV32-NEXT:    ret
1465;
1466; RV64-LABEL: vpscatter_nxv2f32:
1467; RV64:       # %bb.0:
1468; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1469; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
1470; RV64-NEXT:    ret
1471  call void @llvm.vp.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
1472  ret void
1473}
1474
1475declare void @llvm.vp.scatter.nxv4f32.nxv4p0(<vscale x 4 x float>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
1476
1477define void @vpscatter_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1478; RV32-LABEL: vpscatter_nxv4f32:
1479; RV32:       # %bb.0:
1480; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1481; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
1482; RV32-NEXT:    ret
1483;
1484; RV64-LABEL: vpscatter_nxv4f32:
1485; RV64:       # %bb.0:
1486; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1487; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
1488; RV64-NEXT:    ret
1489  call void @llvm.vp.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
1490  ret void
1491}
1492
1493define void @vpscatter_truemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
1494; RV32-LABEL: vpscatter_truemask_nxv4f32:
1495; RV32:       # %bb.0:
1496; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1497; RV32-NEXT:    vsoxei32.v v8, (zero), v10
1498; RV32-NEXT:    ret
1499;
1500; RV64-LABEL: vpscatter_truemask_nxv4f32:
1501; RV64:       # %bb.0:
1502; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1503; RV64-NEXT:    vsoxei64.v v8, (zero), v12
1504; RV64-NEXT:    ret
1505  call void @llvm.vp.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
1506  ret void
1507}
1508
1509declare void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
1510
1511define void @vpscatter_nxv8f32(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1512; RV32-LABEL: vpscatter_nxv8f32:
1513; RV32:       # %bb.0:
1514; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1515; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
1516; RV32-NEXT:    ret
1517;
1518; RV64-LABEL: vpscatter_nxv8f32:
1519; RV64:       # %bb.0:
1520; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1521; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
1522; RV64-NEXT:    ret
1523  call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1524  ret void
1525}
1526
1527define void @vpscatter_baseidx_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1528; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f32:
1529; RV32:       # %bb.0:
1530; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1531; RV32-NEXT:    vsext.vf4 v16, v12
1532; RV32-NEXT:    vsll.vi v12, v16, 2
1533; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1534; RV32-NEXT:    ret
1535;
1536; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f32:
1537; RV64:       # %bb.0:
1538; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1539; RV64-NEXT:    vsext.vf8 v16, v12
1540; RV64-NEXT:    vsll.vi v16, v16, 2
1541; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1542; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1543; RV64-NEXT:    ret
1544  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i8> %idxs
1545  call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1546  ret void
1547}
1548
1549define void @vpscatter_baseidx_sext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1550; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f32:
1551; RV32:       # %bb.0:
1552; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1553; RV32-NEXT:    vsext.vf4 v16, v12
1554; RV32-NEXT:    vsll.vi v12, v16, 2
1555; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1556; RV32-NEXT:    ret
1557;
1558; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f32:
1559; RV64:       # %bb.0:
1560; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1561; RV64-NEXT:    vsext.vf8 v16, v12
1562; RV64-NEXT:    vsll.vi v16, v16, 2
1563; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1564; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1565; RV64-NEXT:    ret
1566  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1567  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1568  call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1569  ret void
1570}
1571
1572define void @vpscatter_baseidx_zext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1573; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f32:
1574; RV32:       # %bb.0:
1575; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
1576; RV32-NEXT:    vzext.vf2 v14, v12
1577; RV32-NEXT:    vsll.vi v12, v14, 2
1578; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1579; RV32-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
1580; RV32-NEXT:    ret
1581;
1582; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f32:
1583; RV64:       # %bb.0:
1584; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
1585; RV64-NEXT:    vzext.vf2 v14, v12
1586; RV64-NEXT:    vsll.vi v12, v14, 2
1587; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1588; RV64-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
1589; RV64-NEXT:    ret
1590  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1591  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1592  call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1593  ret void
1594}
1595
1596define void @vpscatter_baseidx_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1597; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8f32:
1598; RV32:       # %bb.0:
1599; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1600; RV32-NEXT:    vsext.vf2 v16, v12
1601; RV32-NEXT:    vsll.vi v12, v16, 2
1602; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1603; RV32-NEXT:    ret
1604;
1605; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8f32:
1606; RV64:       # %bb.0:
1607; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1608; RV64-NEXT:    vsext.vf4 v16, v12
1609; RV64-NEXT:    vsll.vi v16, v16, 2
1610; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1611; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1612; RV64-NEXT:    ret
1613  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i16> %idxs
1614  call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1615  ret void
1616}
1617
1618define void @vpscatter_baseidx_sext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1619; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f32:
1620; RV32:       # %bb.0:
1621; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1622; RV32-NEXT:    vsext.vf2 v16, v12
1623; RV32-NEXT:    vsll.vi v12, v16, 2
1624; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1625; RV32-NEXT:    ret
1626;
1627; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f32:
1628; RV64:       # %bb.0:
1629; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1630; RV64-NEXT:    vsext.vf4 v16, v12
1631; RV64-NEXT:    vsll.vi v16, v16, 2
1632; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1633; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1634; RV64-NEXT:    ret
1635  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1636  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1637  call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1638  ret void
1639}
1640
1641define void @vpscatter_baseidx_zext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1642; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f32:
1643; RV32:       # %bb.0:
1644; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1645; RV32-NEXT:    vzext.vf2 v16, v12
1646; RV32-NEXT:    vsll.vi v12, v16, 2
1647; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1648; RV32-NEXT:    ret
1649;
1650; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f32:
1651; RV64:       # %bb.0:
1652; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1653; RV64-NEXT:    vzext.vf2 v16, v12
1654; RV64-NEXT:    vsll.vi v12, v16, 2
1655; RV64-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1656; RV64-NEXT:    ret
1657  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1658  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1659  call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1660  ret void
1661}
1662
1663define void @vpscatter_baseidx_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1664; RV32-LABEL: vpscatter_baseidx_nxv8f32:
1665; RV32:       # %bb.0:
1666; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1667; RV32-NEXT:    vsll.vi v12, v12, 2
1668; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1669; RV32-NEXT:    ret
1670;
1671; RV64-LABEL: vpscatter_baseidx_nxv8f32:
1672; RV64:       # %bb.0:
1673; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1674; RV64-NEXT:    vsext.vf2 v16, v12
1675; RV64-NEXT:    vsll.vi v16, v16, 2
1676; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1677; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1678; RV64-NEXT:    ret
1679  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %idxs
1680  call void @llvm.vp.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1681  ret void
1682}
1683
1684declare void @llvm.vp.scatter.nxv1f64.nxv1p0(<vscale x 1 x double>, <vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
1685
1686define void @vpscatter_nxv1f64(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1687; RV32-LABEL: vpscatter_nxv1f64:
1688; RV32:       # %bb.0:
1689; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1690; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1691; RV32-NEXT:    ret
1692;
1693; RV64-LABEL: vpscatter_nxv1f64:
1694; RV64:       # %bb.0:
1695; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1696; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
1697; RV64-NEXT:    ret
1698  call void @llvm.vp.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
1699  ret void
1700}
1701
1702declare void @llvm.vp.scatter.nxv2f64.nxv2p0(<vscale x 2 x double>, <vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
1703
1704define void @vpscatter_nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1705; RV32-LABEL: vpscatter_nxv2f64:
1706; RV32:       # %bb.0:
1707; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1708; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
1709; RV32-NEXT:    ret
1710;
1711; RV64-LABEL: vpscatter_nxv2f64:
1712; RV64:       # %bb.0:
1713; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1714; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
1715; RV64-NEXT:    ret
1716  call void @llvm.vp.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
1717  ret void
1718}
1719
1720declare void @llvm.vp.scatter.nxv4f64.nxv4p0(<vscale x 4 x double>, <vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
1721
1722define void @vpscatter_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1723; RV32-LABEL: vpscatter_nxv4f64:
1724; RV32:       # %bb.0:
1725; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1726; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
1727; RV32-NEXT:    ret
1728;
1729; RV64-LABEL: vpscatter_nxv4f64:
1730; RV64:       # %bb.0:
1731; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1732; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
1733; RV64-NEXT:    ret
1734  call void @llvm.vp.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
1735  ret void
1736}
1737
1738define void @vpscatter_truemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
1739; RV32-LABEL: vpscatter_truemask_nxv4f64:
1740; RV32:       # %bb.0:
1741; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1742; RV32-NEXT:    vsoxei32.v v8, (zero), v12
1743; RV32-NEXT:    ret
1744;
1745; RV64-LABEL: vpscatter_truemask_nxv4f64:
1746; RV64:       # %bb.0:
1747; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1748; RV64-NEXT:    vsoxei64.v v8, (zero), v12
1749; RV64-NEXT:    ret
1750  call void @llvm.vp.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
1751  ret void
1752}
1753
1754declare void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double>, <vscale x 6 x ptr>, <vscale x 6 x i1>, i32)
1755
1756define void @vpscatter_nxv6f64(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1757; RV32-LABEL: vpscatter_nxv6f64:
1758; RV32:       # %bb.0:
1759; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1760; RV32-NEXT:    vsoxei32.v v8, (zero), v16, v0.t
1761; RV32-NEXT:    ret
1762;
1763; RV64-LABEL: vpscatter_nxv6f64:
1764; RV64:       # %bb.0:
1765; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1766; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
1767; RV64-NEXT:    ret
1768  call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1769  ret void
1770}
1771
1772define void @vpscatter_baseidx_nxv6i8_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1773; RV32-LABEL: vpscatter_baseidx_nxv6i8_nxv6f64:
1774; RV32:       # %bb.0:
1775; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1776; RV32-NEXT:    vsext.vf4 v20, v16
1777; RV32-NEXT:    vsll.vi v16, v20, 3
1778; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1779; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1780; RV32-NEXT:    ret
1781;
1782; RV64-LABEL: vpscatter_baseidx_nxv6i8_nxv6f64:
1783; RV64:       # %bb.0:
1784; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1785; RV64-NEXT:    vsext.vf8 v24, v16
1786; RV64-NEXT:    vsll.vi v16, v24, 3
1787; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1788; RV64-NEXT:    ret
1789  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i8> %idxs
1790  call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1791  ret void
1792}
1793
1794define void @vpscatter_baseidx_sext_nxv6i8_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1795; RV32-LABEL: vpscatter_baseidx_sext_nxv6i8_nxv6f64:
1796; RV32:       # %bb.0:
1797; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1798; RV32-NEXT:    vsext.vf4 v20, v16
1799; RV32-NEXT:    vsll.vi v16, v20, 3
1800; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1801; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1802; RV32-NEXT:    ret
1803;
1804; RV64-LABEL: vpscatter_baseidx_sext_nxv6i8_nxv6f64:
1805; RV64:       # %bb.0:
1806; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1807; RV64-NEXT:    vsext.vf8 v24, v16
1808; RV64-NEXT:    vsll.vi v16, v24, 3
1809; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1810; RV64-NEXT:    ret
1811  %eidxs = sext <vscale x 6 x i8> %idxs to <vscale x 6 x i64>
1812  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
1813  call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1814  ret void
1815}
1816
1817define void @vpscatter_baseidx_zext_nxv6i8_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1818; RV32-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64:
1819; RV32:       # %bb.0:
1820; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
1821; RV32-NEXT:    vzext.vf2 v18, v16
1822; RV32-NEXT:    vsll.vi v16, v18, 3
1823; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1824; RV32-NEXT:    vsoxei16.v v8, (a0), v16, v0.t
1825; RV32-NEXT:    ret
1826;
1827; RV64-LABEL: vpscatter_baseidx_zext_nxv6i8_nxv6f64:
1828; RV64:       # %bb.0:
1829; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
1830; RV64-NEXT:    vzext.vf2 v18, v16
1831; RV64-NEXT:    vsll.vi v16, v18, 3
1832; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1833; RV64-NEXT:    vsoxei16.v v8, (a0), v16, v0.t
1834; RV64-NEXT:    ret
1835  %eidxs = zext <vscale x 6 x i8> %idxs to <vscale x 6 x i64>
1836  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
1837  call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1838  ret void
1839}
1840
1841define void @vpscatter_baseidx_nxv6i16_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1842; RV32-LABEL: vpscatter_baseidx_nxv6i16_nxv6f64:
1843; RV32:       # %bb.0:
1844; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1845; RV32-NEXT:    vsext.vf2 v20, v16
1846; RV32-NEXT:    vsll.vi v16, v20, 3
1847; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1848; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1849; RV32-NEXT:    ret
1850;
1851; RV64-LABEL: vpscatter_baseidx_nxv6i16_nxv6f64:
1852; RV64:       # %bb.0:
1853; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1854; RV64-NEXT:    vsext.vf4 v24, v16
1855; RV64-NEXT:    vsll.vi v16, v24, 3
1856; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1857; RV64-NEXT:    ret
1858  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i16> %idxs
1859  call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1860  ret void
1861}
1862
1863define void @vpscatter_baseidx_sext_nxv6i16_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1864; RV32-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64:
1865; RV32:       # %bb.0:
1866; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1867; RV32-NEXT:    vsext.vf2 v20, v16
1868; RV32-NEXT:    vsll.vi v16, v20, 3
1869; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1870; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1871; RV32-NEXT:    ret
1872;
1873; RV64-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64:
1874; RV64:       # %bb.0:
1875; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1876; RV64-NEXT:    vsext.vf4 v24, v16
1877; RV64-NEXT:    vsll.vi v16, v24, 3
1878; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1879; RV64-NEXT:    ret
1880  %eidxs = sext <vscale x 6 x i16> %idxs to <vscale x 6 x i64>
1881  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
1882  call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1883  ret void
1884}
1885
1886define void @vpscatter_baseidx_zext_nxv6i16_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1887; RV32-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64:
1888; RV32:       # %bb.0:
1889; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1890; RV32-NEXT:    vzext.vf2 v20, v16
1891; RV32-NEXT:    vsll.vi v16, v20, 3
1892; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1893; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1894; RV32-NEXT:    ret
1895;
1896; RV64-LABEL: vpscatter_baseidx_zext_nxv6i16_nxv6f64:
1897; RV64:       # %bb.0:
1898; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1899; RV64-NEXT:    vzext.vf2 v20, v16
1900; RV64-NEXT:    vsll.vi v16, v20, 3
1901; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1902; RV64-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1903; RV64-NEXT:    ret
1904  %eidxs = zext <vscale x 6 x i16> %idxs to <vscale x 6 x i64>
1905  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
1906  call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1907  ret void
1908}
1909
1910define void @vpscatter_baseidx_nxv6i32_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1911; RV32-LABEL: vpscatter_baseidx_nxv6i32_nxv6f64:
1912; RV32:       # %bb.0:
1913; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1914; RV32-NEXT:    vsll.vi v16, v16, 3
1915; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1916; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1917; RV32-NEXT:    ret
1918;
1919; RV64-LABEL: vpscatter_baseidx_nxv6i32_nxv6f64:
1920; RV64:       # %bb.0:
1921; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1922; RV64-NEXT:    vsext.vf2 v24, v16
1923; RV64-NEXT:    vsll.vi v16, v24, 3
1924; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1925; RV64-NEXT:    ret
1926  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i32> %idxs
1927  call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1928  ret void
1929}
1930
1931define void @vpscatter_baseidx_sext_nxv6i32_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1932; RV32-LABEL: vpscatter_baseidx_sext_nxv6i32_nxv6f64:
1933; RV32:       # %bb.0:
1934; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1935; RV32-NEXT:    vsll.vi v16, v16, 3
1936; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1937; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1938; RV32-NEXT:    ret
1939;
1940; RV64-LABEL: vpscatter_baseidx_sext_nxv6i32_nxv6f64:
1941; RV64:       # %bb.0:
1942; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1943; RV64-NEXT:    vsext.vf2 v24, v16
1944; RV64-NEXT:    vsll.vi v16, v24, 3
1945; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1946; RV64-NEXT:    ret
1947  %eidxs = sext <vscale x 6 x i32> %idxs to <vscale x 6 x i64>
1948  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
1949  call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1950  ret void
1951}
1952
1953define void @vpscatter_baseidx_zext_nxv6i32_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1954; RV32-LABEL: vpscatter_baseidx_zext_nxv6i32_nxv6f64:
1955; RV32:       # %bb.0:
1956; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1957; RV32-NEXT:    vsll.vi v16, v16, 3
1958; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1959; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1960; RV32-NEXT:    ret
1961;
1962; RV64-LABEL: vpscatter_baseidx_zext_nxv6i32_nxv6f64:
1963; RV64:       # %bb.0:
1964; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1965; RV64-NEXT:    vzext.vf2 v24, v16
1966; RV64-NEXT:    vsll.vi v16, v24, 3
1967; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1968; RV64-NEXT:    ret
1969  %eidxs = zext <vscale x 6 x i32> %idxs to <vscale x 6 x i64>
1970  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
1971  call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1972  ret void
1973}
1974
1975define void @vpscatter_baseidx_nxv6f64(<vscale x 6 x double> %val, ptr %base, <vscale x 6 x i64> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1976; RV32-LABEL: vpscatter_baseidx_nxv6f64:
1977; RV32:       # %bb.0:
1978; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1979; RV32-NEXT:    vnsrl.wi v24, v16, 0
1980; RV32-NEXT:    vsll.vi v16, v24, 3
1981; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1982; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1983; RV32-NEXT:    ret
1984;
1985; RV64-LABEL: vpscatter_baseidx_nxv6f64:
1986; RV64:       # %bb.0:
1987; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1988; RV64-NEXT:    vsll.vi v16, v16, 3
1989; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1990; RV64-NEXT:    ret
1991  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %idxs
1992  call void @llvm.vp.scatter.nxv6f64.nxv6p0(<vscale x 6 x double> %val, <vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1993  ret void
1994}
1995
1996declare void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double>, <vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
1997
1998define void @vpscatter_nxv8f64(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1999; RV32-LABEL: vpscatter_nxv8f64:
2000; RV32:       # %bb.0:
2001; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2002; RV32-NEXT:    vsoxei32.v v8, (zero), v16, v0.t
2003; RV32-NEXT:    ret
2004;
2005; RV64-LABEL: vpscatter_nxv8f64:
2006; RV64:       # %bb.0:
2007; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2008; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
2009; RV64-NEXT:    ret
2010  call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2011  ret void
2012}
2013
2014define void @vpscatter_baseidx_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2015; RV32-LABEL: vpscatter_baseidx_nxv8i8_nxv8f64:
2016; RV32:       # %bb.0:
2017; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2018; RV32-NEXT:    vsext.vf4 v20, v16
2019; RV32-NEXT:    vsll.vi v16, v20, 3
2020; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2021; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
2022; RV32-NEXT:    ret
2023;
2024; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f64:
2025; RV64:       # %bb.0:
2026; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2027; RV64-NEXT:    vsext.vf8 v24, v16
2028; RV64-NEXT:    vsll.vi v16, v24, 3
2029; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
2030; RV64-NEXT:    ret
2031  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i8> %idxs
2032  call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2033  ret void
2034}
2035
2036define void @vpscatter_baseidx_sext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2037; RV32-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f64:
2038; RV32:       # %bb.0:
2039; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2040; RV32-NEXT:    vsext.vf4 v20, v16
2041; RV32-NEXT:    vsll.vi v16, v20, 3
2042; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2043; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
2044; RV32-NEXT:    ret
2045;
2046; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f64:
2047; RV64:       # %bb.0:
2048; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2049; RV64-NEXT:    vsext.vf8 v24, v16
2050; RV64-NEXT:    vsll.vi v16, v24, 3
2051; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
2052; RV64-NEXT:    ret
2053  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
2054  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2055  call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2056  ret void
2057}
2058
2059define void @vpscatter_baseidx_zext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2060; RV32-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f64:
2061; RV32:       # %bb.0:
2062; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
2063; RV32-NEXT:    vzext.vf2 v18, v16
2064; RV32-NEXT:    vsll.vi v16, v18, 3
2065; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2066; RV32-NEXT:    vsoxei16.v v8, (a0), v16, v0.t
2067; RV32-NEXT:    ret
2068;
2069; RV64-LABEL: vpscatter_baseidx_zext_nxv8i8_nxv8f64:
2070; RV64:       # %bb.0:
2071; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
2072; RV64-NEXT:    vzext.vf2 v18, v16
2073; RV64-NEXT:    vsll.vi v16, v18, 3
2074; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2075; RV64-NEXT:    vsoxei16.v v8, (a0), v16, v0.t
2076; RV64-NEXT:    ret
2077  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
2078  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2079  call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2080  ret void
2081}
2082
2083define void @vpscatter_baseidx_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2084; RV32-LABEL: vpscatter_baseidx_nxv8i16_nxv8f64:
2085; RV32:       # %bb.0:
2086; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2087; RV32-NEXT:    vsext.vf2 v20, v16
2088; RV32-NEXT:    vsll.vi v16, v20, 3
2089; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2090; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
2091; RV32-NEXT:    ret
2092;
2093; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8f64:
2094; RV64:       # %bb.0:
2095; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2096; RV64-NEXT:    vsext.vf4 v24, v16
2097; RV64-NEXT:    vsll.vi v16, v24, 3
2098; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
2099; RV64-NEXT:    ret
2100  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i16> %idxs
2101  call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2102  ret void
2103}
2104
2105define void @vpscatter_baseidx_sext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2106; RV32-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f64:
2107; RV32:       # %bb.0:
2108; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2109; RV32-NEXT:    vsext.vf2 v20, v16
2110; RV32-NEXT:    vsll.vi v16, v20, 3
2111; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2112; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
2113; RV32-NEXT:    ret
2114;
2115; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f64:
2116; RV64:       # %bb.0:
2117; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2118; RV64-NEXT:    vsext.vf4 v24, v16
2119; RV64-NEXT:    vsll.vi v16, v24, 3
2120; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
2121; RV64-NEXT:    ret
2122  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
2123  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2124  call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2125  ret void
2126}
2127
2128define void @vpscatter_baseidx_zext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2129; RV32-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f64:
2130; RV32:       # %bb.0:
2131; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2132; RV32-NEXT:    vzext.vf2 v20, v16
2133; RV32-NEXT:    vsll.vi v16, v20, 3
2134; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2135; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
2136; RV32-NEXT:    ret
2137;
2138; RV64-LABEL: vpscatter_baseidx_zext_nxv8i16_nxv8f64:
2139; RV64:       # %bb.0:
2140; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2141; RV64-NEXT:    vzext.vf2 v20, v16
2142; RV64-NEXT:    vsll.vi v16, v20, 3
2143; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2144; RV64-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
2145; RV64-NEXT:    ret
2146  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
2147  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2148  call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2149  ret void
2150}
2151
2152define void @vpscatter_baseidx_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2153; RV32-LABEL: vpscatter_baseidx_nxv8i32_nxv8f64:
2154; RV32:       # %bb.0:
2155; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2156; RV32-NEXT:    vsll.vi v16, v16, 3
2157; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2158; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
2159; RV32-NEXT:    ret
2160;
2161; RV64-LABEL: vpscatter_baseidx_nxv8i32_nxv8f64:
2162; RV64:       # %bb.0:
2163; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2164; RV64-NEXT:    vsext.vf2 v24, v16
2165; RV64-NEXT:    vsll.vi v16, v24, 3
2166; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
2167; RV64-NEXT:    ret
2168  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i32> %idxs
2169  call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2170  ret void
2171}
2172
2173define void @vpscatter_baseidx_sext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2174; RV32-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8f64:
2175; RV32:       # %bb.0:
2176; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2177; RV32-NEXT:    vsll.vi v16, v16, 3
2178; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2179; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
2180; RV32-NEXT:    ret
2181;
2182; RV64-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8f64:
2183; RV64:       # %bb.0:
2184; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2185; RV64-NEXT:    vsext.vf2 v24, v16
2186; RV64-NEXT:    vsll.vi v16, v24, 3
2187; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
2188; RV64-NEXT:    ret
2189  %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
2190  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2191  call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2192  ret void
2193}
2194
2195define void @vpscatter_baseidx_zext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2196; RV32-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8f64:
2197; RV32:       # %bb.0:
2198; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2199; RV32-NEXT:    vsll.vi v16, v16, 3
2200; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2201; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
2202; RV32-NEXT:    ret
2203;
2204; RV64-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8f64:
2205; RV64:       # %bb.0:
2206; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2207; RV64-NEXT:    vzext.vf2 v24, v16
2208; RV64-NEXT:    vsll.vi v16, v24, 3
2209; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
2210; RV64-NEXT:    ret
2211  %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
2212  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2213  call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2214  ret void
2215}
2216
2217define void @vpscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2218; RV32-LABEL: vpscatter_baseidx_nxv8f64:
2219; RV32:       # %bb.0:
2220; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2221; RV32-NEXT:    vnsrl.wi v24, v16, 0
2222; RV32-NEXT:    vsll.vi v16, v24, 3
2223; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2224; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
2225; RV32-NEXT:    ret
2226;
2227; RV64-LABEL: vpscatter_baseidx_nxv8f64:
2228; RV64:       # %bb.0:
2229; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2230; RV64-NEXT:    vsll.vi v16, v16, 3
2231; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
2232; RV64-NEXT:    ret
2233  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %idxs
2234  call void @llvm.vp.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2235  ret void
2236}
2237
2238declare void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double>, <vscale x 16 x ptr>, <vscale x 16 x i1>, i32)
2239
2240define void @vpscatter_nxv16f64(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2241; RV32-LABEL: vpscatter_nxv16f64:
2242; RV32:       # %bb.0:
2243; RV32-NEXT:    vl8re32.v v24, (a0)
2244; RV32-NEXT:    csrr a0, vlenb
2245; RV32-NEXT:    mv a2, a1
2246; RV32-NEXT:    bltu a1, a0, .LBB108_2
2247; RV32-NEXT:  # %bb.1:
2248; RV32-NEXT:    mv a2, a0
2249; RV32-NEXT:  .LBB108_2:
2250; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2251; RV32-NEXT:    vsoxei32.v v8, (zero), v24, v0.t
2252; RV32-NEXT:    sub a2, a1, a0
2253; RV32-NEXT:    srli a0, a0, 3
2254; RV32-NEXT:    sltu a1, a1, a2
2255; RV32-NEXT:    addi a1, a1, -1
2256; RV32-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
2257; RV32-NEXT:    vslidedown.vx v0, v0, a0
2258; RV32-NEXT:    and a1, a1, a2
2259; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2260; RV32-NEXT:    vsoxei32.v v16, (zero), v28, v0.t
2261; RV32-NEXT:    ret
2262;
2263; RV64-LABEL: vpscatter_nxv16f64:
2264; RV64:       # %bb.0:
2265; RV64-NEXT:    addi sp, sp, -16
2266; RV64-NEXT:    .cfi_def_cfa_offset 16
2267; RV64-NEXT:    csrr a1, vlenb
2268; RV64-NEXT:    slli a1, a1, 3
2269; RV64-NEXT:    sub sp, sp, a1
2270; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
2271; RV64-NEXT:    addi a1, sp, 16
2272; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
2273; RV64-NEXT:    csrr a1, vlenb
2274; RV64-NEXT:    slli a3, a1, 3
2275; RV64-NEXT:    add a3, a0, a3
2276; RV64-NEXT:    vl8re64.v v16, (a3)
2277; RV64-NEXT:    vl8re64.v v24, (a0)
2278; RV64-NEXT:    mv a0, a2
2279; RV64-NEXT:    bltu a2, a1, .LBB108_2
2280; RV64-NEXT:  # %bb.1:
2281; RV64-NEXT:    mv a0, a1
2282; RV64-NEXT:  .LBB108_2:
2283; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2284; RV64-NEXT:    vsoxei64.v v8, (zero), v24, v0.t
2285; RV64-NEXT:    sub a0, a2, a1
2286; RV64-NEXT:    srli a1, a1, 3
2287; RV64-NEXT:    sltu a2, a2, a0
2288; RV64-NEXT:    addi a2, a2, -1
2289; RV64-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
2290; RV64-NEXT:    vslidedown.vx v0, v0, a1
2291; RV64-NEXT:    and a0, a2, a0
2292; RV64-NEXT:    addi a1, sp, 16
2293; RV64-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
2294; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2295; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
2296; RV64-NEXT:    csrr a0, vlenb
2297; RV64-NEXT:    slli a0, a0, 3
2298; RV64-NEXT:    add sp, sp, a0
2299; RV64-NEXT:    .cfi_def_cfa sp, 16
2300; RV64-NEXT:    addi sp, sp, 16
2301; RV64-NEXT:    .cfi_def_cfa_offset 0
2302; RV64-NEXT:    ret
2303  call void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
2304  ret void
2305}
2306
2307define void @vpscatter_baseidx_nxv16i16_nxv16f64(<vscale x 16 x double> %val, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2308; RV32-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64:
2309; RV32:       # %bb.0:
2310; RV32-NEXT:    addi sp, sp, -16
2311; RV32-NEXT:    .cfi_def_cfa_offset 16
2312; RV32-NEXT:    csrr a3, vlenb
2313; RV32-NEXT:    sub sp, sp, a3
2314; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
2315; RV32-NEXT:    addi a3, sp, 16
2316; RV32-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
2317; RV32-NEXT:    vl4re16.v v24, (a1)
2318; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
2319; RV32-NEXT:    vsext.vf2 v0, v24
2320; RV32-NEXT:    csrr a1, vlenb
2321; RV32-NEXT:    vsll.vi v24, v0, 3
2322; RV32-NEXT:    mv a3, a2
2323; RV32-NEXT:    bltu a2, a1, .LBB109_2
2324; RV32-NEXT:  # %bb.1:
2325; RV32-NEXT:    mv a3, a1
2326; RV32-NEXT:  .LBB109_2:
2327; RV32-NEXT:    addi a4, sp, 16
2328; RV32-NEXT:    vl1r.v v0, (a4) # Unknown-size Folded Reload
2329; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
2330; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
2331; RV32-NEXT:    sub a3, a2, a1
2332; RV32-NEXT:    srli a1, a1, 3
2333; RV32-NEXT:    sltu a2, a2, a3
2334; RV32-NEXT:    addi a2, a2, -1
2335; RV32-NEXT:    vsetvli a4, zero, e8, mf4, ta, ma
2336; RV32-NEXT:    vslidedown.vx v0, v0, a1
2337; RV32-NEXT:    and a2, a2, a3
2338; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2339; RV32-NEXT:    vsoxei32.v v16, (a0), v28, v0.t
2340; RV32-NEXT:    csrr a0, vlenb
2341; RV32-NEXT:    add sp, sp, a0
2342; RV32-NEXT:    .cfi_def_cfa sp, 16
2343; RV32-NEXT:    addi sp, sp, 16
2344; RV32-NEXT:    .cfi_def_cfa_offset 0
2345; RV32-NEXT:    ret
2346;
2347; RV64-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64:
2348; RV64:       # %bb.0:
2349; RV64-NEXT:    addi sp, sp, -16
2350; RV64-NEXT:    .cfi_def_cfa_offset 16
2351; RV64-NEXT:    csrr a3, vlenb
2352; RV64-NEXT:    slli a3, a3, 4
2353; RV64-NEXT:    sub sp, sp, a3
2354; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
2355; RV64-NEXT:    csrr a3, vlenb
2356; RV64-NEXT:    slli a3, a3, 3
2357; RV64-NEXT:    add a3, sp, a3
2358; RV64-NEXT:    addi a3, a3, 16
2359; RV64-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
2360; RV64-NEXT:    vl4re16.v v24, (a1)
2361; RV64-NEXT:    csrr a1, vlenb
2362; RV64-NEXT:    vsetvli a3, zero, e64, m8, ta, ma
2363; RV64-NEXT:    vsext.vf4 v16, v26
2364; RV64-NEXT:    vsll.vi v16, v16, 3
2365; RV64-NEXT:    addi a3, sp, 16
2366; RV64-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
2367; RV64-NEXT:    vsext.vf4 v16, v24
2368; RV64-NEXT:    vsll.vi v24, v16, 3
2369; RV64-NEXT:    mv a3, a2
2370; RV64-NEXT:    bltu a2, a1, .LBB109_2
2371; RV64-NEXT:  # %bb.1:
2372; RV64-NEXT:    mv a3, a1
2373; RV64-NEXT:  .LBB109_2:
2374; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
2375; RV64-NEXT:    vsoxei64.v v8, (a0), v24, v0.t
2376; RV64-NEXT:    sub a3, a2, a1
2377; RV64-NEXT:    srli a1, a1, 3
2378; RV64-NEXT:    sltu a2, a2, a3
2379; RV64-NEXT:    addi a2, a2, -1
2380; RV64-NEXT:    vsetvli a4, zero, e8, mf4, ta, ma
2381; RV64-NEXT:    vslidedown.vx v0, v0, a1
2382; RV64-NEXT:    and a2, a2, a3
2383; RV64-NEXT:    csrr a1, vlenb
2384; RV64-NEXT:    slli a1, a1, 3
2385; RV64-NEXT:    add a1, sp, a1
2386; RV64-NEXT:    addi a1, a1, 16
2387; RV64-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
2388; RV64-NEXT:    addi a1, sp, 16
2389; RV64-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
2390; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2391; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
2392; RV64-NEXT:    csrr a0, vlenb
2393; RV64-NEXT:    slli a0, a0, 4
2394; RV64-NEXT:    add sp, sp, a0
2395; RV64-NEXT:    .cfi_def_cfa sp, 16
2396; RV64-NEXT:    addi sp, sp, 16
2397; RV64-NEXT:    .cfi_def_cfa_offset 0
2398; RV64-NEXT:    ret
2399  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i16> %idxs
2400  call void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
2401  ret void
2402}
2403
2404define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64(<vscale x 16 x double> %val, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2405; RV32-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64:
2406; RV32:       # %bb.0:
2407; RV32-NEXT:    addi sp, sp, -16
2408; RV32-NEXT:    .cfi_def_cfa_offset 16
2409; RV32-NEXT:    csrr a3, vlenb
2410; RV32-NEXT:    sub sp, sp, a3
2411; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
2412; RV32-NEXT:    addi a3, sp, 16
2413; RV32-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
2414; RV32-NEXT:    vl4re16.v v24, (a1)
2415; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
2416; RV32-NEXT:    vsext.vf2 v0, v24
2417; RV32-NEXT:    csrr a1, vlenb
2418; RV32-NEXT:    vsll.vi v24, v0, 3
2419; RV32-NEXT:    mv a3, a2
2420; RV32-NEXT:    bltu a2, a1, .LBB110_2
2421; RV32-NEXT:  # %bb.1:
2422; RV32-NEXT:    mv a3, a1
2423; RV32-NEXT:  .LBB110_2:
2424; RV32-NEXT:    addi a4, sp, 16
2425; RV32-NEXT:    vl1r.v v0, (a4) # Unknown-size Folded Reload
2426; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
2427; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
2428; RV32-NEXT:    sub a3, a2, a1
2429; RV32-NEXT:    srli a1, a1, 3
2430; RV32-NEXT:    sltu a2, a2, a3
2431; RV32-NEXT:    addi a2, a2, -1
2432; RV32-NEXT:    vsetvli a4, zero, e8, mf4, ta, ma
2433; RV32-NEXT:    vslidedown.vx v0, v0, a1
2434; RV32-NEXT:    and a2, a2, a3
2435; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2436; RV32-NEXT:    vsoxei32.v v16, (a0), v28, v0.t
2437; RV32-NEXT:    csrr a0, vlenb
2438; RV32-NEXT:    add sp, sp, a0
2439; RV32-NEXT:    .cfi_def_cfa sp, 16
2440; RV32-NEXT:    addi sp, sp, 16
2441; RV32-NEXT:    .cfi_def_cfa_offset 0
2442; RV32-NEXT:    ret
2443;
2444; RV64-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64:
2445; RV64:       # %bb.0:
2446; RV64-NEXT:    addi sp, sp, -16
2447; RV64-NEXT:    .cfi_def_cfa_offset 16
2448; RV64-NEXT:    csrr a3, vlenb
2449; RV64-NEXT:    slli a3, a3, 4
2450; RV64-NEXT:    sub sp, sp, a3
2451; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
2452; RV64-NEXT:    csrr a3, vlenb
2453; RV64-NEXT:    slli a3, a3, 3
2454; RV64-NEXT:    add a3, sp, a3
2455; RV64-NEXT:    addi a3, a3, 16
2456; RV64-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
2457; RV64-NEXT:    vl4re16.v v24, (a1)
2458; RV64-NEXT:    csrr a1, vlenb
2459; RV64-NEXT:    vsetvli a3, zero, e64, m8, ta, ma
2460; RV64-NEXT:    vsext.vf4 v16, v26
2461; RV64-NEXT:    vsll.vi v16, v16, 3
2462; RV64-NEXT:    addi a3, sp, 16
2463; RV64-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
2464; RV64-NEXT:    vsext.vf4 v16, v24
2465; RV64-NEXT:    vsll.vi v24, v16, 3
2466; RV64-NEXT:    mv a3, a2
2467; RV64-NEXT:    bltu a2, a1, .LBB110_2
2468; RV64-NEXT:  # %bb.1:
2469; RV64-NEXT:    mv a3, a1
2470; RV64-NEXT:  .LBB110_2:
2471; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
2472; RV64-NEXT:    vsoxei64.v v8, (a0), v24, v0.t
2473; RV64-NEXT:    sub a3, a2, a1
2474; RV64-NEXT:    srli a1, a1, 3
2475; RV64-NEXT:    sltu a2, a2, a3
2476; RV64-NEXT:    addi a2, a2, -1
2477; RV64-NEXT:    vsetvli a4, zero, e8, mf4, ta, ma
2478; RV64-NEXT:    vslidedown.vx v0, v0, a1
2479; RV64-NEXT:    and a2, a2, a3
2480; RV64-NEXT:    csrr a1, vlenb
2481; RV64-NEXT:    slli a1, a1, 3
2482; RV64-NEXT:    add a1, sp, a1
2483; RV64-NEXT:    addi a1, a1, 16
2484; RV64-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
2485; RV64-NEXT:    addi a1, sp, 16
2486; RV64-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
2487; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2488; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
2489; RV64-NEXT:    csrr a0, vlenb
2490; RV64-NEXT:    slli a0, a0, 4
2491; RV64-NEXT:    add sp, sp, a0
2492; RV64-NEXT:    .cfi_def_cfa sp, 16
2493; RV64-NEXT:    addi sp, sp, 16
2494; RV64-NEXT:    .cfi_def_cfa_offset 0
2495; RV64-NEXT:    ret
2496  %eidxs = sext <vscale x 16 x i16> %idxs to <vscale x 16 x i64>
2497  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i64> %eidxs
2498  call void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
2499  ret void
2500}
2501
2502define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64(<vscale x 16 x double> %val, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2503; RV32-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64:
2504; RV32:       # %bb.0:
2505; RV32-NEXT:    addi sp, sp, -16
2506; RV32-NEXT:    .cfi_def_cfa_offset 16
2507; RV32-NEXT:    csrr a3, vlenb
2508; RV32-NEXT:    sub sp, sp, a3
2509; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
2510; RV32-NEXT:    addi a3, sp, 16
2511; RV32-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
2512; RV32-NEXT:    vl4re16.v v24, (a1)
2513; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
2514; RV32-NEXT:    vzext.vf2 v0, v24
2515; RV32-NEXT:    csrr a1, vlenb
2516; RV32-NEXT:    vsll.vi v24, v0, 3
2517; RV32-NEXT:    mv a3, a2
2518; RV32-NEXT:    bltu a2, a1, .LBB111_2
2519; RV32-NEXT:  # %bb.1:
2520; RV32-NEXT:    mv a3, a1
2521; RV32-NEXT:  .LBB111_2:
2522; RV32-NEXT:    addi a4, sp, 16
2523; RV32-NEXT:    vl1r.v v0, (a4) # Unknown-size Folded Reload
2524; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
2525; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
2526; RV32-NEXT:    sub a3, a2, a1
2527; RV32-NEXT:    srli a1, a1, 3
2528; RV32-NEXT:    sltu a2, a2, a3
2529; RV32-NEXT:    addi a2, a2, -1
2530; RV32-NEXT:    vsetvli a4, zero, e8, mf4, ta, ma
2531; RV32-NEXT:    vslidedown.vx v0, v0, a1
2532; RV32-NEXT:    and a2, a2, a3
2533; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2534; RV32-NEXT:    vsoxei32.v v16, (a0), v28, v0.t
2535; RV32-NEXT:    csrr a0, vlenb
2536; RV32-NEXT:    add sp, sp, a0
2537; RV32-NEXT:    .cfi_def_cfa sp, 16
2538; RV32-NEXT:    addi sp, sp, 16
2539; RV32-NEXT:    .cfi_def_cfa_offset 0
2540; RV32-NEXT:    ret
2541;
2542; RV64-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64:
2543; RV64:       # %bb.0:
2544; RV64-NEXT:    addi sp, sp, -16
2545; RV64-NEXT:    .cfi_def_cfa_offset 16
2546; RV64-NEXT:    csrr a3, vlenb
2547; RV64-NEXT:    sub sp, sp, a3
2548; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
2549; RV64-NEXT:    addi a3, sp, 16
2550; RV64-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
2551; RV64-NEXT:    vl4re16.v v24, (a1)
2552; RV64-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
2553; RV64-NEXT:    vzext.vf2 v0, v24
2554; RV64-NEXT:    csrr a1, vlenb
2555; RV64-NEXT:    vsll.vi v24, v0, 3
2556; RV64-NEXT:    mv a3, a2
2557; RV64-NEXT:    bltu a2, a1, .LBB111_2
2558; RV64-NEXT:  # %bb.1:
2559; RV64-NEXT:    mv a3, a1
2560; RV64-NEXT:  .LBB111_2:
2561; RV64-NEXT:    addi a4, sp, 16
2562; RV64-NEXT:    vl1r.v v0, (a4) # Unknown-size Folded Reload
2563; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
2564; RV64-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
2565; RV64-NEXT:    sub a3, a2, a1
2566; RV64-NEXT:    srli a1, a1, 3
2567; RV64-NEXT:    sltu a2, a2, a3
2568; RV64-NEXT:    addi a2, a2, -1
2569; RV64-NEXT:    vsetvli a4, zero, e8, mf4, ta, ma
2570; RV64-NEXT:    vslidedown.vx v0, v0, a1
2571; RV64-NEXT:    and a2, a2, a3
2572; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2573; RV64-NEXT:    vsoxei32.v v16, (a0), v28, v0.t
2574; RV64-NEXT:    csrr a0, vlenb
2575; RV64-NEXT:    add sp, sp, a0
2576; RV64-NEXT:    .cfi_def_cfa sp, 16
2577; RV64-NEXT:    addi sp, sp, 16
2578; RV64-NEXT:    .cfi_def_cfa_offset 0
2579; RV64-NEXT:    ret
2580  %eidxs = zext <vscale x 16 x i16> %idxs to <vscale x 16 x i64>
2581  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i64> %eidxs
2582  call void @llvm.vp.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %val, <vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
2583  ret void
2584}
2585