xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v \
3; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
4; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v \
5; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
6; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v \
7; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
8; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v \
9; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
10
11declare void @llvm.vp.scatter.v2i8.v2p0(<2 x i8>, <2 x ptr>, <2 x i1>, i32)
12
13define void @vpscatter_v2i8(<2 x i8> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
14; RV32-LABEL: vpscatter_v2i8:
15; RV32:       # %bb.0:
16; RV32-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
17; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
18; RV32-NEXT:    ret
19;
20; RV64-LABEL: vpscatter_v2i8:
21; RV64:       # %bb.0:
22; RV64-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
23; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
24; RV64-NEXT:    ret
25  call void @llvm.vp.scatter.v2i8.v2p0(<2 x i8> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
26  ret void
27}
28
29define void @vpscatter_v2i16_truncstore_v2i8(<2 x i16> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
30; RV32-LABEL: vpscatter_v2i16_truncstore_v2i8:
31; RV32:       # %bb.0:
32; RV32-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
33; RV32-NEXT:    vnsrl.wi v8, v8, 0
34; RV32-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
35; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
36; RV32-NEXT:    ret
37;
38; RV64-LABEL: vpscatter_v2i16_truncstore_v2i8:
39; RV64:       # %bb.0:
40; RV64-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
41; RV64-NEXT:    vnsrl.wi v8, v8, 0
42; RV64-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
43; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
44; RV64-NEXT:    ret
45  %tval = trunc <2 x i16> %val to <2 x i8>
46  call void @llvm.vp.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
47  ret void
48}
49
50define void @vpscatter_v2i32_truncstore_v2i8(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
51; RV32-LABEL: vpscatter_v2i32_truncstore_v2i8:
52; RV32:       # %bb.0:
53; RV32-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
54; RV32-NEXT:    vnsrl.wi v8, v8, 0
55; RV32-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
56; RV32-NEXT:    vnsrl.wi v8, v8, 0
57; RV32-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
58; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
59; RV32-NEXT:    ret
60;
61; RV64-LABEL: vpscatter_v2i32_truncstore_v2i8:
62; RV64:       # %bb.0:
63; RV64-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
64; RV64-NEXT:    vnsrl.wi v8, v8, 0
65; RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
66; RV64-NEXT:    vnsrl.wi v8, v8, 0
67; RV64-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
68; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
69; RV64-NEXT:    ret
70  %tval = trunc <2 x i32> %val to <2 x i8>
71  call void @llvm.vp.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
72  ret void
73}
74
75define void @vpscatter_v2i64_truncstore_v2i8(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
76; RV32-LABEL: vpscatter_v2i64_truncstore_v2i8:
77; RV32:       # %bb.0:
78; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
79; RV32-NEXT:    vnsrl.wi v8, v8, 0
80; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
81; RV32-NEXT:    vnsrl.wi v8, v8, 0
82; RV32-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
83; RV32-NEXT:    vnsrl.wi v8, v8, 0
84; RV32-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
85; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
86; RV32-NEXT:    ret
87;
88; RV64-LABEL: vpscatter_v2i64_truncstore_v2i8:
89; RV64:       # %bb.0:
90; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
91; RV64-NEXT:    vnsrl.wi v8, v8, 0
92; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
93; RV64-NEXT:    vnsrl.wi v8, v8, 0
94; RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, ma
95; RV64-NEXT:    vnsrl.wi v8, v8, 0
96; RV64-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
97; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
98; RV64-NEXT:    ret
99  %tval = trunc <2 x i64> %val to <2 x i8>
100  call void @llvm.vp.scatter.v2i8.v2p0(<2 x i8> %tval, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
101  ret void
102}
103
104declare void @llvm.vp.scatter.v4i8.v4p0(<4 x i8>, <4 x ptr>, <4 x i1>, i32)
105
106define void @vpscatter_v4i8(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
107; RV32-LABEL: vpscatter_v4i8:
108; RV32:       # %bb.0:
109; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
110; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
111; RV32-NEXT:    ret
112;
113; RV64-LABEL: vpscatter_v4i8:
114; RV64:       # %bb.0:
115; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
116; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
117; RV64-NEXT:    ret
118  call void @llvm.vp.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
119  ret void
120}
121
122define void @vpscatter_truemask_v4i8(<4 x i8> %val, <4 x ptr> %ptrs, i32 zeroext %evl) {
123; RV32-LABEL: vpscatter_truemask_v4i8:
124; RV32:       # %bb.0:
125; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
126; RV32-NEXT:    vsoxei32.v v8, (zero), v9
127; RV32-NEXT:    ret
128;
129; RV64-LABEL: vpscatter_truemask_v4i8:
130; RV64:       # %bb.0:
131; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
132; RV64-NEXT:    vsoxei64.v v8, (zero), v10
133; RV64-NEXT:    ret
134  call void @llvm.vp.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
135  ret void
136}
137
138declare void @llvm.vp.scatter.v8i8.v8p0(<8 x i8>, <8 x ptr>, <8 x i1>, i32)
139
140define void @vpscatter_v8i8(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
141; RV32-LABEL: vpscatter_v8i8:
142; RV32:       # %bb.0:
143; RV32-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
144; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
145; RV32-NEXT:    ret
146;
147; RV64-LABEL: vpscatter_v8i8:
148; RV64:       # %bb.0:
149; RV64-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
150; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
151; RV64-NEXT:    ret
152  call void @llvm.vp.scatter.v8i8.v8p0(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
153  ret void
154}
155
156define void @vpscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
157; RV32-LABEL: vpscatter_baseidx_v8i8:
158; RV32:       # %bb.0:
159; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
160; RV32-NEXT:    vsext.vf4 v10, v9
161; RV32-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
162; RV32-NEXT:    vsoxei32.v v8, (a0), v10, v0.t
163; RV32-NEXT:    ret
164;
165; RV64-LABEL: vpscatter_baseidx_v8i8:
166; RV64:       # %bb.0:
167; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
168; RV64-NEXT:    vsext.vf8 v12, v9
169; RV64-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
170; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
171; RV64-NEXT:    ret
172  %ptrs = getelementptr inbounds i8, ptr %base, <8 x i8> %idxs
173  call void @llvm.vp.scatter.v8i8.v8p0(<8 x i8> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
174  ret void
175}
176
177declare void @llvm.vp.scatter.v2i16.v2p0(<2 x i16>, <2 x ptr>, <2 x i1>, i32)
178
179define void @vpscatter_v2i16(<2 x i16> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
180; RV32-LABEL: vpscatter_v2i16:
181; RV32:       # %bb.0:
182; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
183; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
184; RV32-NEXT:    ret
185;
186; RV64-LABEL: vpscatter_v2i16:
187; RV64:       # %bb.0:
188; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
189; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
190; RV64-NEXT:    ret
191  call void @llvm.vp.scatter.v2i16.v2p0(<2 x i16> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
192  ret void
193}
194
195define void @vpscatter_v2i32_truncstore_v2i16(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
196; RV32-LABEL: vpscatter_v2i32_truncstore_v2i16:
197; RV32:       # %bb.0:
198; RV32-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
199; RV32-NEXT:    vnsrl.wi v8, v8, 0
200; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
201; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
202; RV32-NEXT:    ret
203;
204; RV64-LABEL: vpscatter_v2i32_truncstore_v2i16:
205; RV64:       # %bb.0:
206; RV64-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
207; RV64-NEXT:    vnsrl.wi v8, v8, 0
208; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
209; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
210; RV64-NEXT:    ret
211  %tval = trunc <2 x i32> %val to <2 x i16>
212  call void @llvm.vp.scatter.v2i16.v2p0(<2 x i16> %tval, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
213  ret void
214}
215
216define void @vpscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
217; RV32-LABEL: vpscatter_v2i64_truncstore_v2i16:
218; RV32:       # %bb.0:
219; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
220; RV32-NEXT:    vnsrl.wi v8, v8, 0
221; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
222; RV32-NEXT:    vnsrl.wi v8, v8, 0
223; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
224; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
225; RV32-NEXT:    ret
226;
227; RV64-LABEL: vpscatter_v2i64_truncstore_v2i16:
228; RV64:       # %bb.0:
229; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
230; RV64-NEXT:    vnsrl.wi v8, v8, 0
231; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
232; RV64-NEXT:    vnsrl.wi v8, v8, 0
233; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
234; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
235; RV64-NEXT:    ret
236  %tval = trunc <2 x i64> %val to <2 x i16>
237  call void @llvm.vp.scatter.v2i16.v2p0(<2 x i16> %tval, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
238  ret void
239}
240
241declare void @llvm.vp.scatter.v3i16.v3p0(<3 x i16>, <3 x ptr>, <3 x i1>, i32)
242
243define void @vpscatter_v3i16(<3 x i16> %val, <3 x ptr> %ptrs, <3 x i1> %m, i32 zeroext %evl) {
244; RV32-LABEL: vpscatter_v3i16:
245; RV32:       # %bb.0:
246; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
247; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
248; RV32-NEXT:    ret
249;
250; RV64-LABEL: vpscatter_v3i16:
251; RV64:       # %bb.0:
252; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
253; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
254; RV64-NEXT:    ret
255  call void @llvm.vp.scatter.v3i16.v3p0(<3 x i16> %val, <3 x ptr> %ptrs, <3 x i1> %m, i32 %evl)
256  ret void
257}
258
259define void @vpscatter_truemask_v3i16(<3 x i16> %val, <3 x ptr> %ptrs, i32 zeroext %evl) {
260; RV32-LABEL: vpscatter_truemask_v3i16:
261; RV32:       # %bb.0:
262; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
263; RV32-NEXT:    vsoxei32.v v8, (zero), v9
264; RV32-NEXT:    ret
265;
266; RV64-LABEL: vpscatter_truemask_v3i16:
267; RV64:       # %bb.0:
268; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
269; RV64-NEXT:    vsoxei64.v v8, (zero), v10
270; RV64-NEXT:    ret
271  call void @llvm.vp.scatter.v3i16.v3p0(<3 x i16> %val, <3 x ptr> %ptrs, <3 x i1> splat (i1 1), i32 %evl)
272  ret void
273}
274
275declare void @llvm.vp.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, <4 x i1>, i32)
276
277define void @vpscatter_v4i16(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
278; RV32-LABEL: vpscatter_v4i16:
279; RV32:       # %bb.0:
280; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
281; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
282; RV32-NEXT:    ret
283;
284; RV64-LABEL: vpscatter_v4i16:
285; RV64:       # %bb.0:
286; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
287; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
288; RV64-NEXT:    ret
289  call void @llvm.vp.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
290  ret void
291}
292
293define void @vpscatter_truemask_v4i16(<4 x i16> %val, <4 x ptr> %ptrs, i32 zeroext %evl) {
294; RV32-LABEL: vpscatter_truemask_v4i16:
295; RV32:       # %bb.0:
296; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
297; RV32-NEXT:    vsoxei32.v v8, (zero), v9
298; RV32-NEXT:    ret
299;
300; RV64-LABEL: vpscatter_truemask_v4i16:
301; RV64:       # %bb.0:
302; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
303; RV64-NEXT:    vsoxei64.v v8, (zero), v10
304; RV64-NEXT:    ret
305  call void @llvm.vp.scatter.v4i16.v4p0(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
306  ret void
307}
308
309declare void @llvm.vp.scatter.v8i16.v8p0(<8 x i16>, <8 x ptr>, <8 x i1>, i32)
310
311define void @vpscatter_v8i16(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
312; RV32-LABEL: vpscatter_v8i16:
313; RV32:       # %bb.0:
314; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
315; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
316; RV32-NEXT:    ret
317;
318; RV64-LABEL: vpscatter_v8i16:
319; RV64:       # %bb.0:
320; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
321; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
322; RV64-NEXT:    ret
323  call void @llvm.vp.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
324  ret void
325}
326
327define void @vpscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
328; RV32-LABEL: vpscatter_baseidx_v8i8_v8i16:
329; RV32:       # %bb.0:
330; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
331; RV32-NEXT:    vsext.vf4 v10, v9
332; RV32-NEXT:    vadd.vv v10, v10, v10
333; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
334; RV32-NEXT:    vsoxei32.v v8, (a0), v10, v0.t
335; RV32-NEXT:    ret
336;
337; RV64-LABEL: vpscatter_baseidx_v8i8_v8i16:
338; RV64:       # %bb.0:
339; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
340; RV64-NEXT:    vsext.vf8 v12, v9
341; RV64-NEXT:    vadd.vv v12, v12, v12
342; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
343; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
344; RV64-NEXT:    ret
345  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i8> %idxs
346  call void @llvm.vp.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
347  ret void
348}
349
350define void @vpscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
351; RV32-LABEL: vpscatter_baseidx_sext_v8i8_v8i16:
352; RV32:       # %bb.0:
353; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
354; RV32-NEXT:    vsext.vf4 v10, v9
355; RV32-NEXT:    vadd.vv v10, v10, v10
356; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
357; RV32-NEXT:    vsoxei32.v v8, (a0), v10, v0.t
358; RV32-NEXT:    ret
359;
360; RV64-LABEL: vpscatter_baseidx_sext_v8i8_v8i16:
361; RV64:       # %bb.0:
362; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
363; RV64-NEXT:    vsext.vf8 v12, v9
364; RV64-NEXT:    vadd.vv v12, v12, v12
365; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
366; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
367; RV64-NEXT:    ret
368  %eidxs = sext <8 x i8> %idxs to <8 x i16>
369  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
370  call void @llvm.vp.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
371  ret void
372}
373
374define void @vpscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
375; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8i16:
376; RV32:       # %bb.0:
377; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
378; RV32-NEXT:    vwaddu.vv v10, v9, v9
379; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
380; RV32-NEXT:    vsoxei16.v v8, (a0), v10, v0.t
381; RV32-NEXT:    ret
382;
383; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8i16:
384; RV64:       # %bb.0:
385; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
386; RV64-NEXT:    vwaddu.vv v10, v9, v9
387; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
388; RV64-NEXT:    vsoxei16.v v8, (a0), v10, v0.t
389; RV64-NEXT:    ret
390  %eidxs = zext <8 x i8> %idxs to <8 x i16>
391  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
392  call void @llvm.vp.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
393  ret void
394}
395
396define void @vpscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
397; RV32-LABEL: vpscatter_baseidx_v8i16:
398; RV32:       # %bb.0:
399; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
400; RV32-NEXT:    vwadd.vv v10, v9, v9
401; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
402; RV32-NEXT:    vsoxei32.v v8, (a0), v10, v0.t
403; RV32-NEXT:    ret
404;
405; RV64-LABEL: vpscatter_baseidx_v8i16:
406; RV64:       # %bb.0:
407; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
408; RV64-NEXT:    vsext.vf4 v12, v9
409; RV64-NEXT:    vadd.vv v12, v12, v12
410; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
411; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
412; RV64-NEXT:    ret
413  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %idxs
414  call void @llvm.vp.scatter.v8i16.v8p0(<8 x i16> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
415  ret void
416}
417
418declare void @llvm.vp.scatter.v2i32.v2p0(<2 x i32>, <2 x ptr>, <2 x i1>, i32)
419
420define void @vpscatter_v2i32(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
421; RV32-LABEL: vpscatter_v2i32:
422; RV32:       # %bb.0:
423; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
424; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
425; RV32-NEXT:    ret
426;
427; RV64-LABEL: vpscatter_v2i32:
428; RV64:       # %bb.0:
429; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
430; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
431; RV64-NEXT:    ret
432  call void @llvm.vp.scatter.v2i32.v2p0(<2 x i32> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
433  ret void
434}
435
436define void @vpscatter_v2i64_truncstore_v2i32(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
437; RV32-LABEL: vpscatter_v2i64_truncstore_v2i32:
438; RV32:       # %bb.0:
439; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
440; RV32-NEXT:    vnsrl.wi v8, v8, 0
441; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
442; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
443; RV32-NEXT:    ret
444;
445; RV64-LABEL: vpscatter_v2i64_truncstore_v2i32:
446; RV64:       # %bb.0:
447; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
448; RV64-NEXT:    vnsrl.wi v8, v8, 0
449; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
450; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
451; RV64-NEXT:    ret
452  %tval = trunc <2 x i64> %val to <2 x i32>
453  call void @llvm.vp.scatter.v2i32.v2p0(<2 x i32> %tval, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
454  ret void
455}
456
457declare void @llvm.vp.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, <4 x i1>, i32)
458
459define void @vpscatter_v4i32(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
460; RV32-LABEL: vpscatter_v4i32:
461; RV32:       # %bb.0:
462; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
463; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
464; RV32-NEXT:    ret
465;
466; RV64-LABEL: vpscatter_v4i32:
467; RV64:       # %bb.0:
468; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
469; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
470; RV64-NEXT:    ret
471  call void @llvm.vp.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
472  ret void
473}
474
475define void @vpscatter_truemask_v4i32(<4 x i32> %val, <4 x ptr> %ptrs, i32 zeroext %evl) {
476; RV32-LABEL: vpscatter_truemask_v4i32:
477; RV32:       # %bb.0:
478; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
479; RV32-NEXT:    vsoxei32.v v8, (zero), v9
480; RV32-NEXT:    ret
481;
482; RV64-LABEL: vpscatter_truemask_v4i32:
483; RV64:       # %bb.0:
484; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
485; RV64-NEXT:    vsoxei64.v v8, (zero), v10
486; RV64-NEXT:    ret
487  call void @llvm.vp.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
488  ret void
489}
490
491declare void @llvm.vp.scatter.v8i32.v8p0(<8 x i32>, <8 x ptr>, <8 x i1>, i32)
492
493define void @vpscatter_v8i32(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
494; RV32-LABEL: vpscatter_v8i32:
495; RV32:       # %bb.0:
496; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
497; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
498; RV32-NEXT:    ret
499;
500; RV64-LABEL: vpscatter_v8i32:
501; RV64:       # %bb.0:
502; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
503; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
504; RV64-NEXT:    ret
505  call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
506  ret void
507}
508
509define void @vpscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
510; RV32-LABEL: vpscatter_baseidx_v8i8_v8i32:
511; RV32:       # %bb.0:
512; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
513; RV32-NEXT:    vsext.vf4 v12, v10
514; RV32-NEXT:    vsll.vi v10, v12, 2
515; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
516; RV32-NEXT:    vsoxei32.v v8, (a0), v10, v0.t
517; RV32-NEXT:    ret
518;
519; RV64-LABEL: vpscatter_baseidx_v8i8_v8i32:
520; RV64:       # %bb.0:
521; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
522; RV64-NEXT:    vsext.vf8 v12, v10
523; RV64-NEXT:    vsll.vi v12, v12, 2
524; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
525; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
526; RV64-NEXT:    ret
527  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i8> %idxs
528  call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
529  ret void
530}
531
532define void @vpscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
533; RV32-LABEL: vpscatter_baseidx_sext_v8i8_v8i32:
534; RV32:       # %bb.0:
535; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
536; RV32-NEXT:    vsext.vf4 v12, v10
537; RV32-NEXT:    vsll.vi v10, v12, 2
538; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
539; RV32-NEXT:    vsoxei32.v v8, (a0), v10, v0.t
540; RV32-NEXT:    ret
541;
542; RV64-LABEL: vpscatter_baseidx_sext_v8i8_v8i32:
543; RV64:       # %bb.0:
544; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
545; RV64-NEXT:    vsext.vf8 v12, v10
546; RV64-NEXT:    vsll.vi v12, v12, 2
547; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
548; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
549; RV64-NEXT:    ret
550  %eidxs = sext <8 x i8> %idxs to <8 x i32>
551  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
552  call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
553  ret void
554}
555
556define void @vpscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
557; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8i32:
558; RV32:       # %bb.0:
559; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
560; RV32-NEXT:    vzext.vf2 v11, v10
561; RV32-NEXT:    vsll.vi v10, v11, 2
562; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
563; RV32-NEXT:    vsoxei16.v v8, (a0), v10, v0.t
564; RV32-NEXT:    ret
565;
566; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8i32:
567; RV64:       # %bb.0:
568; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
569; RV64-NEXT:    vzext.vf2 v11, v10
570; RV64-NEXT:    vsll.vi v10, v11, 2
571; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
572; RV64-NEXT:    vsoxei16.v v8, (a0), v10, v0.t
573; RV64-NEXT:    ret
574  %eidxs = zext <8 x i8> %idxs to <8 x i32>
575  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
576  call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
577  ret void
578}
579
580define void @vpscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
581; RV32-LABEL: vpscatter_baseidx_v8i16_v8i32:
582; RV32:       # %bb.0:
583; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
584; RV32-NEXT:    vsext.vf2 v12, v10
585; RV32-NEXT:    vsll.vi v10, v12, 2
586; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
587; RV32-NEXT:    vsoxei32.v v8, (a0), v10, v0.t
588; RV32-NEXT:    ret
589;
590; RV64-LABEL: vpscatter_baseidx_v8i16_v8i32:
591; RV64:       # %bb.0:
592; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
593; RV64-NEXT:    vsext.vf4 v12, v10
594; RV64-NEXT:    vsll.vi v12, v12, 2
595; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
596; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
597; RV64-NEXT:    ret
598  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i16> %idxs
599  call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
600  ret void
601}
602
603define void @vpscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
604; RV32-LABEL: vpscatter_baseidx_sext_v8i16_v8i32:
605; RV32:       # %bb.0:
606; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
607; RV32-NEXT:    vsext.vf2 v12, v10
608; RV32-NEXT:    vsll.vi v10, v12, 2
609; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
610; RV32-NEXT:    vsoxei32.v v8, (a0), v10, v0.t
611; RV32-NEXT:    ret
612;
613; RV64-LABEL: vpscatter_baseidx_sext_v8i16_v8i32:
614; RV64:       # %bb.0:
615; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
616; RV64-NEXT:    vsext.vf4 v12, v10
617; RV64-NEXT:    vsll.vi v12, v12, 2
618; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
619; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
620; RV64-NEXT:    ret
621  %eidxs = sext <8 x i16> %idxs to <8 x i32>
622  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
623  call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
624  ret void
625}
626
627define void @vpscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
628; RV32-LABEL: vpscatter_baseidx_zext_v8i16_v8i32:
629; RV32:       # %bb.0:
630; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
631; RV32-NEXT:    vzext.vf2 v12, v10
632; RV32-NEXT:    vsll.vi v10, v12, 2
633; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
634; RV32-NEXT:    vsoxei32.v v8, (a0), v10, v0.t
635; RV32-NEXT:    ret
636;
637; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8i32:
638; RV64:       # %bb.0:
639; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
640; RV64-NEXT:    vzext.vf2 v12, v10
641; RV64-NEXT:    vsll.vi v10, v12, 2
642; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
643; RV64-NEXT:    vsoxei32.v v8, (a0), v10, v0.t
644; RV64-NEXT:    ret
645  %eidxs = zext <8 x i16> %idxs to <8 x i32>
646  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
647  call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
648  ret void
649}
650
651define void @vpscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
652; RV32-LABEL: vpscatter_baseidx_v8i32:
653; RV32:       # %bb.0:
654; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
655; RV32-NEXT:    vsll.vi v10, v10, 2
656; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
657; RV32-NEXT:    vsoxei32.v v8, (a0), v10, v0.t
658; RV32-NEXT:    ret
659;
660; RV64-LABEL: vpscatter_baseidx_v8i32:
661; RV64:       # %bb.0:
662; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
663; RV64-NEXT:    vsext.vf2 v12, v10
664; RV64-NEXT:    vsll.vi v12, v12, 2
665; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
666; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
667; RV64-NEXT:    ret
668  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %idxs
669  call void @llvm.vp.scatter.v8i32.v8p0(<8 x i32> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
670  ret void
671}
672
673declare void @llvm.vp.scatter.v2i64.v2p0(<2 x i64>, <2 x ptr>, <2 x i1>, i32)
674
675define void @vpscatter_v2i64(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
676; RV32-LABEL: vpscatter_v2i64:
677; RV32:       # %bb.0:
678; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
679; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
680; RV32-NEXT:    ret
681;
682; RV64-LABEL: vpscatter_v2i64:
683; RV64:       # %bb.0:
684; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
685; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
686; RV64-NEXT:    ret
687  call void @llvm.vp.scatter.v2i64.v2p0(<2 x i64> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
688  ret void
689}
690
691declare void @llvm.vp.scatter.v4i64.v4p0(<4 x i64>, <4 x ptr>, <4 x i1>, i32)
692
693define void @vpscatter_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
694; RV32-LABEL: vpscatter_v4i64:
695; RV32:       # %bb.0:
696; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
697; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
698; RV32-NEXT:    ret
699;
700; RV64-LABEL: vpscatter_v4i64:
701; RV64:       # %bb.0:
702; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
703; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
704; RV64-NEXT:    ret
705  call void @llvm.vp.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
706  ret void
707}
708
709define void @vpscatter_truemask_v4i64(<4 x i64> %val, <4 x ptr> %ptrs, i32 zeroext %evl) {
710; RV32-LABEL: vpscatter_truemask_v4i64:
711; RV32:       # %bb.0:
712; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
713; RV32-NEXT:    vsoxei32.v v8, (zero), v10
714; RV32-NEXT:    ret
715;
716; RV64-LABEL: vpscatter_truemask_v4i64:
717; RV64:       # %bb.0:
718; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
719; RV64-NEXT:    vsoxei64.v v8, (zero), v10
720; RV64-NEXT:    ret
721  call void @llvm.vp.scatter.v4i64.v4p0(<4 x i64> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
722  ret void
723}
724
725declare void @llvm.vp.scatter.v8i64.v8p0(<8 x i64>, <8 x ptr>, <8 x i1>, i32)
726
727define void @vpscatter_v8i64(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
728; RV32-LABEL: vpscatter_v8i64:
729; RV32:       # %bb.0:
730; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
731; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
732; RV32-NEXT:    ret
733;
734; RV64-LABEL: vpscatter_v8i64:
735; RV64:       # %bb.0:
736; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
737; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
738; RV64-NEXT:    ret
739  call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
740  ret void
741}
742
743define void @vpscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
744; RV32-LABEL: vpscatter_baseidx_v8i8_v8i64:
745; RV32:       # %bb.0:
746; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
747; RV32-NEXT:    vsext.vf4 v14, v12
748; RV32-NEXT:    vsll.vi v12, v14, 3
749; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
750; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
751; RV32-NEXT:    ret
752;
753; RV64-LABEL: vpscatter_baseidx_v8i8_v8i64:
754; RV64:       # %bb.0:
755; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
756; RV64-NEXT:    vsext.vf8 v16, v12
757; RV64-NEXT:    vsll.vi v12, v16, 3
758; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
759; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
760; RV64-NEXT:    ret
761  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i8> %idxs
762  call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
763  ret void
764}
765
766define void @vpscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
767; RV32-LABEL: vpscatter_baseidx_sext_v8i8_v8i64:
768; RV32:       # %bb.0:
769; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
770; RV32-NEXT:    vsext.vf4 v14, v12
771; RV32-NEXT:    vsll.vi v12, v14, 3
772; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
773; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
774; RV32-NEXT:    ret
775;
776; RV64-LABEL: vpscatter_baseidx_sext_v8i8_v8i64:
777; RV64:       # %bb.0:
778; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
779; RV64-NEXT:    vsext.vf8 v16, v12
780; RV64-NEXT:    vsll.vi v12, v16, 3
781; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
782; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
783; RV64-NEXT:    ret
784  %eidxs = sext <8 x i8> %idxs to <8 x i64>
785  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
786  call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
787  ret void
788}
789
790define void @vpscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
791; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8i64:
792; RV32:       # %bb.0:
793; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
794; RV32-NEXT:    vzext.vf2 v13, v12
795; RV32-NEXT:    vsll.vi v12, v13, 3
796; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
797; RV32-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
798; RV32-NEXT:    ret
799;
800; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8i64:
801; RV64:       # %bb.0:
802; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
803; RV64-NEXT:    vzext.vf2 v13, v12
804; RV64-NEXT:    vsll.vi v12, v13, 3
805; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
806; RV64-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
807; RV64-NEXT:    ret
808  %eidxs = zext <8 x i8> %idxs to <8 x i64>
809  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
810  call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
811  ret void
812}
813
814define void @vpscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
815; RV32-LABEL: vpscatter_baseidx_v8i16_v8i64:
816; RV32:       # %bb.0:
817; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
818; RV32-NEXT:    vsext.vf2 v14, v12
819; RV32-NEXT:    vsll.vi v12, v14, 3
820; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
821; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
822; RV32-NEXT:    ret
823;
824; RV64-LABEL: vpscatter_baseidx_v8i16_v8i64:
825; RV64:       # %bb.0:
826; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
827; RV64-NEXT:    vsext.vf4 v16, v12
828; RV64-NEXT:    vsll.vi v12, v16, 3
829; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
830; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
831; RV64-NEXT:    ret
832  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i16> %idxs
833  call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
834  ret void
835}
836
837define void @vpscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
838; RV32-LABEL: vpscatter_baseidx_sext_v8i16_v8i64:
839; RV32:       # %bb.0:
840; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
841; RV32-NEXT:    vsext.vf2 v14, v12
842; RV32-NEXT:    vsll.vi v12, v14, 3
843; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
844; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
845; RV32-NEXT:    ret
846;
847; RV64-LABEL: vpscatter_baseidx_sext_v8i16_v8i64:
848; RV64:       # %bb.0:
849; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
850; RV64-NEXT:    vsext.vf4 v16, v12
851; RV64-NEXT:    vsll.vi v12, v16, 3
852; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
853; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
854; RV64-NEXT:    ret
855  %eidxs = sext <8 x i16> %idxs to <8 x i64>
856  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
857  call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
858  ret void
859}
860
861define void @vpscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
862; RV32-LABEL: vpscatter_baseidx_zext_v8i16_v8i64:
863; RV32:       # %bb.0:
864; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
865; RV32-NEXT:    vzext.vf2 v14, v12
866; RV32-NEXT:    vsll.vi v12, v14, 3
867; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
868; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
869; RV32-NEXT:    ret
870;
871; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8i64:
872; RV64:       # %bb.0:
873; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
874; RV64-NEXT:    vzext.vf2 v14, v12
875; RV64-NEXT:    vsll.vi v12, v14, 3
876; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
877; RV64-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
878; RV64-NEXT:    ret
879  %eidxs = zext <8 x i16> %idxs to <8 x i64>
880  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
881  call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
882  ret void
883}
884
885define void @vpscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
886; RV32-LABEL: vpscatter_baseidx_v8i32_v8i64:
887; RV32:       # %bb.0:
888; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
889; RV32-NEXT:    vsll.vi v12, v12, 3
890; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
891; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
892; RV32-NEXT:    ret
893;
894; RV64-LABEL: vpscatter_baseidx_v8i32_v8i64:
895; RV64:       # %bb.0:
896; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
897; RV64-NEXT:    vsext.vf2 v16, v12
898; RV64-NEXT:    vsll.vi v12, v16, 3
899; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
900; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
901; RV64-NEXT:    ret
902  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i32> %idxs
903  call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
904  ret void
905}
906
907define void @vpscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
908; RV32-LABEL: vpscatter_baseidx_sext_v8i32_v8i64:
909; RV32:       # %bb.0:
910; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
911; RV32-NEXT:    vsll.vi v12, v12, 3
912; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
913; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
914; RV32-NEXT:    ret
915;
916; RV64-LABEL: vpscatter_baseidx_sext_v8i32_v8i64:
917; RV64:       # %bb.0:
918; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
919; RV64-NEXT:    vsext.vf2 v16, v12
920; RV64-NEXT:    vsll.vi v12, v16, 3
921; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
922; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
923; RV64-NEXT:    ret
924  %eidxs = sext <8 x i32> %idxs to <8 x i64>
925  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
926  call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
927  ret void
928}
929
930define void @vpscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
931; RV32-LABEL: vpscatter_baseidx_zext_v8i32_v8i64:
932; RV32:       # %bb.0:
933; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
934; RV32-NEXT:    vsll.vi v12, v12, 3
935; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
936; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
937; RV32-NEXT:    ret
938;
939; RV64-LABEL: vpscatter_baseidx_zext_v8i32_v8i64:
940; RV64:       # %bb.0:
941; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
942; RV64-NEXT:    vzext.vf2 v16, v12
943; RV64-NEXT:    vsll.vi v12, v16, 3
944; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
945; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
946; RV64-NEXT:    ret
947  %eidxs = zext <8 x i32> %idxs to <8 x i64>
948  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
949  call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
950  ret void
951}
952
953define void @vpscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, <8 x i1> %m, i32 zeroext %evl) {
954; RV32-LABEL: vpscatter_baseidx_v8i64:
955; RV32:       # %bb.0:
956; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
957; RV32-NEXT:    vnsrl.wi v16, v12, 0
958; RV32-NEXT:    vsll.vi v12, v16, 3
959; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
960; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
961; RV32-NEXT:    ret
962;
963; RV64-LABEL: vpscatter_baseidx_v8i64:
964; RV64:       # %bb.0:
965; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
966; RV64-NEXT:    vsll.vi v12, v12, 3
967; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
968; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
969; RV64-NEXT:    ret
970  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %idxs
971  call void @llvm.vp.scatter.v8i64.v8p0(<8 x i64> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
972  ret void
973}
974
975declare void @llvm.vp.scatter.v2bf16.v2p0(<2 x bfloat>, <2 x ptr>, <2 x i1>, i32)
976
977define void @vpscatter_v2bf16(<2 x bfloat> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
978; RV32-LABEL: vpscatter_v2bf16:
979; RV32:       # %bb.0:
980; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
981; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
982; RV32-NEXT:    ret
983;
984; RV64-LABEL: vpscatter_v2bf16:
985; RV64:       # %bb.0:
986; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
987; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
988; RV64-NEXT:    ret
989  call void @llvm.vp.scatter.v2bf16.v2p0(<2 x bfloat> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
990  ret void
991}
992
993declare void @llvm.vp.scatter.v4bf16.v4p0(<4 x bfloat>, <4 x ptr>, <4 x i1>, i32)
994
995define void @vpscatter_v4bf16(<4 x bfloat> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
996; RV32-LABEL: vpscatter_v4bf16:
997; RV32:       # %bb.0:
998; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
999; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1000; RV32-NEXT:    ret
1001;
1002; RV64-LABEL: vpscatter_v4bf16:
1003; RV64:       # %bb.0:
1004; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1005; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
1006; RV64-NEXT:    ret
1007  call void @llvm.vp.scatter.v4bf16.v4p0(<4 x bfloat> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
1008  ret void
1009}
1010
1011define void @vpscatter_truemask_v4bf16(<4 x bfloat> %val, <4 x ptr> %ptrs, i32 zeroext %evl) {
1012; RV32-LABEL: vpscatter_truemask_v4bf16:
1013; RV32:       # %bb.0:
1014; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1015; RV32-NEXT:    vsoxei32.v v8, (zero), v9
1016; RV32-NEXT:    ret
1017;
1018; RV64-LABEL: vpscatter_truemask_v4bf16:
1019; RV64:       # %bb.0:
1020; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1021; RV64-NEXT:    vsoxei64.v v8, (zero), v10
1022; RV64-NEXT:    ret
1023  call void @llvm.vp.scatter.v4bf16.v4p0(<4 x bfloat> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
1024  ret void
1025}
1026
1027declare void @llvm.vp.scatter.v8bf16.v8p0(<8 x bfloat>, <8 x ptr>, <8 x i1>, i32)
1028
1029define void @vpscatter_v8bf16(<8 x bfloat> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
1030; RV32-LABEL: vpscatter_v8bf16:
1031; RV32:       # %bb.0:
1032; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1033; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
1034; RV32-NEXT:    ret
1035;
1036; RV64-LABEL: vpscatter_v8bf16:
1037; RV64:       # %bb.0:
1038; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1039; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
1040; RV64-NEXT:    ret
1041  call void @llvm.vp.scatter.v8bf16.v8p0(<8 x bfloat> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1042  ret void
1043}
1044
1045declare void @llvm.vp.scatter.v2f16.v2p0(<2 x half>, <2 x ptr>, <2 x i1>, i32)
1046
1047define void @vpscatter_v2f16(<2 x half> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
1048; RV32-LABEL: vpscatter_v2f16:
1049; RV32:       # %bb.0:
1050; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
1051; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1052; RV32-NEXT:    ret
1053;
1054; RV64-LABEL: vpscatter_v2f16:
1055; RV64:       # %bb.0:
1056; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
1057; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
1058; RV64-NEXT:    ret
1059  call void @llvm.vp.scatter.v2f16.v2p0(<2 x half> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
1060  ret void
1061}
1062
1063declare void @llvm.vp.scatter.v4f16.v4p0(<4 x half>, <4 x ptr>, <4 x i1>, i32)
1064
1065define void @vpscatter_v4f16(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
1066; RV32-LABEL: vpscatter_v4f16:
1067; RV32:       # %bb.0:
1068; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1069; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1070; RV32-NEXT:    ret
1071;
1072; RV64-LABEL: vpscatter_v4f16:
1073; RV64:       # %bb.0:
1074; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1075; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
1076; RV64-NEXT:    ret
1077  call void @llvm.vp.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
1078  ret void
1079}
1080
1081define void @vpscatter_truemask_v4f16(<4 x half> %val, <4 x ptr> %ptrs, i32 zeroext %evl) {
1082; RV32-LABEL: vpscatter_truemask_v4f16:
1083; RV32:       # %bb.0:
1084; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1085; RV32-NEXT:    vsoxei32.v v8, (zero), v9
1086; RV32-NEXT:    ret
1087;
1088; RV64-LABEL: vpscatter_truemask_v4f16:
1089; RV64:       # %bb.0:
1090; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1091; RV64-NEXT:    vsoxei64.v v8, (zero), v10
1092; RV64-NEXT:    ret
1093  call void @llvm.vp.scatter.v4f16.v4p0(<4 x half> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
1094  ret void
1095}
1096
1097declare void @llvm.vp.scatter.v8f16.v8p0(<8 x half>, <8 x ptr>, <8 x i1>, i32)
1098
1099define void @vpscatter_v8f16(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
1100; RV32-LABEL: vpscatter_v8f16:
1101; RV32:       # %bb.0:
1102; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1103; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
1104; RV32-NEXT:    ret
1105;
1106; RV64-LABEL: vpscatter_v8f16:
1107; RV64:       # %bb.0:
1108; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1109; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
1110; RV64-NEXT:    ret
1111  call void @llvm.vp.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1112  ret void
1113}
1114
1115define void @vpscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1116; RV32-LABEL: vpscatter_baseidx_v8i8_v8f16:
1117; RV32:       # %bb.0:
1118; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1119; RV32-NEXT:    vsext.vf4 v10, v9
1120; RV32-NEXT:    vadd.vv v10, v10, v10
1121; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
1122; RV32-NEXT:    vsoxei32.v v8, (a0), v10, v0.t
1123; RV32-NEXT:    ret
1124;
1125; RV64-LABEL: vpscatter_baseidx_v8i8_v8f16:
1126; RV64:       # %bb.0:
1127; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1128; RV64-NEXT:    vsext.vf8 v12, v9
1129; RV64-NEXT:    vadd.vv v12, v12, v12
1130; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
1131; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
1132; RV64-NEXT:    ret
1133  %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs
1134  call void @llvm.vp.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1135  ret void
1136}
1137
1138define void @vpscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1139; RV32-LABEL: vpscatter_baseidx_sext_v8i8_v8f16:
1140; RV32:       # %bb.0:
1141; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1142; RV32-NEXT:    vsext.vf4 v10, v9
1143; RV32-NEXT:    vadd.vv v10, v10, v10
1144; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
1145; RV32-NEXT:    vsoxei32.v v8, (a0), v10, v0.t
1146; RV32-NEXT:    ret
1147;
1148; RV64-LABEL: vpscatter_baseidx_sext_v8i8_v8f16:
1149; RV64:       # %bb.0:
1150; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1151; RV64-NEXT:    vsext.vf8 v12, v9
1152; RV64-NEXT:    vadd.vv v12, v12, v12
1153; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
1154; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
1155; RV64-NEXT:    ret
1156  %eidxs = sext <8 x i8> %idxs to <8 x i16>
1157  %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
1158  call void @llvm.vp.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1159  ret void
1160}
1161
1162define void @vpscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1163; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8f16:
1164; RV32:       # %bb.0:
1165; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
1166; RV32-NEXT:    vwaddu.vv v10, v9, v9
1167; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
1168; RV32-NEXT:    vsoxei16.v v8, (a0), v10, v0.t
1169; RV32-NEXT:    ret
1170;
1171; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8f16:
1172; RV64:       # %bb.0:
1173; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
1174; RV64-NEXT:    vwaddu.vv v10, v9, v9
1175; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
1176; RV64-NEXT:    vsoxei16.v v8, (a0), v10, v0.t
1177; RV64-NEXT:    ret
1178  %eidxs = zext <8 x i8> %idxs to <8 x i16>
1179  %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
1180  call void @llvm.vp.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1181  ret void
1182}
1183
1184define void @vpscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1185; RV32-LABEL: vpscatter_baseidx_v8f16:
1186; RV32:       # %bb.0:
1187; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1188; RV32-NEXT:    vwadd.vv v10, v9, v9
1189; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
1190; RV32-NEXT:    vsoxei32.v v8, (a0), v10, v0.t
1191; RV32-NEXT:    ret
1192;
1193; RV64-LABEL: vpscatter_baseidx_v8f16:
1194; RV64:       # %bb.0:
1195; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1196; RV64-NEXT:    vsext.vf4 v12, v9
1197; RV64-NEXT:    vadd.vv v12, v12, v12
1198; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
1199; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
1200; RV64-NEXT:    ret
1201  %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs
1202  call void @llvm.vp.scatter.v8f16.v8p0(<8 x half> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1203  ret void
1204}
1205
1206declare void @llvm.vp.scatter.v2f32.v2p0(<2 x float>, <2 x ptr>, <2 x i1>, i32)
1207
1208define void @vpscatter_v2f32(<2 x float> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
1209; RV32-LABEL: vpscatter_v2f32:
1210; RV32:       # %bb.0:
1211; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
1212; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1213; RV32-NEXT:    ret
1214;
1215; RV64-LABEL: vpscatter_v2f32:
1216; RV64:       # %bb.0:
1217; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
1218; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
1219; RV64-NEXT:    ret
1220  call void @llvm.vp.scatter.v2f32.v2p0(<2 x float> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
1221  ret void
1222}
1223
1224declare void @llvm.vp.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, <4 x i1>, i32)
1225
1226define void @vpscatter_v4f32(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
1227; RV32-LABEL: vpscatter_v4f32:
1228; RV32:       # %bb.0:
1229; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1230; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1231; RV32-NEXT:    ret
1232;
1233; RV64-LABEL: vpscatter_v4f32:
1234; RV64:       # %bb.0:
1235; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1236; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
1237; RV64-NEXT:    ret
1238  call void @llvm.vp.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
1239  ret void
1240}
1241
1242define void @vpscatter_truemask_v4f32(<4 x float> %val, <4 x ptr> %ptrs, i32 zeroext %evl) {
1243; RV32-LABEL: vpscatter_truemask_v4f32:
1244; RV32:       # %bb.0:
1245; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1246; RV32-NEXT:    vsoxei32.v v8, (zero), v9
1247; RV32-NEXT:    ret
1248;
1249; RV64-LABEL: vpscatter_truemask_v4f32:
1250; RV64:       # %bb.0:
1251; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1252; RV64-NEXT:    vsoxei64.v v8, (zero), v10
1253; RV64-NEXT:    ret
1254  call void @llvm.vp.scatter.v4f32.v4p0(<4 x float> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
1255  ret void
1256}
1257
1258declare void @llvm.vp.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, <8 x i1>, i32)
1259
1260define void @vpscatter_v8f32(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
1261; RV32-LABEL: vpscatter_v8f32:
1262; RV32:       # %bb.0:
1263; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1264; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
1265; RV32-NEXT:    ret
1266;
1267; RV64-LABEL: vpscatter_v8f32:
1268; RV64:       # %bb.0:
1269; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1270; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
1271; RV64-NEXT:    ret
1272  call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1273  ret void
1274}
1275
1276define void @vpscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1277; RV32-LABEL: vpscatter_baseidx_v8i8_v8f32:
1278; RV32:       # %bb.0:
1279; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1280; RV32-NEXT:    vsext.vf4 v12, v10
1281; RV32-NEXT:    vsll.vi v10, v12, 2
1282; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1283; RV32-NEXT:    vsoxei32.v v8, (a0), v10, v0.t
1284; RV32-NEXT:    ret
1285;
1286; RV64-LABEL: vpscatter_baseidx_v8i8_v8f32:
1287; RV64:       # %bb.0:
1288; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1289; RV64-NEXT:    vsext.vf8 v12, v10
1290; RV64-NEXT:    vsll.vi v12, v12, 2
1291; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1292; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
1293; RV64-NEXT:    ret
1294  %ptrs = getelementptr inbounds float, ptr %base, <8 x i8> %idxs
1295  call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1296  ret void
1297}
1298
1299define void @vpscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1300; RV32-LABEL: vpscatter_baseidx_sext_v8i8_v8f32:
1301; RV32:       # %bb.0:
1302; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1303; RV32-NEXT:    vsext.vf4 v12, v10
1304; RV32-NEXT:    vsll.vi v10, v12, 2
1305; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1306; RV32-NEXT:    vsoxei32.v v8, (a0), v10, v0.t
1307; RV32-NEXT:    ret
1308;
1309; RV64-LABEL: vpscatter_baseidx_sext_v8i8_v8f32:
1310; RV64:       # %bb.0:
1311; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1312; RV64-NEXT:    vsext.vf8 v12, v10
1313; RV64-NEXT:    vsll.vi v12, v12, 2
1314; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1315; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
1316; RV64-NEXT:    ret
1317  %eidxs = sext <8 x i8> %idxs to <8 x i32>
1318  %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
1319  call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1320  ret void
1321}
1322
1323define void @vpscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1324; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8f32:
1325; RV32:       # %bb.0:
1326; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1327; RV32-NEXT:    vzext.vf2 v11, v10
1328; RV32-NEXT:    vsll.vi v10, v11, 2
1329; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1330; RV32-NEXT:    vsoxei16.v v8, (a0), v10, v0.t
1331; RV32-NEXT:    ret
1332;
1333; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8f32:
1334; RV64:       # %bb.0:
1335; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1336; RV64-NEXT:    vzext.vf2 v11, v10
1337; RV64-NEXT:    vsll.vi v10, v11, 2
1338; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1339; RV64-NEXT:    vsoxei16.v v8, (a0), v10, v0.t
1340; RV64-NEXT:    ret
1341  %eidxs = zext <8 x i8> %idxs to <8 x i32>
1342  %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
1343  call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1344  ret void
1345}
1346
1347define void @vpscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1348; RV32-LABEL: vpscatter_baseidx_v8i16_v8f32:
1349; RV32:       # %bb.0:
1350; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1351; RV32-NEXT:    vsext.vf2 v12, v10
1352; RV32-NEXT:    vsll.vi v10, v12, 2
1353; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1354; RV32-NEXT:    vsoxei32.v v8, (a0), v10, v0.t
1355; RV32-NEXT:    ret
1356;
1357; RV64-LABEL: vpscatter_baseidx_v8i16_v8f32:
1358; RV64:       # %bb.0:
1359; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1360; RV64-NEXT:    vsext.vf4 v12, v10
1361; RV64-NEXT:    vsll.vi v12, v12, 2
1362; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1363; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
1364; RV64-NEXT:    ret
1365  %ptrs = getelementptr inbounds float, ptr %base, <8 x i16> %idxs
1366  call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1367  ret void
1368}
1369
1370define void @vpscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1371; RV32-LABEL: vpscatter_baseidx_sext_v8i16_v8f32:
1372; RV32:       # %bb.0:
1373; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1374; RV32-NEXT:    vsext.vf2 v12, v10
1375; RV32-NEXT:    vsll.vi v10, v12, 2
1376; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1377; RV32-NEXT:    vsoxei32.v v8, (a0), v10, v0.t
1378; RV32-NEXT:    ret
1379;
1380; RV64-LABEL: vpscatter_baseidx_sext_v8i16_v8f32:
1381; RV64:       # %bb.0:
1382; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1383; RV64-NEXT:    vsext.vf4 v12, v10
1384; RV64-NEXT:    vsll.vi v12, v12, 2
1385; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1386; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
1387; RV64-NEXT:    ret
1388  %eidxs = sext <8 x i16> %idxs to <8 x i32>
1389  %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
1390  call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1391  ret void
1392}
1393
1394define void @vpscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1395; RV32-LABEL: vpscatter_baseidx_zext_v8i16_v8f32:
1396; RV32:       # %bb.0:
1397; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1398; RV32-NEXT:    vzext.vf2 v12, v10
1399; RV32-NEXT:    vsll.vi v10, v12, 2
1400; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1401; RV32-NEXT:    vsoxei32.v v8, (a0), v10, v0.t
1402; RV32-NEXT:    ret
1403;
1404; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8f32:
1405; RV64:       # %bb.0:
1406; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1407; RV64-NEXT:    vzext.vf2 v12, v10
1408; RV64-NEXT:    vsll.vi v10, v12, 2
1409; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1410; RV64-NEXT:    vsoxei32.v v8, (a0), v10, v0.t
1411; RV64-NEXT:    ret
1412  %eidxs = zext <8 x i16> %idxs to <8 x i32>
1413  %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
1414  call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1415  ret void
1416}
1417
1418define void @vpscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1419; RV32-LABEL: vpscatter_baseidx_v8f32:
1420; RV32:       # %bb.0:
1421; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1422; RV32-NEXT:    vsll.vi v10, v10, 2
1423; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1424; RV32-NEXT:    vsoxei32.v v8, (a0), v10, v0.t
1425; RV32-NEXT:    ret
1426;
1427; RV64-LABEL: vpscatter_baseidx_v8f32:
1428; RV64:       # %bb.0:
1429; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1430; RV64-NEXT:    vsext.vf2 v12, v10
1431; RV64-NEXT:    vsll.vi v12, v12, 2
1432; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1433; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
1434; RV64-NEXT:    ret
1435  %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %idxs
1436  call void @llvm.vp.scatter.v8f32.v8p0(<8 x float> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1437  ret void
1438}
1439
1440declare void @llvm.vp.scatter.v2f64.v2p0(<2 x double>, <2 x ptr>, <2 x i1>, i32)
1441
1442define void @vpscatter_v2f64(<2 x double> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
1443; RV32-LABEL: vpscatter_v2f64:
1444; RV32:       # %bb.0:
1445; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1446; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1447; RV32-NEXT:    ret
1448;
1449; RV64-LABEL: vpscatter_v2f64:
1450; RV64:       # %bb.0:
1451; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1452; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
1453; RV64-NEXT:    ret
1454  call void @llvm.vp.scatter.v2f64.v2p0(<2 x double> %val, <2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
1455  ret void
1456}
1457
1458declare void @llvm.vp.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, <4 x i1>, i32)
1459
1460define void @vpscatter_v4f64(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
1461; RV32-LABEL: vpscatter_v4f64:
1462; RV32:       # %bb.0:
1463; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1464; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
1465; RV32-NEXT:    ret
1466;
1467; RV64-LABEL: vpscatter_v4f64:
1468; RV64:       # %bb.0:
1469; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1470; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
1471; RV64-NEXT:    ret
1472  call void @llvm.vp.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
1473  ret void
1474}
1475
1476define void @vpscatter_truemask_v4f64(<4 x double> %val, <4 x ptr> %ptrs, i32 zeroext %evl) {
1477; RV32-LABEL: vpscatter_truemask_v4f64:
1478; RV32:       # %bb.0:
1479; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1480; RV32-NEXT:    vsoxei32.v v8, (zero), v10
1481; RV32-NEXT:    ret
1482;
1483; RV64-LABEL: vpscatter_truemask_v4f64:
1484; RV64:       # %bb.0:
1485; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1486; RV64-NEXT:    vsoxei64.v v8, (zero), v10
1487; RV64-NEXT:    ret
1488  call void @llvm.vp.scatter.v4f64.v4p0(<4 x double> %val, <4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
1489  ret void
1490}
1491
1492declare void @llvm.vp.scatter.v8f64.v8p0(<8 x double>, <8 x ptr>, <8 x i1>, i32)
1493
1494define void @vpscatter_v8f64(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
1495; RV32-LABEL: vpscatter_v8f64:
1496; RV32:       # %bb.0:
1497; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1498; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
1499; RV32-NEXT:    ret
1500;
1501; RV64-LABEL: vpscatter_v8f64:
1502; RV64:       # %bb.0:
1503; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1504; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
1505; RV64-NEXT:    ret
1506  call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1507  ret void
1508}
1509
1510define void @vpscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1511; RV32-LABEL: vpscatter_baseidx_v8i8_v8f64:
1512; RV32:       # %bb.0:
1513; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1514; RV32-NEXT:    vsext.vf4 v14, v12
1515; RV32-NEXT:    vsll.vi v12, v14, 3
1516; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1517; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1518; RV32-NEXT:    ret
1519;
1520; RV64-LABEL: vpscatter_baseidx_v8i8_v8f64:
1521; RV64:       # %bb.0:
1522; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1523; RV64-NEXT:    vsext.vf8 v16, v12
1524; RV64-NEXT:    vsll.vi v12, v16, 3
1525; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1526; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
1527; RV64-NEXT:    ret
1528  %ptrs = getelementptr inbounds double, ptr %base, <8 x i8> %idxs
1529  call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1530  ret void
1531}
1532
1533define void @vpscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1534; RV32-LABEL: vpscatter_baseidx_sext_v8i8_v8f64:
1535; RV32:       # %bb.0:
1536; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1537; RV32-NEXT:    vsext.vf4 v14, v12
1538; RV32-NEXT:    vsll.vi v12, v14, 3
1539; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1540; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1541; RV32-NEXT:    ret
1542;
1543; RV64-LABEL: vpscatter_baseidx_sext_v8i8_v8f64:
1544; RV64:       # %bb.0:
1545; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1546; RV64-NEXT:    vsext.vf8 v16, v12
1547; RV64-NEXT:    vsll.vi v12, v16, 3
1548; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1549; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
1550; RV64-NEXT:    ret
1551  %eidxs = sext <8 x i8> %idxs to <8 x i64>
1552  %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1553  call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1554  ret void
1555}
1556
1557define void @vpscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1558; RV32-LABEL: vpscatter_baseidx_zext_v8i8_v8f64:
1559; RV32:       # %bb.0:
1560; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1561; RV32-NEXT:    vzext.vf2 v13, v12
1562; RV32-NEXT:    vsll.vi v12, v13, 3
1563; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1564; RV32-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
1565; RV32-NEXT:    ret
1566;
1567; RV64-LABEL: vpscatter_baseidx_zext_v8i8_v8f64:
1568; RV64:       # %bb.0:
1569; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1570; RV64-NEXT:    vzext.vf2 v13, v12
1571; RV64-NEXT:    vsll.vi v12, v13, 3
1572; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1573; RV64-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
1574; RV64-NEXT:    ret
1575  %eidxs = zext <8 x i8> %idxs to <8 x i64>
1576  %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1577  call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1578  ret void
1579}
1580
1581define void @vpscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1582; RV32-LABEL: vpscatter_baseidx_v8i16_v8f64:
1583; RV32:       # %bb.0:
1584; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1585; RV32-NEXT:    vsext.vf2 v14, v12
1586; RV32-NEXT:    vsll.vi v12, v14, 3
1587; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1588; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1589; RV32-NEXT:    ret
1590;
1591; RV64-LABEL: vpscatter_baseidx_v8i16_v8f64:
1592; RV64:       # %bb.0:
1593; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1594; RV64-NEXT:    vsext.vf4 v16, v12
1595; RV64-NEXT:    vsll.vi v12, v16, 3
1596; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1597; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
1598; RV64-NEXT:    ret
1599  %ptrs = getelementptr inbounds double, ptr %base, <8 x i16> %idxs
1600  call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1601  ret void
1602}
1603
1604define void @vpscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1605; RV32-LABEL: vpscatter_baseidx_sext_v8i16_v8f64:
1606; RV32:       # %bb.0:
1607; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1608; RV32-NEXT:    vsext.vf2 v14, v12
1609; RV32-NEXT:    vsll.vi v12, v14, 3
1610; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1611; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1612; RV32-NEXT:    ret
1613;
1614; RV64-LABEL: vpscatter_baseidx_sext_v8i16_v8f64:
1615; RV64:       # %bb.0:
1616; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1617; RV64-NEXT:    vsext.vf4 v16, v12
1618; RV64-NEXT:    vsll.vi v12, v16, 3
1619; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1620; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
1621; RV64-NEXT:    ret
1622  %eidxs = sext <8 x i16> %idxs to <8 x i64>
1623  %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1624  call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1625  ret void
1626}
1627
1628define void @vpscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1629; RV32-LABEL: vpscatter_baseidx_zext_v8i16_v8f64:
1630; RV32:       # %bb.0:
1631; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1632; RV32-NEXT:    vzext.vf2 v14, v12
1633; RV32-NEXT:    vsll.vi v12, v14, 3
1634; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1635; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1636; RV32-NEXT:    ret
1637;
1638; RV64-LABEL: vpscatter_baseidx_zext_v8i16_v8f64:
1639; RV64:       # %bb.0:
1640; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1641; RV64-NEXT:    vzext.vf2 v14, v12
1642; RV64-NEXT:    vsll.vi v12, v14, 3
1643; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1644; RV64-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1645; RV64-NEXT:    ret
1646  %eidxs = zext <8 x i16> %idxs to <8 x i64>
1647  %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1648  call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1649  ret void
1650}
1651
1652define void @vpscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1653; RV32-LABEL: vpscatter_baseidx_v8i32_v8f64:
1654; RV32:       # %bb.0:
1655; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1656; RV32-NEXT:    vsll.vi v12, v12, 3
1657; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1658; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1659; RV32-NEXT:    ret
1660;
1661; RV64-LABEL: vpscatter_baseidx_v8i32_v8f64:
1662; RV64:       # %bb.0:
1663; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1664; RV64-NEXT:    vsext.vf2 v16, v12
1665; RV64-NEXT:    vsll.vi v12, v16, 3
1666; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1667; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
1668; RV64-NEXT:    ret
1669  %ptrs = getelementptr inbounds double, ptr %base, <8 x i32> %idxs
1670  call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1671  ret void
1672}
1673
1674define void @vpscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1675; RV32-LABEL: vpscatter_baseidx_sext_v8i32_v8f64:
1676; RV32:       # %bb.0:
1677; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1678; RV32-NEXT:    vsll.vi v12, v12, 3
1679; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1680; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1681; RV32-NEXT:    ret
1682;
1683; RV64-LABEL: vpscatter_baseidx_sext_v8i32_v8f64:
1684; RV64:       # %bb.0:
1685; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1686; RV64-NEXT:    vsext.vf2 v16, v12
1687; RV64-NEXT:    vsll.vi v12, v16, 3
1688; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1689; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
1690; RV64-NEXT:    ret
1691  %eidxs = sext <8 x i32> %idxs to <8 x i64>
1692  %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1693  call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1694  ret void
1695}
1696
1697define void @vpscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1698; RV32-LABEL: vpscatter_baseidx_zext_v8i32_v8f64:
1699; RV32:       # %bb.0:
1700; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1701; RV32-NEXT:    vsll.vi v12, v12, 3
1702; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1703; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1704; RV32-NEXT:    ret
1705;
1706; RV64-LABEL: vpscatter_baseidx_zext_v8i32_v8f64:
1707; RV64:       # %bb.0:
1708; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1709; RV64-NEXT:    vzext.vf2 v16, v12
1710; RV64-NEXT:    vsll.vi v12, v16, 3
1711; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1712; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
1713; RV64-NEXT:    ret
1714  %eidxs = zext <8 x i32> %idxs to <8 x i64>
1715  %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1716  call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1717  ret void
1718}
1719
1720define void @vpscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1721; RV32-LABEL: vpscatter_baseidx_v8f64:
1722; RV32:       # %bb.0:
1723; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1724; RV32-NEXT:    vnsrl.wi v16, v12, 0
1725; RV32-NEXT:    vsll.vi v12, v16, 3
1726; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1727; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1728; RV32-NEXT:    ret
1729;
1730; RV64-LABEL: vpscatter_baseidx_v8f64:
1731; RV64:       # %bb.0:
1732; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1733; RV64-NEXT:    vsll.vi v12, v12, 3
1734; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1735; RV64-NEXT:    vsoxei64.v v8, (a0), v12, v0.t
1736; RV64-NEXT:    ret
1737  %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %idxs
1738  call void @llvm.vp.scatter.v8f64.v8p0(<8 x double> %val, <8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1739  ret void
1740}
1741
1742declare void @llvm.vp.scatter.v32f64.v32p0(<32 x double>, <32 x ptr>, <32 x i1>, i32)
1743
1744define void @vpscatter_v32f64(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m, i32 zeroext %evl) {
1745; RV32-LABEL: vpscatter_v32f64:
1746; RV32:       # %bb.0:
1747; RV32-NEXT:    li a2, 32
1748; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
1749; RV32-NEXT:    vle32.v v24, (a0)
1750; RV32-NEXT:    li a2, 16
1751; RV32-NEXT:    mv a0, a1
1752; RV32-NEXT:    bltu a1, a2, .LBB83_2
1753; RV32-NEXT:  # %bb.1:
1754; RV32-NEXT:    li a0, 16
1755; RV32-NEXT:  .LBB83_2:
1756; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1757; RV32-NEXT:    vsoxei32.v v8, (zero), v24, v0.t
1758; RV32-NEXT:    addi a0, a1, -16
1759; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
1760; RV32-NEXT:    vslidedown.vi v0, v0, 2
1761; RV32-NEXT:    sltu a1, a1, a0
1762; RV32-NEXT:    addi a1, a1, -1
1763; RV32-NEXT:    and a0, a1, a0
1764; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
1765; RV32-NEXT:    vslidedown.vi v8, v24, 16
1766; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1767; RV32-NEXT:    vsoxei32.v v16, (zero), v8, v0.t
1768; RV32-NEXT:    ret
1769;
1770; RV64-LABEL: vpscatter_v32f64:
1771; RV64:       # %bb.0:
1772; RV64-NEXT:    addi sp, sp, -16
1773; RV64-NEXT:    .cfi_def_cfa_offset 16
1774; RV64-NEXT:    csrr a1, vlenb
1775; RV64-NEXT:    slli a1, a1, 3
1776; RV64-NEXT:    sub sp, sp, a1
1777; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1778; RV64-NEXT:    addi a1, sp, 16
1779; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
1780; RV64-NEXT:    addi a1, a0, 128
1781; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1782; RV64-NEXT:    vle64.v v16, (a1)
1783; RV64-NEXT:    vle64.v v24, (a0)
1784; RV64-NEXT:    li a1, 16
1785; RV64-NEXT:    mv a0, a2
1786; RV64-NEXT:    bltu a2, a1, .LBB83_2
1787; RV64-NEXT:  # %bb.1:
1788; RV64-NEXT:    li a0, 16
1789; RV64-NEXT:  .LBB83_2:
1790; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1791; RV64-NEXT:    vsoxei64.v v8, (zero), v24, v0.t
1792; RV64-NEXT:    addi a0, a2, -16
1793; RV64-NEXT:    sltu a1, a2, a0
1794; RV64-NEXT:    addi a1, a1, -1
1795; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
1796; RV64-NEXT:    vslidedown.vi v0, v0, 2
1797; RV64-NEXT:    and a0, a1, a0
1798; RV64-NEXT:    addi a1, sp, 16
1799; RV64-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
1800; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1801; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
1802; RV64-NEXT:    csrr a0, vlenb
1803; RV64-NEXT:    slli a0, a0, 3
1804; RV64-NEXT:    add sp, sp, a0
1805; RV64-NEXT:    .cfi_def_cfa sp, 16
1806; RV64-NEXT:    addi sp, sp, 16
1807; RV64-NEXT:    .cfi_def_cfa_offset 0
1808; RV64-NEXT:    ret
1809  call void @llvm.vp.scatter.v32f64.v32p0(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
1810  ret void
1811}
1812
1813define void @vpscatter_baseidx_v32i32_v32f64(<32 x double> %val, ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
1814; RV32-LABEL: vpscatter_baseidx_v32i32_v32f64:
1815; RV32:       # %bb.0:
1816; RV32-NEXT:    li a3, 32
1817; RV32-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
1818; RV32-NEXT:    vle32.v v24, (a1)
1819; RV32-NEXT:    li a3, 16
1820; RV32-NEXT:    vsll.vi v24, v24, 3
1821; RV32-NEXT:    mv a1, a2
1822; RV32-NEXT:    bltu a2, a3, .LBB84_2
1823; RV32-NEXT:  # %bb.1:
1824; RV32-NEXT:    li a1, 16
1825; RV32-NEXT:  .LBB84_2:
1826; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1827; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1828; RV32-NEXT:    addi a1, a2, -16
1829; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
1830; RV32-NEXT:    vslidedown.vi v0, v0, 2
1831; RV32-NEXT:    sltu a2, a2, a1
1832; RV32-NEXT:    addi a2, a2, -1
1833; RV32-NEXT:    and a1, a2, a1
1834; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
1835; RV32-NEXT:    vslidedown.vi v8, v24, 16
1836; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1837; RV32-NEXT:    vsoxei32.v v16, (a0), v8, v0.t
1838; RV32-NEXT:    ret
1839;
1840; RV64-LABEL: vpscatter_baseidx_v32i32_v32f64:
1841; RV64:       # %bb.0:
1842; RV64-NEXT:    addi sp, sp, -16
1843; RV64-NEXT:    .cfi_def_cfa_offset 16
1844; RV64-NEXT:    csrr a3, vlenb
1845; RV64-NEXT:    slli a4, a3, 3
1846; RV64-NEXT:    add a3, a4, a3
1847; RV64-NEXT:    sub sp, sp, a3
1848; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x09, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 9 * vlenb
1849; RV64-NEXT:    addi a3, sp, 16
1850; RV64-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
1851; RV64-NEXT:    csrr a3, vlenb
1852; RV64-NEXT:    add a3, sp, a3
1853; RV64-NEXT:    addi a3, a3, 16
1854; RV64-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
1855; RV64-NEXT:    li a3, 32
1856; RV64-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
1857; RV64-NEXT:    vle32.v v24, (a1)
1858; RV64-NEXT:    li a3, 16
1859; RV64-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
1860; RV64-NEXT:    vslidedown.vi v16, v24, 16
1861; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1862; RV64-NEXT:    vsext.vf2 v0, v24
1863; RV64-NEXT:    vsext.vf2 v24, v16
1864; RV64-NEXT:    vsll.vi v16, v24, 3
1865; RV64-NEXT:    vsll.vi v24, v0, 3
1866; RV64-NEXT:    mv a1, a2
1867; RV64-NEXT:    bltu a2, a3, .LBB84_2
1868; RV64-NEXT:  # %bb.1:
1869; RV64-NEXT:    li a1, 16
1870; RV64-NEXT:  .LBB84_2:
1871; RV64-NEXT:    addi a3, sp, 16
1872; RV64-NEXT:    vl1r.v v0, (a3) # Unknown-size Folded Reload
1873; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1874; RV64-NEXT:    vsoxei64.v v8, (a0), v24, v0.t
1875; RV64-NEXT:    addi a1, a2, -16
1876; RV64-NEXT:    sltu a2, a2, a1
1877; RV64-NEXT:    addi a2, a2, -1
1878; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
1879; RV64-NEXT:    vslidedown.vi v0, v0, 2
1880; RV64-NEXT:    and a1, a2, a1
1881; RV64-NEXT:    csrr a2, vlenb
1882; RV64-NEXT:    add a2, sp, a2
1883; RV64-NEXT:    addi a2, a2, 16
1884; RV64-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
1885; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1886; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1887; RV64-NEXT:    csrr a0, vlenb
1888; RV64-NEXT:    slli a1, a0, 3
1889; RV64-NEXT:    add a0, a1, a0
1890; RV64-NEXT:    add sp, sp, a0
1891; RV64-NEXT:    .cfi_def_cfa sp, 16
1892; RV64-NEXT:    addi sp, sp, 16
1893; RV64-NEXT:    .cfi_def_cfa_offset 0
1894; RV64-NEXT:    ret
1895  %ptrs = getelementptr inbounds double, ptr %base, <32 x i32> %idxs
1896  call void @llvm.vp.scatter.v32f64.v32p0(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
1897  ret void
1898}
1899
1900define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
1901; RV32-LABEL: vpscatter_baseidx_sext_v32i32_v32f64:
1902; RV32:       # %bb.0:
1903; RV32-NEXT:    li a3, 32
1904; RV32-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
1905; RV32-NEXT:    vle32.v v24, (a1)
1906; RV32-NEXT:    li a3, 16
1907; RV32-NEXT:    vsll.vi v24, v24, 3
1908; RV32-NEXT:    mv a1, a2
1909; RV32-NEXT:    bltu a2, a3, .LBB85_2
1910; RV32-NEXT:  # %bb.1:
1911; RV32-NEXT:    li a1, 16
1912; RV32-NEXT:  .LBB85_2:
1913; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1914; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1915; RV32-NEXT:    addi a1, a2, -16
1916; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
1917; RV32-NEXT:    vslidedown.vi v0, v0, 2
1918; RV32-NEXT:    sltu a2, a2, a1
1919; RV32-NEXT:    addi a2, a2, -1
1920; RV32-NEXT:    and a1, a2, a1
1921; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
1922; RV32-NEXT:    vslidedown.vi v8, v24, 16
1923; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1924; RV32-NEXT:    vsoxei32.v v16, (a0), v8, v0.t
1925; RV32-NEXT:    ret
1926;
1927; RV64-LABEL: vpscatter_baseidx_sext_v32i32_v32f64:
1928; RV64:       # %bb.0:
1929; RV64-NEXT:    addi sp, sp, -16
1930; RV64-NEXT:    .cfi_def_cfa_offset 16
1931; RV64-NEXT:    csrr a3, vlenb
1932; RV64-NEXT:    slli a3, a3, 4
1933; RV64-NEXT:    sub sp, sp, a3
1934; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
1935; RV64-NEXT:    csrr a3, vlenb
1936; RV64-NEXT:    slli a3, a3, 3
1937; RV64-NEXT:    add a3, sp, a3
1938; RV64-NEXT:    addi a3, a3, 16
1939; RV64-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
1940; RV64-NEXT:    addi a3, sp, 16
1941; RV64-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
1942; RV64-NEXT:    li a3, 32
1943; RV64-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
1944; RV64-NEXT:    vle32.v v24, (a1)
1945; RV64-NEXT:    li a3, 16
1946; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1947; RV64-NEXT:    vsext.vf2 v16, v24
1948; RV64-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
1949; RV64-NEXT:    vslidedown.vi v8, v24, 16
1950; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
1951; RV64-NEXT:    vsext.vf2 v24, v8
1952; RV64-NEXT:    vsll.vi v8, v24, 3
1953; RV64-NEXT:    vsll.vi v24, v16, 3
1954; RV64-NEXT:    mv a1, a2
1955; RV64-NEXT:    bltu a2, a3, .LBB85_2
1956; RV64-NEXT:  # %bb.1:
1957; RV64-NEXT:    li a1, 16
1958; RV64-NEXT:  .LBB85_2:
1959; RV64-NEXT:    addi a3, sp, 16
1960; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
1961; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1962; RV64-NEXT:    vsoxei64.v v16, (a0), v24, v0.t
1963; RV64-NEXT:    addi a1, a2, -16
1964; RV64-NEXT:    sltu a2, a2, a1
1965; RV64-NEXT:    addi a2, a2, -1
1966; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
1967; RV64-NEXT:    vslidedown.vi v0, v0, 2
1968; RV64-NEXT:    and a1, a2, a1
1969; RV64-NEXT:    csrr a2, vlenb
1970; RV64-NEXT:    slli a2, a2, 3
1971; RV64-NEXT:    add a2, sp, a2
1972; RV64-NEXT:    addi a2, a2, 16
1973; RV64-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
1974; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1975; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
1976; RV64-NEXT:    csrr a0, vlenb
1977; RV64-NEXT:    slli a0, a0, 4
1978; RV64-NEXT:    add sp, sp, a0
1979; RV64-NEXT:    .cfi_def_cfa sp, 16
1980; RV64-NEXT:    addi sp, sp, 16
1981; RV64-NEXT:    .cfi_def_cfa_offset 0
1982; RV64-NEXT:    ret
1983  %eidxs = sext <32 x i32> %idxs to <32 x i64>
1984  %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs
1985  call void @llvm.vp.scatter.v32f64.v32p0(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
1986  ret void
1987}
1988
1989define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
1990; RV32-LABEL: vpscatter_baseidx_zext_v32i32_v32f64:
1991; RV32:       # %bb.0:
1992; RV32-NEXT:    li a3, 32
1993; RV32-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
1994; RV32-NEXT:    vle32.v v24, (a1)
1995; RV32-NEXT:    li a3, 16
1996; RV32-NEXT:    vsll.vi v24, v24, 3
1997; RV32-NEXT:    mv a1, a2
1998; RV32-NEXT:    bltu a2, a3, .LBB86_2
1999; RV32-NEXT:  # %bb.1:
2000; RV32-NEXT:    li a1, 16
2001; RV32-NEXT:  .LBB86_2:
2002; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2003; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
2004; RV32-NEXT:    addi a1, a2, -16
2005; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2006; RV32-NEXT:    vslidedown.vi v0, v0, 2
2007; RV32-NEXT:    sltu a2, a2, a1
2008; RV32-NEXT:    addi a2, a2, -1
2009; RV32-NEXT:    and a1, a2, a1
2010; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
2011; RV32-NEXT:    vslidedown.vi v8, v24, 16
2012; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2013; RV32-NEXT:    vsoxei32.v v16, (a0), v8, v0.t
2014; RV32-NEXT:    ret
2015;
2016; RV64-LABEL: vpscatter_baseidx_zext_v32i32_v32f64:
2017; RV64:       # %bb.0:
2018; RV64-NEXT:    addi sp, sp, -16
2019; RV64-NEXT:    .cfi_def_cfa_offset 16
2020; RV64-NEXT:    csrr a3, vlenb
2021; RV64-NEXT:    slli a3, a3, 4
2022; RV64-NEXT:    sub sp, sp, a3
2023; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
2024; RV64-NEXT:    csrr a3, vlenb
2025; RV64-NEXT:    slli a3, a3, 3
2026; RV64-NEXT:    add a3, sp, a3
2027; RV64-NEXT:    addi a3, a3, 16
2028; RV64-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
2029; RV64-NEXT:    addi a3, sp, 16
2030; RV64-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
2031; RV64-NEXT:    li a3, 32
2032; RV64-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
2033; RV64-NEXT:    vle32.v v24, (a1)
2034; RV64-NEXT:    li a3, 16
2035; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2036; RV64-NEXT:    vzext.vf2 v16, v24
2037; RV64-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
2038; RV64-NEXT:    vslidedown.vi v8, v24, 16
2039; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2040; RV64-NEXT:    vzext.vf2 v24, v8
2041; RV64-NEXT:    vsll.vi v8, v24, 3
2042; RV64-NEXT:    vsll.vi v24, v16, 3
2043; RV64-NEXT:    mv a1, a2
2044; RV64-NEXT:    bltu a2, a3, .LBB86_2
2045; RV64-NEXT:  # %bb.1:
2046; RV64-NEXT:    li a1, 16
2047; RV64-NEXT:  .LBB86_2:
2048; RV64-NEXT:    addi a3, sp, 16
2049; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
2050; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2051; RV64-NEXT:    vsoxei64.v v16, (a0), v24, v0.t
2052; RV64-NEXT:    addi a1, a2, -16
2053; RV64-NEXT:    sltu a2, a2, a1
2054; RV64-NEXT:    addi a2, a2, -1
2055; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2056; RV64-NEXT:    vslidedown.vi v0, v0, 2
2057; RV64-NEXT:    and a1, a2, a1
2058; RV64-NEXT:    csrr a2, vlenb
2059; RV64-NEXT:    slli a2, a2, 3
2060; RV64-NEXT:    add a2, sp, a2
2061; RV64-NEXT:    addi a2, a2, 16
2062; RV64-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
2063; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2064; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
2065; RV64-NEXT:    csrr a0, vlenb
2066; RV64-NEXT:    slli a0, a0, 4
2067; RV64-NEXT:    add sp, sp, a0
2068; RV64-NEXT:    .cfi_def_cfa sp, 16
2069; RV64-NEXT:    addi sp, sp, 16
2070; RV64-NEXT:    .cfi_def_cfa_offset 0
2071; RV64-NEXT:    ret
2072  %eidxs = zext <32 x i32> %idxs to <32 x i64>
2073  %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs
2074  call void @llvm.vp.scatter.v32f64.v32p0(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2075  ret void
2076}
2077