xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll (revision b6c0f1bfa79a3a32d841ac5ab1f94c3aee3b5d90)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zvfbfmin,+v \
3; RUN:     -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
4; RUN:     --check-prefixes=CHECK,RV32
5; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zvfbfmin,+v \
6; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
7; RUN:     --check-prefixes=CHECK,RV64
8; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfhmin,+zvfbfmin,+v \
9; RUN:     -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
10; RUN:     --check-prefixes=CHECK,RV32
11; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfhmin,+zvfbfmin,+v \
12; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
13; RUN:     --check-prefixes=CHECK,RV64
14
15declare void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
16
17define void @mscatter_nxv1i8(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
18; RV32-LABEL: mscatter_nxv1i8:
19; RV32:       # %bb.0:
20; RV32-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
21; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
22; RV32-NEXT:    ret
23;
24; RV64-LABEL: mscatter_nxv1i8:
25; RV64:       # %bb.0:
26; RV64-NEXT:    vsetvli a0, zero, e8, mf8, ta, ma
27; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
28; RV64-NEXT:    ret
29  call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, i32 1, <vscale x 1 x i1> %m)
30  ret void
31}
32
33declare void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
34
35define void @mscatter_nxv2i8(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
36; RV32-LABEL: mscatter_nxv2i8:
37; RV32:       # %bb.0:
38; RV32-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
39; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
40; RV32-NEXT:    ret
41;
42; RV64-LABEL: mscatter_nxv2i8:
43; RV64:       # %bb.0:
44; RV64-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
45; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
46; RV64-NEXT:    ret
47  call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %val, <vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m)
48  ret void
49}
50
51define void @mscatter_nxv2i16_truncstore_nxv2i8(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
52; RV32-LABEL: mscatter_nxv2i16_truncstore_nxv2i8:
53; RV32:       # %bb.0:
54; RV32-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
55; RV32-NEXT:    vnsrl.wi v8, v8, 0
56; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
57; RV32-NEXT:    ret
58;
59; RV64-LABEL: mscatter_nxv2i16_truncstore_nxv2i8:
60; RV64:       # %bb.0:
61; RV64-NEXT:    vsetvli a0, zero, e8, mf4, ta, ma
62; RV64-NEXT:    vnsrl.wi v8, v8, 0
63; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
64; RV64-NEXT:    ret
65  %tval = trunc <vscale x 2 x i16> %val to <vscale x 2 x i8>
66  call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m)
67  ret void
68}
69
70define void @mscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
71; RV32-LABEL: mscatter_nxv2i32_truncstore_nxv2i8:
72; RV32:       # %bb.0:
73; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
74; RV32-NEXT:    vnsrl.wi v8, v8, 0
75; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
76; RV32-NEXT:    vnsrl.wi v8, v8, 0
77; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
78; RV32-NEXT:    ret
79;
80; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i8:
81; RV64:       # %bb.0:
82; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
83; RV64-NEXT:    vnsrl.wi v8, v8, 0
84; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
85; RV64-NEXT:    vnsrl.wi v8, v8, 0
86; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
87; RV64-NEXT:    ret
88  %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i8>
89  call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m)
90  ret void
91}
92
93define void @mscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
94; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i8:
95; RV32:       # %bb.0:
96; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
97; RV32-NEXT:    vnsrl.wi v11, v8, 0
98; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
99; RV32-NEXT:    vnsrl.wi v8, v11, 0
100; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
101; RV32-NEXT:    vnsrl.wi v8, v8, 0
102; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
103; RV32-NEXT:    ret
104;
105; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i8:
106; RV64:       # %bb.0:
107; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
108; RV64-NEXT:    vnsrl.wi v12, v8, 0
109; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
110; RV64-NEXT:    vnsrl.wi v8, v12, 0
111; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
112; RV64-NEXT:    vnsrl.wi v8, v8, 0
113; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
114; RV64-NEXT:    ret
115  %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i8>
116  call void @llvm.masked.scatter.nxv2i8.nxv2p0(<vscale x 2 x i8> %tval, <vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m)
117  ret void
118}
119
120declare void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
121
122define void @mscatter_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
123; RV32-LABEL: mscatter_nxv4i8:
124; RV32:       # %bb.0:
125; RV32-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
126; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
127; RV32-NEXT:    ret
128;
129; RV64-LABEL: mscatter_nxv4i8:
130; RV64:       # %bb.0:
131; RV64-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
132; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
133; RV64-NEXT:    ret
134  call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, i32 1, <vscale x 4 x i1> %m)
135  ret void
136}
137
138define void @mscatter_truemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs) {
139; RV32-LABEL: mscatter_truemask_nxv4i8:
140; RV32:       # %bb.0:
141; RV32-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
142; RV32-NEXT:    vsoxei32.v v8, (zero), v10
143; RV32-NEXT:    ret
144;
145; RV64-LABEL: mscatter_truemask_nxv4i8:
146; RV64:       # %bb.0:
147; RV64-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
148; RV64-NEXT:    vsoxei64.v v8, (zero), v12
149; RV64-NEXT:    ret
150  call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, i32 1, <vscale x 4 x i1> splat (i1 1))
151  ret void
152}
153
154define void @mscatter_falsemask_nxv4i8(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs) {
155; CHECK-LABEL: mscatter_falsemask_nxv4i8:
156; CHECK:       # %bb.0:
157; CHECK-NEXT:    ret
158  call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> %val, <vscale x 4 x ptr> %ptrs, i32 1, <vscale x 4 x i1> zeroinitializer)
159  ret void
160}
161
162declare void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
163
164define void @mscatter_nxv8i8(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
165; RV32-LABEL: mscatter_nxv8i8:
166; RV32:       # %bb.0:
167; RV32-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
168; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
169; RV32-NEXT:    ret
170;
171; RV64-LABEL: mscatter_nxv8i8:
172; RV64:       # %bb.0:
173; RV64-NEXT:    vsetvli a0, zero, e8, m1, ta, ma
174; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
175; RV64-NEXT:    ret
176  call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, i32 1, <vscale x 8 x i1> %m)
177  ret void
178}
179
180define void @mscatter_baseidx_nxv8i8(<vscale x 8 x i8> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
181; RV32-LABEL: mscatter_baseidx_nxv8i8:
182; RV32:       # %bb.0:
183; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
184; RV32-NEXT:    vsext.vf4 v12, v9
185; RV32-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
186; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
187; RV32-NEXT:    ret
188;
189; RV64-LABEL: mscatter_baseidx_nxv8i8:
190; RV64:       # %bb.0:
191; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
192; RV64-NEXT:    vsext.vf8 v16, v9
193; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
194; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
195; RV64-NEXT:    ret
196  %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 8 x i8> %idxs
197  call void @llvm.masked.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> %val, <vscale x 8 x ptr> %ptrs, i32 1, <vscale x 8 x i1> %m)
198  ret void
199}
200
201declare void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
202
203define void @mscatter_nxv1i16(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
204; RV32-LABEL: mscatter_nxv1i16:
205; RV32:       # %bb.0:
206; RV32-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
207; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
208; RV32-NEXT:    ret
209;
210; RV64-LABEL: mscatter_nxv1i16:
211; RV64:       # %bb.0:
212; RV64-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
213; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
214; RV64-NEXT:    ret
215  call void @llvm.masked.scatter.nxv1i16.nxv1p0(<vscale x 1 x i16> %val, <vscale x 1 x ptr> %ptrs, i32 2, <vscale x 1 x i1> %m)
216  ret void
217}
218
219declare void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
220
221define void @mscatter_nxv2i16(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
222; RV32-LABEL: mscatter_nxv2i16:
223; RV32:       # %bb.0:
224; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
225; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
226; RV32-NEXT:    ret
227;
228; RV64-LABEL: mscatter_nxv2i16:
229; RV64:       # %bb.0:
230; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
231; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
232; RV64-NEXT:    ret
233  call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %val, <vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m)
234  ret void
235}
236
237define void @mscatter_nxv2i32_truncstore_nxv2i16(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
238; RV32-LABEL: mscatter_nxv2i32_truncstore_nxv2i16:
239; RV32:       # %bb.0:
240; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
241; RV32-NEXT:    vnsrl.wi v8, v8, 0
242; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
243; RV32-NEXT:    ret
244;
245; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i16:
246; RV64:       # %bb.0:
247; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
248; RV64-NEXT:    vnsrl.wi v8, v8, 0
249; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
250; RV64-NEXT:    ret
251  %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i16>
252  call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %tval, <vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m)
253  ret void
254}
255
256define void @mscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
257; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i16:
258; RV32:       # %bb.0:
259; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
260; RV32-NEXT:    vnsrl.wi v11, v8, 0
261; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
262; RV32-NEXT:    vnsrl.wi v8, v11, 0
263; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
264; RV32-NEXT:    ret
265;
266; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i16:
267; RV64:       # %bb.0:
268; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
269; RV64-NEXT:    vnsrl.wi v12, v8, 0
270; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
271; RV64-NEXT:    vnsrl.wi v8, v12, 0
272; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
273; RV64-NEXT:    ret
274  %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i16>
275  call void @llvm.masked.scatter.nxv2i16.nxv2p0(<vscale x 2 x i16> %tval, <vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m)
276  ret void
277}
278
279declare void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
280
281define void @mscatter_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
282; RV32-LABEL: mscatter_nxv4i16:
283; RV32:       # %bb.0:
284; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
285; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
286; RV32-NEXT:    ret
287;
288; RV64-LABEL: mscatter_nxv4i16:
289; RV64:       # %bb.0:
290; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
291; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
292; RV64-NEXT:    ret
293  call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> %m)
294  ret void
295}
296
297define void @mscatter_truemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs) {
298; RV32-LABEL: mscatter_truemask_nxv4i16:
299; RV32:       # %bb.0:
300; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
301; RV32-NEXT:    vsoxei32.v v8, (zero), v10
302; RV32-NEXT:    ret
303;
304; RV64-LABEL: mscatter_truemask_nxv4i16:
305; RV64:       # %bb.0:
306; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
307; RV64-NEXT:    vsoxei64.v v8, (zero), v12
308; RV64-NEXT:    ret
309  call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> splat (i1 1))
310  ret void
311}
312
313define void @mscatter_falsemask_nxv4i16(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs) {
314; CHECK-LABEL: mscatter_falsemask_nxv4i16:
315; CHECK:       # %bb.0:
316; CHECK-NEXT:    ret
317  call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer)
318  ret void
319}
320
321declare void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
322
323define void @mscatter_nxv8i16(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
324; RV32-LABEL: mscatter_nxv8i16:
325; RV32:       # %bb.0:
326; RV32-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
327; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
328; RV32-NEXT:    ret
329;
330; RV64-LABEL: mscatter_nxv8i16:
331; RV64:       # %bb.0:
332; RV64-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
333; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
334; RV64-NEXT:    ret
335  call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
336  ret void
337}
338
339define void @mscatter_baseidx_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
340; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i16:
341; RV32:       # %bb.0:
342; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
343; RV32-NEXT:    vsext.vf4 v12, v10
344; RV32-NEXT:    vadd.vv v12, v12, v12
345; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
346; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
347; RV32-NEXT:    ret
348;
349; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i16:
350; RV64:       # %bb.0:
351; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
352; RV64-NEXT:    vsext.vf8 v16, v10
353; RV64-NEXT:    vadd.vv v16, v16, v16
354; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
355; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
356; RV64-NEXT:    ret
357  %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i8> %idxs
358  call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
359  ret void
360}
361
362define void @mscatter_baseidx_sext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
363; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i16:
364; RV32:       # %bb.0:
365; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
366; RV32-NEXT:    vsext.vf4 v12, v10
367; RV32-NEXT:    vadd.vv v12, v12, v12
368; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
369; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
370; RV32-NEXT:    ret
371;
372; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i16:
373; RV64:       # %bb.0:
374; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
375; RV64-NEXT:    vsext.vf8 v16, v10
376; RV64-NEXT:    vadd.vv v16, v16, v16
377; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
378; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
379; RV64-NEXT:    ret
380  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
381  %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
382  call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
383  ret void
384}
385
386define void @mscatter_baseidx_zext_nxv8i8_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
387; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i16:
388; CHECK:       # %bb.0:
389; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
390; CHECK-NEXT:    vwaddu.vv v12, v10, v10
391; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
392; CHECK-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
393; CHECK-NEXT:    ret
394  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
395  %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
396  call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
397  ret void
398}
399
400define void @mscatter_baseidx_nxv8i16(<vscale x 8 x i16> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
401; RV32-LABEL: mscatter_baseidx_nxv8i16:
402; RV32:       # %bb.0:
403; RV32-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
404; RV32-NEXT:    vwadd.vv v12, v10, v10
405; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
406; RV32-NEXT:    ret
407;
408; RV64-LABEL: mscatter_baseidx_nxv8i16:
409; RV64:       # %bb.0:
410; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
411; RV64-NEXT:    vsext.vf4 v16, v10
412; RV64-NEXT:    vadd.vv v16, v16, v16
413; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
414; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
415; RV64-NEXT:    ret
416  %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %idxs
417  call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
418  ret void
419}
420
421declare void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
422
423define void @mscatter_nxv1i32(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
424; RV32-LABEL: mscatter_nxv1i32:
425; RV32:       # %bb.0:
426; RV32-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
427; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
428; RV32-NEXT:    ret
429;
430; RV64-LABEL: mscatter_nxv1i32:
431; RV64:       # %bb.0:
432; RV64-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
433; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
434; RV64-NEXT:    ret
435  call void @llvm.masked.scatter.nxv1i32.nxv1p0(<vscale x 1 x i32> %val, <vscale x 1 x ptr> %ptrs, i32 4, <vscale x 1 x i1> %m)
436  ret void
437}
438
439declare void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
440
441define void @mscatter_nxv2i32(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
442; RV32-LABEL: mscatter_nxv2i32:
443; RV32:       # %bb.0:
444; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
445; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
446; RV32-NEXT:    ret
447;
448; RV64-LABEL: mscatter_nxv2i32:
449; RV64:       # %bb.0:
450; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
451; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
452; RV64-NEXT:    ret
453  call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %val, <vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %m)
454  ret void
455}
456
457define void @mscatter_nxv2i64_truncstore_nxv2i32(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
458; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i32:
459; RV32:       # %bb.0:
460; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
461; RV32-NEXT:    vnsrl.wi v11, v8, 0
462; RV32-NEXT:    vsoxei32.v v11, (zero), v10, v0.t
463; RV32-NEXT:    ret
464;
465; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i32:
466; RV64:       # %bb.0:
467; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
468; RV64-NEXT:    vnsrl.wi v12, v8, 0
469; RV64-NEXT:    vsoxei64.v v12, (zero), v10, v0.t
470; RV64-NEXT:    ret
471  %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i32>
472  call void @llvm.masked.scatter.nxv2i32.nxv2p0(<vscale x 2 x i32> %tval, <vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %m)
473  ret void
474}
475
476declare void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
477
478define void @mscatter_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
479; RV32-LABEL: mscatter_nxv4i32:
480; RV32:       # %bb.0:
481; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
482; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
483; RV32-NEXT:    ret
484;
485; RV64-LABEL: mscatter_nxv4i32:
486; RV64:       # %bb.0:
487; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
488; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
489; RV64-NEXT:    ret
490  call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %m)
491  ret void
492}
493
494define void @mscatter_truemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs) {
495; RV32-LABEL: mscatter_truemask_nxv4i32:
496; RV32:       # %bb.0:
497; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
498; RV32-NEXT:    vsoxei32.v v8, (zero), v10
499; RV32-NEXT:    ret
500;
501; RV64-LABEL: mscatter_truemask_nxv4i32:
502; RV64:       # %bb.0:
503; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
504; RV64-NEXT:    vsoxei64.v v8, (zero), v12
505; RV64-NEXT:    ret
506  call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> splat (i1 1))
507  ret void
508}
509
510define void @mscatter_falsemask_nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs) {
511; CHECK-LABEL: mscatter_falsemask_nxv4i32:
512; CHECK:       # %bb.0:
513; CHECK-NEXT:    ret
514  call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer)
515  ret void
516}
517
518declare void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
519
520define void @mscatter_nxv8i32(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
521; RV32-LABEL: mscatter_nxv8i32:
522; RV32:       # %bb.0:
523; RV32-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
524; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
525; RV32-NEXT:    ret
526;
527; RV64-LABEL: mscatter_nxv8i32:
528; RV64:       # %bb.0:
529; RV64-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
530; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
531; RV64-NEXT:    ret
532  call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
533  ret void
534}
535
536define void @mscatter_baseidx_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
537; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i32:
538; RV32:       # %bb.0:
539; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
540; RV32-NEXT:    vsext.vf4 v16, v12
541; RV32-NEXT:    vsll.vi v12, v16, 2
542; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
543; RV32-NEXT:    ret
544;
545; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i32:
546; RV64:       # %bb.0:
547; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
548; RV64-NEXT:    vsext.vf8 v16, v12
549; RV64-NEXT:    vsll.vi v16, v16, 2
550; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
551; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
552; RV64-NEXT:    ret
553  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i8> %idxs
554  call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
555  ret void
556}
557
558define void @mscatter_baseidx_sext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
559; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i32:
560; RV32:       # %bb.0:
561; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
562; RV32-NEXT:    vsext.vf4 v16, v12
563; RV32-NEXT:    vsll.vi v12, v16, 2
564; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
565; RV32-NEXT:    ret
566;
567; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i32:
568; RV64:       # %bb.0:
569; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
570; RV64-NEXT:    vsext.vf8 v16, v12
571; RV64-NEXT:    vsll.vi v16, v16, 2
572; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
573; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
574; RV64-NEXT:    ret
575  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
576  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
577  call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
578  ret void
579}
580
581define void @mscatter_baseidx_zext_nxv8i8_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
582; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i32:
583; CHECK:       # %bb.0:
584; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
585; CHECK-NEXT:    vzext.vf2 v14, v12
586; CHECK-NEXT:    vsll.vi v12, v14, 2
587; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
588; CHECK-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
589; CHECK-NEXT:    ret
590  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
591  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
592  call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
593  ret void
594}
595
596define void @mscatter_baseidx_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
597; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8i32:
598; RV32:       # %bb.0:
599; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
600; RV32-NEXT:    vsext.vf2 v16, v12
601; RV32-NEXT:    vsll.vi v12, v16, 2
602; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
603; RV32-NEXT:    ret
604;
605; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i32:
606; RV64:       # %bb.0:
607; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
608; RV64-NEXT:    vsext.vf4 v16, v12
609; RV64-NEXT:    vsll.vi v16, v16, 2
610; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
611; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
612; RV64-NEXT:    ret
613  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i16> %idxs
614  call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
615  ret void
616}
617
618define void @mscatter_baseidx_sext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
619; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i32:
620; RV32:       # %bb.0:
621; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
622; RV32-NEXT:    vsext.vf2 v16, v12
623; RV32-NEXT:    vsll.vi v12, v16, 2
624; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
625; RV32-NEXT:    ret
626;
627; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i32:
628; RV64:       # %bb.0:
629; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
630; RV64-NEXT:    vsext.vf4 v16, v12
631; RV64-NEXT:    vsll.vi v16, v16, 2
632; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
633; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
634; RV64-NEXT:    ret
635  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
636  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
637  call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
638  ret void
639}
640
641define void @mscatter_baseidx_zext_nxv8i16_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
642; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i32:
643; CHECK:       # %bb.0:
644; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
645; CHECK-NEXT:    vzext.vf2 v16, v12
646; CHECK-NEXT:    vsll.vi v12, v16, 2
647; CHECK-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
648; CHECK-NEXT:    ret
649  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
650  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
651  call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
652  ret void
653}
654
655define void @mscatter_baseidx_nxv8i32(<vscale x 8 x i32> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
656; RV32-LABEL: mscatter_baseidx_nxv8i32:
657; RV32:       # %bb.0:
658; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
659; RV32-NEXT:    vsll.vi v12, v12, 2
660; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
661; RV32-NEXT:    ret
662;
663; RV64-LABEL: mscatter_baseidx_nxv8i32:
664; RV64:       # %bb.0:
665; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
666; RV64-NEXT:    vsext.vf2 v16, v12
667; RV64-NEXT:    vsll.vi v16, v16, 2
668; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
669; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
670; RV64-NEXT:    ret
671  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %idxs
672  call void @llvm.masked.scatter.nxv8i32.nxv8p0(<vscale x 8 x i32> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
673  ret void
674}
675
676declare void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
677
678define void @mscatter_nxv1i64(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
679; RV32-LABEL: mscatter_nxv1i64:
680; RV32:       # %bb.0:
681; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
682; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
683; RV32-NEXT:    ret
684;
685; RV64-LABEL: mscatter_nxv1i64:
686; RV64:       # %bb.0:
687; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
688; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
689; RV64-NEXT:    ret
690  call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> %val, <vscale x 1 x ptr> %ptrs, i32 8, <vscale x 1 x i1> %m)
691  ret void
692}
693
694declare void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
695
696define void @mscatter_nxv2i64(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
697; RV32-LABEL: mscatter_nxv2i64:
698; RV32:       # %bb.0:
699; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
700; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
701; RV32-NEXT:    ret
702;
703; RV64-LABEL: mscatter_nxv2i64:
704; RV64:       # %bb.0:
705; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
706; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
707; RV64-NEXT:    ret
708  call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> %val, <vscale x 2 x ptr> %ptrs, i32 8, <vscale x 2 x i1> %m)
709  ret void
710}
711
712declare void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
713
714define void @mscatter_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
715; RV32-LABEL: mscatter_nxv4i64:
716; RV32:       # %bb.0:
717; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
718; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
719; RV32-NEXT:    ret
720;
721; RV64-LABEL: mscatter_nxv4i64:
722; RV64:       # %bb.0:
723; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
724; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
725; RV64-NEXT:    ret
726  call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> %m)
727  ret void
728}
729
730define void @mscatter_truemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs) {
731; RV32-LABEL: mscatter_truemask_nxv4i64:
732; RV32:       # %bb.0:
733; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
734; RV32-NEXT:    vsoxei32.v v8, (zero), v12
735; RV32-NEXT:    ret
736;
737; RV64-LABEL: mscatter_truemask_nxv4i64:
738; RV64:       # %bb.0:
739; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
740; RV64-NEXT:    vsoxei64.v v8, (zero), v12
741; RV64-NEXT:    ret
742  call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> splat (i1 1))
743  ret void
744}
745
746define void @mscatter_falsemask_nxv4i64(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs) {
747; CHECK-LABEL: mscatter_falsemask_nxv4i64:
748; CHECK:       # %bb.0:
749; CHECK-NEXT:    ret
750  call void @llvm.masked.scatter.nxv4i64.nxv4p0(<vscale x 4 x i64> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer)
751  ret void
752}
753
754declare void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
755
756define void @mscatter_nxv8i64(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
757; RV32-LABEL: mscatter_nxv8i64:
758; RV32:       # %bb.0:
759; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
760; RV32-NEXT:    vsoxei32.v v8, (zero), v16, v0.t
761; RV32-NEXT:    ret
762;
763; RV64-LABEL: mscatter_nxv8i64:
764; RV64:       # %bb.0:
765; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
766; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
767; RV64-NEXT:    ret
768  call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
769  ret void
770}
771
772define void @mscatter_baseidx_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
773; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8i64:
774; RV32:       # %bb.0:
775; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
776; RV32-NEXT:    vsext.vf4 v20, v16
777; RV32-NEXT:    vsll.vi v16, v20, 3
778; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
779; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
780; RV32-NEXT:    ret
781;
782; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i64:
783; RV64:       # %bb.0:
784; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
785; RV64-NEXT:    vsext.vf8 v24, v16
786; RV64-NEXT:    vsll.vi v16, v24, 3
787; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
788; RV64-NEXT:    ret
789  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i8> %idxs
790  call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
791  ret void
792}
793
794define void @mscatter_baseidx_sext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
795; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64:
796; RV32:       # %bb.0:
797; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
798; RV32-NEXT:    vsext.vf4 v20, v16
799; RV32-NEXT:    vsll.vi v16, v20, 3
800; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
801; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
802; RV32-NEXT:    ret
803;
804; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64:
805; RV64:       # %bb.0:
806; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
807; RV64-NEXT:    vsext.vf8 v24, v16
808; RV64-NEXT:    vsll.vi v16, v24, 3
809; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
810; RV64-NEXT:    ret
811  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
812  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
813  call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
814  ret void
815}
816
817define void @mscatter_baseidx_zext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
818; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8i64:
819; CHECK:       # %bb.0:
820; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
821; CHECK-NEXT:    vzext.vf2 v18, v16
822; CHECK-NEXT:    vsll.vi v16, v18, 3
823; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
824; CHECK-NEXT:    vsoxei16.v v8, (a0), v16, v0.t
825; CHECK-NEXT:    ret
826  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
827  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
828  call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
829  ret void
830}
831
832define void @mscatter_baseidx_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
833; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8i64:
834; RV32:       # %bb.0:
835; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
836; RV32-NEXT:    vsext.vf2 v20, v16
837; RV32-NEXT:    vsll.vi v16, v20, 3
838; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
839; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
840; RV32-NEXT:    ret
841;
842; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i64:
843; RV64:       # %bb.0:
844; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
845; RV64-NEXT:    vsext.vf4 v24, v16
846; RV64-NEXT:    vsll.vi v16, v24, 3
847; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
848; RV64-NEXT:    ret
849  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i16> %idxs
850  call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
851  ret void
852}
853
854define void @mscatter_baseidx_sext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
855; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64:
856; RV32:       # %bb.0:
857; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
858; RV32-NEXT:    vsext.vf2 v20, v16
859; RV32-NEXT:    vsll.vi v16, v20, 3
860; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
861; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
862; RV32-NEXT:    ret
863;
864; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64:
865; RV64:       # %bb.0:
866; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
867; RV64-NEXT:    vsext.vf4 v24, v16
868; RV64-NEXT:    vsll.vi v16, v24, 3
869; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
870; RV64-NEXT:    ret
871  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
872  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
873  call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
874  ret void
875}
876
877define void @mscatter_baseidx_zext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
878; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8i64:
879; CHECK:       # %bb.0:
880; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
881; CHECK-NEXT:    vzext.vf2 v20, v16
882; CHECK-NEXT:    vsll.vi v16, v20, 3
883; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
884; CHECK-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
885; CHECK-NEXT:    ret
886  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
887  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
888  call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
889  ret void
890}
891
892define void @mscatter_baseidx_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
893; RV32-LABEL: mscatter_baseidx_nxv8i32_nxv8i64:
894; RV32:       # %bb.0:
895; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
896; RV32-NEXT:    vsll.vi v16, v16, 3
897; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
898; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
899; RV32-NEXT:    ret
900;
901; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8i64:
902; RV64:       # %bb.0:
903; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
904; RV64-NEXT:    vsext.vf2 v24, v16
905; RV64-NEXT:    vsll.vi v16, v24, 3
906; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
907; RV64-NEXT:    ret
908  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i32> %idxs
909  call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
910  ret void
911}
912
913define void @mscatter_baseidx_sext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
914; RV32-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64:
915; RV32:       # %bb.0:
916; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
917; RV32-NEXT:    vsll.vi v16, v16, 3
918; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
919; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
920; RV32-NEXT:    ret
921;
922; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64:
923; RV64:       # %bb.0:
924; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
925; RV64-NEXT:    vsext.vf2 v24, v16
926; RV64-NEXT:    vsll.vi v16, v24, 3
927; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
928; RV64-NEXT:    ret
929  %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
930  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
931  call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
932  ret void
933}
934
935define void @mscatter_baseidx_zext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
936; RV32-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64:
937; RV32:       # %bb.0:
938; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
939; RV32-NEXT:    vsll.vi v16, v16, 3
940; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
941; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
942; RV32-NEXT:    ret
943;
944; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64:
945; RV64:       # %bb.0:
946; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
947; RV64-NEXT:    vzext.vf2 v24, v16
948; RV64-NEXT:    vsll.vi v16, v24, 3
949; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
950; RV64-NEXT:    ret
951  %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
952  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
953  call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
954  ret void
955}
956
957define void @mscatter_baseidx_nxv8i64(<vscale x 8 x i64> %val, ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m) {
958; RV32-LABEL: mscatter_baseidx_nxv8i64:
959; RV32:       # %bb.0:
960; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
961; RV32-NEXT:    vnsrl.wi v24, v16, 0
962; RV32-NEXT:    vsll.vi v16, v24, 3
963; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
964; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
965; RV32-NEXT:    ret
966;
967; RV64-LABEL: mscatter_baseidx_nxv8i64:
968; RV64:       # %bb.0:
969; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
970; RV64-NEXT:    vsll.vi v16, v16, 3
971; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
972; RV64-NEXT:    ret
973  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %idxs
974  call void @llvm.masked.scatter.nxv8i64.nxv8p0(<vscale x 8 x i64> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
975  ret void
976}
977
978declare void @llvm.masked.scatter.nxv1bf16.nxv1p0(<vscale x 1 x bfloat>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
979
980define void @mscatter_nxv1bf16(<vscale x 1 x bfloat> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
981; RV32-LABEL: mscatter_nxv1bf16:
982; RV32:       # %bb.0:
983; RV32-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
984; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
985; RV32-NEXT:    ret
986;
987; RV64-LABEL: mscatter_nxv1bf16:
988; RV64:       # %bb.0:
989; RV64-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
990; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
991; RV64-NEXT:    ret
992  call void @llvm.masked.scatter.nxv1bf16.nxv1p0(<vscale x 1 x bfloat> %val, <vscale x 1 x ptr> %ptrs, i32 2, <vscale x 1 x i1> %m)
993  ret void
994}
995
996declare void @llvm.masked.scatter.nxv2bf16.nxv2p0(<vscale x 2 x bfloat>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
997
998define void @mscatter_nxv2bf16(<vscale x 2 x bfloat> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
999; RV32-LABEL: mscatter_nxv2bf16:
1000; RV32:       # %bb.0:
1001; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
1002; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1003; RV32-NEXT:    ret
1004;
1005; RV64-LABEL: mscatter_nxv2bf16:
1006; RV64:       # %bb.0:
1007; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
1008; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
1009; RV64-NEXT:    ret
1010  call void @llvm.masked.scatter.nxv2bf16.nxv2p0(<vscale x 2 x bfloat> %val, <vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m)
1011  ret void
1012}
1013
1014declare void @llvm.masked.scatter.nxv4bf16.nxv4p0(<vscale x 4 x bfloat>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
1015
1016define void @mscatter_nxv4bf16(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
1017; RV32-LABEL: mscatter_nxv4bf16:
1018; RV32:       # %bb.0:
1019; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
1020; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
1021; RV32-NEXT:    ret
1022;
1023; RV64-LABEL: mscatter_nxv4bf16:
1024; RV64:       # %bb.0:
1025; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
1026; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
1027; RV64-NEXT:    ret
1028  call void @llvm.masked.scatter.nxv4bf16.nxv4p0(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> %m)
1029  ret void
1030}
1031
1032define void @mscatter_truemask_nxv4bf16(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs) {
1033; RV32-LABEL: mscatter_truemask_nxv4bf16:
1034; RV32:       # %bb.0:
1035; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
1036; RV32-NEXT:    vsoxei32.v v8, (zero), v10
1037; RV32-NEXT:    ret
1038;
1039; RV64-LABEL: mscatter_truemask_nxv4bf16:
1040; RV64:       # %bb.0:
1041; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
1042; RV64-NEXT:    vsoxei64.v v8, (zero), v12
1043; RV64-NEXT:    ret
1044  call void @llvm.masked.scatter.nxv4bf16.nxv4p0(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> splat (i1 1))
1045  ret void
1046}
1047
1048define void @mscatter_falsemask_nxv4bf16(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs) {
1049; CHECK-LABEL: mscatter_falsemask_nxv4bf16:
1050; CHECK:       # %bb.0:
1051; CHECK-NEXT:    ret
1052  call void @llvm.masked.scatter.nxv4bf16.nxv4p0(<vscale x 4 x bfloat> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer)
1053  ret void
1054}
1055
1056declare void @llvm.masked.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
1057
1058define void @mscatter_nxv8bf16(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
1059; RV32-LABEL: mscatter_nxv8bf16:
1060; RV32:       # %bb.0:
1061; RV32-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
1062; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
1063; RV32-NEXT:    ret
1064;
1065; RV64-LABEL: mscatter_nxv8bf16:
1066; RV64:       # %bb.0:
1067; RV64-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
1068; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
1069; RV64-NEXT:    ret
1070  call void @llvm.masked.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1071  ret void
1072}
1073
1074define void @mscatter_baseidx_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1075; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8bf16:
1076; RV32:       # %bb.0:
1077; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1078; RV32-NEXT:    vsext.vf4 v12, v10
1079; RV32-NEXT:    vadd.vv v12, v12, v12
1080; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1081; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1082; RV32-NEXT:    ret
1083;
1084; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8bf16:
1085; RV64:       # %bb.0:
1086; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1087; RV64-NEXT:    vsext.vf8 v16, v10
1088; RV64-NEXT:    vadd.vv v16, v16, v16
1089; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1090; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1091; RV64-NEXT:    ret
1092  %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i8> %idxs
1093  call void @llvm.masked.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1094  ret void
1095}
1096
1097define void @mscatter_baseidx_sext_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1098; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8bf16:
1099; RV32:       # %bb.0:
1100; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1101; RV32-NEXT:    vsext.vf4 v12, v10
1102; RV32-NEXT:    vadd.vv v12, v12, v12
1103; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1104; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1105; RV32-NEXT:    ret
1106;
1107; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8bf16:
1108; RV64:       # %bb.0:
1109; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1110; RV64-NEXT:    vsext.vf8 v16, v10
1111; RV64-NEXT:    vadd.vv v16, v16, v16
1112; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1113; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1114; RV64-NEXT:    ret
1115  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1116  %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %eidxs
1117  call void @llvm.masked.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1118  ret void
1119}
1120
1121define void @mscatter_baseidx_zext_nxv8i8_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1122; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8bf16:
1123; CHECK:       # %bb.0:
1124; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
1125; CHECK-NEXT:    vwaddu.vv v12, v10, v10
1126; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1127; CHECK-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
1128; CHECK-NEXT:    ret
1129  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1130  %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %eidxs
1131  call void @llvm.masked.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1132  ret void
1133}
1134
1135define void @mscatter_baseidx_nxv8bf16(<vscale x 8 x bfloat> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1136; RV32-LABEL: mscatter_baseidx_nxv8bf16:
1137; RV32:       # %bb.0:
1138; RV32-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
1139; RV32-NEXT:    vwadd.vv v12, v10, v10
1140; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1141; RV32-NEXT:    ret
1142;
1143; RV64-LABEL: mscatter_baseidx_nxv8bf16:
1144; RV64:       # %bb.0:
1145; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1146; RV64-NEXT:    vsext.vf4 v16, v10
1147; RV64-NEXT:    vadd.vv v16, v16, v16
1148; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1149; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1150; RV64-NEXT:    ret
1151  %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %idxs
1152  call void @llvm.masked.scatter.nxv8bf16.nxv8p0(<vscale x 8 x bfloat> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1153  ret void
1154}
1155
1156declare void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
1157
1158define void @mscatter_nxv1f16(<vscale x 1 x half> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
1159; RV32-LABEL: mscatter_nxv1f16:
1160; RV32:       # %bb.0:
1161; RV32-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
1162; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1163; RV32-NEXT:    ret
1164;
1165; RV64-LABEL: mscatter_nxv1f16:
1166; RV64:       # %bb.0:
1167; RV64-NEXT:    vsetvli a0, zero, e16, mf4, ta, ma
1168; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
1169; RV64-NEXT:    ret
1170  call void @llvm.masked.scatter.nxv1f16.nxv1p0(<vscale x 1 x half> %val, <vscale x 1 x ptr> %ptrs, i32 2, <vscale x 1 x i1> %m)
1171  ret void
1172}
1173
1174declare void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
1175
1176define void @mscatter_nxv2f16(<vscale x 2 x half> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
1177; RV32-LABEL: mscatter_nxv2f16:
1178; RV32:       # %bb.0:
1179; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
1180; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1181; RV32-NEXT:    ret
1182;
1183; RV64-LABEL: mscatter_nxv2f16:
1184; RV64:       # %bb.0:
1185; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
1186; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
1187; RV64-NEXT:    ret
1188  call void @llvm.masked.scatter.nxv2f16.nxv2p0(<vscale x 2 x half> %val, <vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m)
1189  ret void
1190}
1191
1192declare void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
1193
1194define void @mscatter_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
1195; RV32-LABEL: mscatter_nxv4f16:
1196; RV32:       # %bb.0:
1197; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
1198; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
1199; RV32-NEXT:    ret
1200;
1201; RV64-LABEL: mscatter_nxv4f16:
1202; RV64:       # %bb.0:
1203; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
1204; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
1205; RV64-NEXT:    ret
1206  call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> %m)
1207  ret void
1208}
1209
1210define void @mscatter_truemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs) {
1211; RV32-LABEL: mscatter_truemask_nxv4f16:
1212; RV32:       # %bb.0:
1213; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
1214; RV32-NEXT:    vsoxei32.v v8, (zero), v10
1215; RV32-NEXT:    ret
1216;
1217; RV64-LABEL: mscatter_truemask_nxv4f16:
1218; RV64:       # %bb.0:
1219; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
1220; RV64-NEXT:    vsoxei64.v v8, (zero), v12
1221; RV64-NEXT:    ret
1222  call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> splat (i1 1))
1223  ret void
1224}
1225
1226define void @mscatter_falsemask_nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs) {
1227; CHECK-LABEL: mscatter_falsemask_nxv4f16:
1228; CHECK:       # %bb.0:
1229; CHECK-NEXT:    ret
1230  call void @llvm.masked.scatter.nxv4f16.nxv4p0(<vscale x 4 x half> %val, <vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer)
1231  ret void
1232}
1233
1234declare void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
1235
1236define void @mscatter_nxv8f16(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
1237; RV32-LABEL: mscatter_nxv8f16:
1238; RV32:       # %bb.0:
1239; RV32-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
1240; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
1241; RV32-NEXT:    ret
1242;
1243; RV64-LABEL: mscatter_nxv8f16:
1244; RV64:       # %bb.0:
1245; RV64-NEXT:    vsetvli a0, zero, e16, m2, ta, ma
1246; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
1247; RV64-NEXT:    ret
1248  call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1249  ret void
1250}
1251
1252define void @mscatter_baseidx_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1253; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f16:
1254; RV32:       # %bb.0:
1255; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1256; RV32-NEXT:    vsext.vf4 v12, v10
1257; RV32-NEXT:    vadd.vv v12, v12, v12
1258; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1259; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1260; RV32-NEXT:    ret
1261;
1262; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f16:
1263; RV64:       # %bb.0:
1264; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1265; RV64-NEXT:    vsext.vf8 v16, v10
1266; RV64-NEXT:    vadd.vv v16, v16, v16
1267; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1268; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1269; RV64-NEXT:    ret
1270  %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i8> %idxs
1271  call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1272  ret void
1273}
1274
1275define void @mscatter_baseidx_sext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1276; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f16:
1277; RV32:       # %bb.0:
1278; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1279; RV32-NEXT:    vsext.vf4 v12, v10
1280; RV32-NEXT:    vadd.vv v12, v12, v12
1281; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1282; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1283; RV32-NEXT:    ret
1284;
1285; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f16:
1286; RV64:       # %bb.0:
1287; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1288; RV64-NEXT:    vsext.vf8 v16, v10
1289; RV64-NEXT:    vadd.vv v16, v16, v16
1290; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1291; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1292; RV64-NEXT:    ret
1293  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1294  %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs
1295  call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1296  ret void
1297}
1298
1299define void @mscatter_baseidx_zext_nxv8i8_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1300; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f16:
1301; CHECK:       # %bb.0:
1302; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
1303; CHECK-NEXT:    vwaddu.vv v12, v10, v10
1304; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1305; CHECK-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
1306; CHECK-NEXT:    ret
1307  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1308  %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs
1309  call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1310  ret void
1311}
1312
1313define void @mscatter_baseidx_nxv8f16(<vscale x 8 x half> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1314; RV32-LABEL: mscatter_baseidx_nxv8f16:
1315; RV32:       # %bb.0:
1316; RV32-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
1317; RV32-NEXT:    vwadd.vv v12, v10, v10
1318; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1319; RV32-NEXT:    ret
1320;
1321; RV64-LABEL: mscatter_baseidx_nxv8f16:
1322; RV64:       # %bb.0:
1323; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1324; RV64-NEXT:    vsext.vf4 v16, v10
1325; RV64-NEXT:    vadd.vv v16, v16, v16
1326; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1327; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1328; RV64-NEXT:    ret
1329  %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %idxs
1330  call void @llvm.masked.scatter.nxv8f16.nxv8p0(<vscale x 8 x half> %val, <vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m)
1331  ret void
1332}
1333
1334declare void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
1335
1336define void @mscatter_nxv1f32(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
1337; RV32-LABEL: mscatter_nxv1f32:
1338; RV32:       # %bb.0:
1339; RV32-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
1340; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1341; RV32-NEXT:    ret
1342;
1343; RV64-LABEL: mscatter_nxv1f32:
1344; RV64:       # %bb.0:
1345; RV64-NEXT:    vsetvli a0, zero, e32, mf2, ta, ma
1346; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
1347; RV64-NEXT:    ret
1348  call void @llvm.masked.scatter.nxv1f32.nxv1p0(<vscale x 1 x float> %val, <vscale x 1 x ptr> %ptrs, i32 4, <vscale x 1 x i1> %m)
1349  ret void
1350}
1351
1352declare void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
1353
1354define void @mscatter_nxv2f32(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
1355; RV32-LABEL: mscatter_nxv2f32:
1356; RV32:       # %bb.0:
1357; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
1358; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1359; RV32-NEXT:    ret
1360;
1361; RV64-LABEL: mscatter_nxv2f32:
1362; RV64:       # %bb.0:
1363; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
1364; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
1365; RV64-NEXT:    ret
1366  call void @llvm.masked.scatter.nxv2f32.nxv2p0(<vscale x 2 x float> %val, <vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %m)
1367  ret void
1368}
1369
1370declare void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
1371
1372define void @mscatter_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
1373; RV32-LABEL: mscatter_nxv4f32:
1374; RV32:       # %bb.0:
1375; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
1376; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
1377; RV32-NEXT:    ret
1378;
1379; RV64-LABEL: mscatter_nxv4f32:
1380; RV64:       # %bb.0:
1381; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
1382; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
1383; RV64-NEXT:    ret
1384  call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %m)
1385  ret void
1386}
1387
1388define void @mscatter_truemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs) {
1389; RV32-LABEL: mscatter_truemask_nxv4f32:
1390; RV32:       # %bb.0:
1391; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
1392; RV32-NEXT:    vsoxei32.v v8, (zero), v10
1393; RV32-NEXT:    ret
1394;
1395; RV64-LABEL: mscatter_truemask_nxv4f32:
1396; RV64:       # %bb.0:
1397; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
1398; RV64-NEXT:    vsoxei64.v v8, (zero), v12
1399; RV64-NEXT:    ret
1400  call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> splat (i1 1))
1401  ret void
1402}
1403
1404define void @mscatter_falsemask_nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs) {
1405; CHECK-LABEL: mscatter_falsemask_nxv4f32:
1406; CHECK:       # %bb.0:
1407; CHECK-NEXT:    ret
1408  call void @llvm.masked.scatter.nxv4f32.nxv4p0(<vscale x 4 x float> %val, <vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer)
1409  ret void
1410}
1411
1412declare void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
1413
1414define void @mscatter_nxv8f32(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
1415; RV32-LABEL: mscatter_nxv8f32:
1416; RV32:       # %bb.0:
1417; RV32-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
1418; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
1419; RV32-NEXT:    ret
1420;
1421; RV64-LABEL: mscatter_nxv8f32:
1422; RV64:       # %bb.0:
1423; RV64-NEXT:    vsetvli a0, zero, e32, m4, ta, ma
1424; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
1425; RV64-NEXT:    ret
1426  call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1427  ret void
1428}
1429
1430define void @mscatter_baseidx_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1431; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f32:
1432; RV32:       # %bb.0:
1433; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1434; RV32-NEXT:    vsext.vf4 v16, v12
1435; RV32-NEXT:    vsll.vi v12, v16, 2
1436; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1437; RV32-NEXT:    ret
1438;
1439; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f32:
1440; RV64:       # %bb.0:
1441; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1442; RV64-NEXT:    vsext.vf8 v16, v12
1443; RV64-NEXT:    vsll.vi v16, v16, 2
1444; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1445; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1446; RV64-NEXT:    ret
1447  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i8> %idxs
1448  call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1449  ret void
1450}
1451
1452define void @mscatter_baseidx_sext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1453; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f32:
1454; RV32:       # %bb.0:
1455; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1456; RV32-NEXT:    vsext.vf4 v16, v12
1457; RV32-NEXT:    vsll.vi v12, v16, 2
1458; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1459; RV32-NEXT:    ret
1460;
1461; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f32:
1462; RV64:       # %bb.0:
1463; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1464; RV64-NEXT:    vsext.vf8 v16, v12
1465; RV64-NEXT:    vsll.vi v16, v16, 2
1466; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1467; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1468; RV64-NEXT:    ret
1469  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1470  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1471  call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1472  ret void
1473}
1474
1475define void @mscatter_baseidx_zext_nxv8i8_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1476; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f32:
1477; CHECK:       # %bb.0:
1478; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
1479; CHECK-NEXT:    vzext.vf2 v14, v12
1480; CHECK-NEXT:    vsll.vi v12, v14, 2
1481; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1482; CHECK-NEXT:    vsoxei16.v v8, (a0), v12, v0.t
1483; CHECK-NEXT:    ret
1484  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1485  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1486  call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1487  ret void
1488}
1489
1490define void @mscatter_baseidx_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1491; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8f32:
1492; RV32:       # %bb.0:
1493; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1494; RV32-NEXT:    vsext.vf2 v16, v12
1495; RV32-NEXT:    vsll.vi v12, v16, 2
1496; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1497; RV32-NEXT:    ret
1498;
1499; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f32:
1500; RV64:       # %bb.0:
1501; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1502; RV64-NEXT:    vsext.vf4 v16, v12
1503; RV64-NEXT:    vsll.vi v16, v16, 2
1504; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1505; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1506; RV64-NEXT:    ret
1507  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i16> %idxs
1508  call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1509  ret void
1510}
1511
1512define void @mscatter_baseidx_sext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1513; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f32:
1514; RV32:       # %bb.0:
1515; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1516; RV32-NEXT:    vsext.vf2 v16, v12
1517; RV32-NEXT:    vsll.vi v12, v16, 2
1518; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1519; RV32-NEXT:    ret
1520;
1521; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f32:
1522; RV64:       # %bb.0:
1523; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1524; RV64-NEXT:    vsext.vf4 v16, v12
1525; RV64-NEXT:    vsll.vi v16, v16, 2
1526; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1527; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1528; RV64-NEXT:    ret
1529  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1530  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1531  call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1532  ret void
1533}
1534
1535define void @mscatter_baseidx_zext_nxv8i16_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1536; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f32:
1537; CHECK:       # %bb.0:
1538; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1539; CHECK-NEXT:    vzext.vf2 v16, v12
1540; CHECK-NEXT:    vsll.vi v12, v16, 2
1541; CHECK-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1542; CHECK-NEXT:    ret
1543  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1544  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1545  call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1546  ret void
1547}
1548
1549define void @mscatter_baseidx_nxv8f32(<vscale x 8 x float> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1550; RV32-LABEL: mscatter_baseidx_nxv8f32:
1551; RV32:       # %bb.0:
1552; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1553; RV32-NEXT:    vsll.vi v12, v12, 2
1554; RV32-NEXT:    vsoxei32.v v8, (a0), v12, v0.t
1555; RV32-NEXT:    ret
1556;
1557; RV64-LABEL: mscatter_baseidx_nxv8f32:
1558; RV64:       # %bb.0:
1559; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1560; RV64-NEXT:    vsext.vf2 v16, v12
1561; RV64-NEXT:    vsll.vi v16, v16, 2
1562; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1563; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1564; RV64-NEXT:    ret
1565  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %idxs
1566  call void @llvm.masked.scatter.nxv8f32.nxv8p0(<vscale x 8 x float> %val, <vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m)
1567  ret void
1568}
1569
1570declare void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>)
1571
1572define void @mscatter_nxv1f64(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m) {
1573; RV32-LABEL: mscatter_nxv1f64:
1574; RV32:       # %bb.0:
1575; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
1576; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
1577; RV32-NEXT:    ret
1578;
1579; RV64-LABEL: mscatter_nxv1f64:
1580; RV64:       # %bb.0:
1581; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
1582; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
1583; RV64-NEXT:    ret
1584  call void @llvm.masked.scatter.nxv1f64.nxv1p0(<vscale x 1 x double> %val, <vscale x 1 x ptr> %ptrs, i32 8, <vscale x 1 x i1> %m)
1585  ret void
1586}
1587
1588declare void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double>, <vscale x 2 x ptr>, i32, <vscale x 2 x i1>)
1589
1590define void @mscatter_nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m) {
1591; RV32-LABEL: mscatter_nxv2f64:
1592; RV32:       # %bb.0:
1593; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
1594; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
1595; RV32-NEXT:    ret
1596;
1597; RV64-LABEL: mscatter_nxv2f64:
1598; RV64:       # %bb.0:
1599; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
1600; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
1601; RV64-NEXT:    ret
1602  call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> %val, <vscale x 2 x ptr> %ptrs, i32 8, <vscale x 2 x i1> %m)
1603  ret void
1604}
1605
1606declare void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double>, <vscale x 4 x ptr>, i32, <vscale x 4 x i1>)
1607
1608define void @mscatter_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m) {
1609; RV32-LABEL: mscatter_nxv4f64:
1610; RV32:       # %bb.0:
1611; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1612; RV32-NEXT:    vsoxei32.v v8, (zero), v12, v0.t
1613; RV32-NEXT:    ret
1614;
1615; RV64-LABEL: mscatter_nxv4f64:
1616; RV64:       # %bb.0:
1617; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1618; RV64-NEXT:    vsoxei64.v v8, (zero), v12, v0.t
1619; RV64-NEXT:    ret
1620  call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> %m)
1621  ret void
1622}
1623
1624define void @mscatter_truemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs) {
1625; RV32-LABEL: mscatter_truemask_nxv4f64:
1626; RV32:       # %bb.0:
1627; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1628; RV32-NEXT:    vsoxei32.v v8, (zero), v12
1629; RV32-NEXT:    ret
1630;
1631; RV64-LABEL: mscatter_truemask_nxv4f64:
1632; RV64:       # %bb.0:
1633; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
1634; RV64-NEXT:    vsoxei64.v v8, (zero), v12
1635; RV64-NEXT:    ret
1636  call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> splat (i1 1))
1637  ret void
1638}
1639
1640define void @mscatter_falsemask_nxv4f64(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs) {
1641; CHECK-LABEL: mscatter_falsemask_nxv4f64:
1642; CHECK:       # %bb.0:
1643; CHECK-NEXT:    ret
1644  call void @llvm.masked.scatter.nxv4f64.nxv4p0(<vscale x 4 x double> %val, <vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer)
1645  ret void
1646}
1647
1648declare void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double>, <vscale x 8 x ptr>, i32, <vscale x 8 x i1>)
1649
1650define void @mscatter_nxv8f64(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m) {
1651; RV32-LABEL: mscatter_nxv8f64:
1652; RV32:       # %bb.0:
1653; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
1654; RV32-NEXT:    vsoxei32.v v8, (zero), v16, v0.t
1655; RV32-NEXT:    ret
1656;
1657; RV64-LABEL: mscatter_nxv8f64:
1658; RV64:       # %bb.0:
1659; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
1660; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
1661; RV64-NEXT:    ret
1662  call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1663  ret void
1664}
1665
1666define void @mscatter_baseidx_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1667; RV32-LABEL: mscatter_baseidx_nxv8i8_nxv8f64:
1668; RV32:       # %bb.0:
1669; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1670; RV32-NEXT:    vsext.vf4 v20, v16
1671; RV32-NEXT:    vsll.vi v16, v20, 3
1672; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1673; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1674; RV32-NEXT:    ret
1675;
1676; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f64:
1677; RV64:       # %bb.0:
1678; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1679; RV64-NEXT:    vsext.vf8 v24, v16
1680; RV64-NEXT:    vsll.vi v16, v24, 3
1681; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1682; RV64-NEXT:    ret
1683  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i8> %idxs
1684  call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1685  ret void
1686}
1687
1688define void @mscatter_baseidx_sext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1689; RV32-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64:
1690; RV32:       # %bb.0:
1691; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1692; RV32-NEXT:    vsext.vf4 v20, v16
1693; RV32-NEXT:    vsll.vi v16, v20, 3
1694; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1695; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1696; RV32-NEXT:    ret
1697;
1698; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64:
1699; RV64:       # %bb.0:
1700; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1701; RV64-NEXT:    vsext.vf8 v24, v16
1702; RV64-NEXT:    vsll.vi v16, v24, 3
1703; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1704; RV64-NEXT:    ret
1705  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1706  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1707  call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1708  ret void
1709}
1710
1711define void @mscatter_baseidx_zext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m) {
1712; CHECK-LABEL: mscatter_baseidx_zext_nxv8i8_nxv8f64:
1713; CHECK:       # %bb.0:
1714; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
1715; CHECK-NEXT:    vzext.vf2 v18, v16
1716; CHECK-NEXT:    vsll.vi v16, v18, 3
1717; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1718; CHECK-NEXT:    vsoxei16.v v8, (a0), v16, v0.t
1719; CHECK-NEXT:    ret
1720  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1721  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1722  call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1723  ret void
1724}
1725
1726define void @mscatter_baseidx_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1727; RV32-LABEL: mscatter_baseidx_nxv8i16_nxv8f64:
1728; RV32:       # %bb.0:
1729; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1730; RV32-NEXT:    vsext.vf2 v20, v16
1731; RV32-NEXT:    vsll.vi v16, v20, 3
1732; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1733; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1734; RV32-NEXT:    ret
1735;
1736; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f64:
1737; RV64:       # %bb.0:
1738; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1739; RV64-NEXT:    vsext.vf4 v24, v16
1740; RV64-NEXT:    vsll.vi v16, v24, 3
1741; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1742; RV64-NEXT:    ret
1743  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i16> %idxs
1744  call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1745  ret void
1746}
1747
1748define void @mscatter_baseidx_sext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1749; RV32-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64:
1750; RV32:       # %bb.0:
1751; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1752; RV32-NEXT:    vsext.vf2 v20, v16
1753; RV32-NEXT:    vsll.vi v16, v20, 3
1754; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1755; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1756; RV32-NEXT:    ret
1757;
1758; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64:
1759; RV64:       # %bb.0:
1760; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1761; RV64-NEXT:    vsext.vf4 v24, v16
1762; RV64-NEXT:    vsll.vi v16, v24, 3
1763; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1764; RV64-NEXT:    ret
1765  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1766  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1767  call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1768  ret void
1769}
1770
1771define void @mscatter_baseidx_zext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m) {
1772; CHECK-LABEL: mscatter_baseidx_zext_nxv8i16_nxv8f64:
1773; CHECK:       # %bb.0:
1774; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1775; CHECK-NEXT:    vzext.vf2 v20, v16
1776; CHECK-NEXT:    vsll.vi v16, v20, 3
1777; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1778; CHECK-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1779; CHECK-NEXT:    ret
1780  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1781  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1782  call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1783  ret void
1784}
1785
1786define void @mscatter_baseidx_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1787; RV32-LABEL: mscatter_baseidx_nxv8i32_nxv8f64:
1788; RV32:       # %bb.0:
1789; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1790; RV32-NEXT:    vsll.vi v16, v16, 3
1791; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1792; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1793; RV32-NEXT:    ret
1794;
1795; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8f64:
1796; RV64:       # %bb.0:
1797; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1798; RV64-NEXT:    vsext.vf2 v24, v16
1799; RV64-NEXT:    vsll.vi v16, v24, 3
1800; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1801; RV64-NEXT:    ret
1802  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i32> %idxs
1803  call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1804  ret void
1805}
1806
1807define void @mscatter_baseidx_sext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1808; RV32-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64:
1809; RV32:       # %bb.0:
1810; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1811; RV32-NEXT:    vsll.vi v16, v16, 3
1812; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1813; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1814; RV32-NEXT:    ret
1815;
1816; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64:
1817; RV64:       # %bb.0:
1818; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1819; RV64-NEXT:    vsext.vf2 v24, v16
1820; RV64-NEXT:    vsll.vi v16, v24, 3
1821; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1822; RV64-NEXT:    ret
1823  %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1824  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1825  call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1826  ret void
1827}
1828
1829define void @mscatter_baseidx_zext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m) {
1830; RV32-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64:
1831; RV32:       # %bb.0:
1832; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1833; RV32-NEXT:    vsll.vi v16, v16, 3
1834; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1835; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1836; RV32-NEXT:    ret
1837;
1838; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64:
1839; RV64:       # %bb.0:
1840; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1841; RV64-NEXT:    vzext.vf2 v24, v16
1842; RV64-NEXT:    vsll.vi v16, v24, 3
1843; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1844; RV64-NEXT:    ret
1845  %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1846  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
1847  call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1848  ret void
1849}
1850
1851define void @mscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m) {
1852; RV32-LABEL: mscatter_baseidx_nxv8f64:
1853; RV32:       # %bb.0:
1854; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1855; RV32-NEXT:    vnsrl.wi v24, v16, 0
1856; RV32-NEXT:    vsll.vi v16, v24, 3
1857; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1858; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
1859; RV32-NEXT:    ret
1860;
1861; RV64-LABEL: mscatter_baseidx_nxv8f64:
1862; RV64:       # %bb.0:
1863; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1864; RV64-NEXT:    vsll.vi v16, v16, 3
1865; RV64-NEXT:    vsoxei64.v v8, (a0), v16, v0.t
1866; RV64-NEXT:    ret
1867  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %idxs
1868  call void @llvm.masked.scatter.nxv8f64.nxv8p0(<vscale x 8 x double> %val, <vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m)
1869  ret void
1870}
1871
1872declare void @llvm.masked.scatter.nxv16f64.nxv16p0(<vscale x 16 x double>, <vscale x 16 x ptr>, i32, <vscale x 16 x i1>)
1873
1874declare <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double>, <vscale x 8 x double>, i64)
1875declare <vscale x 16 x ptr> @llvm.vector.insert.nxv8p0.nxv16p0(<vscale x 16 x ptr>, <vscale x 8 x ptr>, i64)
1876
1877define void @mscatter_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, <vscale x 8 x ptr> %ptrs0, <vscale x 8 x ptr> %ptrs1, <vscale x 16 x i1> %m) {
1878; RV32-LABEL: mscatter_nxv16f64:
1879; RV32:       # %bb.0:
1880; RV32-NEXT:    vl4re32.v v28, (a1)
1881; RV32-NEXT:    vl4re32.v v4, (a0)
1882; RV32-NEXT:    csrr a0, vlenb
1883; RV32-NEXT:    srli a0, a0, 3
1884; RV32-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
1885; RV32-NEXT:    vslidedown.vx v24, v0, a0
1886; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
1887; RV32-NEXT:    vsoxei32.v v8, (zero), v4, v0.t
1888; RV32-NEXT:    vmv1r.v v0, v24
1889; RV32-NEXT:    vsoxei32.v v16, (zero), v28, v0.t
1890; RV32-NEXT:    ret
1891;
1892; RV64-LABEL: mscatter_nxv16f64:
1893; RV64:       # %bb.0:
1894; RV64-NEXT:    addi sp, sp, -16
1895; RV64-NEXT:    .cfi_def_cfa_offset 16
1896; RV64-NEXT:    csrr a2, vlenb
1897; RV64-NEXT:    slli a2, a2, 5
1898; RV64-NEXT:    sub sp, sp, a2
1899; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
1900; RV64-NEXT:    csrr a2, vlenb
1901; RV64-NEXT:    li a3, 24
1902; RV64-NEXT:    mul a2, a2, a3
1903; RV64-NEXT:    add a2, sp, a2
1904; RV64-NEXT:    addi a2, a2, 16
1905; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
1906; RV64-NEXT:    csrr a2, vlenb
1907; RV64-NEXT:    slli a2, a2, 4
1908; RV64-NEXT:    add a2, sp, a2
1909; RV64-NEXT:    addi a2, a2, 16
1910; RV64-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
1911; RV64-NEXT:    vl8re64.v v8, (a0)
1912; RV64-NEXT:    csrr a0, vlenb
1913; RV64-NEXT:    slli a0, a0, 3
1914; RV64-NEXT:    add a0, sp, a0
1915; RV64-NEXT:    addi a0, a0, 16
1916; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
1917; RV64-NEXT:    csrr a0, vlenb
1918; RV64-NEXT:    vl8re64.v v8, (a1)
1919; RV64-NEXT:    addi a1, sp, 16
1920; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
1921; RV64-NEXT:    srli a0, a0, 3
1922; RV64-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
1923; RV64-NEXT:    vslidedown.vx v24, v0, a0
1924; RV64-NEXT:    csrr a0, vlenb
1925; RV64-NEXT:    slli a0, a0, 4
1926; RV64-NEXT:    add a0, sp, a0
1927; RV64-NEXT:    addi a0, a0, 16
1928; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
1929; RV64-NEXT:    csrr a0, vlenb
1930; RV64-NEXT:    slli a0, a0, 3
1931; RV64-NEXT:    add a0, sp, a0
1932; RV64-NEXT:    addi a0, a0, 16
1933; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
1934; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
1935; RV64-NEXT:    vsoxei64.v v16, (zero), v8, v0.t
1936; RV64-NEXT:    vmv1r.v v0, v24
1937; RV64-NEXT:    csrr a0, vlenb
1938; RV64-NEXT:    li a1, 24
1939; RV64-NEXT:    mul a0, a0, a1
1940; RV64-NEXT:    add a0, sp, a0
1941; RV64-NEXT:    addi a0, a0, 16
1942; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
1943; RV64-NEXT:    addi a0, sp, 16
1944; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
1945; RV64-NEXT:    vsoxei64.v v16, (zero), v8, v0.t
1946; RV64-NEXT:    csrr a0, vlenb
1947; RV64-NEXT:    slli a0, a0, 5
1948; RV64-NEXT:    add sp, sp, a0
1949; RV64-NEXT:    .cfi_def_cfa sp, 16
1950; RV64-NEXT:    addi sp, sp, 16
1951; RV64-NEXT:    .cfi_def_cfa_offset 0
1952; RV64-NEXT:    ret
1953  %p0 = call <vscale x 16 x ptr> @llvm.vector.insert.nxv8p0.nxv16p0(<vscale x 16 x ptr> undef, <vscale x 8 x ptr> %ptrs0, i64 0)
1954  %p1 = call <vscale x 16 x ptr> @llvm.vector.insert.nxv8p0.nxv16p0(<vscale x 16 x ptr> %p0, <vscale x 8 x ptr> %ptrs1, i64 8)
1955  %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
1956  %v1 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
1957  call void @llvm.masked.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %v1, <vscale x 16 x ptr> %p1, i32 8, <vscale x 16 x i1> %m)
1958  ret void
1959}
1960
1961define void @mscatter_baseidx_nxv16i8_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, ptr %base, <vscale x 16 x i8> %idxs, <vscale x 16 x i1> %m) {
1962; RV32-LABEL: mscatter_baseidx_nxv16i8_nxv16f64:
1963; RV32:       # %bb.0:
1964; RV32-NEXT:    vl2r.v v4, (a1)
1965; RV32-NEXT:    csrr a1, vlenb
1966; RV32-NEXT:    srli a1, a1, 3
1967; RV32-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
1968; RV32-NEXT:    vslidedown.vx v7, v0, a1
1969; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
1970; RV32-NEXT:    vsext.vf4 v24, v4
1971; RV32-NEXT:    vsll.vi v24, v24, 3
1972; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1973; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
1974; RV32-NEXT:    vmv1r.v v0, v7
1975; RV32-NEXT:    vsoxei32.v v16, (a0), v28, v0.t
1976; RV32-NEXT:    ret
1977;
1978; RV64-LABEL: mscatter_baseidx_nxv16i8_nxv16f64:
1979; RV64:       # %bb.0:
1980; RV64-NEXT:    vl2r.v v6, (a1)
1981; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1982; RV64-NEXT:    vsext.vf8 v24, v6
1983; RV64-NEXT:    vsll.vi v24, v24, 3
1984; RV64-NEXT:    vsoxei64.v v8, (a0), v24, v0.t
1985; RV64-NEXT:    csrr a1, vlenb
1986; RV64-NEXT:    srli a1, a1, 3
1987; RV64-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
1988; RV64-NEXT:    vslidedown.vx v0, v0, a1
1989; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1990; RV64-NEXT:    vsext.vf8 v8, v7
1991; RV64-NEXT:    vsll.vi v8, v8, 3
1992; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
1993; RV64-NEXT:    ret
1994  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i8> %idxs
1995  %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
1996  %v1 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
1997  call void @llvm.masked.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %v1, <vscale x 16 x ptr> %ptrs, i32 8, <vscale x 16 x i1> %m)
1998  ret void
1999}
2000
2001define void @mscatter_baseidx_nxv16i16_nxv16f64(<vscale x 8 x double> %val0, <vscale x 8 x double> %val1, ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m) {
2002; RV32-LABEL: mscatter_baseidx_nxv16i16_nxv16f64:
2003; RV32:       # %bb.0:
2004; RV32-NEXT:    addi sp, sp, -16
2005; RV32-NEXT:    .cfi_def_cfa_offset 16
2006; RV32-NEXT:    csrr a2, vlenb
2007; RV32-NEXT:    slli a2, a2, 3
2008; RV32-NEXT:    sub sp, sp, a2
2009; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
2010; RV32-NEXT:    addi a2, sp, 16
2011; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
2012; RV32-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
2013; RV32-NEXT:    vmv8r.v v16, v8
2014; RV32-NEXT:    vl4re16.v v8, (a1)
2015; RV32-NEXT:    csrr a1, vlenb
2016; RV32-NEXT:    srli a1, a1, 3
2017; RV32-NEXT:    vslidedown.vx v7, v0, a1
2018; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
2019; RV32-NEXT:    vsext.vf2 v24, v8
2020; RV32-NEXT:    vsll.vi v8, v24, 3
2021; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
2022; RV32-NEXT:    vsoxei32.v v16, (a0), v8, v0.t
2023; RV32-NEXT:    vmv1r.v v0, v7
2024; RV32-NEXT:    addi a1, sp, 16
2025; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
2026; RV32-NEXT:    vsoxei32.v v16, (a0), v12, v0.t
2027; RV32-NEXT:    csrr a0, vlenb
2028; RV32-NEXT:    slli a0, a0, 3
2029; RV32-NEXT:    add sp, sp, a0
2030; RV32-NEXT:    .cfi_def_cfa sp, 16
2031; RV32-NEXT:    addi sp, sp, 16
2032; RV32-NEXT:    .cfi_def_cfa_offset 0
2033; RV32-NEXT:    ret
2034;
2035; RV64-LABEL: mscatter_baseidx_nxv16i16_nxv16f64:
2036; RV64:       # %bb.0:
2037; RV64-NEXT:    vl4re16.v v4, (a1)
2038; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
2039; RV64-NEXT:    vsext.vf4 v24, v4
2040; RV64-NEXT:    vsll.vi v24, v24, 3
2041; RV64-NEXT:    vsoxei64.v v8, (a0), v24, v0.t
2042; RV64-NEXT:    csrr a1, vlenb
2043; RV64-NEXT:    srli a1, a1, 3
2044; RV64-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
2045; RV64-NEXT:    vslidedown.vx v0, v0, a1
2046; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
2047; RV64-NEXT:    vsext.vf4 v8, v6
2048; RV64-NEXT:    vsll.vi v8, v8, 3
2049; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
2050; RV64-NEXT:    ret
2051  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i16> %idxs
2052  %v0 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> undef, <vscale x 8 x double> %val0, i64 0)
2053  %v1 = call <vscale x 16 x double> @llvm.vector.insert.nxv8f64.nxv16f64(<vscale x 16 x double> %v0, <vscale x 8 x double> %val1, i64 8)
2054  call void @llvm.masked.scatter.nxv16f64.nxv16p0(<vscale x 16 x double> %v1, <vscale x 16 x ptr> %ptrs, i32 8, <vscale x 16 x i1> %m)
2055  ret void
2056}
2057
2058define void @mscatter_baseidx_zext_nxv1i1_nxv1i8(<vscale x 1 x i8> %val, ptr %base, <vscale x 1 x i1> %idxs, <vscale x 1 x i1> %m) {
2059; CHECK-LABEL: mscatter_baseidx_zext_nxv1i1_nxv1i8:
2060; CHECK:       # %bb.0:
2061; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, ma
2062; CHECK-NEXT:    vmv.v.i v10, 0
2063; CHECK-NEXT:    vmerge.vim v10, v10, 1, v0
2064; CHECK-NEXT:    vmv1r.v v0, v9
2065; CHECK-NEXT:    vsoxei8.v v8, (a0), v10, v0.t
2066; CHECK-NEXT:    ret
2067  %eidxs = zext <vscale x 1 x i1> %idxs to <vscale x 1 x i8>
2068  %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 1 x i8> %eidxs
2069  call void @llvm.masked.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> %val, <vscale x 1 x ptr> %ptrs, i32 1, <vscale x 1 x i1> %m)
2070  ret void
2071}
2072