xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll (revision b6c0f1bfa79a3a32d841ac5ab1f94c3aee3b5d90)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zvfbfmin,+v \
3; RUN:     -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
4; RUN:     --check-prefixes=CHECK,RV32
5; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zvfbfmin,+v \
6; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
7; RUN:     --check-prefixes=CHECK,RV64
8; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfhmin,+zvfbfmin,+v \
9; RUN:     -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
10; RUN:     --check-prefixes=CHECK,RV32
11; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfhmin,+zvfbfmin,+v \
12; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
13; RUN:     --check-prefixes=CHECK,RV64
14
15declare <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x i8>)
16
17define <vscale x 1 x i8> @mgather_nxv1i8(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, <vscale x 1 x i8> %passthru) {
18; RV32-LABEL: mgather_nxv1i8:
19; RV32:       # %bb.0:
20; RV32-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
21; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
22; RV32-NEXT:    vmv1r.v v8, v9
23; RV32-NEXT:    ret
24;
25; RV64-LABEL: mgather_nxv1i8:
26; RV64:       # %bb.0:
27; RV64-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
28; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
29; RV64-NEXT:    vmv1r.v v8, v9
30; RV64-NEXT:    ret
31  %v = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> %ptrs, i32 1, <vscale x 1 x i1> %m, <vscale x 1 x i8> %passthru)
32  ret <vscale x 1 x i8> %v
33}
34
35declare <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
36
37define <vscale x 2 x i8> @mgather_nxv2i8(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) {
38; RV32-LABEL: mgather_nxv2i8:
39; RV32:       # %bb.0:
40; RV32-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
41; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
42; RV32-NEXT:    vmv1r.v v8, v9
43; RV32-NEXT:    ret
44;
45; RV64-LABEL: mgather_nxv2i8:
46; RV64:       # %bb.0:
47; RV64-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
48; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
49; RV64-NEXT:    vmv1r.v v8, v10
50; RV64-NEXT:    ret
51  %v = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru)
52  ret <vscale x 2 x i8> %v
53}
54
55define <vscale x 2 x i16> @mgather_nxv2i8_sextload_nxv2i16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) {
56; RV32-LABEL: mgather_nxv2i8_sextload_nxv2i16:
57; RV32:       # %bb.0:
58; RV32-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
59; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
60; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
61; RV32-NEXT:    vsext.vf2 v8, v9
62; RV32-NEXT:    ret
63;
64; RV64-LABEL: mgather_nxv2i8_sextload_nxv2i16:
65; RV64:       # %bb.0:
66; RV64-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
67; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
68; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
69; RV64-NEXT:    vsext.vf2 v8, v10
70; RV64-NEXT:    ret
71  %v = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru)
72  %ev = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
73  ret <vscale x 2 x i16> %ev
74}
75
76define <vscale x 2 x i16> @mgather_nxv2i8_zextload_nxv2i16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) {
77; RV32-LABEL: mgather_nxv2i8_zextload_nxv2i16:
78; RV32:       # %bb.0:
79; RV32-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
80; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
81; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
82; RV32-NEXT:    vzext.vf2 v8, v9
83; RV32-NEXT:    ret
84;
85; RV64-LABEL: mgather_nxv2i8_zextload_nxv2i16:
86; RV64:       # %bb.0:
87; RV64-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
88; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
89; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
90; RV64-NEXT:    vzext.vf2 v8, v10
91; RV64-NEXT:    ret
92  %v = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru)
93  %ev = zext <vscale x 2 x i8> %v to <vscale x 2 x i16>
94  ret <vscale x 2 x i16> %ev
95}
96
97define <vscale x 2 x i32> @mgather_nxv2i8_sextload_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) {
98; RV32-LABEL: mgather_nxv2i8_sextload_nxv2i32:
99; RV32:       # %bb.0:
100; RV32-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
101; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
102; RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
103; RV32-NEXT:    vsext.vf4 v8, v9
104; RV32-NEXT:    ret
105;
106; RV64-LABEL: mgather_nxv2i8_sextload_nxv2i32:
107; RV64:       # %bb.0:
108; RV64-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
109; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
110; RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
111; RV64-NEXT:    vsext.vf4 v8, v10
112; RV64-NEXT:    ret
113  %v = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru)
114  %ev = sext <vscale x 2 x i8> %v to <vscale x 2 x i32>
115  ret <vscale x 2 x i32> %ev
116}
117
118define <vscale x 2 x i32> @mgather_nxv2i8_zextload_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) {
119; RV32-LABEL: mgather_nxv2i8_zextload_nxv2i32:
120; RV32:       # %bb.0:
121; RV32-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
122; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
123; RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
124; RV32-NEXT:    vzext.vf4 v8, v9
125; RV32-NEXT:    ret
126;
127; RV64-LABEL: mgather_nxv2i8_zextload_nxv2i32:
128; RV64:       # %bb.0:
129; RV64-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
130; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
131; RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
132; RV64-NEXT:    vzext.vf4 v8, v10
133; RV64-NEXT:    ret
134  %v = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru)
135  %ev = zext <vscale x 2 x i8> %v to <vscale x 2 x i32>
136  ret <vscale x 2 x i32> %ev
137}
138
139define <vscale x 2 x i64> @mgather_nxv2i8_sextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) {
140; RV32-LABEL: mgather_nxv2i8_sextload_nxv2i64:
141; RV32:       # %bb.0:
142; RV32-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
143; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
144; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
145; RV32-NEXT:    vsext.vf8 v10, v9
146; RV32-NEXT:    vmv.v.v v8, v10
147; RV32-NEXT:    ret
148;
149; RV64-LABEL: mgather_nxv2i8_sextload_nxv2i64:
150; RV64:       # %bb.0:
151; RV64-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
152; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
153; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
154; RV64-NEXT:    vsext.vf8 v8, v10
155; RV64-NEXT:    ret
156  %v = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru)
157  %ev = sext <vscale x 2 x i8> %v to <vscale x 2 x i64>
158  ret <vscale x 2 x i64> %ev
159}
160
161define <vscale x 2 x i64> @mgather_nxv2i8_zextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru) {
162; RV32-LABEL: mgather_nxv2i8_zextload_nxv2i64:
163; RV32:       # %bb.0:
164; RV32-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
165; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
166; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
167; RV32-NEXT:    vzext.vf8 v10, v9
168; RV32-NEXT:    vmv.v.v v8, v10
169; RV32-NEXT:    ret
170;
171; RV64-LABEL: mgather_nxv2i8_zextload_nxv2i64:
172; RV64:       # %bb.0:
173; RV64-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
174; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
175; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
176; RV64-NEXT:    vzext.vf8 v8, v10
177; RV64-NEXT:    ret
178  %v = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %m, <vscale x 2 x i8> %passthru)
179  %ev = zext <vscale x 2 x i8> %v to <vscale x 2 x i64>
180  ret <vscale x 2 x i64> %ev
181}
182
183declare <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x i8>)
184
185define <vscale x 4 x i8> @mgather_nxv4i8(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, <vscale x 4 x i8> %passthru) {
186; RV32-LABEL: mgather_nxv4i8:
187; RV32:       # %bb.0:
188; RV32-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
189; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
190; RV32-NEXT:    vmv1r.v v8, v10
191; RV32-NEXT:    ret
192;
193; RV64-LABEL: mgather_nxv4i8:
194; RV64:       # %bb.0:
195; RV64-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
196; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
197; RV64-NEXT:    vmv1r.v v8, v12
198; RV64-NEXT:    ret
199  %v = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 1, <vscale x 4 x i1> %m, <vscale x 4 x i8> %passthru)
200  ret <vscale x 4 x i8> %v
201}
202
203define <vscale x 4 x i8> @mgather_truemask_nxv4i8(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i8> %passthru) {
204; RV32-LABEL: mgather_truemask_nxv4i8:
205; RV32:       # %bb.0:
206; RV32-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
207; RV32-NEXT:    vluxei32.v v10, (zero), v8
208; RV32-NEXT:    vmv1r.v v8, v10
209; RV32-NEXT:    ret
210;
211; RV64-LABEL: mgather_truemask_nxv4i8:
212; RV64:       # %bb.0:
213; RV64-NEXT:    vsetvli a0, zero, e8, mf2, ta, ma
214; RV64-NEXT:    vluxei64.v v12, (zero), v8
215; RV64-NEXT:    vmv1r.v v8, v12
216; RV64-NEXT:    ret
217  %v = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 1, <vscale x 4 x i1> splat (i1 1), <vscale x 4 x i8> %passthru)
218  ret <vscale x 4 x i8> %v
219}
220
221define <vscale x 4 x i8> @mgather_falsemask_nxv4i8(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i8> %passthru) {
222; RV32-LABEL: mgather_falsemask_nxv4i8:
223; RV32:       # %bb.0:
224; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
225; RV32-NEXT:    vmv1r.v v8, v10
226; RV32-NEXT:    ret
227;
228; RV64-LABEL: mgather_falsemask_nxv4i8:
229; RV64:       # %bb.0:
230; RV64-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
231; RV64-NEXT:    vmv1r.v v8, v12
232; RV64-NEXT:    ret
233  %v = call <vscale x 4 x i8> @llvm.masked.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 1, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i8> %passthru)
234  ret <vscale x 4 x i8> %v
235}
236
237declare <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x i8>)
238
239define <vscale x 8 x i8> @mgather_nxv8i8(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, <vscale x 8 x i8> %passthru) {
240; RV32-LABEL: mgather_nxv8i8:
241; RV32:       # %bb.0:
242; RV32-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
243; RV32-NEXT:    vluxei32.v v12, (zero), v8, v0.t
244; RV32-NEXT:    vmv.v.v v8, v12
245; RV32-NEXT:    ret
246;
247; RV64-LABEL: mgather_nxv8i8:
248; RV64:       # %bb.0:
249; RV64-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
250; RV64-NEXT:    vluxei64.v v16, (zero), v8, v0.t
251; RV64-NEXT:    vmv.v.v v8, v16
252; RV64-NEXT:    ret
253  %v = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 1, <vscale x 8 x i1> %m, <vscale x 8 x i8> %passthru)
254  ret <vscale x 8 x i8> %v
255}
256
257define <vscale x 8 x i8> @mgather_baseidx_nxv8i8(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i8> %passthru) {
258; RV32-LABEL: mgather_baseidx_nxv8i8:
259; RV32:       # %bb.0:
260; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
261; RV32-NEXT:    vsext.vf4 v12, v8
262; RV32-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
263; RV32-NEXT:    vluxei32.v v9, (a0), v12, v0.t
264; RV32-NEXT:    vmv.v.v v8, v9
265; RV32-NEXT:    ret
266;
267; RV64-LABEL: mgather_baseidx_nxv8i8:
268; RV64:       # %bb.0:
269; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
270; RV64-NEXT:    vsext.vf8 v16, v8
271; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
272; RV64-NEXT:    vluxei64.v v9, (a0), v16, v0.t
273; RV64-NEXT:    vmv.v.v v8, v9
274; RV64-NEXT:    ret
275  %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 8 x i8> %idxs
276  %v = call <vscale x 8 x i8> @llvm.masked.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 1, <vscale x 8 x i1> %m, <vscale x 8 x i8> %passthru)
277  ret <vscale x 8 x i8> %v
278}
279
280declare <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x i16>)
281
282define <vscale x 1 x i16> @mgather_nxv1i16(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, <vscale x 1 x i16> %passthru) {
283; RV32-LABEL: mgather_nxv1i16:
284; RV32:       # %bb.0:
285; RV32-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
286; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
287; RV32-NEXT:    vmv1r.v v8, v9
288; RV32-NEXT:    ret
289;
290; RV64-LABEL: mgather_nxv1i16:
291; RV64:       # %bb.0:
292; RV64-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
293; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
294; RV64-NEXT:    vmv1r.v v8, v9
295; RV64-NEXT:    ret
296  %v = call <vscale x 1 x i16> @llvm.masked.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> %ptrs, i32 2, <vscale x 1 x i1> %m, <vscale x 1 x i16> %passthru)
297  ret <vscale x 1 x i16> %v
298}
299
300declare <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
301
302define <vscale x 2 x i16> @mgather_nxv2i16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru) {
303; RV32-LABEL: mgather_nxv2i16:
304; RV32:       # %bb.0:
305; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
306; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
307; RV32-NEXT:    vmv1r.v v8, v9
308; RV32-NEXT:    ret
309;
310; RV64-LABEL: mgather_nxv2i16:
311; RV64:       # %bb.0:
312; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
313; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
314; RV64-NEXT:    vmv1r.v v8, v10
315; RV64-NEXT:    ret
316  %v = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru)
317  ret <vscale x 2 x i16> %v
318}
319
320define <vscale x 2 x i32> @mgather_nxv2i16_sextload_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru) {
321; RV32-LABEL: mgather_nxv2i16_sextload_nxv2i32:
322; RV32:       # %bb.0:
323; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
324; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
325; RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
326; RV32-NEXT:    vsext.vf2 v8, v9
327; RV32-NEXT:    ret
328;
329; RV64-LABEL: mgather_nxv2i16_sextload_nxv2i32:
330; RV64:       # %bb.0:
331; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
332; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
333; RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
334; RV64-NEXT:    vsext.vf2 v8, v10
335; RV64-NEXT:    ret
336  %v = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru)
337  %ev = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
338  ret <vscale x 2 x i32> %ev
339}
340
341define <vscale x 2 x i32> @mgather_nxv2i16_zextload_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru) {
342; RV32-LABEL: mgather_nxv2i16_zextload_nxv2i32:
343; RV32:       # %bb.0:
344; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
345; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
346; RV32-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
347; RV32-NEXT:    vzext.vf2 v8, v9
348; RV32-NEXT:    ret
349;
350; RV64-LABEL: mgather_nxv2i16_zextload_nxv2i32:
351; RV64:       # %bb.0:
352; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
353; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
354; RV64-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
355; RV64-NEXT:    vzext.vf2 v8, v10
356; RV64-NEXT:    ret
357  %v = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru)
358  %ev = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
359  ret <vscale x 2 x i32> %ev
360}
361
362define <vscale x 2 x i64> @mgather_nxv2i16_sextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru) {
363; RV32-LABEL: mgather_nxv2i16_sextload_nxv2i64:
364; RV32:       # %bb.0:
365; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
366; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
367; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
368; RV32-NEXT:    vsext.vf4 v10, v9
369; RV32-NEXT:    vmv.v.v v8, v10
370; RV32-NEXT:    ret
371;
372; RV64-LABEL: mgather_nxv2i16_sextload_nxv2i64:
373; RV64:       # %bb.0:
374; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
375; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
376; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
377; RV64-NEXT:    vsext.vf4 v8, v10
378; RV64-NEXT:    ret
379  %v = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru)
380  %ev = sext <vscale x 2 x i16> %v to <vscale x 2 x i64>
381  ret <vscale x 2 x i64> %ev
382}
383
384define <vscale x 2 x i64> @mgather_nxv2i16_zextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru) {
385; RV32-LABEL: mgather_nxv2i16_zextload_nxv2i64:
386; RV32:       # %bb.0:
387; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
388; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
389; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
390; RV32-NEXT:    vzext.vf4 v10, v9
391; RV32-NEXT:    vmv.v.v v8, v10
392; RV32-NEXT:    ret
393;
394; RV64-LABEL: mgather_nxv2i16_zextload_nxv2i64:
395; RV64:       # %bb.0:
396; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
397; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
398; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
399; RV64-NEXT:    vzext.vf4 v8, v10
400; RV64-NEXT:    ret
401  %v = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m, <vscale x 2 x i16> %passthru)
402  %ev = zext <vscale x 2 x i16> %v to <vscale x 2 x i64>
403  ret <vscale x 2 x i64> %ev
404}
405
406declare <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x i16>)
407
408define <vscale x 4 x i16> @mgather_nxv4i16(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, <vscale x 4 x i16> %passthru) {
409; RV32-LABEL: mgather_nxv4i16:
410; RV32:       # %bb.0:
411; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
412; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
413; RV32-NEXT:    vmv.v.v v8, v10
414; RV32-NEXT:    ret
415;
416; RV64-LABEL: mgather_nxv4i16:
417; RV64:       # %bb.0:
418; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
419; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
420; RV64-NEXT:    vmv.v.v v8, v12
421; RV64-NEXT:    ret
422  %v = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> %m, <vscale x 4 x i16> %passthru)
423  ret <vscale x 4 x i16> %v
424}
425
426define <vscale x 4 x i16> @mgather_truemask_nxv4i16(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i16> %passthru) {
427; RV32-LABEL: mgather_truemask_nxv4i16:
428; RV32:       # %bb.0:
429; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
430; RV32-NEXT:    vluxei32.v v10, (zero), v8
431; RV32-NEXT:    vmv.v.v v8, v10
432; RV32-NEXT:    ret
433;
434; RV64-LABEL: mgather_truemask_nxv4i16:
435; RV64:       # %bb.0:
436; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
437; RV64-NEXT:    vluxei64.v v12, (zero), v8
438; RV64-NEXT:    vmv.v.v v8, v12
439; RV64-NEXT:    ret
440  %v = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> splat (i1 1), <vscale x 4 x i16> %passthru)
441  ret <vscale x 4 x i16> %v
442}
443
444define <vscale x 4 x i16> @mgather_falsemask_nxv4i16(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i16> %passthru) {
445; RV32-LABEL: mgather_falsemask_nxv4i16:
446; RV32:       # %bb.0:
447; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
448; RV32-NEXT:    vmv1r.v v8, v10
449; RV32-NEXT:    ret
450;
451; RV64-LABEL: mgather_falsemask_nxv4i16:
452; RV64:       # %bb.0:
453; RV64-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
454; RV64-NEXT:    vmv1r.v v8, v12
455; RV64-NEXT:    ret
456  %v = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i16> %passthru)
457  ret <vscale x 4 x i16> %v
458}
459
460declare <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x i16>)
461
462define <vscale x 8 x i16> @mgather_nxv8i16(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru) {
463; RV32-LABEL: mgather_nxv8i16:
464; RV32:       # %bb.0:
465; RV32-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
466; RV32-NEXT:    vluxei32.v v12, (zero), v8, v0.t
467; RV32-NEXT:    vmv.v.v v8, v12
468; RV32-NEXT:    ret
469;
470; RV64-LABEL: mgather_nxv8i16:
471; RV64:       # %bb.0:
472; RV64-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
473; RV64-NEXT:    vluxei64.v v16, (zero), v8, v0.t
474; RV64-NEXT:    vmv.v.v v8, v16
475; RV64-NEXT:    ret
476  %v = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru)
477  ret <vscale x 8 x i16> %v
478}
479
480define <vscale x 8 x i16> @mgather_baseidx_nxv8i8_nxv8i16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru) {
481; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8i16:
482; RV32:       # %bb.0:
483; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
484; RV32-NEXT:    vsext.vf4 v12, v8
485; RV32-NEXT:    vadd.vv v12, v12, v12
486; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
487; RV32-NEXT:    vluxei32.v v10, (a0), v12, v0.t
488; RV32-NEXT:    vmv.v.v v8, v10
489; RV32-NEXT:    ret
490;
491; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8i16:
492; RV64:       # %bb.0:
493; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
494; RV64-NEXT:    vsext.vf8 v16, v8
495; RV64-NEXT:    vadd.vv v16, v16, v16
496; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
497; RV64-NEXT:    vluxei64.v v10, (a0), v16, v0.t
498; RV64-NEXT:    vmv.v.v v8, v10
499; RV64-NEXT:    ret
500  %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i8> %idxs
501  %v = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru)
502  ret <vscale x 8 x i16> %v
503}
504
505define <vscale x 8 x i16> @mgather_baseidx_sext_nxv8i8_nxv8i16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru) {
506; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i16:
507; RV32:       # %bb.0:
508; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
509; RV32-NEXT:    vsext.vf4 v12, v8
510; RV32-NEXT:    vadd.vv v12, v12, v12
511; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
512; RV32-NEXT:    vluxei32.v v10, (a0), v12, v0.t
513; RV32-NEXT:    vmv.v.v v8, v10
514; RV32-NEXT:    ret
515;
516; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i16:
517; RV64:       # %bb.0:
518; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
519; RV64-NEXT:    vsext.vf8 v16, v8
520; RV64-NEXT:    vadd.vv v16, v16, v16
521; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
522; RV64-NEXT:    vluxei64.v v10, (a0), v16, v0.t
523; RV64-NEXT:    vmv.v.v v8, v10
524; RV64-NEXT:    ret
525  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
526  %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
527  %v = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru)
528  ret <vscale x 8 x i16> %v
529}
530
531define <vscale x 8 x i16> @mgather_baseidx_zext_nxv8i8_nxv8i16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru) {
532; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i16:
533; CHECK:       # %bb.0:
534; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
535; CHECK-NEXT:    vwaddu.vv v12, v8, v8
536; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
537; CHECK-NEXT:    vluxei16.v v10, (a0), v12, v0.t
538; CHECK-NEXT:    vmv.v.v v8, v10
539; CHECK-NEXT:    ret
540  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
541  %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
542  %v = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru)
543  ret <vscale x 8 x i16> %v
544}
545
546define <vscale x 8 x i16> @mgather_baseidx_nxv8i16(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru) {
547; RV32-LABEL: mgather_baseidx_nxv8i16:
548; RV32:       # %bb.0:
549; RV32-NEXT:    vsetvli a1, zero, e16, m2, ta, mu
550; RV32-NEXT:    vwadd.vv v12, v8, v8
551; RV32-NEXT:    vluxei32.v v10, (a0), v12, v0.t
552; RV32-NEXT:    vmv.v.v v8, v10
553; RV32-NEXT:    ret
554;
555; RV64-LABEL: mgather_baseidx_nxv8i16:
556; RV64:       # %bb.0:
557; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
558; RV64-NEXT:    vsext.vf4 v16, v8
559; RV64-NEXT:    vadd.vv v16, v16, v16
560; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
561; RV64-NEXT:    vluxei64.v v10, (a0), v16, v0.t
562; RV64-NEXT:    vmv.v.v v8, v10
563; RV64-NEXT:    ret
564  %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %idxs
565  %v = call <vscale x 8 x i16> @llvm.masked.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x i16> %passthru)
566  ret <vscale x 8 x i16> %v
567}
568
569declare <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x i32>)
570
571define <vscale x 1 x i32> @mgather_nxv1i32(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, <vscale x 1 x i32> %passthru) {
572; RV32-LABEL: mgather_nxv1i32:
573; RV32:       # %bb.0:
574; RV32-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
575; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
576; RV32-NEXT:    vmv1r.v v8, v9
577; RV32-NEXT:    ret
578;
579; RV64-LABEL: mgather_nxv1i32:
580; RV64:       # %bb.0:
581; RV64-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
582; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
583; RV64-NEXT:    vmv1r.v v8, v9
584; RV64-NEXT:    ret
585  %v = call <vscale x 1 x i32> @llvm.masked.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> %ptrs, i32 4, <vscale x 1 x i1> %m, <vscale x 1 x i32> %passthru)
586  ret <vscale x 1 x i32> %v
587}
588
589declare <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
590
591define <vscale x 2 x i32> @mgather_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i32> %passthru) {
592; RV32-LABEL: mgather_nxv2i32:
593; RV32:       # %bb.0:
594; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
595; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
596; RV32-NEXT:    vmv.v.v v8, v9
597; RV32-NEXT:    ret
598;
599; RV64-LABEL: mgather_nxv2i32:
600; RV64:       # %bb.0:
601; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
602; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
603; RV64-NEXT:    vmv.v.v v8, v10
604; RV64-NEXT:    ret
605  %v = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %m, <vscale x 2 x i32> %passthru)
606  ret <vscale x 2 x i32> %v
607}
608
609define <vscale x 2 x i64> @mgather_nxv2i32_sextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i32> %passthru) {
610; RV32-LABEL: mgather_nxv2i32_sextload_nxv2i64:
611; RV32:       # %bb.0:
612; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
613; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
614; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
615; RV32-NEXT:    vsext.vf2 v10, v9
616; RV32-NEXT:    vmv.v.v v8, v10
617; RV32-NEXT:    ret
618;
619; RV64-LABEL: mgather_nxv2i32_sextload_nxv2i64:
620; RV64:       # %bb.0:
621; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
622; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
623; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
624; RV64-NEXT:    vsext.vf2 v8, v10
625; RV64-NEXT:    ret
626  %v = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %m, <vscale x 2 x i32> %passthru)
627  %ev = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
628  ret <vscale x 2 x i64> %ev
629}
630
631define <vscale x 2 x i64> @mgather_nxv2i32_zextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i32> %passthru) {
632; RV32-LABEL: mgather_nxv2i32_zextload_nxv2i64:
633; RV32:       # %bb.0:
634; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
635; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
636; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
637; RV32-NEXT:    vzext.vf2 v10, v9
638; RV32-NEXT:    vmv.v.v v8, v10
639; RV32-NEXT:    ret
640;
641; RV64-LABEL: mgather_nxv2i32_zextload_nxv2i64:
642; RV64:       # %bb.0:
643; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
644; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
645; RV64-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
646; RV64-NEXT:    vzext.vf2 v8, v10
647; RV64-NEXT:    ret
648  %v = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %m, <vscale x 2 x i32> %passthru)
649  %ev = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
650  ret <vscale x 2 x i64> %ev
651}
652
653declare <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
654
655define <vscale x 4 x i32> @mgather_nxv4i32(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, <vscale x 4 x i32> %passthru) {
656; RV32-LABEL: mgather_nxv4i32:
657; RV32:       # %bb.0:
658; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
659; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
660; RV32-NEXT:    vmv.v.v v8, v10
661; RV32-NEXT:    ret
662;
663; RV64-LABEL: mgather_nxv4i32:
664; RV64:       # %bb.0:
665; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
666; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
667; RV64-NEXT:    vmv.v.v v8, v12
668; RV64-NEXT:    ret
669  %v = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %m, <vscale x 4 x i32> %passthru)
670  ret <vscale x 4 x i32> %v
671}
672
673define <vscale x 4 x i32> @mgather_truemask_nxv4i32(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i32> %passthru) {
674; RV32-LABEL: mgather_truemask_nxv4i32:
675; RV32:       # %bb.0:
676; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
677; RV32-NEXT:    vluxei32.v v8, (zero), v8
678; RV32-NEXT:    ret
679;
680; RV64-LABEL: mgather_truemask_nxv4i32:
681; RV64:       # %bb.0:
682; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
683; RV64-NEXT:    vluxei64.v v12, (zero), v8
684; RV64-NEXT:    vmv.v.v v8, v12
685; RV64-NEXT:    ret
686  %v = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> splat (i1 1), <vscale x 4 x i32> %passthru)
687  ret <vscale x 4 x i32> %v
688}
689
690define <vscale x 4 x i32> @mgather_falsemask_nxv4i32(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i32> %passthru) {
691; RV32-LABEL: mgather_falsemask_nxv4i32:
692; RV32:       # %bb.0:
693; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
694; RV32-NEXT:    vmv2r.v v8, v10
695; RV32-NEXT:    ret
696;
697; RV64-LABEL: mgather_falsemask_nxv4i32:
698; RV64:       # %bb.0:
699; RV64-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
700; RV64-NEXT:    vmv2r.v v8, v12
701; RV64-NEXT:    ret
702  %v = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %passthru)
703  ret <vscale x 4 x i32> %v
704}
705
706declare <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x i32>)
707
708define <vscale x 8 x i32> @mgather_nxv8i32(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
709; RV32-LABEL: mgather_nxv8i32:
710; RV32:       # %bb.0:
711; RV32-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
712; RV32-NEXT:    vluxei32.v v12, (zero), v8, v0.t
713; RV32-NEXT:    vmv.v.v v8, v12
714; RV32-NEXT:    ret
715;
716; RV64-LABEL: mgather_nxv8i32:
717; RV64:       # %bb.0:
718; RV64-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
719; RV64-NEXT:    vluxei64.v v16, (zero), v8, v0.t
720; RV64-NEXT:    vmv.v.v v8, v16
721; RV64-NEXT:    ret
722  %v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru)
723  ret <vscale x 8 x i32> %v
724}
725
726define <vscale x 8 x i32> @mgather_baseidx_nxv8i8_nxv8i32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
727; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8i32:
728; RV32:       # %bb.0:
729; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
730; RV32-NEXT:    vsext.vf4 v16, v8
731; RV32-NEXT:    vsll.vi v8, v16, 2
732; RV32-NEXT:    vluxei32.v v12, (a0), v8, v0.t
733; RV32-NEXT:    vmv.v.v v8, v12
734; RV32-NEXT:    ret
735;
736; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8i32:
737; RV64:       # %bb.0:
738; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
739; RV64-NEXT:    vsext.vf8 v16, v8
740; RV64-NEXT:    vsll.vi v16, v16, 2
741; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
742; RV64-NEXT:    vluxei64.v v12, (a0), v16, v0.t
743; RV64-NEXT:    vmv.v.v v8, v12
744; RV64-NEXT:    ret
745  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i8> %idxs
746  %v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru)
747  ret <vscale x 8 x i32> %v
748}
749
750define <vscale x 8 x i32> @mgather_baseidx_sext_nxv8i8_nxv8i32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
751; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i32:
752; RV32:       # %bb.0:
753; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
754; RV32-NEXT:    vsext.vf4 v16, v8
755; RV32-NEXT:    vsll.vi v8, v16, 2
756; RV32-NEXT:    vluxei32.v v12, (a0), v8, v0.t
757; RV32-NEXT:    vmv.v.v v8, v12
758; RV32-NEXT:    ret
759;
760; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i32:
761; RV64:       # %bb.0:
762; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
763; RV64-NEXT:    vsext.vf8 v16, v8
764; RV64-NEXT:    vsll.vi v16, v16, 2
765; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
766; RV64-NEXT:    vluxei64.v v12, (a0), v16, v0.t
767; RV64-NEXT:    vmv.v.v v8, v12
768; RV64-NEXT:    ret
769  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
770  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
771  %v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru)
772  ret <vscale x 8 x i32> %v
773}
774
775define <vscale x 8 x i32> @mgather_baseidx_zext_nxv8i8_nxv8i32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
776; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i32:
777; CHECK:       # %bb.0:
778; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
779; CHECK-NEXT:    vzext.vf2 v10, v8
780; CHECK-NEXT:    vsll.vi v8, v10, 2
781; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
782; CHECK-NEXT:    vluxei16.v v12, (a0), v8, v0.t
783; CHECK-NEXT:    vmv.v.v v8, v12
784; CHECK-NEXT:    ret
785  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
786  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
787  %v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru)
788  ret <vscale x 8 x i32> %v
789}
790
791define <vscale x 8 x i32> @mgather_baseidx_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
792; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8i32:
793; RV32:       # %bb.0:
794; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
795; RV32-NEXT:    vsext.vf2 v16, v8
796; RV32-NEXT:    vsll.vi v8, v16, 2
797; RV32-NEXT:    vluxei32.v v12, (a0), v8, v0.t
798; RV32-NEXT:    vmv.v.v v8, v12
799; RV32-NEXT:    ret
800;
801; RV64-LABEL: mgather_baseidx_nxv8i16_nxv8i32:
802; RV64:       # %bb.0:
803; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
804; RV64-NEXT:    vsext.vf4 v16, v8
805; RV64-NEXT:    vsll.vi v16, v16, 2
806; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
807; RV64-NEXT:    vluxei64.v v12, (a0), v16, v0.t
808; RV64-NEXT:    vmv.v.v v8, v12
809; RV64-NEXT:    ret
810  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i16> %idxs
811  %v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru)
812  ret <vscale x 8 x i32> %v
813}
814
815define <vscale x 8 x i32> @mgather_baseidx_sext_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
816; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i32:
817; RV32:       # %bb.0:
818; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
819; RV32-NEXT:    vsext.vf2 v16, v8
820; RV32-NEXT:    vsll.vi v8, v16, 2
821; RV32-NEXT:    vluxei32.v v12, (a0), v8, v0.t
822; RV32-NEXT:    vmv.v.v v8, v12
823; RV32-NEXT:    ret
824;
825; RV64-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i32:
826; RV64:       # %bb.0:
827; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
828; RV64-NEXT:    vsext.vf4 v16, v8
829; RV64-NEXT:    vsll.vi v16, v16, 2
830; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
831; RV64-NEXT:    vluxei64.v v12, (a0), v16, v0.t
832; RV64-NEXT:    vmv.v.v v8, v12
833; RV64-NEXT:    ret
834  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
835  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
836  %v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru)
837  ret <vscale x 8 x i32> %v
838}
839
840define <vscale x 8 x i32> @mgather_baseidx_zext_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
841; CHECK-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i32:
842; CHECK:       # %bb.0:
843; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
844; CHECK-NEXT:    vzext.vf2 v16, v8
845; CHECK-NEXT:    vsll.vi v8, v16, 2
846; CHECK-NEXT:    vluxei32.v v12, (a0), v8, v0.t
847; CHECK-NEXT:    vmv.v.v v8, v12
848; CHECK-NEXT:    ret
849  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
850  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
851  %v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru)
852  ret <vscale x 8 x i32> %v
853}
854
855define <vscale x 8 x i32> @mgather_baseidx_nxv8i32(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru) {
856; RV32-LABEL: mgather_baseidx_nxv8i32:
857; RV32:       # %bb.0:
858; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
859; RV32-NEXT:    vsll.vi v8, v8, 2
860; RV32-NEXT:    vluxei32.v v12, (a0), v8, v0.t
861; RV32-NEXT:    vmv.v.v v8, v12
862; RV32-NEXT:    ret
863;
864; RV64-LABEL: mgather_baseidx_nxv8i32:
865; RV64:       # %bb.0:
866; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
867; RV64-NEXT:    vsext.vf2 v16, v8
868; RV64-NEXT:    vsll.vi v16, v16, 2
869; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
870; RV64-NEXT:    vluxei64.v v12, (a0), v16, v0.t
871; RV64-NEXT:    vmv.v.v v8, v12
872; RV64-NEXT:    ret
873  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %idxs
874  %v = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x i32> %passthru)
875  ret <vscale x 8 x i32> %v
876}
877
878declare <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x i64>)
879
880define <vscale x 1 x i64> @mgather_nxv1i64(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, <vscale x 1 x i64> %passthru) {
881; RV32-LABEL: mgather_nxv1i64:
882; RV32:       # %bb.0:
883; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
884; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
885; RV32-NEXT:    vmv.v.v v8, v9
886; RV32-NEXT:    ret
887;
888; RV64-LABEL: mgather_nxv1i64:
889; RV64:       # %bb.0:
890; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
891; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
892; RV64-NEXT:    vmv.v.v v8, v9
893; RV64-NEXT:    ret
894  %v = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> %ptrs, i32 8, <vscale x 1 x i1> %m, <vscale x 1 x i64> %passthru)
895  ret <vscale x 1 x i64> %v
896}
897
898declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
899
900define <vscale x 2 x i64> @mgather_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x i64> %passthru) {
901; RV32-LABEL: mgather_nxv2i64:
902; RV32:       # %bb.0:
903; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
904; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
905; RV32-NEXT:    vmv.v.v v8, v10
906; RV32-NEXT:    ret
907;
908; RV64-LABEL: mgather_nxv2i64:
909; RV64:       # %bb.0:
910; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
911; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
912; RV64-NEXT:    vmv.v.v v8, v10
913; RV64-NEXT:    ret
914  %v = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 8, <vscale x 2 x i1> %m, <vscale x 2 x i64> %passthru)
915  ret <vscale x 2 x i64> %v
916}
917
918declare <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x i64>)
919
920define <vscale x 4 x i64> @mgather_nxv4i64(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, <vscale x 4 x i64> %passthru) {
921; RV32-LABEL: mgather_nxv4i64:
922; RV32:       # %bb.0:
923; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
924; RV32-NEXT:    vluxei32.v v12, (zero), v8, v0.t
925; RV32-NEXT:    vmv.v.v v8, v12
926; RV32-NEXT:    ret
927;
928; RV64-LABEL: mgather_nxv4i64:
929; RV64:       # %bb.0:
930; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
931; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
932; RV64-NEXT:    vmv.v.v v8, v12
933; RV64-NEXT:    ret
934  %v = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> %m, <vscale x 4 x i64> %passthru)
935  ret <vscale x 4 x i64> %v
936}
937
938define <vscale x 4 x i64> @mgather_truemask_nxv4i64(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i64> %passthru) {
939; RV32-LABEL: mgather_truemask_nxv4i64:
940; RV32:       # %bb.0:
941; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
942; RV32-NEXT:    vluxei32.v v12, (zero), v8
943; RV32-NEXT:    vmv.v.v v8, v12
944; RV32-NEXT:    ret
945;
946; RV64-LABEL: mgather_truemask_nxv4i64:
947; RV64:       # %bb.0:
948; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
949; RV64-NEXT:    vluxei64.v v8, (zero), v8
950; RV64-NEXT:    ret
951  %v = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> splat (i1 1), <vscale x 4 x i64> %passthru)
952  ret <vscale x 4 x i64> %v
953}
954
955define <vscale x 4 x i64> @mgather_falsemask_nxv4i64(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i64> %passthru) {
956; CHECK-LABEL: mgather_falsemask_nxv4i64:
957; CHECK:       # %bb.0:
958; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
959; CHECK-NEXT:    vmv4r.v v8, v12
960; CHECK-NEXT:    ret
961  %v = call <vscale x 4 x i64> @llvm.masked.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i64> %passthru)
962  ret <vscale x 4 x i64> %v
963}
964
965declare <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x i64>)
966
967define <vscale x 8 x i64> @mgather_nxv8i64(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
968; RV32-LABEL: mgather_nxv8i64:
969; RV32:       # %bb.0:
970; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
971; RV32-NEXT:    vluxei32.v v16, (zero), v8, v0.t
972; RV32-NEXT:    vmv.v.v v8, v16
973; RV32-NEXT:    ret
974;
975; RV64-LABEL: mgather_nxv8i64:
976; RV64:       # %bb.0:
977; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
978; RV64-NEXT:    vluxei64.v v16, (zero), v8, v0.t
979; RV64-NEXT:    vmv.v.v v8, v16
980; RV64-NEXT:    ret
981  %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru)
982  ret <vscale x 8 x i64> %v
983}
984
985define <vscale x 8 x i64> @mgather_baseidx_nxv8i8_nxv8i64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
986; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8i64:
987; RV32:       # %bb.0:
988; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
989; RV32-NEXT:    vsext.vf4 v12, v8
990; RV32-NEXT:    vsll.vi v8, v12, 3
991; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
992; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
993; RV32-NEXT:    vmv.v.v v8, v16
994; RV32-NEXT:    ret
995;
996; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8i64:
997; RV64:       # %bb.0:
998; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
999; RV64-NEXT:    vsext.vf8 v24, v8
1000; RV64-NEXT:    vsll.vi v8, v24, 3
1001; RV64-NEXT:    vluxei64.v v16, (a0), v8, v0.t
1002; RV64-NEXT:    vmv.v.v v8, v16
1003; RV64-NEXT:    ret
1004  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i8> %idxs
1005  %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru)
1006  ret <vscale x 8 x i64> %v
1007}
1008
1009define <vscale x 8 x i64> @mgather_baseidx_sext_nxv8i8_nxv8i64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
1010; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i64:
1011; RV32:       # %bb.0:
1012; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1013; RV32-NEXT:    vsext.vf4 v12, v8
1014; RV32-NEXT:    vsll.vi v8, v12, 3
1015; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1016; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
1017; RV32-NEXT:    vmv.v.v v8, v16
1018; RV32-NEXT:    ret
1019;
1020; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i64:
1021; RV64:       # %bb.0:
1022; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1023; RV64-NEXT:    vsext.vf8 v24, v8
1024; RV64-NEXT:    vsll.vi v8, v24, 3
1025; RV64-NEXT:    vluxei64.v v16, (a0), v8, v0.t
1026; RV64-NEXT:    vmv.v.v v8, v16
1027; RV64-NEXT:    ret
1028  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1029  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
1030  %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru)
1031  ret <vscale x 8 x i64> %v
1032}
1033
1034define <vscale x 8 x i64> @mgather_baseidx_zext_nxv8i8_nxv8i64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
1035; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8i64:
1036; CHECK:       # %bb.0:
1037; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
1038; CHECK-NEXT:    vzext.vf2 v10, v8
1039; CHECK-NEXT:    vsll.vi v8, v10, 3
1040; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1041; CHECK-NEXT:    vluxei16.v v16, (a0), v8, v0.t
1042; CHECK-NEXT:    vmv.v.v v8, v16
1043; CHECK-NEXT:    ret
1044  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1045  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
1046  %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru)
1047  ret <vscale x 8 x i64> %v
1048}
1049
1050define <vscale x 8 x i64> @mgather_baseidx_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
1051; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8i64:
1052; RV32:       # %bb.0:
1053; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1054; RV32-NEXT:    vsext.vf2 v12, v8
1055; RV32-NEXT:    vsll.vi v8, v12, 3
1056; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1057; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
1058; RV32-NEXT:    vmv.v.v v8, v16
1059; RV32-NEXT:    ret
1060;
1061; RV64-LABEL: mgather_baseidx_nxv8i16_nxv8i64:
1062; RV64:       # %bb.0:
1063; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1064; RV64-NEXT:    vsext.vf4 v24, v8
1065; RV64-NEXT:    vsll.vi v8, v24, 3
1066; RV64-NEXT:    vluxei64.v v16, (a0), v8, v0.t
1067; RV64-NEXT:    vmv.v.v v8, v16
1068; RV64-NEXT:    ret
1069  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i16> %idxs
1070  %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru)
1071  ret <vscale x 8 x i64> %v
1072}
1073
1074define <vscale x 8 x i64> @mgather_baseidx_sext_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
1075; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i64:
1076; RV32:       # %bb.0:
1077; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1078; RV32-NEXT:    vsext.vf2 v12, v8
1079; RV32-NEXT:    vsll.vi v8, v12, 3
1080; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1081; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
1082; RV32-NEXT:    vmv.v.v v8, v16
1083; RV32-NEXT:    ret
1084;
1085; RV64-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i64:
1086; RV64:       # %bb.0:
1087; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1088; RV64-NEXT:    vsext.vf4 v24, v8
1089; RV64-NEXT:    vsll.vi v8, v24, 3
1090; RV64-NEXT:    vluxei64.v v16, (a0), v8, v0.t
1091; RV64-NEXT:    vmv.v.v v8, v16
1092; RV64-NEXT:    ret
1093  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1094  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
1095  %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru)
1096  ret <vscale x 8 x i64> %v
1097}
1098
1099define <vscale x 8 x i64> @mgather_baseidx_zext_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
1100; CHECK-LABEL: mgather_baseidx_zext_nxv8i16_nxv8i64:
1101; CHECK:       # %bb.0:
1102; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1103; CHECK-NEXT:    vzext.vf2 v12, v8
1104; CHECK-NEXT:    vsll.vi v8, v12, 3
1105; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1106; CHECK-NEXT:    vluxei32.v v16, (a0), v8, v0.t
1107; CHECK-NEXT:    vmv.v.v v8, v16
1108; CHECK-NEXT:    ret
1109  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1110  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
1111  %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru)
1112  ret <vscale x 8 x i64> %v
1113}
1114
1115define <vscale x 8 x i64> @mgather_baseidx_nxv8i32_nxv8i64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
1116; RV32-LABEL: mgather_baseidx_nxv8i32_nxv8i64:
1117; RV32:       # %bb.0:
1118; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1119; RV32-NEXT:    vsll.vi v8, v8, 3
1120; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1121; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
1122; RV32-NEXT:    vmv.v.v v8, v16
1123; RV32-NEXT:    ret
1124;
1125; RV64-LABEL: mgather_baseidx_nxv8i32_nxv8i64:
1126; RV64:       # %bb.0:
1127; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1128; RV64-NEXT:    vsext.vf2 v24, v8
1129; RV64-NEXT:    vsll.vi v8, v24, 3
1130; RV64-NEXT:    vluxei64.v v16, (a0), v8, v0.t
1131; RV64-NEXT:    vmv.v.v v8, v16
1132; RV64-NEXT:    ret
1133  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i32> %idxs
1134  %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru)
1135  ret <vscale x 8 x i64> %v
1136}
1137
1138define <vscale x 8 x i64> @mgather_baseidx_sext_nxv8i32_nxv8i64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
1139; RV32-LABEL: mgather_baseidx_sext_nxv8i32_nxv8i64:
1140; RV32:       # %bb.0:
1141; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1142; RV32-NEXT:    vsll.vi v8, v8, 3
1143; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1144; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
1145; RV32-NEXT:    vmv.v.v v8, v16
1146; RV32-NEXT:    ret
1147;
1148; RV64-LABEL: mgather_baseidx_sext_nxv8i32_nxv8i64:
1149; RV64:       # %bb.0:
1150; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1151; RV64-NEXT:    vsext.vf2 v24, v8
1152; RV64-NEXT:    vsll.vi v8, v24, 3
1153; RV64-NEXT:    vluxei64.v v16, (a0), v8, v0.t
1154; RV64-NEXT:    vmv.v.v v8, v16
1155; RV64-NEXT:    ret
1156  %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1157  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
1158  %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru)
1159  ret <vscale x 8 x i64> %v
1160}
1161
1162define <vscale x 8 x i64> @mgather_baseidx_zext_nxv8i32_nxv8i64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
1163; RV32-LABEL: mgather_baseidx_zext_nxv8i32_nxv8i64:
1164; RV32:       # %bb.0:
1165; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1166; RV32-NEXT:    vsll.vi v8, v8, 3
1167; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1168; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
1169; RV32-NEXT:    vmv.v.v v8, v16
1170; RV32-NEXT:    ret
1171;
1172; RV64-LABEL: mgather_baseidx_zext_nxv8i32_nxv8i64:
1173; RV64:       # %bb.0:
1174; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1175; RV64-NEXT:    vzext.vf2 v24, v8
1176; RV64-NEXT:    vsll.vi v8, v24, 3
1177; RV64-NEXT:    vluxei64.v v16, (a0), v8, v0.t
1178; RV64-NEXT:    vmv.v.v v8, v16
1179; RV64-NEXT:    ret
1180  %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1181  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
1182  %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru)
1183  ret <vscale x 8 x i64> %v
1184}
1185
1186define <vscale x 8 x i64> @mgather_baseidx_nxv8i64(ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru) {
1187; RV32-LABEL: mgather_baseidx_nxv8i64:
1188; RV32:       # %bb.0:
1189; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1190; RV32-NEXT:    vnsrl.wi v24, v8, 0
1191; RV32-NEXT:    vsll.vi v8, v24, 3
1192; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
1193; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
1194; RV32-NEXT:    vmv.v.v v8, v16
1195; RV32-NEXT:    ret
1196;
1197; RV64-LABEL: mgather_baseidx_nxv8i64:
1198; RV64:       # %bb.0:
1199; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1200; RV64-NEXT:    vsll.vi v8, v8, 3
1201; RV64-NEXT:    vluxei64.v v16, (a0), v8, v0.t
1202; RV64-NEXT:    vmv.v.v v8, v16
1203; RV64-NEXT:    ret
1204  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %idxs
1205  %v = call <vscale x 8 x i64> @llvm.masked.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x i64> %passthru)
1206  ret <vscale x 8 x i64> %v
1207}
1208
1209declare <vscale x 16 x i64> @llvm.masked.gather.nxv16i64.nxv16p0(<vscale x 16 x ptr>, i32, <vscale x 16 x i1>, <vscale x 16 x i64>)
1210
1211declare <vscale x 16 x i64> @llvm.vector.insert.nxv8i64.nxv16i64(<vscale x 16 x i64>, <vscale x 8 x i64>, i64 %idx)
1212declare <vscale x 16 x ptr> @llvm.vector.insert.nxv8p0.nxv16p0(<vscale x 16 x ptr>, <vscale x 8 x ptr>, i64 %idx)
1213
1214define void @mgather_nxv16i64(<vscale x 8 x ptr> %ptrs0, <vscale x 8 x ptr> %ptrs1, <vscale x 16 x i1> %m, <vscale x 8 x i64> %passthru0, <vscale x 8 x i64> %passthru1, ptr %out) {
1215; RV32-LABEL: mgather_nxv16i64:
1216; RV32:       # %bb.0:
1217; RV32-NEXT:    vl8re64.v v24, (a0)
1218; RV32-NEXT:    csrr a0, vlenb
1219; RV32-NEXT:    srli a2, a0, 3
1220; RV32-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
1221; RV32-NEXT:    vslidedown.vx v7, v0, a2
1222; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
1223; RV32-NEXT:    vluxei32.v v16, (zero), v8, v0.t
1224; RV32-NEXT:    vmv1r.v v0, v7
1225; RV32-NEXT:    vluxei32.v v24, (zero), v12, v0.t
1226; RV32-NEXT:    slli a0, a0, 3
1227; RV32-NEXT:    add a0, a1, a0
1228; RV32-NEXT:    vs8r.v v24, (a0)
1229; RV32-NEXT:    vs8r.v v16, (a1)
1230; RV32-NEXT:    ret
1231;
1232; RV64-LABEL: mgather_nxv16i64:
1233; RV64:       # %bb.0:
1234; RV64-NEXT:    addi sp, sp, -16
1235; RV64-NEXT:    .cfi_def_cfa_offset 16
1236; RV64-NEXT:    csrr a3, vlenb
1237; RV64-NEXT:    slli a3, a3, 3
1238; RV64-NEXT:    sub sp, sp, a3
1239; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
1240; RV64-NEXT:    addi a3, sp, 16
1241; RV64-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
1242; RV64-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
1243; RV64-NEXT:    vmv8r.v v16, v8
1244; RV64-NEXT:    vl8re64.v v24, (a0)
1245; RV64-NEXT:    csrr a0, vlenb
1246; RV64-NEXT:    vl8re64.v v8, (a1)
1247; RV64-NEXT:    srli a1, a0, 3
1248; RV64-NEXT:    vslidedown.vx v7, v0, a1
1249; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
1250; RV64-NEXT:    vluxei64.v v24, (zero), v16, v0.t
1251; RV64-NEXT:    vmv1r.v v0, v7
1252; RV64-NEXT:    addi a1, sp, 16
1253; RV64-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
1254; RV64-NEXT:    vluxei64.v v8, (zero), v16, v0.t
1255; RV64-NEXT:    slli a0, a0, 3
1256; RV64-NEXT:    add a0, a2, a0
1257; RV64-NEXT:    vs8r.v v8, (a0)
1258; RV64-NEXT:    vs8r.v v24, (a2)
1259; RV64-NEXT:    csrr a0, vlenb
1260; RV64-NEXT:    slli a0, a0, 3
1261; RV64-NEXT:    add sp, sp, a0
1262; RV64-NEXT:    .cfi_def_cfa sp, 16
1263; RV64-NEXT:    addi sp, sp, 16
1264; RV64-NEXT:    .cfi_def_cfa_offset 0
1265; RV64-NEXT:    ret
1266  %p0 = call <vscale x 16 x ptr> @llvm.vector.insert.nxv8p0.nxv16p0(<vscale x 16 x ptr> undef, <vscale x 8 x ptr> %ptrs0, i64 0)
1267  %p1 = call <vscale x 16 x ptr> @llvm.vector.insert.nxv8p0.nxv16p0(<vscale x 16 x ptr> %p0, <vscale x 8 x ptr> %ptrs1, i64 8)
1268
1269  %pt0 = call <vscale x 16 x i64> @llvm.vector.insert.nxv8i64.nxv16i64(<vscale x 16 x i64> undef, <vscale x 8 x i64> %passthru0, i64 0)
1270  %pt1 = call <vscale x 16 x i64> @llvm.vector.insert.nxv8i64.nxv16i64(<vscale x 16 x i64> %pt0, <vscale x 8 x i64> %passthru1, i64 8)
1271
1272  %v = call <vscale x 16 x i64> @llvm.masked.gather.nxv16i64.nxv16p0(<vscale x 16 x ptr> %p1, i32 8, <vscale x 16 x i1> %m, <vscale x 16 x i64> %pt1)
1273  store <vscale x 16 x i64> %v, ptr %out
1274  ret void
1275}
1276
1277declare <vscale x 1 x bfloat> @llvm.masked.gather.nxv1bf16.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x bfloat>)
1278
1279define <vscale x 1 x bfloat> @mgather_nxv1bf16(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, <vscale x 1 x bfloat> %passthru) {
1280; RV32-LABEL: mgather_nxv1bf16:
1281; RV32:       # %bb.0:
1282; RV32-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
1283; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
1284; RV32-NEXT:    vmv1r.v v8, v9
1285; RV32-NEXT:    ret
1286;
1287; RV64-LABEL: mgather_nxv1bf16:
1288; RV64:       # %bb.0:
1289; RV64-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
1290; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
1291; RV64-NEXT:    vmv1r.v v8, v9
1292; RV64-NEXT:    ret
1293  %v = call <vscale x 1 x bfloat> @llvm.masked.gather.nxv1bf16.nxv1p0(<vscale x 1 x ptr> %ptrs, i32 2, <vscale x 1 x i1> %m, <vscale x 1 x bfloat> %passthru)
1294  ret <vscale x 1 x bfloat> %v
1295}
1296
1297declare <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x bfloat>)
1298
1299define <vscale x 2 x bfloat> @mgather_nxv2bf16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x bfloat> %passthru) {
1300; RV32-LABEL: mgather_nxv2bf16:
1301; RV32:       # %bb.0:
1302; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
1303; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
1304; RV32-NEXT:    vmv1r.v v8, v9
1305; RV32-NEXT:    ret
1306;
1307; RV64-LABEL: mgather_nxv2bf16:
1308; RV64:       # %bb.0:
1309; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
1310; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
1311; RV64-NEXT:    vmv1r.v v8, v10
1312; RV64-NEXT:    ret
1313  %v = call <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m, <vscale x 2 x bfloat> %passthru)
1314  ret <vscale x 2 x bfloat> %v
1315}
1316
1317declare <vscale x 4 x bfloat> @llvm.masked.gather.nxv4bf16.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x bfloat>)
1318
1319define <vscale x 4 x bfloat> @mgather_nxv4bf16(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, <vscale x 4 x bfloat> %passthru) {
1320; RV32-LABEL: mgather_nxv4bf16:
1321; RV32:       # %bb.0:
1322; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
1323; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
1324; RV32-NEXT:    vmv.v.v v8, v10
1325; RV32-NEXT:    ret
1326;
1327; RV64-LABEL: mgather_nxv4bf16:
1328; RV64:       # %bb.0:
1329; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
1330; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
1331; RV64-NEXT:    vmv.v.v v8, v12
1332; RV64-NEXT:    ret
1333  %v = call <vscale x 4 x bfloat> @llvm.masked.gather.nxv4bf16.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> %m, <vscale x 4 x bfloat> %passthru)
1334  ret <vscale x 4 x bfloat> %v
1335}
1336
1337define <vscale x 4 x bfloat> @mgather_truemask_nxv4bf16(<vscale x 4 x ptr> %ptrs, <vscale x 4 x bfloat> %passthru) {
1338; RV32-LABEL: mgather_truemask_nxv4bf16:
1339; RV32:       # %bb.0:
1340; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
1341; RV32-NEXT:    vluxei32.v v10, (zero), v8
1342; RV32-NEXT:    vmv.v.v v8, v10
1343; RV32-NEXT:    ret
1344;
1345; RV64-LABEL: mgather_truemask_nxv4bf16:
1346; RV64:       # %bb.0:
1347; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
1348; RV64-NEXT:    vluxei64.v v12, (zero), v8
1349; RV64-NEXT:    vmv.v.v v8, v12
1350; RV64-NEXT:    ret
1351  %v = call <vscale x 4 x bfloat> @llvm.masked.gather.nxv4bf16.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> splat (i1 1), <vscale x 4 x bfloat> %passthru)
1352  ret <vscale x 4 x bfloat> %v
1353}
1354
1355define <vscale x 4 x bfloat> @mgather_falsemask_nxv4bf16(<vscale x 4 x ptr> %ptrs, <vscale x 4 x bfloat> %passthru) {
1356; RV32-LABEL: mgather_falsemask_nxv4bf16:
1357; RV32:       # %bb.0:
1358; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
1359; RV32-NEXT:    vmv1r.v v8, v10
1360; RV32-NEXT:    ret
1361;
1362; RV64-LABEL: mgather_falsemask_nxv4bf16:
1363; RV64:       # %bb.0:
1364; RV64-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
1365; RV64-NEXT:    vmv1r.v v8, v12
1366; RV64-NEXT:    ret
1367  %v = call <vscale x 4 x bfloat> @llvm.masked.gather.nxv4bf16.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x bfloat> %passthru)
1368  ret <vscale x 4 x bfloat> %v
1369}
1370
1371declare <vscale x 8 x bfloat> @llvm.masked.gather.nxv8bf16.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x bfloat>)
1372
1373define <vscale x 8 x bfloat> @mgather_nxv8bf16(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, <vscale x 8 x bfloat> %passthru) {
1374; RV32-LABEL: mgather_nxv8bf16:
1375; RV32:       # %bb.0:
1376; RV32-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
1377; RV32-NEXT:    vluxei32.v v12, (zero), v8, v0.t
1378; RV32-NEXT:    vmv.v.v v8, v12
1379; RV32-NEXT:    ret
1380;
1381; RV64-LABEL: mgather_nxv8bf16:
1382; RV64:       # %bb.0:
1383; RV64-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
1384; RV64-NEXT:    vluxei64.v v16, (zero), v8, v0.t
1385; RV64-NEXT:    vmv.v.v v8, v16
1386; RV64-NEXT:    ret
1387  %v = call <vscale x 8 x bfloat> @llvm.masked.gather.nxv8bf16.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x bfloat> %passthru)
1388  ret <vscale x 8 x bfloat> %v
1389}
1390
1391define <vscale x 8 x bfloat> @mgather_baseidx_nxv8i8_nxv8bf16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x bfloat> %passthru) {
1392; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8bf16:
1393; RV32:       # %bb.0:
1394; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1395; RV32-NEXT:    vsext.vf4 v12, v8
1396; RV32-NEXT:    vadd.vv v12, v12, v12
1397; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1398; RV32-NEXT:    vluxei32.v v10, (a0), v12, v0.t
1399; RV32-NEXT:    vmv.v.v v8, v10
1400; RV32-NEXT:    ret
1401;
1402; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8bf16:
1403; RV64:       # %bb.0:
1404; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1405; RV64-NEXT:    vsext.vf8 v16, v8
1406; RV64-NEXT:    vadd.vv v16, v16, v16
1407; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1408; RV64-NEXT:    vluxei64.v v10, (a0), v16, v0.t
1409; RV64-NEXT:    vmv.v.v v8, v10
1410; RV64-NEXT:    ret
1411  %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i8> %idxs
1412  %v = call <vscale x 8 x bfloat> @llvm.masked.gather.nxv8bf16.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x bfloat> %passthru)
1413  ret <vscale x 8 x bfloat> %v
1414}
1415
1416define <vscale x 8 x bfloat> @mgather_baseidx_sext_nxv8i8_nxv8bf16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x bfloat> %passthru) {
1417; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8bf16:
1418; RV32:       # %bb.0:
1419; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1420; RV32-NEXT:    vsext.vf4 v12, v8
1421; RV32-NEXT:    vadd.vv v12, v12, v12
1422; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1423; RV32-NEXT:    vluxei32.v v10, (a0), v12, v0.t
1424; RV32-NEXT:    vmv.v.v v8, v10
1425; RV32-NEXT:    ret
1426;
1427; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8bf16:
1428; RV64:       # %bb.0:
1429; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1430; RV64-NEXT:    vsext.vf8 v16, v8
1431; RV64-NEXT:    vadd.vv v16, v16, v16
1432; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1433; RV64-NEXT:    vluxei64.v v10, (a0), v16, v0.t
1434; RV64-NEXT:    vmv.v.v v8, v10
1435; RV64-NEXT:    ret
1436  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1437  %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %eidxs
1438  %v = call <vscale x 8 x bfloat> @llvm.masked.gather.nxv8bf16.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x bfloat> %passthru)
1439  ret <vscale x 8 x bfloat> %v
1440}
1441
1442define <vscale x 8 x bfloat> @mgather_baseidx_zext_nxv8i8_nxv8bf16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x bfloat> %passthru) {
1443; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8bf16:
1444; CHECK:       # %bb.0:
1445; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
1446; CHECK-NEXT:    vwaddu.vv v12, v8, v8
1447; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1448; CHECK-NEXT:    vluxei16.v v10, (a0), v12, v0.t
1449; CHECK-NEXT:    vmv.v.v v8, v10
1450; CHECK-NEXT:    ret
1451  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1452  %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %eidxs
1453  %v = call <vscale x 8 x bfloat> @llvm.masked.gather.nxv8bf16.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x bfloat> %passthru)
1454  ret <vscale x 8 x bfloat> %v
1455}
1456
1457define <vscale x 8 x bfloat> @mgather_baseidx_nxv8bf16(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x bfloat> %passthru) {
1458; RV32-LABEL: mgather_baseidx_nxv8bf16:
1459; RV32:       # %bb.0:
1460; RV32-NEXT:    vsetvli a1, zero, e16, m2, ta, mu
1461; RV32-NEXT:    vwadd.vv v12, v8, v8
1462; RV32-NEXT:    vluxei32.v v10, (a0), v12, v0.t
1463; RV32-NEXT:    vmv.v.v v8, v10
1464; RV32-NEXT:    ret
1465;
1466; RV64-LABEL: mgather_baseidx_nxv8bf16:
1467; RV64:       # %bb.0:
1468; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1469; RV64-NEXT:    vsext.vf4 v16, v8
1470; RV64-NEXT:    vadd.vv v16, v16, v16
1471; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1472; RV64-NEXT:    vluxei64.v v10, (a0), v16, v0.t
1473; RV64-NEXT:    vmv.v.v v8, v10
1474; RV64-NEXT:    ret
1475  %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %idxs
1476  %v = call <vscale x 8 x bfloat> @llvm.masked.gather.nxv8bf16.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x bfloat> %passthru)
1477  ret <vscale x 8 x bfloat> %v
1478}
1479
1480declare <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x half>)
1481
1482define <vscale x 1 x half> @mgather_nxv1f16(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, <vscale x 1 x half> %passthru) {
1483; RV32-LABEL: mgather_nxv1f16:
1484; RV32:       # %bb.0:
1485; RV32-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
1486; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
1487; RV32-NEXT:    vmv1r.v v8, v9
1488; RV32-NEXT:    ret
1489;
1490; RV64-LABEL: mgather_nxv1f16:
1491; RV64:       # %bb.0:
1492; RV64-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
1493; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
1494; RV64-NEXT:    vmv1r.v v8, v9
1495; RV64-NEXT:    ret
1496  %v = call <vscale x 1 x half> @llvm.masked.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> %ptrs, i32 2, <vscale x 1 x i1> %m, <vscale x 1 x half> %passthru)
1497  ret <vscale x 1 x half> %v
1498}
1499
1500declare <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x half>)
1501
1502define <vscale x 2 x half> @mgather_nxv2f16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x half> %passthru) {
1503; RV32-LABEL: mgather_nxv2f16:
1504; RV32:       # %bb.0:
1505; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
1506; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
1507; RV32-NEXT:    vmv1r.v v8, v9
1508; RV32-NEXT:    ret
1509;
1510; RV64-LABEL: mgather_nxv2f16:
1511; RV64:       # %bb.0:
1512; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
1513; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
1514; RV64-NEXT:    vmv1r.v v8, v10
1515; RV64-NEXT:    ret
1516  %v = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %m, <vscale x 2 x half> %passthru)
1517  ret <vscale x 2 x half> %v
1518}
1519
1520declare <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x half>)
1521
1522define <vscale x 4 x half> @mgather_nxv4f16(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, <vscale x 4 x half> %passthru) {
1523; RV32-LABEL: mgather_nxv4f16:
1524; RV32:       # %bb.0:
1525; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
1526; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
1527; RV32-NEXT:    vmv.v.v v8, v10
1528; RV32-NEXT:    ret
1529;
1530; RV64-LABEL: mgather_nxv4f16:
1531; RV64:       # %bb.0:
1532; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
1533; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
1534; RV64-NEXT:    vmv.v.v v8, v12
1535; RV64-NEXT:    ret
1536  %v = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> %m, <vscale x 4 x half> %passthru)
1537  ret <vscale x 4 x half> %v
1538}
1539
1540define <vscale x 4 x half> @mgather_truemask_nxv4f16(<vscale x 4 x ptr> %ptrs, <vscale x 4 x half> %passthru) {
1541; RV32-LABEL: mgather_truemask_nxv4f16:
1542; RV32:       # %bb.0:
1543; RV32-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
1544; RV32-NEXT:    vluxei32.v v10, (zero), v8
1545; RV32-NEXT:    vmv.v.v v8, v10
1546; RV32-NEXT:    ret
1547;
1548; RV64-LABEL: mgather_truemask_nxv4f16:
1549; RV64:       # %bb.0:
1550; RV64-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
1551; RV64-NEXT:    vluxei64.v v12, (zero), v8
1552; RV64-NEXT:    vmv.v.v v8, v12
1553; RV64-NEXT:    ret
1554  %v = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> splat (i1 1), <vscale x 4 x half> %passthru)
1555  ret <vscale x 4 x half> %v
1556}
1557
1558define <vscale x 4 x half> @mgather_falsemask_nxv4f16(<vscale x 4 x ptr> %ptrs, <vscale x 4 x half> %passthru) {
1559; RV32-LABEL: mgather_falsemask_nxv4f16:
1560; RV32:       # %bb.0:
1561; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
1562; RV32-NEXT:    vmv1r.v v8, v10
1563; RV32-NEXT:    ret
1564;
1565; RV64-LABEL: mgather_falsemask_nxv4f16:
1566; RV64:       # %bb.0:
1567; RV64-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
1568; RV64-NEXT:    vmv1r.v v8, v12
1569; RV64-NEXT:    ret
1570  %v = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x half> %passthru)
1571  ret <vscale x 4 x half> %v
1572}
1573
1574declare <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x half>)
1575
1576define <vscale x 8 x half> @mgather_nxv8f16(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru) {
1577; RV32-LABEL: mgather_nxv8f16:
1578; RV32:       # %bb.0:
1579; RV32-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
1580; RV32-NEXT:    vluxei32.v v12, (zero), v8, v0.t
1581; RV32-NEXT:    vmv.v.v v8, v12
1582; RV32-NEXT:    ret
1583;
1584; RV64-LABEL: mgather_nxv8f16:
1585; RV64:       # %bb.0:
1586; RV64-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
1587; RV64-NEXT:    vluxei64.v v16, (zero), v8, v0.t
1588; RV64-NEXT:    vmv.v.v v8, v16
1589; RV64-NEXT:    ret
1590  %v = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru)
1591  ret <vscale x 8 x half> %v
1592}
1593
1594define <vscale x 8 x half> @mgather_baseidx_nxv8i8_nxv8f16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru) {
1595; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8f16:
1596; RV32:       # %bb.0:
1597; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1598; RV32-NEXT:    vsext.vf4 v12, v8
1599; RV32-NEXT:    vadd.vv v12, v12, v12
1600; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1601; RV32-NEXT:    vluxei32.v v10, (a0), v12, v0.t
1602; RV32-NEXT:    vmv.v.v v8, v10
1603; RV32-NEXT:    ret
1604;
1605; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8f16:
1606; RV64:       # %bb.0:
1607; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1608; RV64-NEXT:    vsext.vf8 v16, v8
1609; RV64-NEXT:    vadd.vv v16, v16, v16
1610; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1611; RV64-NEXT:    vluxei64.v v10, (a0), v16, v0.t
1612; RV64-NEXT:    vmv.v.v v8, v10
1613; RV64-NEXT:    ret
1614  %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i8> %idxs
1615  %v = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru)
1616  ret <vscale x 8 x half> %v
1617}
1618
1619define <vscale x 8 x half> @mgather_baseidx_sext_nxv8i8_nxv8f16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru) {
1620; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f16:
1621; RV32:       # %bb.0:
1622; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
1623; RV32-NEXT:    vsext.vf4 v12, v8
1624; RV32-NEXT:    vadd.vv v12, v12, v12
1625; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1626; RV32-NEXT:    vluxei32.v v10, (a0), v12, v0.t
1627; RV32-NEXT:    vmv.v.v v8, v10
1628; RV32-NEXT:    ret
1629;
1630; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f16:
1631; RV64:       # %bb.0:
1632; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1633; RV64-NEXT:    vsext.vf8 v16, v8
1634; RV64-NEXT:    vadd.vv v16, v16, v16
1635; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1636; RV64-NEXT:    vluxei64.v v10, (a0), v16, v0.t
1637; RV64-NEXT:    vmv.v.v v8, v10
1638; RV64-NEXT:    ret
1639  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1640  %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs
1641  %v = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru)
1642  ret <vscale x 8 x half> %v
1643}
1644
1645define <vscale x 8 x half> @mgather_baseidx_zext_nxv8i8_nxv8f16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru) {
1646; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f16:
1647; CHECK:       # %bb.0:
1648; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
1649; CHECK-NEXT:    vwaddu.vv v12, v8, v8
1650; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1651; CHECK-NEXT:    vluxei16.v v10, (a0), v12, v0.t
1652; CHECK-NEXT:    vmv.v.v v8, v10
1653; CHECK-NEXT:    ret
1654  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1655  %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs
1656  %v = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru)
1657  ret <vscale x 8 x half> %v
1658}
1659
1660define <vscale x 8 x half> @mgather_baseidx_nxv8f16(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru) {
1661; RV32-LABEL: mgather_baseidx_nxv8f16:
1662; RV32:       # %bb.0:
1663; RV32-NEXT:    vsetvli a1, zero, e16, m2, ta, mu
1664; RV32-NEXT:    vwadd.vv v12, v8, v8
1665; RV32-NEXT:    vluxei32.v v10, (a0), v12, v0.t
1666; RV32-NEXT:    vmv.v.v v8, v10
1667; RV32-NEXT:    ret
1668;
1669; RV64-LABEL: mgather_baseidx_nxv8f16:
1670; RV64:       # %bb.0:
1671; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1672; RV64-NEXT:    vsext.vf4 v16, v8
1673; RV64-NEXT:    vadd.vv v16, v16, v16
1674; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
1675; RV64-NEXT:    vluxei64.v v10, (a0), v16, v0.t
1676; RV64-NEXT:    vmv.v.v v8, v10
1677; RV64-NEXT:    ret
1678  %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %idxs
1679  %v = call <vscale x 8 x half> @llvm.masked.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 2, <vscale x 8 x i1> %m, <vscale x 8 x half> %passthru)
1680  ret <vscale x 8 x half> %v
1681}
1682
1683declare <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x float>)
1684
1685define <vscale x 1 x float> @mgather_nxv1f32(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, <vscale x 1 x float> %passthru) {
1686; RV32-LABEL: mgather_nxv1f32:
1687; RV32:       # %bb.0:
1688; RV32-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
1689; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
1690; RV32-NEXT:    vmv1r.v v8, v9
1691; RV32-NEXT:    ret
1692;
1693; RV64-LABEL: mgather_nxv1f32:
1694; RV64:       # %bb.0:
1695; RV64-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
1696; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
1697; RV64-NEXT:    vmv1r.v v8, v9
1698; RV64-NEXT:    ret
1699  %v = call <vscale x 1 x float> @llvm.masked.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> %ptrs, i32 4, <vscale x 1 x i1> %m, <vscale x 1 x float> %passthru)
1700  ret <vscale x 1 x float> %v
1701}
1702
1703declare <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x float>)
1704
1705define <vscale x 2 x float> @mgather_nxv2f32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x float> %passthru) {
1706; RV32-LABEL: mgather_nxv2f32:
1707; RV32:       # %bb.0:
1708; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
1709; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
1710; RV32-NEXT:    vmv.v.v v8, v9
1711; RV32-NEXT:    ret
1712;
1713; RV64-LABEL: mgather_nxv2f32:
1714; RV64:       # %bb.0:
1715; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
1716; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
1717; RV64-NEXT:    vmv.v.v v8, v10
1718; RV64-NEXT:    ret
1719  %v = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %m, <vscale x 2 x float> %passthru)
1720  ret <vscale x 2 x float> %v
1721}
1722
1723declare <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
1724
1725define <vscale x 4 x float> @mgather_nxv4f32(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, <vscale x 4 x float> %passthru) {
1726; RV32-LABEL: mgather_nxv4f32:
1727; RV32:       # %bb.0:
1728; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
1729; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
1730; RV32-NEXT:    vmv.v.v v8, v10
1731; RV32-NEXT:    ret
1732;
1733; RV64-LABEL: mgather_nxv4f32:
1734; RV64:       # %bb.0:
1735; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
1736; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
1737; RV64-NEXT:    vmv.v.v v8, v12
1738; RV64-NEXT:    ret
1739  %v = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %m, <vscale x 4 x float> %passthru)
1740  ret <vscale x 4 x float> %v
1741}
1742
1743define <vscale x 4 x float> @mgather_truemask_nxv4f32(<vscale x 4 x ptr> %ptrs, <vscale x 4 x float> %passthru) {
1744; RV32-LABEL: mgather_truemask_nxv4f32:
1745; RV32:       # %bb.0:
1746; RV32-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
1747; RV32-NEXT:    vluxei32.v v8, (zero), v8
1748; RV32-NEXT:    ret
1749;
1750; RV64-LABEL: mgather_truemask_nxv4f32:
1751; RV64:       # %bb.0:
1752; RV64-NEXT:    vsetvli a0, zero, e32, m2, ta, ma
1753; RV64-NEXT:    vluxei64.v v12, (zero), v8
1754; RV64-NEXT:    vmv.v.v v8, v12
1755; RV64-NEXT:    ret
1756  %v = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> splat (i1 1), <vscale x 4 x float> %passthru)
1757  ret <vscale x 4 x float> %v
1758}
1759
1760define <vscale x 4 x float> @mgather_falsemask_nxv4f32(<vscale x 4 x ptr> %ptrs, <vscale x 4 x float> %passthru) {
1761; RV32-LABEL: mgather_falsemask_nxv4f32:
1762; RV32:       # %bb.0:
1763; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
1764; RV32-NEXT:    vmv2r.v v8, v10
1765; RV32-NEXT:    ret
1766;
1767; RV64-LABEL: mgather_falsemask_nxv4f32:
1768; RV64:       # %bb.0:
1769; RV64-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
1770; RV64-NEXT:    vmv2r.v v8, v12
1771; RV64-NEXT:    ret
1772  %v = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %passthru)
1773  ret <vscale x 4 x float> %v
1774}
1775
1776declare <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x float>)
1777
1778define <vscale x 8 x float> @mgather_nxv8f32(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) {
1779; RV32-LABEL: mgather_nxv8f32:
1780; RV32:       # %bb.0:
1781; RV32-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
1782; RV32-NEXT:    vluxei32.v v12, (zero), v8, v0.t
1783; RV32-NEXT:    vmv.v.v v8, v12
1784; RV32-NEXT:    ret
1785;
1786; RV64-LABEL: mgather_nxv8f32:
1787; RV64:       # %bb.0:
1788; RV64-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
1789; RV64-NEXT:    vluxei64.v v16, (zero), v8, v0.t
1790; RV64-NEXT:    vmv.v.v v8, v16
1791; RV64-NEXT:    ret
1792  %v = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru)
1793  ret <vscale x 8 x float> %v
1794}
1795
1796define <vscale x 8 x float> @mgather_baseidx_nxv8i8_nxv8f32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) {
1797; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8f32:
1798; RV32:       # %bb.0:
1799; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1800; RV32-NEXT:    vsext.vf4 v16, v8
1801; RV32-NEXT:    vsll.vi v8, v16, 2
1802; RV32-NEXT:    vluxei32.v v12, (a0), v8, v0.t
1803; RV32-NEXT:    vmv.v.v v8, v12
1804; RV32-NEXT:    ret
1805;
1806; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8f32:
1807; RV64:       # %bb.0:
1808; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1809; RV64-NEXT:    vsext.vf8 v16, v8
1810; RV64-NEXT:    vsll.vi v16, v16, 2
1811; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1812; RV64-NEXT:    vluxei64.v v12, (a0), v16, v0.t
1813; RV64-NEXT:    vmv.v.v v8, v12
1814; RV64-NEXT:    ret
1815  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i8> %idxs
1816  %v = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru)
1817  ret <vscale x 8 x float> %v
1818}
1819
1820define <vscale x 8 x float> @mgather_baseidx_sext_nxv8i8_nxv8f32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) {
1821; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f32:
1822; RV32:       # %bb.0:
1823; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1824; RV32-NEXT:    vsext.vf4 v16, v8
1825; RV32-NEXT:    vsll.vi v8, v16, 2
1826; RV32-NEXT:    vluxei32.v v12, (a0), v8, v0.t
1827; RV32-NEXT:    vmv.v.v v8, v12
1828; RV32-NEXT:    ret
1829;
1830; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f32:
1831; RV64:       # %bb.0:
1832; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1833; RV64-NEXT:    vsext.vf8 v16, v8
1834; RV64-NEXT:    vsll.vi v16, v16, 2
1835; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1836; RV64-NEXT:    vluxei64.v v12, (a0), v16, v0.t
1837; RV64-NEXT:    vmv.v.v v8, v12
1838; RV64-NEXT:    ret
1839  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1840  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1841  %v = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru)
1842  ret <vscale x 8 x float> %v
1843}
1844
1845define <vscale x 8 x float> @mgather_baseidx_zext_nxv8i8_nxv8f32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) {
1846; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f32:
1847; CHECK:       # %bb.0:
1848; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
1849; CHECK-NEXT:    vzext.vf2 v10, v8
1850; CHECK-NEXT:    vsll.vi v8, v10, 2
1851; CHECK-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1852; CHECK-NEXT:    vluxei16.v v12, (a0), v8, v0.t
1853; CHECK-NEXT:    vmv.v.v v8, v12
1854; CHECK-NEXT:    ret
1855  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1856  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1857  %v = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru)
1858  ret <vscale x 8 x float> %v
1859}
1860
1861define <vscale x 8 x float> @mgather_baseidx_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) {
1862; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8f32:
1863; RV32:       # %bb.0:
1864; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1865; RV32-NEXT:    vsext.vf2 v16, v8
1866; RV32-NEXT:    vsll.vi v8, v16, 2
1867; RV32-NEXT:    vluxei32.v v12, (a0), v8, v0.t
1868; RV32-NEXT:    vmv.v.v v8, v12
1869; RV32-NEXT:    ret
1870;
1871; RV64-LABEL: mgather_baseidx_nxv8i16_nxv8f32:
1872; RV64:       # %bb.0:
1873; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1874; RV64-NEXT:    vsext.vf4 v16, v8
1875; RV64-NEXT:    vsll.vi v16, v16, 2
1876; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1877; RV64-NEXT:    vluxei64.v v12, (a0), v16, v0.t
1878; RV64-NEXT:    vmv.v.v v8, v12
1879; RV64-NEXT:    ret
1880  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i16> %idxs
1881  %v = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru)
1882  ret <vscale x 8 x float> %v
1883}
1884
1885define <vscale x 8 x float> @mgather_baseidx_sext_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) {
1886; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f32:
1887; RV32:       # %bb.0:
1888; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1889; RV32-NEXT:    vsext.vf2 v16, v8
1890; RV32-NEXT:    vsll.vi v8, v16, 2
1891; RV32-NEXT:    vluxei32.v v12, (a0), v8, v0.t
1892; RV32-NEXT:    vmv.v.v v8, v12
1893; RV32-NEXT:    ret
1894;
1895; RV64-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f32:
1896; RV64:       # %bb.0:
1897; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1898; RV64-NEXT:    vsext.vf4 v16, v8
1899; RV64-NEXT:    vsll.vi v16, v16, 2
1900; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1901; RV64-NEXT:    vluxei64.v v12, (a0), v16, v0.t
1902; RV64-NEXT:    vmv.v.v v8, v12
1903; RV64-NEXT:    ret
1904  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1905  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1906  %v = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru)
1907  ret <vscale x 8 x float> %v
1908}
1909
1910define <vscale x 8 x float> @mgather_baseidx_zext_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) {
1911; CHECK-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f32:
1912; CHECK:       # %bb.0:
1913; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1914; CHECK-NEXT:    vzext.vf2 v16, v8
1915; CHECK-NEXT:    vsll.vi v8, v16, 2
1916; CHECK-NEXT:    vluxei32.v v12, (a0), v8, v0.t
1917; CHECK-NEXT:    vmv.v.v v8, v12
1918; CHECK-NEXT:    ret
1919  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1920  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1921  %v = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru)
1922  ret <vscale x 8 x float> %v
1923}
1924
1925define <vscale x 8 x float> @mgather_baseidx_nxv8f32(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru) {
1926; RV32-LABEL: mgather_baseidx_nxv8f32:
1927; RV32:       # %bb.0:
1928; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
1929; RV32-NEXT:    vsll.vi v8, v8, 2
1930; RV32-NEXT:    vluxei32.v v12, (a0), v8, v0.t
1931; RV32-NEXT:    vmv.v.v v8, v12
1932; RV32-NEXT:    ret
1933;
1934; RV64-LABEL: mgather_baseidx_nxv8f32:
1935; RV64:       # %bb.0:
1936; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
1937; RV64-NEXT:    vsext.vf2 v16, v8
1938; RV64-NEXT:    vsll.vi v16, v16, 2
1939; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
1940; RV64-NEXT:    vluxei64.v v12, (a0), v16, v0.t
1941; RV64-NEXT:    vmv.v.v v8, v12
1942; RV64-NEXT:    ret
1943  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %idxs
1944  %v = call <vscale x 8 x float> @llvm.masked.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 4, <vscale x 8 x i1> %m, <vscale x 8 x float> %passthru)
1945  ret <vscale x 8 x float> %v
1946}
1947
1948declare <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x double>)
1949
1950define <vscale x 1 x double> @mgather_nxv1f64(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, <vscale x 1 x double> %passthru) {
1951; RV32-LABEL: mgather_nxv1f64:
1952; RV32:       # %bb.0:
1953; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
1954; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
1955; RV32-NEXT:    vmv.v.v v8, v9
1956; RV32-NEXT:    ret
1957;
1958; RV64-LABEL: mgather_nxv1f64:
1959; RV64:       # %bb.0:
1960; RV64-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
1961; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
1962; RV64-NEXT:    vmv.v.v v8, v9
1963; RV64-NEXT:    ret
1964  %v = call <vscale x 1 x double> @llvm.masked.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> %ptrs, i32 8, <vscale x 1 x i1> %m, <vscale x 1 x double> %passthru)
1965  ret <vscale x 1 x double> %v
1966}
1967
1968declare <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
1969
1970define <vscale x 2 x double> @mgather_nxv2f64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, <vscale x 2 x double> %passthru) {
1971; RV32-LABEL: mgather_nxv2f64:
1972; RV32:       # %bb.0:
1973; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
1974; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
1975; RV32-NEXT:    vmv.v.v v8, v10
1976; RV32-NEXT:    ret
1977;
1978; RV64-LABEL: mgather_nxv2f64:
1979; RV64:       # %bb.0:
1980; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
1981; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
1982; RV64-NEXT:    vmv.v.v v8, v10
1983; RV64-NEXT:    ret
1984  %v = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> %ptrs, i32 8, <vscale x 2 x i1> %m, <vscale x 2 x double> %passthru)
1985  ret <vscale x 2 x double> %v
1986}
1987
1988declare <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x double>)
1989
1990define <vscale x 4 x double> @mgather_nxv4f64(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, <vscale x 4 x double> %passthru) {
1991; RV32-LABEL: mgather_nxv4f64:
1992; RV32:       # %bb.0:
1993; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
1994; RV32-NEXT:    vluxei32.v v12, (zero), v8, v0.t
1995; RV32-NEXT:    vmv.v.v v8, v12
1996; RV32-NEXT:    ret
1997;
1998; RV64-LABEL: mgather_nxv4f64:
1999; RV64:       # %bb.0:
2000; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
2001; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
2002; RV64-NEXT:    vmv.v.v v8, v12
2003; RV64-NEXT:    ret
2004  %v = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> %m, <vscale x 4 x double> %passthru)
2005  ret <vscale x 4 x double> %v
2006}
2007
2008define <vscale x 4 x double> @mgather_truemask_nxv4f64(<vscale x 4 x ptr> %ptrs, <vscale x 4 x double> %passthru) {
2009; RV32-LABEL: mgather_truemask_nxv4f64:
2010; RV32:       # %bb.0:
2011; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
2012; RV32-NEXT:    vluxei32.v v12, (zero), v8
2013; RV32-NEXT:    vmv.v.v v8, v12
2014; RV32-NEXT:    ret
2015;
2016; RV64-LABEL: mgather_truemask_nxv4f64:
2017; RV64:       # %bb.0:
2018; RV64-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
2019; RV64-NEXT:    vluxei64.v v8, (zero), v8
2020; RV64-NEXT:    ret
2021  %v = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> splat (i1 1), <vscale x 4 x double> %passthru)
2022  ret <vscale x 4 x double> %v
2023}
2024
2025define <vscale x 4 x double> @mgather_falsemask_nxv4f64(<vscale x 4 x ptr> %ptrs, <vscale x 4 x double> %passthru) {
2026; CHECK-LABEL: mgather_falsemask_nxv4f64:
2027; CHECK:       # %bb.0:
2028; CHECK-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
2029; CHECK-NEXT:    vmv4r.v v8, v12
2030; CHECK-NEXT:    ret
2031  %v = call <vscale x 4 x double> @llvm.masked.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> %ptrs, i32 8, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x double> %passthru)
2032  ret <vscale x 4 x double> %v
2033}
2034
2035declare <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr>, i32, <vscale x 8 x i1>, <vscale x 8 x double>)
2036
2037define <vscale x 8 x double> @mgather_nxv8f64(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
2038; RV32-LABEL: mgather_nxv8f64:
2039; RV32:       # %bb.0:
2040; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
2041; RV32-NEXT:    vluxei32.v v16, (zero), v8, v0.t
2042; RV32-NEXT:    vmv.v.v v8, v16
2043; RV32-NEXT:    ret
2044;
2045; RV64-LABEL: mgather_nxv8f64:
2046; RV64:       # %bb.0:
2047; RV64-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
2048; RV64-NEXT:    vluxei64.v v16, (zero), v8, v0.t
2049; RV64-NEXT:    vmv.v.v v8, v16
2050; RV64-NEXT:    ret
2051  %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru)
2052  ret <vscale x 8 x double> %v
2053}
2054
2055define <vscale x 8 x double> @mgather_baseidx_nxv8i8_nxv8f64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
2056; RV32-LABEL: mgather_baseidx_nxv8i8_nxv8f64:
2057; RV32:       # %bb.0:
2058; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
2059; RV32-NEXT:    vsext.vf4 v12, v8
2060; RV32-NEXT:    vsll.vi v8, v12, 3
2061; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2062; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
2063; RV32-NEXT:    vmv.v.v v8, v16
2064; RV32-NEXT:    ret
2065;
2066; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8f64:
2067; RV64:       # %bb.0:
2068; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
2069; RV64-NEXT:    vsext.vf8 v24, v8
2070; RV64-NEXT:    vsll.vi v8, v24, 3
2071; RV64-NEXT:    vluxei64.v v16, (a0), v8, v0.t
2072; RV64-NEXT:    vmv.v.v v8, v16
2073; RV64-NEXT:    ret
2074  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i8> %idxs
2075  %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru)
2076  ret <vscale x 8 x double> %v
2077}
2078
2079define <vscale x 8 x double> @mgather_baseidx_sext_nxv8i8_nxv8f64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
2080; RV32-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f64:
2081; RV32:       # %bb.0:
2082; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
2083; RV32-NEXT:    vsext.vf4 v12, v8
2084; RV32-NEXT:    vsll.vi v8, v12, 3
2085; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2086; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
2087; RV32-NEXT:    vmv.v.v v8, v16
2088; RV32-NEXT:    ret
2089;
2090; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f64:
2091; RV64:       # %bb.0:
2092; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
2093; RV64-NEXT:    vsext.vf8 v24, v8
2094; RV64-NEXT:    vsll.vi v8, v24, 3
2095; RV64-NEXT:    vluxei64.v v16, (a0), v8, v0.t
2096; RV64-NEXT:    vmv.v.v v8, v16
2097; RV64-NEXT:    ret
2098  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
2099  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2100  %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru)
2101  ret <vscale x 8 x double> %v
2102}
2103
2104define <vscale x 8 x double> @mgather_baseidx_zext_nxv8i8_nxv8f64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
2105; CHECK-LABEL: mgather_baseidx_zext_nxv8i8_nxv8f64:
2106; CHECK:       # %bb.0:
2107; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, ma
2108; CHECK-NEXT:    vzext.vf2 v10, v8
2109; CHECK-NEXT:    vsll.vi v8, v10, 3
2110; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2111; CHECK-NEXT:    vluxei16.v v16, (a0), v8, v0.t
2112; CHECK-NEXT:    vmv.v.v v8, v16
2113; CHECK-NEXT:    ret
2114  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
2115  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2116  %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru)
2117  ret <vscale x 8 x double> %v
2118}
2119
2120define <vscale x 8 x double> @mgather_baseidx_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
2121; RV32-LABEL: mgather_baseidx_nxv8i16_nxv8f64:
2122; RV32:       # %bb.0:
2123; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
2124; RV32-NEXT:    vsext.vf2 v12, v8
2125; RV32-NEXT:    vsll.vi v8, v12, 3
2126; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2127; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
2128; RV32-NEXT:    vmv.v.v v8, v16
2129; RV32-NEXT:    ret
2130;
2131; RV64-LABEL: mgather_baseidx_nxv8i16_nxv8f64:
2132; RV64:       # %bb.0:
2133; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
2134; RV64-NEXT:    vsext.vf4 v24, v8
2135; RV64-NEXT:    vsll.vi v8, v24, 3
2136; RV64-NEXT:    vluxei64.v v16, (a0), v8, v0.t
2137; RV64-NEXT:    vmv.v.v v8, v16
2138; RV64-NEXT:    ret
2139  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i16> %idxs
2140  %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru)
2141  ret <vscale x 8 x double> %v
2142}
2143
2144define <vscale x 8 x double> @mgather_baseidx_sext_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
2145; RV32-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f64:
2146; RV32:       # %bb.0:
2147; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
2148; RV32-NEXT:    vsext.vf2 v12, v8
2149; RV32-NEXT:    vsll.vi v8, v12, 3
2150; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2151; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
2152; RV32-NEXT:    vmv.v.v v8, v16
2153; RV32-NEXT:    ret
2154;
2155; RV64-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f64:
2156; RV64:       # %bb.0:
2157; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
2158; RV64-NEXT:    vsext.vf4 v24, v8
2159; RV64-NEXT:    vsll.vi v8, v24, 3
2160; RV64-NEXT:    vluxei64.v v16, (a0), v8, v0.t
2161; RV64-NEXT:    vmv.v.v v8, v16
2162; RV64-NEXT:    ret
2163  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
2164  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2165  %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru)
2166  ret <vscale x 8 x double> %v
2167}
2168
2169define <vscale x 8 x double> @mgather_baseidx_zext_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
2170; CHECK-LABEL: mgather_baseidx_zext_nxv8i16_nxv8f64:
2171; CHECK:       # %bb.0:
2172; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
2173; CHECK-NEXT:    vzext.vf2 v12, v8
2174; CHECK-NEXT:    vsll.vi v8, v12, 3
2175; CHECK-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2176; CHECK-NEXT:    vluxei32.v v16, (a0), v8, v0.t
2177; CHECK-NEXT:    vmv.v.v v8, v16
2178; CHECK-NEXT:    ret
2179  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
2180  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2181  %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru)
2182  ret <vscale x 8 x double> %v
2183}
2184
2185define <vscale x 8 x double> @mgather_baseidx_nxv8i32_nxv8f64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
2186; RV32-LABEL: mgather_baseidx_nxv8i32_nxv8f64:
2187; RV32:       # %bb.0:
2188; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
2189; RV32-NEXT:    vsll.vi v8, v8, 3
2190; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2191; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
2192; RV32-NEXT:    vmv.v.v v8, v16
2193; RV32-NEXT:    ret
2194;
2195; RV64-LABEL: mgather_baseidx_nxv8i32_nxv8f64:
2196; RV64:       # %bb.0:
2197; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
2198; RV64-NEXT:    vsext.vf2 v24, v8
2199; RV64-NEXT:    vsll.vi v8, v24, 3
2200; RV64-NEXT:    vluxei64.v v16, (a0), v8, v0.t
2201; RV64-NEXT:    vmv.v.v v8, v16
2202; RV64-NEXT:    ret
2203  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i32> %idxs
2204  %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru)
2205  ret <vscale x 8 x double> %v
2206}
2207
2208define <vscale x 8 x double> @mgather_baseidx_sext_nxv8i32_nxv8f64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
2209; RV32-LABEL: mgather_baseidx_sext_nxv8i32_nxv8f64:
2210; RV32:       # %bb.0:
2211; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
2212; RV32-NEXT:    vsll.vi v8, v8, 3
2213; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2214; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
2215; RV32-NEXT:    vmv.v.v v8, v16
2216; RV32-NEXT:    ret
2217;
2218; RV64-LABEL: mgather_baseidx_sext_nxv8i32_nxv8f64:
2219; RV64:       # %bb.0:
2220; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
2221; RV64-NEXT:    vsext.vf2 v24, v8
2222; RV64-NEXT:    vsll.vi v8, v24, 3
2223; RV64-NEXT:    vluxei64.v v16, (a0), v8, v0.t
2224; RV64-NEXT:    vmv.v.v v8, v16
2225; RV64-NEXT:    ret
2226  %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
2227  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2228  %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru)
2229  ret <vscale x 8 x double> %v
2230}
2231
2232define <vscale x 8 x double> @mgather_baseidx_zext_nxv8i32_nxv8f64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
2233; RV32-LABEL: mgather_baseidx_zext_nxv8i32_nxv8f64:
2234; RV32:       # %bb.0:
2235; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
2236; RV32-NEXT:    vsll.vi v8, v8, 3
2237; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2238; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
2239; RV32-NEXT:    vmv.v.v v8, v16
2240; RV32-NEXT:    ret
2241;
2242; RV64-LABEL: mgather_baseidx_zext_nxv8i32_nxv8f64:
2243; RV64:       # %bb.0:
2244; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
2245; RV64-NEXT:    vzext.vf2 v24, v8
2246; RV64-NEXT:    vsll.vi v8, v24, 3
2247; RV64-NEXT:    vluxei64.v v16, (a0), v8, v0.t
2248; RV64-NEXT:    vmv.v.v v8, v16
2249; RV64-NEXT:    ret
2250  %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
2251  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2252  %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru)
2253  ret <vscale x 8 x double> %v
2254}
2255
2256define <vscale x 8 x double> @mgather_baseidx_nxv8f64(ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru) {
2257; RV32-LABEL: mgather_baseidx_nxv8f64:
2258; RV32:       # %bb.0:
2259; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
2260; RV32-NEXT:    vnsrl.wi v24, v8, 0
2261; RV32-NEXT:    vsll.vi v8, v24, 3
2262; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
2263; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
2264; RV32-NEXT:    vmv.v.v v8, v16
2265; RV32-NEXT:    ret
2266;
2267; RV64-LABEL: mgather_baseidx_nxv8f64:
2268; RV64:       # %bb.0:
2269; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
2270; RV64-NEXT:    vsll.vi v8, v8, 3
2271; RV64-NEXT:    vluxei64.v v16, (a0), v8, v0.t
2272; RV64-NEXT:    vmv.v.v v8, v16
2273; RV64-NEXT:    ret
2274  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %idxs
2275  %v = call <vscale x 8 x double> @llvm.masked.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, i32 8, <vscale x 8 x i1> %m, <vscale x 8 x double> %passthru)
2276  ret <vscale x 8 x double> %v
2277}
2278
2279declare <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr>, i32, <vscale x 16 x i1>, <vscale x 16 x i8>)
2280
2281define <vscale x 16 x i8> @mgather_baseidx_nxv16i8(ptr %base, <vscale x 16 x i8> %idxs, <vscale x 16 x i1> %m, <vscale x 16 x i8> %passthru) {
2282; RV32-LABEL: mgather_baseidx_nxv16i8:
2283; RV32:       # %bb.0:
2284; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
2285; RV32-NEXT:    vsext.vf4 v16, v8
2286; RV32-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
2287; RV32-NEXT:    vluxei32.v v10, (a0), v16, v0.t
2288; RV32-NEXT:    vmv.v.v v8, v10
2289; RV32-NEXT:    ret
2290;
2291; RV64-LABEL: mgather_baseidx_nxv16i8:
2292; RV64:       # %bb.0:
2293; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
2294; RV64-NEXT:    vsext.vf8 v16, v8
2295; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
2296; RV64-NEXT:    vluxei64.v v10, (a0), v16, v0.t
2297; RV64-NEXT:    csrr a1, vlenb
2298; RV64-NEXT:    srli a1, a1, 3
2299; RV64-NEXT:    vsetvli a2, zero, e8, mf4, ta, ma
2300; RV64-NEXT:    vslidedown.vx v0, v0, a1
2301; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
2302; RV64-NEXT:    vsext.vf8 v16, v9
2303; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
2304; RV64-NEXT:    vluxei64.v v11, (a0), v16, v0.t
2305; RV64-NEXT:    vmv2r.v v8, v10
2306; RV64-NEXT:    ret
2307  %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 16 x i8> %idxs
2308  %v = call <vscale x 16 x i8> @llvm.masked.gather.nxv16i8.nxv16p0(<vscale x 16 x ptr> %ptrs, i32 2, <vscale x 16 x i1> %m, <vscale x 16 x i8> %passthru)
2309  ret <vscale x 16 x i8> %v
2310}
2311
2312declare <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr>, i32, <vscale x 32 x i1>, <vscale x 32 x i8>)
2313
2314define <vscale x 32 x i8> @mgather_baseidx_nxv32i8(ptr %base, <vscale x 32 x i8> %idxs, <vscale x 32 x i1> %m, <vscale x 32 x i8> %passthru) {
2315; RV32-LABEL: mgather_baseidx_nxv32i8:
2316; RV32:       # %bb.0:
2317; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
2318; RV32-NEXT:    vsext.vf4 v16, v8
2319; RV32-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
2320; RV32-NEXT:    vluxei32.v v12, (a0), v16, v0.t
2321; RV32-NEXT:    csrr a1, vlenb
2322; RV32-NEXT:    srli a1, a1, 2
2323; RV32-NEXT:    vsetvli a2, zero, e8, mf2, ta, ma
2324; RV32-NEXT:    vslidedown.vx v0, v0, a1
2325; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
2326; RV32-NEXT:    vsext.vf4 v16, v10
2327; RV32-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
2328; RV32-NEXT:    vluxei32.v v14, (a0), v16, v0.t
2329; RV32-NEXT:    vmv4r.v v8, v12
2330; RV32-NEXT:    ret
2331;
2332; RV64-LABEL: mgather_baseidx_nxv32i8:
2333; RV64:       # %bb.0:
2334; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
2335; RV64-NEXT:    vmv1r.v v16, v0
2336; RV64-NEXT:    vsext.vf8 v24, v8
2337; RV64-NEXT:    csrr a1, vlenb
2338; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
2339; RV64-NEXT:    vluxei64.v v12, (a0), v24, v0.t
2340; RV64-NEXT:    srli a2, a1, 3
2341; RV64-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
2342; RV64-NEXT:    vslidedown.vx v0, v0, a2
2343; RV64-NEXT:    vsetvli a3, zero, e64, m8, ta, ma
2344; RV64-NEXT:    vsext.vf8 v24, v9
2345; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
2346; RV64-NEXT:    vluxei64.v v13, (a0), v24, v0.t
2347; RV64-NEXT:    srli a1, a1, 2
2348; RV64-NEXT:    vsetvli a3, zero, e8, mf2, ta, ma
2349; RV64-NEXT:    vslidedown.vx v8, v16, a1
2350; RV64-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
2351; RV64-NEXT:    vslidedown.vx v0, v8, a2
2352; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
2353; RV64-NEXT:    vsext.vf8 v16, v11
2354; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
2355; RV64-NEXT:    vluxei64.v v15, (a0), v16, v0.t
2356; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2357; RV64-NEXT:    vsext.vf8 v16, v10
2358; RV64-NEXT:    vmv1r.v v0, v8
2359; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
2360; RV64-NEXT:    vluxei64.v v14, (a0), v16, v0.t
2361; RV64-NEXT:    vmv4r.v v8, v12
2362; RV64-NEXT:    ret
2363  %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 32 x i8> %idxs
2364  %v = call <vscale x 32 x i8> @llvm.masked.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> %ptrs, i32 2, <vscale x 32 x i1> %m, <vscale x 32 x i8> %passthru)
2365  ret <vscale x 32 x i8> %v
2366}
2367
2368define <vscale x 1 x i8> @mgather_baseidx_zext_nxv1i1_nxv1i8(ptr %base, <vscale x 1 x i1> %idxs, <vscale x 1 x i1> %m, <vscale x 1 x i8> %passthru) {
2369; CHECK-LABEL: mgather_baseidx_zext_nxv1i1_nxv1i8:
2370; CHECK:       # %bb.0:
2371; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
2372; CHECK-NEXT:    vmv.v.i v10, 0
2373; CHECK-NEXT:    vmerge.vim v10, v10, 1, v0
2374; CHECK-NEXT:    vmv1r.v v0, v8
2375; CHECK-NEXT:    vluxei8.v v9, (a0), v10, v0.t
2376; CHECK-NEXT:    vmv1r.v v8, v9
2377; CHECK-NEXT:    ret
2378  %eidxs = zext <vscale x 1 x i1> %idxs to <vscale x 1 x i8>
2379  %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 1 x i8> %eidxs
2380  %v = call <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> %ptrs, i32 1, <vscale x 1 x i1> %m, <vscale x 1 x i8> %passthru)
2381  ret <vscale x 1 x i8> %v
2382}
2383