xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll (revision a61eeaa7486178a6887e0efc843559d8a35bf4af)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v \
3; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
4; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v \
5; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
6; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v \
7; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
8; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v \
9; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
10
11declare <vscale x 1 x i8> @llvm.vp.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
12
13define <vscale x 1 x i8> @vpgather_nxv1i8(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
14; RV32-LABEL: vpgather_nxv1i8:
15; RV32:       # %bb.0:
16; RV32-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
17; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
18; RV32-NEXT:    vmv1r.v v8, v9
19; RV32-NEXT:    ret
20;
21; RV64-LABEL: vpgather_nxv1i8:
22; RV64:       # %bb.0:
23; RV64-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
24; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
25; RV64-NEXT:    vmv1r.v v8, v9
26; RV64-NEXT:    ret
27  %v = call <vscale x 1 x i8> @llvm.vp.gather.nxv1i8.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
28  ret <vscale x 1 x i8> %v
29}
30
31declare <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
32
33define <vscale x 2 x i8> @vpgather_nxv2i8(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
34; RV32-LABEL: vpgather_nxv2i8:
35; RV32:       # %bb.0:
36; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
37; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
38; RV32-NEXT:    vmv1r.v v8, v9
39; RV32-NEXT:    ret
40;
41; RV64-LABEL: vpgather_nxv2i8:
42; RV64:       # %bb.0:
43; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
44; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
45; RV64-NEXT:    vmv1r.v v8, v10
46; RV64-NEXT:    ret
47  %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
48  ret <vscale x 2 x i8> %v
49}
50
51define <vscale x 2 x i16> @vpgather_nxv2i8_sextload_nxv2i16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
52; RV32-LABEL: vpgather_nxv2i8_sextload_nxv2i16:
53; RV32:       # %bb.0:
54; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
55; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
56; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
57; RV32-NEXT:    vsext.vf2 v8, v9
58; RV32-NEXT:    ret
59;
60; RV64-LABEL: vpgather_nxv2i8_sextload_nxv2i16:
61; RV64:       # %bb.0:
62; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
63; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
64; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
65; RV64-NEXT:    vsext.vf2 v8, v10
66; RV64-NEXT:    ret
67  %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
68  %ev = sext <vscale x 2 x i8> %v to <vscale x 2 x i16>
69  ret <vscale x 2 x i16> %ev
70}
71
72define <vscale x 2 x i16> @vpgather_nxv2i8_zextload_nxv2i16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
73; RV32-LABEL: vpgather_nxv2i8_zextload_nxv2i16:
74; RV32:       # %bb.0:
75; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
76; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
77; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
78; RV32-NEXT:    vzext.vf2 v8, v9
79; RV32-NEXT:    ret
80;
81; RV64-LABEL: vpgather_nxv2i8_zextload_nxv2i16:
82; RV64:       # %bb.0:
83; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
84; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
85; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, ma
86; RV64-NEXT:    vzext.vf2 v8, v10
87; RV64-NEXT:    ret
88  %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
89  %ev = zext <vscale x 2 x i8> %v to <vscale x 2 x i16>
90  ret <vscale x 2 x i16> %ev
91}
92
93define <vscale x 2 x i32> @vpgather_nxv2i8_sextload_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
94; RV32-LABEL: vpgather_nxv2i8_sextload_nxv2i32:
95; RV32:       # %bb.0:
96; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
97; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
98; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
99; RV32-NEXT:    vsext.vf4 v8, v9
100; RV32-NEXT:    ret
101;
102; RV64-LABEL: vpgather_nxv2i8_sextload_nxv2i32:
103; RV64:       # %bb.0:
104; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
105; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
106; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
107; RV64-NEXT:    vsext.vf4 v8, v10
108; RV64-NEXT:    ret
109  %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
110  %ev = sext <vscale x 2 x i8> %v to <vscale x 2 x i32>
111  ret <vscale x 2 x i32> %ev
112}
113
114define <vscale x 2 x i32> @vpgather_nxv2i8_zextload_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
115; RV32-LABEL: vpgather_nxv2i8_zextload_nxv2i32:
116; RV32:       # %bb.0:
117; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
118; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
119; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
120; RV32-NEXT:    vzext.vf4 v8, v9
121; RV32-NEXT:    ret
122;
123; RV64-LABEL: vpgather_nxv2i8_zextload_nxv2i32:
124; RV64:       # %bb.0:
125; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
126; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
127; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
128; RV64-NEXT:    vzext.vf4 v8, v10
129; RV64-NEXT:    ret
130  %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
131  %ev = zext <vscale x 2 x i8> %v to <vscale x 2 x i32>
132  ret <vscale x 2 x i32> %ev
133}
134
135define <vscale x 2 x i64> @vpgather_nxv2i8_sextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
136; RV32-LABEL: vpgather_nxv2i8_sextload_nxv2i64:
137; RV32:       # %bb.0:
138; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
139; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
140; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
141; RV32-NEXT:    vsext.vf8 v8, v10
142; RV32-NEXT:    ret
143;
144; RV64-LABEL: vpgather_nxv2i8_sextload_nxv2i64:
145; RV64:       # %bb.0:
146; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
147; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
148; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
149; RV64-NEXT:    vsext.vf8 v8, v10
150; RV64-NEXT:    ret
151  %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
152  %ev = sext <vscale x 2 x i8> %v to <vscale x 2 x i64>
153  ret <vscale x 2 x i64> %ev
154}
155
156define <vscale x 2 x i64> @vpgather_nxv2i8_zextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
157; RV32-LABEL: vpgather_nxv2i8_zextload_nxv2i64:
158; RV32:       # %bb.0:
159; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
160; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
161; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
162; RV32-NEXT:    vzext.vf8 v8, v10
163; RV32-NEXT:    ret
164;
165; RV64-LABEL: vpgather_nxv2i8_zextload_nxv2i64:
166; RV64:       # %bb.0:
167; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
168; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
169; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
170; RV64-NEXT:    vzext.vf8 v8, v10
171; RV64-NEXT:    ret
172  %v = call <vscale x 2 x i8> @llvm.vp.gather.nxv2i8.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
173  %ev = zext <vscale x 2 x i8> %v to <vscale x 2 x i64>
174  ret <vscale x 2 x i64> %ev
175}
176
177declare <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
178
179define <vscale x 4 x i8> @vpgather_nxv4i8(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
180; RV32-LABEL: vpgather_nxv4i8:
181; RV32:       # %bb.0:
182; RV32-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
183; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
184; RV32-NEXT:    vmv1r.v v8, v10
185; RV32-NEXT:    ret
186;
187; RV64-LABEL: vpgather_nxv4i8:
188; RV64:       # %bb.0:
189; RV64-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
190; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
191; RV64-NEXT:    vmv1r.v v8, v12
192; RV64-NEXT:    ret
193  %v = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
194  ret <vscale x 4 x i8> %v
195}
196
197define <vscale x 4 x i8> @vpgather_truemask_nxv4i8(<vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
198; RV32-LABEL: vpgather_truemask_nxv4i8:
199; RV32:       # %bb.0:
200; RV32-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
201; RV32-NEXT:    vluxei32.v v10, (zero), v8
202; RV32-NEXT:    vmv1r.v v8, v10
203; RV32-NEXT:    ret
204;
205; RV64-LABEL: vpgather_truemask_nxv4i8:
206; RV64:       # %bb.0:
207; RV64-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
208; RV64-NEXT:    vluxei64.v v12, (zero), v8
209; RV64-NEXT:    vmv1r.v v8, v12
210; RV64-NEXT:    ret
211  %v = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
212  ret <vscale x 4 x i8> %v
213}
214
215declare <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
216
217define <vscale x 8 x i8> @vpgather_nxv8i8(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
218; RV32-LABEL: vpgather_nxv8i8:
219; RV32:       # %bb.0:
220; RV32-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
221; RV32-NEXT:    vluxei32.v v12, (zero), v8, v0.t
222; RV32-NEXT:    vmv.v.v v8, v12
223; RV32-NEXT:    ret
224;
225; RV64-LABEL: vpgather_nxv8i8:
226; RV64:       # %bb.0:
227; RV64-NEXT:    vsetvli zero, a0, e8, m1, ta, ma
228; RV64-NEXT:    vluxei64.v v16, (zero), v8, v0.t
229; RV64-NEXT:    vmv.v.v v8, v16
230; RV64-NEXT:    ret
231  %v = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
232  ret <vscale x 8 x i8> %v
233}
234
235define <vscale x 8 x i8> @vpgather_baseidx_nxv8i8(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
236; RV32-LABEL: vpgather_baseidx_nxv8i8:
237; RV32:       # %bb.0:
238; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
239; RV32-NEXT:    vsext.vf4 v12, v8
240; RV32-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
241; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
242; RV32-NEXT:    ret
243;
244; RV64-LABEL: vpgather_baseidx_nxv8i8:
245; RV64:       # %bb.0:
246; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
247; RV64-NEXT:    vsext.vf8 v16, v8
248; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
249; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
250; RV64-NEXT:    ret
251  %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 8 x i8> %idxs
252  %v = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
253  ret <vscale x 8 x i8> %v
254}
255
256declare <vscale x 32 x i8> @llvm.vp.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr>, <vscale x 32 x i1>, i32)
257
258define <vscale x 32 x i8> @vpgather_baseidx_nxv32i8(ptr %base, <vscale x 32 x i8> %idxs, <vscale x 32 x i1> %m, i32 zeroext %evl) {
259; RV32-LABEL: vpgather_baseidx_nxv32i8:
260; RV32:       # %bb.0:
261; RV32-NEXT:    vsetvli a2, zero, e8, mf2, ta, ma
262; RV32-NEXT:    vmv1r.v v12, v0
263; RV32-NEXT:    csrr a3, vlenb
264; RV32-NEXT:    slli a2, a3, 1
265; RV32-NEXT:    srli a3, a3, 2
266; RV32-NEXT:    sub a4, a1, a2
267; RV32-NEXT:    sltu a5, a1, a4
268; RV32-NEXT:    addi a5, a5, -1
269; RV32-NEXT:    and a4, a5, a4
270; RV32-NEXT:    vslidedown.vx v0, v0, a3
271; RV32-NEXT:    vsetvli zero, a4, e32, m8, ta, ma
272; RV32-NEXT:    vsext.vf4 v16, v10
273; RV32-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
274; RV32-NEXT:    vluxei32.v v10, (a0), v16, v0.t
275; RV32-NEXT:    bltu a1, a2, .LBB12_2
276; RV32-NEXT:  # %bb.1:
277; RV32-NEXT:    mv a1, a2
278; RV32-NEXT:  .LBB12_2:
279; RV32-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
280; RV32-NEXT:    vsext.vf4 v16, v8
281; RV32-NEXT:    vmv1r.v v0, v12
282; RV32-NEXT:    vsetvli zero, zero, e8, m2, ta, ma
283; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
284; RV32-NEXT:    ret
285;
286; RV64-LABEL: vpgather_baseidx_nxv32i8:
287; RV64:       # %bb.0:
288; RV64-NEXT:    vsetvli a2, zero, e8, mf2, ta, ma
289; RV64-NEXT:    vmv1r.v v12, v0
290; RV64-NEXT:    csrr a2, vlenb
291; RV64-NEXT:    slli a3, a2, 1
292; RV64-NEXT:    srli a4, a2, 2
293; RV64-NEXT:    sub a5, a1, a3
294; RV64-NEXT:    vslidedown.vx v13, v0, a4
295; RV64-NEXT:    sltu a4, a1, a5
296; RV64-NEXT:    addi a4, a4, -1
297; RV64-NEXT:    and a5, a4, a5
298; RV64-NEXT:    sub a4, a5, a2
299; RV64-NEXT:    sltu a6, a5, a4
300; RV64-NEXT:    addi a6, a6, -1
301; RV64-NEXT:    and a6, a6, a4
302; RV64-NEXT:    srli a4, a2, 3
303; RV64-NEXT:    vsetvli a7, zero, e8, mf4, ta, ma
304; RV64-NEXT:    vslidedown.vx v0, v13, a4
305; RV64-NEXT:    vsetvli zero, a6, e64, m8, ta, ma
306; RV64-NEXT:    vsext.vf8 v16, v11
307; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
308; RV64-NEXT:    vluxei64.v v11, (a0), v16, v0.t
309; RV64-NEXT:    bltu a5, a2, .LBB12_2
310; RV64-NEXT:  # %bb.1:
311; RV64-NEXT:    mv a5, a2
312; RV64-NEXT:  .LBB12_2:
313; RV64-NEXT:    vsetvli zero, a5, e64, m8, ta, ma
314; RV64-NEXT:    vsext.vf8 v16, v10
315; RV64-NEXT:    vmv1r.v v0, v13
316; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
317; RV64-NEXT:    vluxei64.v v10, (a0), v16, v0.t
318; RV64-NEXT:    bltu a1, a3, .LBB12_4
319; RV64-NEXT:  # %bb.3:
320; RV64-NEXT:    mv a1, a3
321; RV64-NEXT:  .LBB12_4:
322; RV64-NEXT:    sub a3, a1, a2
323; RV64-NEXT:    sltu a5, a1, a3
324; RV64-NEXT:    addi a5, a5, -1
325; RV64-NEXT:    and a3, a5, a3
326; RV64-NEXT:    vsetvli a5, zero, e8, mf4, ta, ma
327; RV64-NEXT:    vslidedown.vx v0, v12, a4
328; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
329; RV64-NEXT:    vsext.vf8 v16, v9
330; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
331; RV64-NEXT:    vluxei64.v v9, (a0), v16, v0.t
332; RV64-NEXT:    bltu a1, a2, .LBB12_6
333; RV64-NEXT:  # %bb.5:
334; RV64-NEXT:    mv a1, a2
335; RV64-NEXT:  .LBB12_6:
336; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
337; RV64-NEXT:    vsext.vf8 v16, v8
338; RV64-NEXT:    vmv1r.v v0, v12
339; RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, ma
340; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
341; RV64-NEXT:    ret
342  %ptrs = getelementptr inbounds i8, ptr %base, <vscale x 32 x i8> %idxs
343  %v = call <vscale x 32 x i8> @llvm.vp.gather.nxv32i8.nxv32p0(<vscale x 32 x ptr> %ptrs, <vscale x 32 x i1> %m, i32 %evl)
344  ret <vscale x 32 x i8> %v
345}
346
347declare <vscale x 1 x i16> @llvm.vp.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
348
349define <vscale x 1 x i16> @vpgather_nxv1i16(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
350; RV32-LABEL: vpgather_nxv1i16:
351; RV32:       # %bb.0:
352; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
353; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
354; RV32-NEXT:    vmv1r.v v8, v9
355; RV32-NEXT:    ret
356;
357; RV64-LABEL: vpgather_nxv1i16:
358; RV64:       # %bb.0:
359; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
360; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
361; RV64-NEXT:    vmv1r.v v8, v9
362; RV64-NEXT:    ret
363  %v = call <vscale x 1 x i16> @llvm.vp.gather.nxv1i16.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
364  ret <vscale x 1 x i16> %v
365}
366
367declare <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
368
369define <vscale x 2 x i16> @vpgather_nxv2i16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
370; RV32-LABEL: vpgather_nxv2i16:
371; RV32:       # %bb.0:
372; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
373; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
374; RV32-NEXT:    vmv1r.v v8, v9
375; RV32-NEXT:    ret
376;
377; RV64-LABEL: vpgather_nxv2i16:
378; RV64:       # %bb.0:
379; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
380; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
381; RV64-NEXT:    vmv1r.v v8, v10
382; RV64-NEXT:    ret
383  %v = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
384  ret <vscale x 2 x i16> %v
385}
386
387define <vscale x 2 x i32> @vpgather_nxv2i16_sextload_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
388; RV32-LABEL: vpgather_nxv2i16_sextload_nxv2i32:
389; RV32:       # %bb.0:
390; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
391; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
392; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
393; RV32-NEXT:    vsext.vf2 v8, v9
394; RV32-NEXT:    ret
395;
396; RV64-LABEL: vpgather_nxv2i16_sextload_nxv2i32:
397; RV64:       # %bb.0:
398; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
399; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
400; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
401; RV64-NEXT:    vsext.vf2 v8, v10
402; RV64-NEXT:    ret
403  %v = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
404  %ev = sext <vscale x 2 x i16> %v to <vscale x 2 x i32>
405  ret <vscale x 2 x i32> %ev
406}
407
408define <vscale x 2 x i32> @vpgather_nxv2i16_zextload_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
409; RV32-LABEL: vpgather_nxv2i16_zextload_nxv2i32:
410; RV32:       # %bb.0:
411; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
412; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
413; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
414; RV32-NEXT:    vzext.vf2 v8, v9
415; RV32-NEXT:    ret
416;
417; RV64-LABEL: vpgather_nxv2i16_zextload_nxv2i32:
418; RV64:       # %bb.0:
419; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
420; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
421; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, ma
422; RV64-NEXT:    vzext.vf2 v8, v10
423; RV64-NEXT:    ret
424  %v = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
425  %ev = zext <vscale x 2 x i16> %v to <vscale x 2 x i32>
426  ret <vscale x 2 x i32> %ev
427}
428
429define <vscale x 2 x i64> @vpgather_nxv2i16_sextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
430; RV32-LABEL: vpgather_nxv2i16_sextload_nxv2i64:
431; RV32:       # %bb.0:
432; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
433; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
434; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
435; RV32-NEXT:    vsext.vf4 v8, v10
436; RV32-NEXT:    ret
437;
438; RV64-LABEL: vpgather_nxv2i16_sextload_nxv2i64:
439; RV64:       # %bb.0:
440; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
441; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
442; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
443; RV64-NEXT:    vsext.vf4 v8, v10
444; RV64-NEXT:    ret
445  %v = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
446  %ev = sext <vscale x 2 x i16> %v to <vscale x 2 x i64>
447  ret <vscale x 2 x i64> %ev
448}
449
450define <vscale x 2 x i64> @vpgather_nxv2i16_zextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
451; RV32-LABEL: vpgather_nxv2i16_zextload_nxv2i64:
452; RV32:       # %bb.0:
453; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
454; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
455; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
456; RV32-NEXT:    vzext.vf4 v8, v10
457; RV32-NEXT:    ret
458;
459; RV64-LABEL: vpgather_nxv2i16_zextload_nxv2i64:
460; RV64:       # %bb.0:
461; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
462; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
463; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
464; RV64-NEXT:    vzext.vf4 v8, v10
465; RV64-NEXT:    ret
466  %v = call <vscale x 2 x i16> @llvm.vp.gather.nxv2i16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
467  %ev = zext <vscale x 2 x i16> %v to <vscale x 2 x i64>
468  ret <vscale x 2 x i64> %ev
469}
470
471declare <vscale x 4 x i16> @llvm.vp.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
472
473define <vscale x 4 x i16> @vpgather_nxv4i16(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
474; RV32-LABEL: vpgather_nxv4i16:
475; RV32:       # %bb.0:
476; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
477; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
478; RV32-NEXT:    vmv.v.v v8, v10
479; RV32-NEXT:    ret
480;
481; RV64-LABEL: vpgather_nxv4i16:
482; RV64:       # %bb.0:
483; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
484; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
485; RV64-NEXT:    vmv.v.v v8, v12
486; RV64-NEXT:    ret
487  %v = call <vscale x 4 x i16> @llvm.vp.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
488  ret <vscale x 4 x i16> %v
489}
490
491define <vscale x 4 x i16> @vpgather_truemask_nxv4i16(<vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
492; RV32-LABEL: vpgather_truemask_nxv4i16:
493; RV32:       # %bb.0:
494; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
495; RV32-NEXT:    vluxei32.v v10, (zero), v8
496; RV32-NEXT:    vmv.v.v v8, v10
497; RV32-NEXT:    ret
498;
499; RV64-LABEL: vpgather_truemask_nxv4i16:
500; RV64:       # %bb.0:
501; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
502; RV64-NEXT:    vluxei64.v v12, (zero), v8
503; RV64-NEXT:    vmv.v.v v8, v12
504; RV64-NEXT:    ret
505  %v = call <vscale x 4 x i16> @llvm.vp.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
506  ret <vscale x 4 x i16> %v
507}
508
509declare <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
510
511define <vscale x 8 x i16> @vpgather_nxv8i16(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
512; RV32-LABEL: vpgather_nxv8i16:
513; RV32:       # %bb.0:
514; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
515; RV32-NEXT:    vluxei32.v v12, (zero), v8, v0.t
516; RV32-NEXT:    vmv.v.v v8, v12
517; RV32-NEXT:    ret
518;
519; RV64-LABEL: vpgather_nxv8i16:
520; RV64:       # %bb.0:
521; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
522; RV64-NEXT:    vluxei64.v v16, (zero), v8, v0.t
523; RV64-NEXT:    vmv.v.v v8, v16
524; RV64-NEXT:    ret
525  %v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
526  ret <vscale x 8 x i16> %v
527}
528
529define <vscale x 8 x i16> @vpgather_baseidx_nxv8i8_nxv8i16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
530; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8i16:
531; RV32:       # %bb.0:
532; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
533; RV32-NEXT:    vsext.vf4 v12, v8
534; RV32-NEXT:    vadd.vv v12, v12, v12
535; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
536; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
537; RV32-NEXT:    ret
538;
539; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8i16:
540; RV64:       # %bb.0:
541; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
542; RV64-NEXT:    vsext.vf8 v16, v8
543; RV64-NEXT:    vadd.vv v16, v16, v16
544; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
545; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
546; RV64-NEXT:    ret
547  %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i8> %idxs
548  %v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
549  ret <vscale x 8 x i16> %v
550}
551
552define <vscale x 8 x i16> @vpgather_baseidx_sext_nxv8i8_nxv8i16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
553; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i16:
554; RV32:       # %bb.0:
555; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
556; RV32-NEXT:    vsext.vf4 v12, v8
557; RV32-NEXT:    vadd.vv v12, v12, v12
558; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
559; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
560; RV32-NEXT:    ret
561;
562; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i16:
563; RV64:       # %bb.0:
564; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
565; RV64-NEXT:    vsext.vf8 v16, v8
566; RV64-NEXT:    vadd.vv v16, v16, v16
567; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
568; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
569; RV64-NEXT:    ret
570  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
571  %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
572  %v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
573  ret <vscale x 8 x i16> %v
574}
575
576define <vscale x 8 x i16> @vpgather_baseidx_zext_nxv8i8_nxv8i16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
577; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i16:
578; RV32:       # %bb.0:
579; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
580; RV32-NEXT:    vwaddu.vv v10, v8, v8
581; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
582; RV32-NEXT:    vluxei16.v v8, (a0), v10, v0.t
583; RV32-NEXT:    ret
584;
585; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i16:
586; RV64:       # %bb.0:
587; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
588; RV64-NEXT:    vwaddu.vv v10, v8, v8
589; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
590; RV64-NEXT:    vluxei16.v v8, (a0), v10, v0.t
591; RV64-NEXT:    ret
592  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
593  %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %eidxs
594  %v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
595  ret <vscale x 8 x i16> %v
596}
597
598define <vscale x 8 x i16> @vpgather_baseidx_nxv8i16(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
599; RV32-LABEL: vpgather_baseidx_nxv8i16:
600; RV32:       # %bb.0:
601; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
602; RV32-NEXT:    vwadd.vv v12, v8, v8
603; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
604; RV32-NEXT:    ret
605;
606; RV64-LABEL: vpgather_baseidx_nxv8i16:
607; RV64:       # %bb.0:
608; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
609; RV64-NEXT:    vsext.vf4 v16, v8
610; RV64-NEXT:    vadd.vv v16, v16, v16
611; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
612; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
613; RV64-NEXT:    ret
614  %ptrs = getelementptr inbounds i16, ptr %base, <vscale x 8 x i16> %idxs
615  %v = call <vscale x 8 x i16> @llvm.vp.gather.nxv8i16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
616  ret <vscale x 8 x i16> %v
617}
618
619declare <vscale x 1 x i32> @llvm.vp.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
620
621define <vscale x 1 x i32> @vpgather_nxv1i32(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
622; RV32-LABEL: vpgather_nxv1i32:
623; RV32:       # %bb.0:
624; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
625; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
626; RV32-NEXT:    ret
627;
628; RV64-LABEL: vpgather_nxv1i32:
629; RV64:       # %bb.0:
630; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
631; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
632; RV64-NEXT:    vmv1r.v v8, v9
633; RV64-NEXT:    ret
634  %v = call <vscale x 1 x i32> @llvm.vp.gather.nxv1i32.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
635  ret <vscale x 1 x i32> %v
636}
637
638declare <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
639
640define <vscale x 2 x i32> @vpgather_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
641; RV32-LABEL: vpgather_nxv2i32:
642; RV32:       # %bb.0:
643; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
644; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
645; RV32-NEXT:    ret
646;
647; RV64-LABEL: vpgather_nxv2i32:
648; RV64:       # %bb.0:
649; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
650; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
651; RV64-NEXT:    vmv.v.v v8, v10
652; RV64-NEXT:    ret
653  %v = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
654  ret <vscale x 2 x i32> %v
655}
656
657define <vscale x 2 x i64> @vpgather_nxv2i32_sextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
658; RV32-LABEL: vpgather_nxv2i32_sextload_nxv2i64:
659; RV32:       # %bb.0:
660; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
661; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
662; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
663; RV32-NEXT:    vsext.vf2 v8, v10
664; RV32-NEXT:    ret
665;
666; RV64-LABEL: vpgather_nxv2i32_sextload_nxv2i64:
667; RV64:       # %bb.0:
668; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
669; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
670; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
671; RV64-NEXT:    vsext.vf2 v8, v10
672; RV64-NEXT:    ret
673  %v = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
674  %ev = sext <vscale x 2 x i32> %v to <vscale x 2 x i64>
675  ret <vscale x 2 x i64> %ev
676}
677
678define <vscale x 2 x i64> @vpgather_nxv2i32_zextload_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
679; RV32-LABEL: vpgather_nxv2i32_zextload_nxv2i64:
680; RV32:       # %bb.0:
681; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
682; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
683; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
684; RV32-NEXT:    vzext.vf2 v8, v10
685; RV32-NEXT:    ret
686;
687; RV64-LABEL: vpgather_nxv2i32_zextload_nxv2i64:
688; RV64:       # %bb.0:
689; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
690; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
691; RV64-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
692; RV64-NEXT:    vzext.vf2 v8, v10
693; RV64-NEXT:    ret
694  %v = call <vscale x 2 x i32> @llvm.vp.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
695  %ev = zext <vscale x 2 x i32> %v to <vscale x 2 x i64>
696  ret <vscale x 2 x i64> %ev
697}
698
699declare <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
700
701define <vscale x 4 x i32> @vpgather_nxv4i32(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
702; RV32-LABEL: vpgather_nxv4i32:
703; RV32:       # %bb.0:
704; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
705; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
706; RV32-NEXT:    ret
707;
708; RV64-LABEL: vpgather_nxv4i32:
709; RV64:       # %bb.0:
710; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
711; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
712; RV64-NEXT:    vmv.v.v v8, v12
713; RV64-NEXT:    ret
714  %v = call <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
715  ret <vscale x 4 x i32> %v
716}
717
718define <vscale x 4 x i32> @vpgather_truemask_nxv4i32(<vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
719; RV32-LABEL: vpgather_truemask_nxv4i32:
720; RV32:       # %bb.0:
721; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
722; RV32-NEXT:    vluxei32.v v8, (zero), v8
723; RV32-NEXT:    ret
724;
725; RV64-LABEL: vpgather_truemask_nxv4i32:
726; RV64:       # %bb.0:
727; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
728; RV64-NEXT:    vluxei64.v v12, (zero), v8
729; RV64-NEXT:    vmv.v.v v8, v12
730; RV64-NEXT:    ret
731  %v = call <vscale x 4 x i32> @llvm.vp.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
732  ret <vscale x 4 x i32> %v
733}
734
735declare <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
736
737define <vscale x 8 x i32> @vpgather_nxv8i32(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
738; RV32-LABEL: vpgather_nxv8i32:
739; RV32:       # %bb.0:
740; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
741; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
742; RV32-NEXT:    ret
743;
744; RV64-LABEL: vpgather_nxv8i32:
745; RV64:       # %bb.0:
746; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
747; RV64-NEXT:    vluxei64.v v16, (zero), v8, v0.t
748; RV64-NEXT:    vmv.v.v v8, v16
749; RV64-NEXT:    ret
750  %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
751  ret <vscale x 8 x i32> %v
752}
753
754define <vscale x 8 x i32> @vpgather_baseidx_nxv8i8_nxv8i32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
755; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8i32:
756; RV32:       # %bb.0:
757; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
758; RV32-NEXT:    vsext.vf4 v12, v8
759; RV32-NEXT:    vsll.vi v8, v12, 2
760; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
761; RV32-NEXT:    ret
762;
763; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8i32:
764; RV64:       # %bb.0:
765; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
766; RV64-NEXT:    vsext.vf8 v16, v8
767; RV64-NEXT:    vsll.vi v16, v16, 2
768; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
769; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
770; RV64-NEXT:    ret
771  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i8> %idxs
772  %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
773  ret <vscale x 8 x i32> %v
774}
775
776define <vscale x 8 x i32> @vpgather_baseidx_sext_nxv8i8_nxv8i32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
777; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i32:
778; RV32:       # %bb.0:
779; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
780; RV32-NEXT:    vsext.vf4 v12, v8
781; RV32-NEXT:    vsll.vi v8, v12, 2
782; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
783; RV32-NEXT:    ret
784;
785; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i32:
786; RV64:       # %bb.0:
787; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
788; RV64-NEXT:    vsext.vf8 v16, v8
789; RV64-NEXT:    vsll.vi v16, v16, 2
790; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
791; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
792; RV64-NEXT:    ret
793  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
794  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
795  %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
796  ret <vscale x 8 x i32> %v
797}
798
799define <vscale x 8 x i32> @vpgather_baseidx_zext_nxv8i8_nxv8i32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
800; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i32:
801; RV32:       # %bb.0:
802; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
803; RV32-NEXT:    vzext.vf2 v10, v8
804; RV32-NEXT:    vsll.vi v12, v10, 2
805; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
806; RV32-NEXT:    vluxei16.v v8, (a0), v12, v0.t
807; RV32-NEXT:    ret
808;
809; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i32:
810; RV64:       # %bb.0:
811; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
812; RV64-NEXT:    vzext.vf2 v10, v8
813; RV64-NEXT:    vsll.vi v12, v10, 2
814; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
815; RV64-NEXT:    vluxei16.v v8, (a0), v12, v0.t
816; RV64-NEXT:    ret
817  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
818  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
819  %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
820  ret <vscale x 8 x i32> %v
821}
822
823define <vscale x 8 x i32> @vpgather_baseidx_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
824; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8i32:
825; RV32:       # %bb.0:
826; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
827; RV32-NEXT:    vsext.vf2 v12, v8
828; RV32-NEXT:    vsll.vi v8, v12, 2
829; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
830; RV32-NEXT:    ret
831;
832; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8i32:
833; RV64:       # %bb.0:
834; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
835; RV64-NEXT:    vsext.vf4 v16, v8
836; RV64-NEXT:    vsll.vi v16, v16, 2
837; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
838; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
839; RV64-NEXT:    ret
840  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i16> %idxs
841  %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
842  ret <vscale x 8 x i32> %v
843}
844
845define <vscale x 8 x i32> @vpgather_baseidx_sext_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
846; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i32:
847; RV32:       # %bb.0:
848; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
849; RV32-NEXT:    vsext.vf2 v12, v8
850; RV32-NEXT:    vsll.vi v8, v12, 2
851; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
852; RV32-NEXT:    ret
853;
854; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i32:
855; RV64:       # %bb.0:
856; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
857; RV64-NEXT:    vsext.vf4 v16, v8
858; RV64-NEXT:    vsll.vi v16, v16, 2
859; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
860; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
861; RV64-NEXT:    ret
862  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
863  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
864  %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
865  ret <vscale x 8 x i32> %v
866}
867
868define <vscale x 8 x i32> @vpgather_baseidx_zext_nxv8i16_nxv8i32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
869; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i32:
870; RV32:       # %bb.0:
871; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
872; RV32-NEXT:    vzext.vf2 v12, v8
873; RV32-NEXT:    vsll.vi v8, v12, 2
874; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
875; RV32-NEXT:    ret
876;
877; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i32:
878; RV64:       # %bb.0:
879; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
880; RV64-NEXT:    vzext.vf2 v12, v8
881; RV64-NEXT:    vsll.vi v8, v12, 2
882; RV64-NEXT:    vluxei32.v v8, (a0), v8, v0.t
883; RV64-NEXT:    ret
884  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
885  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %eidxs
886  %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
887  ret <vscale x 8 x i32> %v
888}
889
890define <vscale x 8 x i32> @vpgather_baseidx_nxv8i32(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
891; RV32-LABEL: vpgather_baseidx_nxv8i32:
892; RV32:       # %bb.0:
893; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
894; RV32-NEXT:    vsll.vi v8, v8, 2
895; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
896; RV32-NEXT:    ret
897;
898; RV64-LABEL: vpgather_baseidx_nxv8i32:
899; RV64:       # %bb.0:
900; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
901; RV64-NEXT:    vsext.vf2 v16, v8
902; RV64-NEXT:    vsll.vi v16, v16, 2
903; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
904; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
905; RV64-NEXT:    ret
906  %ptrs = getelementptr inbounds i32, ptr %base, <vscale x 8 x i32> %idxs
907  %v = call <vscale x 8 x i32> @llvm.vp.gather.nxv8i32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
908  ret <vscale x 8 x i32> %v
909}
910
911declare <vscale x 1 x i64> @llvm.vp.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
912
913define <vscale x 1 x i64> @vpgather_nxv1i64(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
914; RV32-LABEL: vpgather_nxv1i64:
915; RV32:       # %bb.0:
916; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
917; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
918; RV32-NEXT:    vmv.v.v v8, v9
919; RV32-NEXT:    ret
920;
921; RV64-LABEL: vpgather_nxv1i64:
922; RV64:       # %bb.0:
923; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
924; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
925; RV64-NEXT:    ret
926  %v = call <vscale x 1 x i64> @llvm.vp.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
927  ret <vscale x 1 x i64> %v
928}
929
930declare <vscale x 2 x i64> @llvm.vp.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
931
932define <vscale x 2 x i64> @vpgather_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
933; RV32-LABEL: vpgather_nxv2i64:
934; RV32:       # %bb.0:
935; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
936; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
937; RV32-NEXT:    vmv.v.v v8, v10
938; RV32-NEXT:    ret
939;
940; RV64-LABEL: vpgather_nxv2i64:
941; RV64:       # %bb.0:
942; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
943; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
944; RV64-NEXT:    ret
945  %v = call <vscale x 2 x i64> @llvm.vp.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
946  ret <vscale x 2 x i64> %v
947}
948
949declare <vscale x 4 x i64> @llvm.vp.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
950
951define <vscale x 4 x i64> @vpgather_nxv4i64(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
952; RV32-LABEL: vpgather_nxv4i64:
953; RV32:       # %bb.0:
954; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
955; RV32-NEXT:    vluxei32.v v12, (zero), v8, v0.t
956; RV32-NEXT:    vmv.v.v v8, v12
957; RV32-NEXT:    ret
958;
959; RV64-LABEL: vpgather_nxv4i64:
960; RV64:       # %bb.0:
961; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
962; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
963; RV64-NEXT:    ret
964  %v = call <vscale x 4 x i64> @llvm.vp.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
965  ret <vscale x 4 x i64> %v
966}
967
968define <vscale x 4 x i64> @vpgather_truemask_nxv4i64(<vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
969; RV32-LABEL: vpgather_truemask_nxv4i64:
970; RV32:       # %bb.0:
971; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
972; RV32-NEXT:    vluxei32.v v12, (zero), v8
973; RV32-NEXT:    vmv.v.v v8, v12
974; RV32-NEXT:    ret
975;
976; RV64-LABEL: vpgather_truemask_nxv4i64:
977; RV64:       # %bb.0:
978; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
979; RV64-NEXT:    vluxei64.v v8, (zero), v8
980; RV64-NEXT:    ret
981  %v = call <vscale x 4 x i64> @llvm.vp.gather.nxv4i64.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
982  ret <vscale x 4 x i64> %v
983}
984
985declare <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
986
987define <vscale x 8 x i64> @vpgather_nxv8i64(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
988; RV32-LABEL: vpgather_nxv8i64:
989; RV32:       # %bb.0:
990; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
991; RV32-NEXT:    vluxei32.v v16, (zero), v8, v0.t
992; RV32-NEXT:    vmv.v.v v8, v16
993; RV32-NEXT:    ret
994;
995; RV64-LABEL: vpgather_nxv8i64:
996; RV64:       # %bb.0:
997; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
998; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
999; RV64-NEXT:    ret
1000  %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1001  ret <vscale x 8 x i64> %v
1002}
1003
1004define <vscale x 8 x i64> @vpgather_baseidx_nxv8i8_nxv8i64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1005; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8i64:
1006; RV32:       # %bb.0:
1007; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1008; RV32-NEXT:    vsext.vf4 v12, v8
1009; RV32-NEXT:    vsll.vi v16, v12, 3
1010; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1011; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1012; RV32-NEXT:    ret
1013;
1014; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8i64:
1015; RV64:       # %bb.0:
1016; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1017; RV64-NEXT:    vsext.vf8 v16, v8
1018; RV64-NEXT:    vsll.vi v8, v16, 3
1019; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1020; RV64-NEXT:    ret
1021  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i8> %idxs
1022  %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1023  ret <vscale x 8 x i64> %v
1024}
1025
1026define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i8_nxv8i64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1027; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i64:
1028; RV32:       # %bb.0:
1029; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1030; RV32-NEXT:    vsext.vf4 v12, v8
1031; RV32-NEXT:    vsll.vi v16, v12, 3
1032; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1033; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1034; RV32-NEXT:    ret
1035;
1036; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8i64:
1037; RV64:       # %bb.0:
1038; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1039; RV64-NEXT:    vsext.vf8 v16, v8
1040; RV64-NEXT:    vsll.vi v8, v16, 3
1041; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1042; RV64-NEXT:    ret
1043  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1044  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
1045  %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1046  ret <vscale x 8 x i64> %v
1047}
1048
1049define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i8_nxv8i64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1050; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i64:
1051; RV32:       # %bb.0:
1052; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
1053; RV32-NEXT:    vzext.vf2 v10, v8
1054; RV32-NEXT:    vsll.vi v16, v10, 3
1055; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1056; RV32-NEXT:    vluxei16.v v8, (a0), v16, v0.t
1057; RV32-NEXT:    ret
1058;
1059; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8i64:
1060; RV64:       # %bb.0:
1061; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
1062; RV64-NEXT:    vzext.vf2 v10, v8
1063; RV64-NEXT:    vsll.vi v16, v10, 3
1064; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1065; RV64-NEXT:    vluxei16.v v8, (a0), v16, v0.t
1066; RV64-NEXT:    ret
1067  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
1068  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
1069  %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1070  ret <vscale x 8 x i64> %v
1071}
1072
1073define <vscale x 8 x i64> @vpgather_baseidx_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1074; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8i64:
1075; RV32:       # %bb.0:
1076; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1077; RV32-NEXT:    vsext.vf2 v12, v8
1078; RV32-NEXT:    vsll.vi v16, v12, 3
1079; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1080; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1081; RV32-NEXT:    ret
1082;
1083; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8i64:
1084; RV64:       # %bb.0:
1085; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1086; RV64-NEXT:    vsext.vf4 v16, v8
1087; RV64-NEXT:    vsll.vi v8, v16, 3
1088; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1089; RV64-NEXT:    ret
1090  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i16> %idxs
1091  %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1092  ret <vscale x 8 x i64> %v
1093}
1094
1095define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1096; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i64:
1097; RV32:       # %bb.0:
1098; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1099; RV32-NEXT:    vsext.vf2 v12, v8
1100; RV32-NEXT:    vsll.vi v16, v12, 3
1101; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1102; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1103; RV32-NEXT:    ret
1104;
1105; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8i64:
1106; RV64:       # %bb.0:
1107; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1108; RV64-NEXT:    vsext.vf4 v16, v8
1109; RV64-NEXT:    vsll.vi v8, v16, 3
1110; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1111; RV64-NEXT:    ret
1112  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1113  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
1114  %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1115  ret <vscale x 8 x i64> %v
1116}
1117
1118define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i16_nxv8i64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1119; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i64:
1120; RV32:       # %bb.0:
1121; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1122; RV32-NEXT:    vzext.vf2 v12, v8
1123; RV32-NEXT:    vsll.vi v16, v12, 3
1124; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1125; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1126; RV32-NEXT:    ret
1127;
1128; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8i64:
1129; RV64:       # %bb.0:
1130; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1131; RV64-NEXT:    vzext.vf2 v12, v8
1132; RV64-NEXT:    vsll.vi v16, v12, 3
1133; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1134; RV64-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1135; RV64-NEXT:    ret
1136  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
1137  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
1138  %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1139  ret <vscale x 8 x i64> %v
1140}
1141
1142define <vscale x 8 x i64> @vpgather_baseidx_nxv8i32_nxv8i64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1143; RV32-LABEL: vpgather_baseidx_nxv8i32_nxv8i64:
1144; RV32:       # %bb.0:
1145; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1146; RV32-NEXT:    vsll.vi v16, v8, 3
1147; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1148; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1149; RV32-NEXT:    ret
1150;
1151; RV64-LABEL: vpgather_baseidx_nxv8i32_nxv8i64:
1152; RV64:       # %bb.0:
1153; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1154; RV64-NEXT:    vsext.vf2 v16, v8
1155; RV64-NEXT:    vsll.vi v8, v16, 3
1156; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1157; RV64-NEXT:    ret
1158  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i32> %idxs
1159  %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1160  ret <vscale x 8 x i64> %v
1161}
1162
1163define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i32_nxv8i64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1164; RV32-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8i64:
1165; RV32:       # %bb.0:
1166; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1167; RV32-NEXT:    vsll.vi v16, v8, 3
1168; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1169; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1170; RV32-NEXT:    ret
1171;
1172; RV64-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8i64:
1173; RV64:       # %bb.0:
1174; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1175; RV64-NEXT:    vsext.vf2 v16, v8
1176; RV64-NEXT:    vsll.vi v8, v16, 3
1177; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1178; RV64-NEXT:    ret
1179  %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1180  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
1181  %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1182  ret <vscale x 8 x i64> %v
1183}
1184
1185define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i32_nxv8i64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1186; RV32-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8i64:
1187; RV32:       # %bb.0:
1188; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1189; RV32-NEXT:    vsll.vi v16, v8, 3
1190; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1191; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1192; RV32-NEXT:    ret
1193;
1194; RV64-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8i64:
1195; RV64:       # %bb.0:
1196; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1197; RV64-NEXT:    vzext.vf2 v16, v8
1198; RV64-NEXT:    vsll.vi v8, v16, 3
1199; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1200; RV64-NEXT:    ret
1201  %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
1202  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %eidxs
1203  %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1204  ret <vscale x 8 x i64> %v
1205}
1206
1207define <vscale x 8 x i64> @vpgather_baseidx_nxv8i64(ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1208; RV32-LABEL: vpgather_baseidx_nxv8i64:
1209; RV32:       # %bb.0:
1210; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1211; RV32-NEXT:    vnsrl.wi v16, v8, 0
1212; RV32-NEXT:    vsll.vi v16, v16, 3
1213; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1214; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1215; RV32-NEXT:    ret
1216;
1217; RV64-LABEL: vpgather_baseidx_nxv8i64:
1218; RV64:       # %bb.0:
1219; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1220; RV64-NEXT:    vsll.vi v8, v8, 3
1221; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1222; RV64-NEXT:    ret
1223  %ptrs = getelementptr inbounds i64, ptr %base, <vscale x 8 x i64> %idxs
1224  %v = call <vscale x 8 x i64> @llvm.vp.gather.nxv8i64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1225  ret <vscale x 8 x i64> %v
1226}
1227
1228declare <vscale x 1 x bfloat> @llvm.vp.gather.nxv1bf16.nxv1p0(<vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
1229
1230define <vscale x 1 x bfloat> @vpgather_nxv1bf16(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1231; RV32-LABEL: vpgather_nxv1bf16:
1232; RV32:       # %bb.0:
1233; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
1234; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
1235; RV32-NEXT:    vmv1r.v v8, v9
1236; RV32-NEXT:    ret
1237;
1238; RV64-LABEL: vpgather_nxv1bf16:
1239; RV64:       # %bb.0:
1240; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
1241; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
1242; RV64-NEXT:    vmv1r.v v8, v9
1243; RV64-NEXT:    ret
1244  %v = call <vscale x 1 x bfloat> @llvm.vp.gather.nxv1bf16.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
1245  ret <vscale x 1 x bfloat> %v
1246}
1247
1248declare <vscale x 2 x bfloat> @llvm.vp.gather.nxv2bf16.nxv2p0(<vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
1249
1250define <vscale x 2 x bfloat> @vpgather_nxv2bf16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1251; RV32-LABEL: vpgather_nxv2bf16:
1252; RV32:       # %bb.0:
1253; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1254; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
1255; RV32-NEXT:    vmv1r.v v8, v9
1256; RV32-NEXT:    ret
1257;
1258; RV64-LABEL: vpgather_nxv2bf16:
1259; RV64:       # %bb.0:
1260; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1261; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
1262; RV64-NEXT:    vmv1r.v v8, v10
1263; RV64-NEXT:    ret
1264  %v = call <vscale x 2 x bfloat> @llvm.vp.gather.nxv2bf16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
1265  ret <vscale x 2 x bfloat> %v
1266}
1267
1268declare <vscale x 4 x bfloat> @llvm.vp.gather.nxv4bf16.nxv4p0(<vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
1269
1270define <vscale x 4 x bfloat> @vpgather_nxv4bf16(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1271; RV32-LABEL: vpgather_nxv4bf16:
1272; RV32:       # %bb.0:
1273; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1274; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
1275; RV32-NEXT:    vmv.v.v v8, v10
1276; RV32-NEXT:    ret
1277;
1278; RV64-LABEL: vpgather_nxv4bf16:
1279; RV64:       # %bb.0:
1280; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1281; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
1282; RV64-NEXT:    vmv.v.v v8, v12
1283; RV64-NEXT:    ret
1284  %v = call <vscale x 4 x bfloat> @llvm.vp.gather.nxv4bf16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
1285  ret <vscale x 4 x bfloat> %v
1286}
1287
1288define <vscale x 4 x bfloat> @vpgather_truemask_nxv4bf16(<vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
1289; RV32-LABEL: vpgather_truemask_nxv4bf16:
1290; RV32:       # %bb.0:
1291; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1292; RV32-NEXT:    vluxei32.v v10, (zero), v8
1293; RV32-NEXT:    vmv.v.v v8, v10
1294; RV32-NEXT:    ret
1295;
1296; RV64-LABEL: vpgather_truemask_nxv4bf16:
1297; RV64:       # %bb.0:
1298; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1299; RV64-NEXT:    vluxei64.v v12, (zero), v8
1300; RV64-NEXT:    vmv.v.v v8, v12
1301; RV64-NEXT:    ret
1302  %v = call <vscale x 4 x bfloat> @llvm.vp.gather.nxv4bf16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
1303  ret <vscale x 4 x bfloat> %v
1304}
1305
1306declare <vscale x 8 x bfloat> @llvm.vp.gather.nxv8bf16.nxv8p0(<vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
1307
1308define <vscale x 8 x bfloat> @vpgather_nxv8bf16(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1309; RV32-LABEL: vpgather_nxv8bf16:
1310; RV32:       # %bb.0:
1311; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
1312; RV32-NEXT:    vluxei32.v v12, (zero), v8, v0.t
1313; RV32-NEXT:    vmv.v.v v8, v12
1314; RV32-NEXT:    ret
1315;
1316; RV64-LABEL: vpgather_nxv8bf16:
1317; RV64:       # %bb.0:
1318; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
1319; RV64-NEXT:    vluxei64.v v16, (zero), v8, v0.t
1320; RV64-NEXT:    vmv.v.v v8, v16
1321; RV64-NEXT:    ret
1322  %v = call <vscale x 8 x bfloat> @llvm.vp.gather.nxv8bf16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1323  ret <vscale x 8 x bfloat> %v
1324}
1325
1326define <vscale x 8 x bfloat> @vpgather_baseidx_nxv8i8_nxv8bf16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1327; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8bf16:
1328; RV32:       # %bb.0:
1329; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1330; RV32-NEXT:    vsext.vf4 v12, v8
1331; RV32-NEXT:    vadd.vv v12, v12, v12
1332; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1333; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
1334; RV32-NEXT:    ret
1335;
1336; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8bf16:
1337; RV64:       # %bb.0:
1338; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1339; RV64-NEXT:    vsext.vf8 v16, v8
1340; RV64-NEXT:    vadd.vv v16, v16, v16
1341; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1342; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
1343; RV64-NEXT:    ret
1344  %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i8> %idxs
1345  %v = call <vscale x 8 x bfloat> @llvm.vp.gather.nxv8bf16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1346  ret <vscale x 8 x bfloat> %v
1347}
1348
1349define <vscale x 8 x bfloat> @vpgather_baseidx_sext_nxv8i8_nxv8bf16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1350; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8bf16:
1351; RV32:       # %bb.0:
1352; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1353; RV32-NEXT:    vsext.vf4 v12, v8
1354; RV32-NEXT:    vadd.vv v12, v12, v12
1355; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1356; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
1357; RV32-NEXT:    ret
1358;
1359; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8bf16:
1360; RV64:       # %bb.0:
1361; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1362; RV64-NEXT:    vsext.vf8 v16, v8
1363; RV64-NEXT:    vadd.vv v16, v16, v16
1364; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1365; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
1366; RV64-NEXT:    ret
1367  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1368  %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %eidxs
1369  %v = call <vscale x 8 x bfloat> @llvm.vp.gather.nxv8bf16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1370  ret <vscale x 8 x bfloat> %v
1371}
1372
1373define <vscale x 8 x bfloat> @vpgather_baseidx_zext_nxv8i8_nxv8bf16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1374; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8bf16:
1375; RV32:       # %bb.0:
1376; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
1377; RV32-NEXT:    vwaddu.vv v10, v8, v8
1378; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1379; RV32-NEXT:    vluxei16.v v8, (a0), v10, v0.t
1380; RV32-NEXT:    ret
1381;
1382; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8bf16:
1383; RV64:       # %bb.0:
1384; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
1385; RV64-NEXT:    vwaddu.vv v10, v8, v8
1386; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1387; RV64-NEXT:    vluxei16.v v8, (a0), v10, v0.t
1388; RV64-NEXT:    ret
1389  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1390  %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %eidxs
1391  %v = call <vscale x 8 x bfloat> @llvm.vp.gather.nxv8bf16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1392  ret <vscale x 8 x bfloat> %v
1393}
1394
1395define <vscale x 8 x bfloat> @vpgather_baseidx_nxv8bf16(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1396; RV32-LABEL: vpgather_baseidx_nxv8bf16:
1397; RV32:       # %bb.0:
1398; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
1399; RV32-NEXT:    vwadd.vv v12, v8, v8
1400; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
1401; RV32-NEXT:    ret
1402;
1403; RV64-LABEL: vpgather_baseidx_nxv8bf16:
1404; RV64:       # %bb.0:
1405; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1406; RV64-NEXT:    vsext.vf4 v16, v8
1407; RV64-NEXT:    vadd.vv v16, v16, v16
1408; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1409; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
1410; RV64-NEXT:    ret
1411  %ptrs = getelementptr inbounds bfloat, ptr %base, <vscale x 8 x i16> %idxs
1412  %v = call <vscale x 8 x bfloat> @llvm.vp.gather.nxv8bf16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1413  ret <vscale x 8 x bfloat> %v
1414}
1415
1416declare <vscale x 1 x half> @llvm.vp.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
1417
1418define <vscale x 1 x half> @vpgather_nxv1f16(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1419; RV32-LABEL: vpgather_nxv1f16:
1420; RV32:       # %bb.0:
1421; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
1422; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
1423; RV32-NEXT:    vmv1r.v v8, v9
1424; RV32-NEXT:    ret
1425;
1426; RV64-LABEL: vpgather_nxv1f16:
1427; RV64:       # %bb.0:
1428; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
1429; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
1430; RV64-NEXT:    vmv1r.v v8, v9
1431; RV64-NEXT:    ret
1432  %v = call <vscale x 1 x half> @llvm.vp.gather.nxv1f16.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
1433  ret <vscale x 1 x half> %v
1434}
1435
1436declare <vscale x 2 x half> @llvm.vp.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
1437
1438define <vscale x 2 x half> @vpgather_nxv2f16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1439; RV32-LABEL: vpgather_nxv2f16:
1440; RV32:       # %bb.0:
1441; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1442; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
1443; RV32-NEXT:    vmv1r.v v8, v9
1444; RV32-NEXT:    ret
1445;
1446; RV64-LABEL: vpgather_nxv2f16:
1447; RV64:       # %bb.0:
1448; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1449; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
1450; RV64-NEXT:    vmv1r.v v8, v10
1451; RV64-NEXT:    ret
1452  %v = call <vscale x 2 x half> @llvm.vp.gather.nxv2f16.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
1453  ret <vscale x 2 x half> %v
1454}
1455
1456declare <vscale x 4 x half> @llvm.vp.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
1457
1458define <vscale x 4 x half> @vpgather_nxv4f16(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1459; RV32-LABEL: vpgather_nxv4f16:
1460; RV32:       # %bb.0:
1461; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1462; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
1463; RV32-NEXT:    vmv.v.v v8, v10
1464; RV32-NEXT:    ret
1465;
1466; RV64-LABEL: vpgather_nxv4f16:
1467; RV64:       # %bb.0:
1468; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1469; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
1470; RV64-NEXT:    vmv.v.v v8, v12
1471; RV64-NEXT:    ret
1472  %v = call <vscale x 4 x half> @llvm.vp.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
1473  ret <vscale x 4 x half> %v
1474}
1475
1476define <vscale x 4 x half> @vpgather_truemask_nxv4f16(<vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
1477; RV32-LABEL: vpgather_truemask_nxv4f16:
1478; RV32:       # %bb.0:
1479; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1480; RV32-NEXT:    vluxei32.v v10, (zero), v8
1481; RV32-NEXT:    vmv.v.v v8, v10
1482; RV32-NEXT:    ret
1483;
1484; RV64-LABEL: vpgather_truemask_nxv4f16:
1485; RV64:       # %bb.0:
1486; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1487; RV64-NEXT:    vluxei64.v v12, (zero), v8
1488; RV64-NEXT:    vmv.v.v v8, v12
1489; RV64-NEXT:    ret
1490  %v = call <vscale x 4 x half> @llvm.vp.gather.nxv4f16.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
1491  ret <vscale x 4 x half> %v
1492}
1493
1494declare <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
1495
1496define <vscale x 8 x half> @vpgather_nxv8f16(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1497; RV32-LABEL: vpgather_nxv8f16:
1498; RV32:       # %bb.0:
1499; RV32-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
1500; RV32-NEXT:    vluxei32.v v12, (zero), v8, v0.t
1501; RV32-NEXT:    vmv.v.v v8, v12
1502; RV32-NEXT:    ret
1503;
1504; RV64-LABEL: vpgather_nxv8f16:
1505; RV64:       # %bb.0:
1506; RV64-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
1507; RV64-NEXT:    vluxei64.v v16, (zero), v8, v0.t
1508; RV64-NEXT:    vmv.v.v v8, v16
1509; RV64-NEXT:    ret
1510  %v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1511  ret <vscale x 8 x half> %v
1512}
1513
1514define <vscale x 8 x half> @vpgather_baseidx_nxv8i8_nxv8f16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1515; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8f16:
1516; RV32:       # %bb.0:
1517; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1518; RV32-NEXT:    vsext.vf4 v12, v8
1519; RV32-NEXT:    vadd.vv v12, v12, v12
1520; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1521; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
1522; RV32-NEXT:    ret
1523;
1524; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8f16:
1525; RV64:       # %bb.0:
1526; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1527; RV64-NEXT:    vsext.vf8 v16, v8
1528; RV64-NEXT:    vadd.vv v16, v16, v16
1529; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1530; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
1531; RV64-NEXT:    ret
1532  %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i8> %idxs
1533  %v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1534  ret <vscale x 8 x half> %v
1535}
1536
1537define <vscale x 8 x half> @vpgather_baseidx_sext_nxv8i8_nxv8f16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1538; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f16:
1539; RV32:       # %bb.0:
1540; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1541; RV32-NEXT:    vsext.vf4 v12, v8
1542; RV32-NEXT:    vadd.vv v12, v12, v12
1543; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1544; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
1545; RV32-NEXT:    ret
1546;
1547; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f16:
1548; RV64:       # %bb.0:
1549; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1550; RV64-NEXT:    vsext.vf8 v16, v8
1551; RV64-NEXT:    vadd.vv v16, v16, v16
1552; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1553; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
1554; RV64-NEXT:    ret
1555  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1556  %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs
1557  %v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1558  ret <vscale x 8 x half> %v
1559}
1560
1561define <vscale x 8 x half> @vpgather_baseidx_zext_nxv8i8_nxv8f16(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1562; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f16:
1563; RV32:       # %bb.0:
1564; RV32-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
1565; RV32-NEXT:    vwaddu.vv v10, v8, v8
1566; RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1567; RV32-NEXT:    vluxei16.v v8, (a0), v10, v0.t
1568; RV32-NEXT:    ret
1569;
1570; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f16:
1571; RV64:       # %bb.0:
1572; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
1573; RV64-NEXT:    vwaddu.vv v10, v8, v8
1574; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1575; RV64-NEXT:    vluxei16.v v8, (a0), v10, v0.t
1576; RV64-NEXT:    ret
1577  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i16>
1578  %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %eidxs
1579  %v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1580  ret <vscale x 8 x half> %v
1581}
1582
1583define <vscale x 8 x half> @vpgather_baseidx_nxv8f16(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1584; RV32-LABEL: vpgather_baseidx_nxv8f16:
1585; RV32:       # %bb.0:
1586; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
1587; RV32-NEXT:    vwadd.vv v12, v8, v8
1588; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
1589; RV32-NEXT:    ret
1590;
1591; RV64-LABEL: vpgather_baseidx_nxv8f16:
1592; RV64:       # %bb.0:
1593; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1594; RV64-NEXT:    vsext.vf4 v16, v8
1595; RV64-NEXT:    vadd.vv v16, v16, v16
1596; RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, ma
1597; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
1598; RV64-NEXT:    ret
1599  %ptrs = getelementptr inbounds half, ptr %base, <vscale x 8 x i16> %idxs
1600  %v = call <vscale x 8 x half> @llvm.vp.gather.nxv8f16.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1601  ret <vscale x 8 x half> %v
1602}
1603
1604declare <vscale x 1 x float> @llvm.vp.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
1605
1606define <vscale x 1 x float> @vpgather_nxv1f32(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1607; RV32-LABEL: vpgather_nxv1f32:
1608; RV32:       # %bb.0:
1609; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
1610; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
1611; RV32-NEXT:    ret
1612;
1613; RV64-LABEL: vpgather_nxv1f32:
1614; RV64:       # %bb.0:
1615; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
1616; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
1617; RV64-NEXT:    vmv1r.v v8, v9
1618; RV64-NEXT:    ret
1619  %v = call <vscale x 1 x float> @llvm.vp.gather.nxv1f32.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
1620  ret <vscale x 1 x float> %v
1621}
1622
1623declare <vscale x 2 x float> @llvm.vp.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
1624
1625define <vscale x 2 x float> @vpgather_nxv2f32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1626; RV32-LABEL: vpgather_nxv2f32:
1627; RV32:       # %bb.0:
1628; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1629; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
1630; RV32-NEXT:    ret
1631;
1632; RV64-LABEL: vpgather_nxv2f32:
1633; RV64:       # %bb.0:
1634; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1635; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
1636; RV64-NEXT:    vmv.v.v v8, v10
1637; RV64-NEXT:    ret
1638  %v = call <vscale x 2 x float> @llvm.vp.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
1639  ret <vscale x 2 x float> %v
1640}
1641
1642declare <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
1643
1644define <vscale x 4 x float> @vpgather_nxv4f32(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1645; RV32-LABEL: vpgather_nxv4f32:
1646; RV32:       # %bb.0:
1647; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1648; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
1649; RV32-NEXT:    ret
1650;
1651; RV64-LABEL: vpgather_nxv4f32:
1652; RV64:       # %bb.0:
1653; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1654; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
1655; RV64-NEXT:    vmv.v.v v8, v12
1656; RV64-NEXT:    ret
1657  %v = call <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
1658  ret <vscale x 4 x float> %v
1659}
1660
1661define <vscale x 4 x float> @vpgather_truemask_nxv4f32(<vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
1662; RV32-LABEL: vpgather_truemask_nxv4f32:
1663; RV32:       # %bb.0:
1664; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1665; RV32-NEXT:    vluxei32.v v8, (zero), v8
1666; RV32-NEXT:    ret
1667;
1668; RV64-LABEL: vpgather_truemask_nxv4f32:
1669; RV64:       # %bb.0:
1670; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1671; RV64-NEXT:    vluxei64.v v12, (zero), v8
1672; RV64-NEXT:    vmv.v.v v8, v12
1673; RV64-NEXT:    ret
1674  %v = call <vscale x 4 x float> @llvm.vp.gather.nxv4f32.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
1675  ret <vscale x 4 x float> %v
1676}
1677
1678declare <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
1679
1680define <vscale x 8 x float> @vpgather_nxv8f32(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1681; RV32-LABEL: vpgather_nxv8f32:
1682; RV32:       # %bb.0:
1683; RV32-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1684; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
1685; RV32-NEXT:    ret
1686;
1687; RV64-LABEL: vpgather_nxv8f32:
1688; RV64:       # %bb.0:
1689; RV64-NEXT:    vsetvli zero, a0, e32, m4, ta, ma
1690; RV64-NEXT:    vluxei64.v v16, (zero), v8, v0.t
1691; RV64-NEXT:    vmv.v.v v8, v16
1692; RV64-NEXT:    ret
1693  %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1694  ret <vscale x 8 x float> %v
1695}
1696
1697define <vscale x 8 x float> @vpgather_baseidx_nxv8i8_nxv8f32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1698; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8f32:
1699; RV32:       # %bb.0:
1700; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1701; RV32-NEXT:    vsext.vf4 v12, v8
1702; RV32-NEXT:    vsll.vi v8, v12, 2
1703; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
1704; RV32-NEXT:    ret
1705;
1706; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8f32:
1707; RV64:       # %bb.0:
1708; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1709; RV64-NEXT:    vsext.vf8 v16, v8
1710; RV64-NEXT:    vsll.vi v16, v16, 2
1711; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1712; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
1713; RV64-NEXT:    ret
1714  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i8> %idxs
1715  %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1716  ret <vscale x 8 x float> %v
1717}
1718
1719define <vscale x 8 x float> @vpgather_baseidx_sext_nxv8i8_nxv8f32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1720; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f32:
1721; RV32:       # %bb.0:
1722; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1723; RV32-NEXT:    vsext.vf4 v12, v8
1724; RV32-NEXT:    vsll.vi v8, v12, 2
1725; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
1726; RV32-NEXT:    ret
1727;
1728; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f32:
1729; RV64:       # %bb.0:
1730; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1731; RV64-NEXT:    vsext.vf8 v16, v8
1732; RV64-NEXT:    vsll.vi v16, v16, 2
1733; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1734; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
1735; RV64-NEXT:    ret
1736  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1737  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1738  %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1739  ret <vscale x 8 x float> %v
1740}
1741
1742define <vscale x 8 x float> @vpgather_baseidx_zext_nxv8i8_nxv8f32(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1743; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f32:
1744; RV32:       # %bb.0:
1745; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
1746; RV32-NEXT:    vzext.vf2 v10, v8
1747; RV32-NEXT:    vsll.vi v12, v10, 2
1748; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1749; RV32-NEXT:    vluxei16.v v8, (a0), v12, v0.t
1750; RV32-NEXT:    ret
1751;
1752; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f32:
1753; RV64:       # %bb.0:
1754; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
1755; RV64-NEXT:    vzext.vf2 v10, v8
1756; RV64-NEXT:    vsll.vi v12, v10, 2
1757; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1758; RV64-NEXT:    vluxei16.v v8, (a0), v12, v0.t
1759; RV64-NEXT:    ret
1760  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i32>
1761  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1762  %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1763  ret <vscale x 8 x float> %v
1764}
1765
1766define <vscale x 8 x float> @vpgather_baseidx_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1767; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8f32:
1768; RV32:       # %bb.0:
1769; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1770; RV32-NEXT:    vsext.vf2 v12, v8
1771; RV32-NEXT:    vsll.vi v8, v12, 2
1772; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
1773; RV32-NEXT:    ret
1774;
1775; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8f32:
1776; RV64:       # %bb.0:
1777; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1778; RV64-NEXT:    vsext.vf4 v16, v8
1779; RV64-NEXT:    vsll.vi v16, v16, 2
1780; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1781; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
1782; RV64-NEXT:    ret
1783  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i16> %idxs
1784  %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1785  ret <vscale x 8 x float> %v
1786}
1787
1788define <vscale x 8 x float> @vpgather_baseidx_sext_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1789; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f32:
1790; RV32:       # %bb.0:
1791; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1792; RV32-NEXT:    vsext.vf2 v12, v8
1793; RV32-NEXT:    vsll.vi v8, v12, 2
1794; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
1795; RV32-NEXT:    ret
1796;
1797; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f32:
1798; RV64:       # %bb.0:
1799; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1800; RV64-NEXT:    vsext.vf4 v16, v8
1801; RV64-NEXT:    vsll.vi v16, v16, 2
1802; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1803; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
1804; RV64-NEXT:    ret
1805  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1806  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1807  %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1808  ret <vscale x 8 x float> %v
1809}
1810
1811define <vscale x 8 x float> @vpgather_baseidx_zext_nxv8i16_nxv8f32(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1812; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f32:
1813; RV32:       # %bb.0:
1814; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1815; RV32-NEXT:    vzext.vf2 v12, v8
1816; RV32-NEXT:    vsll.vi v8, v12, 2
1817; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
1818; RV32-NEXT:    ret
1819;
1820; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f32:
1821; RV64:       # %bb.0:
1822; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1823; RV64-NEXT:    vzext.vf2 v12, v8
1824; RV64-NEXT:    vsll.vi v8, v12, 2
1825; RV64-NEXT:    vluxei32.v v8, (a0), v8, v0.t
1826; RV64-NEXT:    ret
1827  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i32>
1828  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %eidxs
1829  %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1830  ret <vscale x 8 x float> %v
1831}
1832
1833define <vscale x 8 x float> @vpgather_baseidx_nxv8f32(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
1834; RV32-LABEL: vpgather_baseidx_nxv8f32:
1835; RV32:       # %bb.0:
1836; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1837; RV32-NEXT:    vsll.vi v8, v8, 2
1838; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
1839; RV32-NEXT:    ret
1840;
1841; RV64-LABEL: vpgather_baseidx_nxv8f32:
1842; RV64:       # %bb.0:
1843; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1844; RV64-NEXT:    vsext.vf2 v16, v8
1845; RV64-NEXT:    vsll.vi v16, v16, 2
1846; RV64-NEXT:    vsetvli zero, zero, e32, m4, ta, ma
1847; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
1848; RV64-NEXT:    ret
1849  %ptrs = getelementptr inbounds float, ptr %base, <vscale x 8 x i32> %idxs
1850  %v = call <vscale x 8 x float> @llvm.vp.gather.nxv8f32.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
1851  ret <vscale x 8 x float> %v
1852}
1853
1854declare <vscale x 1 x double> @llvm.vp.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr>, <vscale x 1 x i1>, i32)
1855
1856define <vscale x 1 x double> @vpgather_nxv1f64(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
1857; RV32-LABEL: vpgather_nxv1f64:
1858; RV32:       # %bb.0:
1859; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1860; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
1861; RV32-NEXT:    vmv.v.v v8, v9
1862; RV32-NEXT:    ret
1863;
1864; RV64-LABEL: vpgather_nxv1f64:
1865; RV64:       # %bb.0:
1866; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1867; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
1868; RV64-NEXT:    ret
1869  %v = call <vscale x 1 x double> @llvm.vp.gather.nxv1f64.nxv1p0(<vscale x 1 x ptr> %ptrs, <vscale x 1 x i1> %m, i32 %evl)
1870  ret <vscale x 1 x double> %v
1871}
1872
1873declare <vscale x 2 x double> @llvm.vp.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr>, <vscale x 2 x i1>, i32)
1874
1875define <vscale x 2 x double> @vpgather_nxv2f64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 zeroext %evl) {
1876; RV32-LABEL: vpgather_nxv2f64:
1877; RV32:       # %bb.0:
1878; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1879; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
1880; RV32-NEXT:    vmv.v.v v8, v10
1881; RV32-NEXT:    ret
1882;
1883; RV64-LABEL: vpgather_nxv2f64:
1884; RV64:       # %bb.0:
1885; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1886; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
1887; RV64-NEXT:    ret
1888  %v = call <vscale x 2 x double> @llvm.vp.gather.nxv2f64.nxv2p0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %m, i32 %evl)
1889  ret <vscale x 2 x double> %v
1890}
1891
1892declare <vscale x 4 x double> @llvm.vp.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr>, <vscale x 4 x i1>, i32)
1893
1894define <vscale x 4 x double> @vpgather_nxv4f64(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 zeroext %evl) {
1895; RV32-LABEL: vpgather_nxv4f64:
1896; RV32:       # %bb.0:
1897; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1898; RV32-NEXT:    vluxei32.v v12, (zero), v8, v0.t
1899; RV32-NEXT:    vmv.v.v v8, v12
1900; RV32-NEXT:    ret
1901;
1902; RV64-LABEL: vpgather_nxv4f64:
1903; RV64:       # %bb.0:
1904; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1905; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
1906; RV64-NEXT:    ret
1907  %v = call <vscale x 4 x double> @llvm.vp.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> %m, i32 %evl)
1908  ret <vscale x 4 x double> %v
1909}
1910
1911define <vscale x 4 x double> @vpgather_truemask_nxv4f64(<vscale x 4 x ptr> %ptrs, i32 zeroext %evl) {
1912; RV32-LABEL: vpgather_truemask_nxv4f64:
1913; RV32:       # %bb.0:
1914; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1915; RV32-NEXT:    vluxei32.v v12, (zero), v8
1916; RV32-NEXT:    vmv.v.v v8, v12
1917; RV32-NEXT:    ret
1918;
1919; RV64-LABEL: vpgather_truemask_nxv4f64:
1920; RV64:       # %bb.0:
1921; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1922; RV64-NEXT:    vluxei64.v v8, (zero), v8
1923; RV64-NEXT:    ret
1924  %v = call <vscale x 4 x double> @llvm.vp.gather.nxv4f64.nxv4p0(<vscale x 4 x ptr> %ptrs, <vscale x 4 x i1> splat (i1 1), i32 %evl)
1925  ret <vscale x 4 x double> %v
1926}
1927
1928declare <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr>, <vscale x 6 x i1>, i32)
1929
1930define <vscale x 6 x double> @vpgather_nxv6f64(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1931; RV32-LABEL: vpgather_nxv6f64:
1932; RV32:       # %bb.0:
1933; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1934; RV32-NEXT:    vluxei32.v v16, (zero), v8, v0.t
1935; RV32-NEXT:    vmv.v.v v8, v16
1936; RV32-NEXT:    ret
1937;
1938; RV64-LABEL: vpgather_nxv6f64:
1939; RV64:       # %bb.0:
1940; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
1941; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
1942; RV64-NEXT:    ret
1943  %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1944  ret <vscale x 6 x double> %v
1945}
1946
1947define <vscale x 6 x double> @vpgather_baseidx_nxv6i8_nxv6f64(ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1948; RV32-LABEL: vpgather_baseidx_nxv6i8_nxv6f64:
1949; RV32:       # %bb.0:
1950; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1951; RV32-NEXT:    vsext.vf4 v12, v8
1952; RV32-NEXT:    vsll.vi v16, v12, 3
1953; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1954; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1955; RV32-NEXT:    ret
1956;
1957; RV64-LABEL: vpgather_baseidx_nxv6i8_nxv6f64:
1958; RV64:       # %bb.0:
1959; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1960; RV64-NEXT:    vsext.vf8 v16, v8
1961; RV64-NEXT:    vsll.vi v8, v16, 3
1962; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1963; RV64-NEXT:    ret
1964  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i8> %idxs
1965  %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1966  ret <vscale x 6 x double> %v
1967}
1968
1969define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i8_nxv6f64(ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1970; RV32-LABEL: vpgather_baseidx_sext_nxv6i8_nxv6f64:
1971; RV32:       # %bb.0:
1972; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
1973; RV32-NEXT:    vsext.vf4 v12, v8
1974; RV32-NEXT:    vsll.vi v16, v12, 3
1975; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1976; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
1977; RV32-NEXT:    ret
1978;
1979; RV64-LABEL: vpgather_baseidx_sext_nxv6i8_nxv6f64:
1980; RV64:       # %bb.0:
1981; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
1982; RV64-NEXT:    vsext.vf8 v16, v8
1983; RV64-NEXT:    vsll.vi v8, v16, 3
1984; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1985; RV64-NEXT:    ret
1986  %eidxs = sext <vscale x 6 x i8> %idxs to <vscale x 6 x i64>
1987  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
1988  %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
1989  ret <vscale x 6 x double> %v
1990}
1991
1992define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i8_nxv6f64(ptr %base, <vscale x 6 x i8> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
1993; RV32-LABEL: vpgather_baseidx_zext_nxv6i8_nxv6f64:
1994; RV32:       # %bb.0:
1995; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
1996; RV32-NEXT:    vzext.vf2 v10, v8
1997; RV32-NEXT:    vsll.vi v16, v10, 3
1998; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
1999; RV32-NEXT:    vluxei16.v v8, (a0), v16, v0.t
2000; RV32-NEXT:    ret
2001;
2002; RV64-LABEL: vpgather_baseidx_zext_nxv6i8_nxv6f64:
2003; RV64:       # %bb.0:
2004; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
2005; RV64-NEXT:    vzext.vf2 v10, v8
2006; RV64-NEXT:    vsll.vi v16, v10, 3
2007; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2008; RV64-NEXT:    vluxei16.v v8, (a0), v16, v0.t
2009; RV64-NEXT:    ret
2010  %eidxs = zext <vscale x 6 x i8> %idxs to <vscale x 6 x i64>
2011  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
2012  %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
2013  ret <vscale x 6 x double> %v
2014}
2015
2016define <vscale x 6 x double> @vpgather_baseidx_nxv6i16_nxv6f64(ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
2017; RV32-LABEL: vpgather_baseidx_nxv6i16_nxv6f64:
2018; RV32:       # %bb.0:
2019; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2020; RV32-NEXT:    vsext.vf2 v12, v8
2021; RV32-NEXT:    vsll.vi v16, v12, 3
2022; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2023; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2024; RV32-NEXT:    ret
2025;
2026; RV64-LABEL: vpgather_baseidx_nxv6i16_nxv6f64:
2027; RV64:       # %bb.0:
2028; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2029; RV64-NEXT:    vsext.vf4 v16, v8
2030; RV64-NEXT:    vsll.vi v8, v16, 3
2031; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2032; RV64-NEXT:    ret
2033  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i16> %idxs
2034  %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
2035  ret <vscale x 6 x double> %v
2036}
2037
2038define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i16_nxv6f64(ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
2039; RV32-LABEL: vpgather_baseidx_sext_nxv6i16_nxv6f64:
2040; RV32:       # %bb.0:
2041; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2042; RV32-NEXT:    vsext.vf2 v12, v8
2043; RV32-NEXT:    vsll.vi v16, v12, 3
2044; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2045; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2046; RV32-NEXT:    ret
2047;
2048; RV64-LABEL: vpgather_baseidx_sext_nxv6i16_nxv6f64:
2049; RV64:       # %bb.0:
2050; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2051; RV64-NEXT:    vsext.vf4 v16, v8
2052; RV64-NEXT:    vsll.vi v8, v16, 3
2053; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2054; RV64-NEXT:    ret
2055  %eidxs = sext <vscale x 6 x i16> %idxs to <vscale x 6 x i64>
2056  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
2057  %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
2058  ret <vscale x 6 x double> %v
2059}
2060
2061define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i16_nxv6f64(ptr %base, <vscale x 6 x i16> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
2062; RV32-LABEL: vpgather_baseidx_zext_nxv6i16_nxv6f64:
2063; RV32:       # %bb.0:
2064; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2065; RV32-NEXT:    vzext.vf2 v12, v8
2066; RV32-NEXT:    vsll.vi v16, v12, 3
2067; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2068; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2069; RV32-NEXT:    ret
2070;
2071; RV64-LABEL: vpgather_baseidx_zext_nxv6i16_nxv6f64:
2072; RV64:       # %bb.0:
2073; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2074; RV64-NEXT:    vzext.vf2 v12, v8
2075; RV64-NEXT:    vsll.vi v16, v12, 3
2076; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2077; RV64-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2078; RV64-NEXT:    ret
2079  %eidxs = zext <vscale x 6 x i16> %idxs to <vscale x 6 x i64>
2080  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
2081  %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
2082  ret <vscale x 6 x double> %v
2083}
2084
2085define <vscale x 6 x double> @vpgather_baseidx_nxv6i32_nxv6f64(ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
2086; RV32-LABEL: vpgather_baseidx_nxv6i32_nxv6f64:
2087; RV32:       # %bb.0:
2088; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2089; RV32-NEXT:    vsll.vi v16, v8, 3
2090; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2091; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2092; RV32-NEXT:    ret
2093;
2094; RV64-LABEL: vpgather_baseidx_nxv6i32_nxv6f64:
2095; RV64:       # %bb.0:
2096; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2097; RV64-NEXT:    vsext.vf2 v16, v8
2098; RV64-NEXT:    vsll.vi v8, v16, 3
2099; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2100; RV64-NEXT:    ret
2101  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i32> %idxs
2102  %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
2103  ret <vscale x 6 x double> %v
2104}
2105
2106define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i32_nxv6f64(ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
2107; RV32-LABEL: vpgather_baseidx_sext_nxv6i32_nxv6f64:
2108; RV32:       # %bb.0:
2109; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2110; RV32-NEXT:    vsll.vi v16, v8, 3
2111; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2112; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2113; RV32-NEXT:    ret
2114;
2115; RV64-LABEL: vpgather_baseidx_sext_nxv6i32_nxv6f64:
2116; RV64:       # %bb.0:
2117; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2118; RV64-NEXT:    vsext.vf2 v16, v8
2119; RV64-NEXT:    vsll.vi v8, v16, 3
2120; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2121; RV64-NEXT:    ret
2122  %eidxs = sext <vscale x 6 x i32> %idxs to <vscale x 6 x i64>
2123  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
2124  %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
2125  ret <vscale x 6 x double> %v
2126}
2127
2128define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i32_nxv6f64(ptr %base, <vscale x 6 x i32> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
2129; RV32-LABEL: vpgather_baseidx_zext_nxv6i32_nxv6f64:
2130; RV32:       # %bb.0:
2131; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2132; RV32-NEXT:    vsll.vi v16, v8, 3
2133; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2134; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2135; RV32-NEXT:    ret
2136;
2137; RV64-LABEL: vpgather_baseidx_zext_nxv6i32_nxv6f64:
2138; RV64:       # %bb.0:
2139; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2140; RV64-NEXT:    vzext.vf2 v16, v8
2141; RV64-NEXT:    vsll.vi v8, v16, 3
2142; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2143; RV64-NEXT:    ret
2144  %eidxs = zext <vscale x 6 x i32> %idxs to <vscale x 6 x i64>
2145  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %eidxs
2146  %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
2147  ret <vscale x 6 x double> %v
2148}
2149
2150define <vscale x 6 x double> @vpgather_baseidx_nxv6f64(ptr %base, <vscale x 6 x i64> %idxs, <vscale x 6 x i1> %m, i32 zeroext %evl) {
2151; RV32-LABEL: vpgather_baseidx_nxv6f64:
2152; RV32:       # %bb.0:
2153; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2154; RV32-NEXT:    vnsrl.wi v16, v8, 0
2155; RV32-NEXT:    vsll.vi v16, v16, 3
2156; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2157; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2158; RV32-NEXT:    ret
2159;
2160; RV64-LABEL: vpgather_baseidx_nxv6f64:
2161; RV64:       # %bb.0:
2162; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2163; RV64-NEXT:    vsll.vi v8, v8, 3
2164; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2165; RV64-NEXT:    ret
2166  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 6 x i64> %idxs
2167  %v = call <vscale x 6 x double> @llvm.vp.gather.nxv6f64.nxv6p0(<vscale x 6 x ptr> %ptrs, <vscale x 6 x i1> %m, i32 %evl)
2168  ret <vscale x 6 x double> %v
2169}
2170
2171declare <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr>, <vscale x 8 x i1>, i32)
2172
2173define <vscale x 8 x double> @vpgather_nxv8f64(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2174; RV32-LABEL: vpgather_nxv8f64:
2175; RV32:       # %bb.0:
2176; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2177; RV32-NEXT:    vluxei32.v v16, (zero), v8, v0.t
2178; RV32-NEXT:    vmv.v.v v8, v16
2179; RV32-NEXT:    ret
2180;
2181; RV64-LABEL: vpgather_nxv8f64:
2182; RV64:       # %bb.0:
2183; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2184; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
2185; RV64-NEXT:    ret
2186  %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2187  ret <vscale x 8 x double> %v
2188}
2189
2190define <vscale x 8 x double> @vpgather_baseidx_nxv8i8_nxv8f64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2191; RV32-LABEL: vpgather_baseidx_nxv8i8_nxv8f64:
2192; RV32:       # %bb.0:
2193; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2194; RV32-NEXT:    vsext.vf4 v12, v8
2195; RV32-NEXT:    vsll.vi v16, v12, 3
2196; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2197; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2198; RV32-NEXT:    ret
2199;
2200; RV64-LABEL: vpgather_baseidx_nxv8i8_nxv8f64:
2201; RV64:       # %bb.0:
2202; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2203; RV64-NEXT:    vsext.vf8 v16, v8
2204; RV64-NEXT:    vsll.vi v8, v16, 3
2205; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2206; RV64-NEXT:    ret
2207  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i8> %idxs
2208  %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2209  ret <vscale x 8 x double> %v
2210}
2211
2212define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i8_nxv8f64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2213; RV32-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f64:
2214; RV32:       # %bb.0:
2215; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2216; RV32-NEXT:    vsext.vf4 v12, v8
2217; RV32-NEXT:    vsll.vi v16, v12, 3
2218; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2219; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2220; RV32-NEXT:    ret
2221;
2222; RV64-LABEL: vpgather_baseidx_sext_nxv8i8_nxv8f64:
2223; RV64:       # %bb.0:
2224; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2225; RV64-NEXT:    vsext.vf8 v16, v8
2226; RV64-NEXT:    vsll.vi v8, v16, 3
2227; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2228; RV64-NEXT:    ret
2229  %eidxs = sext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
2230  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2231  %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2232  ret <vscale x 8 x double> %v
2233}
2234
2235define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i8_nxv8f64(ptr %base, <vscale x 8 x i8> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2236; RV32-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f64:
2237; RV32:       # %bb.0:
2238; RV32-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
2239; RV32-NEXT:    vzext.vf2 v10, v8
2240; RV32-NEXT:    vsll.vi v16, v10, 3
2241; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2242; RV32-NEXT:    vluxei16.v v8, (a0), v16, v0.t
2243; RV32-NEXT:    ret
2244;
2245; RV64-LABEL: vpgather_baseidx_zext_nxv8i8_nxv8f64:
2246; RV64:       # %bb.0:
2247; RV64-NEXT:    vsetvli zero, a1, e16, m2, ta, ma
2248; RV64-NEXT:    vzext.vf2 v10, v8
2249; RV64-NEXT:    vsll.vi v16, v10, 3
2250; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2251; RV64-NEXT:    vluxei16.v v8, (a0), v16, v0.t
2252; RV64-NEXT:    ret
2253  %eidxs = zext <vscale x 8 x i8> %idxs to <vscale x 8 x i64>
2254  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2255  %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2256  ret <vscale x 8 x double> %v
2257}
2258
2259define <vscale x 8 x double> @vpgather_baseidx_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2260; RV32-LABEL: vpgather_baseidx_nxv8i16_nxv8f64:
2261; RV32:       # %bb.0:
2262; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2263; RV32-NEXT:    vsext.vf2 v12, v8
2264; RV32-NEXT:    vsll.vi v16, v12, 3
2265; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2266; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2267; RV32-NEXT:    ret
2268;
2269; RV64-LABEL: vpgather_baseidx_nxv8i16_nxv8f64:
2270; RV64:       # %bb.0:
2271; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2272; RV64-NEXT:    vsext.vf4 v16, v8
2273; RV64-NEXT:    vsll.vi v8, v16, 3
2274; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2275; RV64-NEXT:    ret
2276  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i16> %idxs
2277  %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2278  ret <vscale x 8 x double> %v
2279}
2280
2281define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2282; RV32-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f64:
2283; RV32:       # %bb.0:
2284; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2285; RV32-NEXT:    vsext.vf2 v12, v8
2286; RV32-NEXT:    vsll.vi v16, v12, 3
2287; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2288; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2289; RV32-NEXT:    ret
2290;
2291; RV64-LABEL: vpgather_baseidx_sext_nxv8i16_nxv8f64:
2292; RV64:       # %bb.0:
2293; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2294; RV64-NEXT:    vsext.vf4 v16, v8
2295; RV64-NEXT:    vsll.vi v8, v16, 3
2296; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2297; RV64-NEXT:    ret
2298  %eidxs = sext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
2299  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2300  %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2301  ret <vscale x 8 x double> %v
2302}
2303
2304define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i16_nxv8f64(ptr %base, <vscale x 8 x i16> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2305; RV32-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f64:
2306; RV32:       # %bb.0:
2307; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2308; RV32-NEXT:    vzext.vf2 v12, v8
2309; RV32-NEXT:    vsll.vi v16, v12, 3
2310; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2311; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2312; RV32-NEXT:    ret
2313;
2314; RV64-LABEL: vpgather_baseidx_zext_nxv8i16_nxv8f64:
2315; RV64:       # %bb.0:
2316; RV64-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2317; RV64-NEXT:    vzext.vf2 v12, v8
2318; RV64-NEXT:    vsll.vi v16, v12, 3
2319; RV64-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2320; RV64-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2321; RV64-NEXT:    ret
2322  %eidxs = zext <vscale x 8 x i16> %idxs to <vscale x 8 x i64>
2323  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2324  %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2325  ret <vscale x 8 x double> %v
2326}
2327
2328define <vscale x 8 x double> @vpgather_baseidx_nxv8i32_nxv8f64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2329; RV32-LABEL: vpgather_baseidx_nxv8i32_nxv8f64:
2330; RV32:       # %bb.0:
2331; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2332; RV32-NEXT:    vsll.vi v16, v8, 3
2333; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2334; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2335; RV32-NEXT:    ret
2336;
2337; RV64-LABEL: vpgather_baseidx_nxv8i32_nxv8f64:
2338; RV64:       # %bb.0:
2339; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2340; RV64-NEXT:    vsext.vf2 v16, v8
2341; RV64-NEXT:    vsll.vi v8, v16, 3
2342; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2343; RV64-NEXT:    ret
2344  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i32> %idxs
2345  %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2346  ret <vscale x 8 x double> %v
2347}
2348
2349define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i32_nxv8f64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2350; RV32-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8f64:
2351; RV32:       # %bb.0:
2352; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2353; RV32-NEXT:    vsll.vi v16, v8, 3
2354; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2355; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2356; RV32-NEXT:    ret
2357;
2358; RV64-LABEL: vpgather_baseidx_sext_nxv8i32_nxv8f64:
2359; RV64:       # %bb.0:
2360; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2361; RV64-NEXT:    vsext.vf2 v16, v8
2362; RV64-NEXT:    vsll.vi v8, v16, 3
2363; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2364; RV64-NEXT:    ret
2365  %eidxs = sext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
2366  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2367  %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2368  ret <vscale x 8 x double> %v
2369}
2370
2371define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i32_nxv8f64(ptr %base, <vscale x 8 x i32> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2372; RV32-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8f64:
2373; RV32:       # %bb.0:
2374; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2375; RV32-NEXT:    vsll.vi v16, v8, 3
2376; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2377; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2378; RV32-NEXT:    ret
2379;
2380; RV64-LABEL: vpgather_baseidx_zext_nxv8i32_nxv8f64:
2381; RV64:       # %bb.0:
2382; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2383; RV64-NEXT:    vzext.vf2 v16, v8
2384; RV64-NEXT:    vsll.vi v8, v16, 3
2385; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2386; RV64-NEXT:    ret
2387  %eidxs = zext <vscale x 8 x i32> %idxs to <vscale x 8 x i64>
2388  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %eidxs
2389  %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2390  ret <vscale x 8 x double> %v
2391}
2392
2393define <vscale x 8 x double> @vpgather_baseidx_nxv8f64(ptr %base, <vscale x 8 x i64> %idxs, <vscale x 8 x i1> %m, i32 zeroext %evl) {
2394; RV32-LABEL: vpgather_baseidx_nxv8f64:
2395; RV32:       # %bb.0:
2396; RV32-NEXT:    vsetvli zero, a1, e32, m4, ta, ma
2397; RV32-NEXT:    vnsrl.wi v16, v8, 0
2398; RV32-NEXT:    vsll.vi v16, v16, 3
2399; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, ma
2400; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2401; RV32-NEXT:    ret
2402;
2403; RV64-LABEL: vpgather_baseidx_nxv8f64:
2404; RV64:       # %bb.0:
2405; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2406; RV64-NEXT:    vsll.vi v8, v8, 3
2407; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2408; RV64-NEXT:    ret
2409  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 8 x i64> %idxs
2410  %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0(<vscale x 8 x ptr> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
2411  ret <vscale x 8 x double> %v
2412}
2413
2414declare <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0(<vscale x 16 x ptr>, <vscale x 16 x i1>, i32)
2415
2416define <vscale x 16 x double> @vpgather_nxv16f64(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2417; RV32-LABEL: vpgather_nxv16f64:
2418; RV32:       # %bb.0:
2419; RV32-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
2420; RV32-NEXT:    vmv1r.v v24, v0
2421; RV32-NEXT:    csrr a1, vlenb
2422; RV32-NEXT:    sub a2, a0, a1
2423; RV32-NEXT:    srli a3, a1, 3
2424; RV32-NEXT:    vslidedown.vx v0, v0, a3
2425; RV32-NEXT:    sltu a3, a0, a2
2426; RV32-NEXT:    addi a3, a3, -1
2427; RV32-NEXT:    and a2, a3, a2
2428; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2429; RV32-NEXT:    vluxei32.v v16, (zero), v12, v0.t
2430; RV32-NEXT:    bltu a0, a1, .LBB111_2
2431; RV32-NEXT:  # %bb.1:
2432; RV32-NEXT:    mv a0, a1
2433; RV32-NEXT:  .LBB111_2:
2434; RV32-NEXT:    vmv1r.v v0, v24
2435; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2436; RV32-NEXT:    vluxei32.v v24, (zero), v8, v0.t
2437; RV32-NEXT:    vmv.v.v v8, v24
2438; RV32-NEXT:    ret
2439;
2440; RV64-LABEL: vpgather_nxv16f64:
2441; RV64:       # %bb.0:
2442; RV64-NEXT:    vsetvli a1, zero, e8, mf4, ta, ma
2443; RV64-NEXT:    vmv1r.v v24, v0
2444; RV64-NEXT:    csrr a1, vlenb
2445; RV64-NEXT:    sub a2, a0, a1
2446; RV64-NEXT:    srli a3, a1, 3
2447; RV64-NEXT:    vslidedown.vx v0, v0, a3
2448; RV64-NEXT:    sltu a3, a0, a2
2449; RV64-NEXT:    addi a3, a3, -1
2450; RV64-NEXT:    and a2, a3, a2
2451; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2452; RV64-NEXT:    vluxei64.v v16, (zero), v16, v0.t
2453; RV64-NEXT:    bltu a0, a1, .LBB111_2
2454; RV64-NEXT:  # %bb.1:
2455; RV64-NEXT:    mv a0, a1
2456; RV64-NEXT:  .LBB111_2:
2457; RV64-NEXT:    vmv1r.v v0, v24
2458; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2459; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
2460; RV64-NEXT:    ret
2461  %v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
2462  ret <vscale x 16 x double> %v
2463}
2464
2465define <vscale x 16 x double> @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2466; RV32-LABEL: vpgather_baseidx_nxv16i16_nxv16f64:
2467; RV32:       # %bb.0:
2468; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
2469; RV32-NEXT:    vmv1r.v v12, v0
2470; RV32-NEXT:    vsext.vf2 v16, v8
2471; RV32-NEXT:    csrr a2, vlenb
2472; RV32-NEXT:    vsll.vi v24, v16, 3
2473; RV32-NEXT:    sub a3, a1, a2
2474; RV32-NEXT:    srli a4, a2, 3
2475; RV32-NEXT:    vsetvli a5, zero, e8, mf4, ta, ma
2476; RV32-NEXT:    vslidedown.vx v0, v0, a4
2477; RV32-NEXT:    sltu a4, a1, a3
2478; RV32-NEXT:    addi a4, a4, -1
2479; RV32-NEXT:    and a3, a4, a3
2480; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
2481; RV32-NEXT:    vluxei32.v v16, (a0), v28, v0.t
2482; RV32-NEXT:    bltu a1, a2, .LBB112_2
2483; RV32-NEXT:  # %bb.1:
2484; RV32-NEXT:    mv a1, a2
2485; RV32-NEXT:  .LBB112_2:
2486; RV32-NEXT:    vmv1r.v v0, v12
2487; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2488; RV32-NEXT:    vluxei32.v v8, (a0), v24, v0.t
2489; RV32-NEXT:    ret
2490;
2491; RV64-LABEL: vpgather_baseidx_nxv16i16_nxv16f64:
2492; RV64:       # %bb.0:
2493; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
2494; RV64-NEXT:    vmv1r.v v12, v0
2495; RV64-NEXT:    vsext.vf4 v16, v10
2496; RV64-NEXT:    csrr a2, vlenb
2497; RV64-NEXT:    vsll.vi v16, v16, 3
2498; RV64-NEXT:    sub a3, a1, a2
2499; RV64-NEXT:    srli a4, a2, 3
2500; RV64-NEXT:    vsetvli a5, zero, e8, mf4, ta, ma
2501; RV64-NEXT:    vslidedown.vx v0, v0, a4
2502; RV64-NEXT:    sltu a4, a1, a3
2503; RV64-NEXT:    addi a4, a4, -1
2504; RV64-NEXT:    and a3, a4, a3
2505; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
2506; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
2507; RV64-NEXT:    vsetvli a3, zero, e64, m8, ta, ma
2508; RV64-NEXT:    vsext.vf4 v24, v8
2509; RV64-NEXT:    vsll.vi v24, v24, 3
2510; RV64-NEXT:    bltu a1, a2, .LBB112_2
2511; RV64-NEXT:  # %bb.1:
2512; RV64-NEXT:    mv a1, a2
2513; RV64-NEXT:  .LBB112_2:
2514; RV64-NEXT:    vmv1r.v v0, v12
2515; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2516; RV64-NEXT:    vluxei64.v v8, (a0), v24, v0.t
2517; RV64-NEXT:    ret
2518  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i16> %idxs
2519  %v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
2520  ret <vscale x 16 x double> %v
2521}
2522
2523define <vscale x 16 x double> @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2524; RV32-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64:
2525; RV32:       # %bb.0:
2526; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
2527; RV32-NEXT:    vmv1r.v v12, v0
2528; RV32-NEXT:    vsext.vf2 v16, v8
2529; RV32-NEXT:    csrr a2, vlenb
2530; RV32-NEXT:    vsll.vi v24, v16, 3
2531; RV32-NEXT:    sub a3, a1, a2
2532; RV32-NEXT:    srli a4, a2, 3
2533; RV32-NEXT:    vsetvli a5, zero, e8, mf4, ta, ma
2534; RV32-NEXT:    vslidedown.vx v0, v0, a4
2535; RV32-NEXT:    sltu a4, a1, a3
2536; RV32-NEXT:    addi a4, a4, -1
2537; RV32-NEXT:    and a3, a4, a3
2538; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
2539; RV32-NEXT:    vluxei32.v v16, (a0), v28, v0.t
2540; RV32-NEXT:    bltu a1, a2, .LBB113_2
2541; RV32-NEXT:  # %bb.1:
2542; RV32-NEXT:    mv a1, a2
2543; RV32-NEXT:  .LBB113_2:
2544; RV32-NEXT:    vmv1r.v v0, v12
2545; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2546; RV32-NEXT:    vluxei32.v v8, (a0), v24, v0.t
2547; RV32-NEXT:    ret
2548;
2549; RV64-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64:
2550; RV64:       # %bb.0:
2551; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
2552; RV64-NEXT:    vmv1r.v v12, v0
2553; RV64-NEXT:    vsext.vf4 v16, v10
2554; RV64-NEXT:    csrr a2, vlenb
2555; RV64-NEXT:    vsll.vi v16, v16, 3
2556; RV64-NEXT:    sub a3, a1, a2
2557; RV64-NEXT:    srli a4, a2, 3
2558; RV64-NEXT:    vsetvli a5, zero, e8, mf4, ta, ma
2559; RV64-NEXT:    vslidedown.vx v0, v0, a4
2560; RV64-NEXT:    sltu a4, a1, a3
2561; RV64-NEXT:    addi a4, a4, -1
2562; RV64-NEXT:    and a3, a4, a3
2563; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
2564; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
2565; RV64-NEXT:    vsetvli a3, zero, e64, m8, ta, ma
2566; RV64-NEXT:    vsext.vf4 v24, v8
2567; RV64-NEXT:    vsll.vi v24, v24, 3
2568; RV64-NEXT:    bltu a1, a2, .LBB113_2
2569; RV64-NEXT:  # %bb.1:
2570; RV64-NEXT:    mv a1, a2
2571; RV64-NEXT:  .LBB113_2:
2572; RV64-NEXT:    vmv1r.v v0, v12
2573; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2574; RV64-NEXT:    vluxei64.v v8, (a0), v24, v0.t
2575; RV64-NEXT:    ret
2576  %eidxs = sext <vscale x 16 x i16> %idxs to <vscale x 16 x i64>
2577  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i64> %eidxs
2578  %v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
2579  ret <vscale x 16 x double> %v
2580}
2581
2582define <vscale x 16 x double> @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
2583; RV32-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64:
2584; RV32:       # %bb.0:
2585; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
2586; RV32-NEXT:    vmv1r.v v12, v0
2587; RV32-NEXT:    vzext.vf2 v16, v8
2588; RV32-NEXT:    csrr a2, vlenb
2589; RV32-NEXT:    vsll.vi v24, v16, 3
2590; RV32-NEXT:    sub a3, a1, a2
2591; RV32-NEXT:    srli a4, a2, 3
2592; RV32-NEXT:    vsetvli a5, zero, e8, mf4, ta, ma
2593; RV32-NEXT:    vslidedown.vx v0, v0, a4
2594; RV32-NEXT:    sltu a4, a1, a3
2595; RV32-NEXT:    addi a4, a4, -1
2596; RV32-NEXT:    and a3, a4, a3
2597; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
2598; RV32-NEXT:    vluxei32.v v16, (a0), v28, v0.t
2599; RV32-NEXT:    bltu a1, a2, .LBB114_2
2600; RV32-NEXT:  # %bb.1:
2601; RV32-NEXT:    mv a1, a2
2602; RV32-NEXT:  .LBB114_2:
2603; RV32-NEXT:    vmv1r.v v0, v12
2604; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2605; RV32-NEXT:    vluxei32.v v8, (a0), v24, v0.t
2606; RV32-NEXT:    ret
2607;
2608; RV64-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64:
2609; RV64:       # %bb.0:
2610; RV64-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
2611; RV64-NEXT:    vmv1r.v v12, v0
2612; RV64-NEXT:    vzext.vf2 v16, v8
2613; RV64-NEXT:    csrr a2, vlenb
2614; RV64-NEXT:    vsll.vi v24, v16, 3
2615; RV64-NEXT:    sub a3, a1, a2
2616; RV64-NEXT:    srli a4, a2, 3
2617; RV64-NEXT:    vsetvli a5, zero, e8, mf4, ta, ma
2618; RV64-NEXT:    vslidedown.vx v0, v0, a4
2619; RV64-NEXT:    sltu a4, a1, a3
2620; RV64-NEXT:    addi a4, a4, -1
2621; RV64-NEXT:    and a3, a4, a3
2622; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
2623; RV64-NEXT:    vluxei32.v v16, (a0), v28, v0.t
2624; RV64-NEXT:    bltu a1, a2, .LBB114_2
2625; RV64-NEXT:  # %bb.1:
2626; RV64-NEXT:    mv a1, a2
2627; RV64-NEXT:  .LBB114_2:
2628; RV64-NEXT:    vmv1r.v v0, v12
2629; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2630; RV64-NEXT:    vluxei32.v v8, (a0), v24, v0.t
2631; RV64-NEXT:    ret
2632  %eidxs = zext <vscale x 16 x i16> %idxs to <vscale x 16 x i64>
2633  %ptrs = getelementptr inbounds double, ptr %base, <vscale x 16 x i64> %eidxs
2634  %v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0(<vscale x 16 x ptr> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
2635  ret <vscale x 16 x double> %v
2636}
2637