xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll (revision b6c0f1bfa79a3a32d841ac5ab1f94c3aee3b5d90)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v \
3; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
4; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v \
5; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
6; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v \
7; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
8; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v \
9; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
10
11declare <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr>, <2 x i1>, i32)
12
13define <2 x i8> @vpgather_v2i8(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
14; RV32-LABEL: vpgather_v2i8:
15; RV32:       # %bb.0:
16; RV32-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
17; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
18; RV32-NEXT:    vmv1r.v v8, v9
19; RV32-NEXT:    ret
20;
21; RV64-LABEL: vpgather_v2i8:
22; RV64:       # %bb.0:
23; RV64-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
24; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
25; RV64-NEXT:    vmv1r.v v8, v9
26; RV64-NEXT:    ret
27  %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
28  ret <2 x i8> %v
29}
30
31define <2 x i16> @vpgather_v2i8_sextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
32; RV32-LABEL: vpgather_v2i8_sextload_v2i16:
33; RV32:       # %bb.0:
34; RV32-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
35; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
36; RV32-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
37; RV32-NEXT:    vsext.vf2 v8, v9
38; RV32-NEXT:    ret
39;
40; RV64-LABEL: vpgather_v2i8_sextload_v2i16:
41; RV64:       # %bb.0:
42; RV64-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
43; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
44; RV64-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
45; RV64-NEXT:    vsext.vf2 v8, v9
46; RV64-NEXT:    ret
47  %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
48  %ev = sext <2 x i8> %v to <2 x i16>
49  ret <2 x i16> %ev
50}
51
52define <2 x i16> @vpgather_v2i8_zextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
53; RV32-LABEL: vpgather_v2i8_zextload_v2i16:
54; RV32:       # %bb.0:
55; RV32-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
56; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
57; RV32-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
58; RV32-NEXT:    vzext.vf2 v8, v9
59; RV32-NEXT:    ret
60;
61; RV64-LABEL: vpgather_v2i8_zextload_v2i16:
62; RV64:       # %bb.0:
63; RV64-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
64; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
65; RV64-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
66; RV64-NEXT:    vzext.vf2 v8, v9
67; RV64-NEXT:    ret
68  %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
69  %ev = zext <2 x i8> %v to <2 x i16>
70  ret <2 x i16> %ev
71}
72
73define <2 x i32> @vpgather_v2i8_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
74; RV32-LABEL: vpgather_v2i8_sextload_v2i32:
75; RV32:       # %bb.0:
76; RV32-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
77; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
78; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
79; RV32-NEXT:    vsext.vf4 v8, v9
80; RV32-NEXT:    ret
81;
82; RV64-LABEL: vpgather_v2i8_sextload_v2i32:
83; RV64:       # %bb.0:
84; RV64-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
85; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
86; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
87; RV64-NEXT:    vsext.vf4 v8, v9
88; RV64-NEXT:    ret
89  %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
90  %ev = sext <2 x i8> %v to <2 x i32>
91  ret <2 x i32> %ev
92}
93
94define <2 x i32> @vpgather_v2i8_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
95; RV32-LABEL: vpgather_v2i8_zextload_v2i32:
96; RV32:       # %bb.0:
97; RV32-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
98; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
99; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
100; RV32-NEXT:    vzext.vf4 v8, v9
101; RV32-NEXT:    ret
102;
103; RV64-LABEL: vpgather_v2i8_zextload_v2i32:
104; RV64:       # %bb.0:
105; RV64-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
106; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
107; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
108; RV64-NEXT:    vzext.vf4 v8, v9
109; RV64-NEXT:    ret
110  %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
111  %ev = zext <2 x i8> %v to <2 x i32>
112  ret <2 x i32> %ev
113}
114
115define <2 x i64> @vpgather_v2i8_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
116; RV32-LABEL: vpgather_v2i8_sextload_v2i64:
117; RV32:       # %bb.0:
118; RV32-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
119; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
120; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
121; RV32-NEXT:    vsext.vf8 v8, v9
122; RV32-NEXT:    ret
123;
124; RV64-LABEL: vpgather_v2i8_sextload_v2i64:
125; RV64:       # %bb.0:
126; RV64-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
127; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
128; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
129; RV64-NEXT:    vsext.vf8 v8, v9
130; RV64-NEXT:    ret
131  %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
132  %ev = sext <2 x i8> %v to <2 x i64>
133  ret <2 x i64> %ev
134}
135
136define <2 x i64> @vpgather_v2i8_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
137; RV32-LABEL: vpgather_v2i8_zextload_v2i64:
138; RV32:       # %bb.0:
139; RV32-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
140; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
141; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
142; RV32-NEXT:    vzext.vf8 v8, v9
143; RV32-NEXT:    ret
144;
145; RV64-LABEL: vpgather_v2i8_zextload_v2i64:
146; RV64:       # %bb.0:
147; RV64-NEXT:    vsetvli zero, a0, e8, mf8, ta, ma
148; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
149; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
150; RV64-NEXT:    vzext.vf8 v8, v9
151; RV64-NEXT:    ret
152  %v = call <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
153  %ev = zext <2 x i8> %v to <2 x i64>
154  ret <2 x i64> %ev
155}
156
157declare <3 x i8> @llvm.vp.gather.v3i8.v3p0(<3 x ptr>, <3 x i1>, i32)
158
159define <3 x i8> @vpgather_v3i8(<3 x ptr> %ptrs, <3 x i1> %m, i32 zeroext %evl) {
160; RV32-LABEL: vpgather_v3i8:
161; RV32:       # %bb.0:
162; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
163; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
164; RV32-NEXT:    vmv1r.v v8, v9
165; RV32-NEXT:    ret
166;
167; RV64-LABEL: vpgather_v3i8:
168; RV64:       # %bb.0:
169; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
170; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
171; RV64-NEXT:    vmv1r.v v8, v10
172; RV64-NEXT:    ret
173  %v = call <3 x i8> @llvm.vp.gather.v3i8.v3p0(<3 x ptr> %ptrs, <3 x i1> %m, i32 %evl)
174  ret <3 x i8> %v
175}
176
177define <3 x i8> @vpgather_truemask_v3i8(<3 x ptr> %ptrs, i32 zeroext %evl) {
178; RV32-LABEL: vpgather_truemask_v3i8:
179; RV32:       # %bb.0:
180; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
181; RV32-NEXT:    vluxei32.v v9, (zero), v8
182; RV32-NEXT:    vmv1r.v v8, v9
183; RV32-NEXT:    ret
184;
185; RV64-LABEL: vpgather_truemask_v3i8:
186; RV64:       # %bb.0:
187; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
188; RV64-NEXT:    vluxei64.v v10, (zero), v8
189; RV64-NEXT:    vmv1r.v v8, v10
190; RV64-NEXT:    ret
191  %v = call <3 x i8> @llvm.vp.gather.v3i8.v3p0(<3 x ptr> %ptrs, <3 x i1> splat (i1 1), i32 %evl)
192  ret <3 x i8> %v
193}
194
195declare <4 x i8> @llvm.vp.gather.v4i8.v4p0(<4 x ptr>, <4 x i1>, i32)
196
197define <4 x i8> @vpgather_v4i8(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
198; RV32-LABEL: vpgather_v4i8:
199; RV32:       # %bb.0:
200; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
201; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
202; RV32-NEXT:    vmv1r.v v8, v9
203; RV32-NEXT:    ret
204;
205; RV64-LABEL: vpgather_v4i8:
206; RV64:       # %bb.0:
207; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
208; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
209; RV64-NEXT:    vmv1r.v v8, v10
210; RV64-NEXT:    ret
211  %v = call <4 x i8> @llvm.vp.gather.v4i8.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
212  ret <4 x i8> %v
213}
214
215define <4 x i8> @vpgather_truemask_v4i8(<4 x ptr> %ptrs, i32 zeroext %evl) {
216; RV32-LABEL: vpgather_truemask_v4i8:
217; RV32:       # %bb.0:
218; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
219; RV32-NEXT:    vluxei32.v v9, (zero), v8
220; RV32-NEXT:    vmv1r.v v8, v9
221; RV32-NEXT:    ret
222;
223; RV64-LABEL: vpgather_truemask_v4i8:
224; RV64:       # %bb.0:
225; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, ma
226; RV64-NEXT:    vluxei64.v v10, (zero), v8
227; RV64-NEXT:    vmv1r.v v8, v10
228; RV64-NEXT:    ret
229  %v = call <4 x i8> @llvm.vp.gather.v4i8.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
230  ret <4 x i8> %v
231}
232
233declare <8 x i8> @llvm.vp.gather.v8i8.v8p0(<8 x ptr>, <8 x i1>, i32)
234
235define <8 x i8> @vpgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
236; RV32-LABEL: vpgather_v8i8:
237; RV32:       # %bb.0:
238; RV32-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
239; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
240; RV32-NEXT:    vmv1r.v v8, v10
241; RV32-NEXT:    ret
242;
243; RV64-LABEL: vpgather_v8i8:
244; RV64:       # %bb.0:
245; RV64-NEXT:    vsetvli zero, a0, e8, mf2, ta, ma
246; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
247; RV64-NEXT:    vmv1r.v v8, v12
248; RV64-NEXT:    ret
249  %v = call <8 x i8> @llvm.vp.gather.v8i8.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
250  ret <8 x i8> %v
251}
252
253define <8 x i8> @vpgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
254; RV32-LABEL: vpgather_baseidx_v8i8:
255; RV32:       # %bb.0:
256; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
257; RV32-NEXT:    vsext.vf4 v10, v8
258; RV32-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
259; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
260; RV32-NEXT:    ret
261;
262; RV64-LABEL: vpgather_baseidx_v8i8:
263; RV64:       # %bb.0:
264; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
265; RV64-NEXT:    vsext.vf8 v12, v8
266; RV64-NEXT:    vsetvli zero, a1, e8, mf2, ta, ma
267; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
268; RV64-NEXT:    ret
269  %ptrs = getelementptr inbounds i8, ptr %base, <8 x i8> %idxs
270  %v = call <8 x i8> @llvm.vp.gather.v8i8.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
271  ret <8 x i8> %v
272}
273
274declare <32 x i8> @llvm.vp.gather.v32i8.v32p0(<32 x ptr>, <32 x i1>, i32)
275
276define <32 x i8> @vpgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
277; RV32-LABEL: vpgather_baseidx_v32i8:
278; RV32:       # %bb.0:
279; RV32-NEXT:    li a2, 32
280; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
281; RV32-NEXT:    vsext.vf4 v16, v8
282; RV32-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
283; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
284; RV32-NEXT:    ret
285;
286; RV64-LABEL: vpgather_baseidx_v32i8:
287; RV64:       # %bb.0:
288; RV64-NEXT:    li a3, 16
289; RV64-NEXT:    mv a2, a1
290; RV64-NEXT:    bltu a1, a3, .LBB13_2
291; RV64-NEXT:  # %bb.1:
292; RV64-NEXT:    li a2, 16
293; RV64-NEXT:  .LBB13_2:
294; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
295; RV64-NEXT:    vsext.vf8 v16, v8
296; RV64-NEXT:    vsetvli zero, a2, e8, m1, ta, ma
297; RV64-NEXT:    vluxei64.v v10, (a0), v16, v0.t
298; RV64-NEXT:    addi a2, a1, -16
299; RV64-NEXT:    vsetivli zero, 16, e8, m2, ta, ma
300; RV64-NEXT:    vslidedown.vi v8, v8, 16
301; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
302; RV64-NEXT:    vslidedown.vi v0, v0, 2
303; RV64-NEXT:    sltu a1, a1, a2
304; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
305; RV64-NEXT:    vsext.vf8 v16, v8
306; RV64-NEXT:    addi a1, a1, -1
307; RV64-NEXT:    and a1, a1, a2
308; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, ma
309; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
310; RV64-NEXT:    li a0, 32
311; RV64-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
312; RV64-NEXT:    vslideup.vi v10, v8, 16
313; RV64-NEXT:    vmv.v.v v8, v10
314; RV64-NEXT:    ret
315  %ptrs = getelementptr inbounds i8, ptr %base, <32 x i8> %idxs
316  %v = call <32 x i8> @llvm.vp.gather.v32i8.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
317  ret <32 x i8> %v
318}
319
320declare <2 x i16> @llvm.vp.gather.v2i16.v2p0(<2 x ptr>, <2 x i1>, i32)
321
322define <2 x i16> @vpgather_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
323; RV32-LABEL: vpgather_v2i16:
324; RV32:       # %bb.0:
325; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
326; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
327; RV32-NEXT:    vmv1r.v v8, v9
328; RV32-NEXT:    ret
329;
330; RV64-LABEL: vpgather_v2i16:
331; RV64:       # %bb.0:
332; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
333; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
334; RV64-NEXT:    vmv1r.v v8, v9
335; RV64-NEXT:    ret
336  %v = call <2 x i16> @llvm.vp.gather.v2i16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
337  ret <2 x i16> %v
338}
339
340define <2 x i32> @vpgather_v2i16_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
341; RV32-LABEL: vpgather_v2i16_sextload_v2i32:
342; RV32:       # %bb.0:
343; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
344; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
345; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
346; RV32-NEXT:    vsext.vf2 v8, v9
347; RV32-NEXT:    ret
348;
349; RV64-LABEL: vpgather_v2i16_sextload_v2i32:
350; RV64:       # %bb.0:
351; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
352; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
353; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
354; RV64-NEXT:    vsext.vf2 v8, v9
355; RV64-NEXT:    ret
356  %v = call <2 x i16> @llvm.vp.gather.v2i16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
357  %ev = sext <2 x i16> %v to <2 x i32>
358  ret <2 x i32> %ev
359}
360
361define <2 x i32> @vpgather_v2i16_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
362; RV32-LABEL: vpgather_v2i16_zextload_v2i32:
363; RV32:       # %bb.0:
364; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
365; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
366; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
367; RV32-NEXT:    vzext.vf2 v8, v9
368; RV32-NEXT:    ret
369;
370; RV64-LABEL: vpgather_v2i16_zextload_v2i32:
371; RV64:       # %bb.0:
372; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
373; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
374; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
375; RV64-NEXT:    vzext.vf2 v8, v9
376; RV64-NEXT:    ret
377  %v = call <2 x i16> @llvm.vp.gather.v2i16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
378  %ev = zext <2 x i16> %v to <2 x i32>
379  ret <2 x i32> %ev
380}
381
382define <2 x i64> @vpgather_v2i16_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
383; RV32-LABEL: vpgather_v2i16_sextload_v2i64:
384; RV32:       # %bb.0:
385; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
386; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
387; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
388; RV32-NEXT:    vsext.vf4 v8, v9
389; RV32-NEXT:    ret
390;
391; RV64-LABEL: vpgather_v2i16_sextload_v2i64:
392; RV64:       # %bb.0:
393; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
394; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
395; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
396; RV64-NEXT:    vsext.vf4 v8, v9
397; RV64-NEXT:    ret
398  %v = call <2 x i16> @llvm.vp.gather.v2i16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
399  %ev = sext <2 x i16> %v to <2 x i64>
400  ret <2 x i64> %ev
401}
402
403define <2 x i64> @vpgather_v2i16_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
404; RV32-LABEL: vpgather_v2i16_zextload_v2i64:
405; RV32:       # %bb.0:
406; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
407; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
408; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
409; RV32-NEXT:    vzext.vf4 v8, v9
410; RV32-NEXT:    ret
411;
412; RV64-LABEL: vpgather_v2i16_zextload_v2i64:
413; RV64:       # %bb.0:
414; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
415; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
416; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
417; RV64-NEXT:    vzext.vf4 v8, v9
418; RV64-NEXT:    ret
419  %v = call <2 x i16> @llvm.vp.gather.v2i16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
420  %ev = zext <2 x i16> %v to <2 x i64>
421  ret <2 x i64> %ev
422}
423
424declare <4 x i16> @llvm.vp.gather.v4i16.v4p0(<4 x ptr>, <4 x i1>, i32)
425
426define <4 x i16> @vpgather_v4i16(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
427; RV32-LABEL: vpgather_v4i16:
428; RV32:       # %bb.0:
429; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
430; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
431; RV32-NEXT:    vmv1r.v v8, v9
432; RV32-NEXT:    ret
433;
434; RV64-LABEL: vpgather_v4i16:
435; RV64:       # %bb.0:
436; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
437; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
438; RV64-NEXT:    vmv1r.v v8, v10
439; RV64-NEXT:    ret
440  %v = call <4 x i16> @llvm.vp.gather.v4i16.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
441  ret <4 x i16> %v
442}
443
444define <4 x i16> @vpgather_truemask_v4i16(<4 x ptr> %ptrs, i32 zeroext %evl) {
445; RV32-LABEL: vpgather_truemask_v4i16:
446; RV32:       # %bb.0:
447; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
448; RV32-NEXT:    vluxei32.v v9, (zero), v8
449; RV32-NEXT:    vmv1r.v v8, v9
450; RV32-NEXT:    ret
451;
452; RV64-LABEL: vpgather_truemask_v4i16:
453; RV64:       # %bb.0:
454; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
455; RV64-NEXT:    vluxei64.v v10, (zero), v8
456; RV64-NEXT:    vmv1r.v v8, v10
457; RV64-NEXT:    ret
458  %v = call <4 x i16> @llvm.vp.gather.v4i16.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
459  ret <4 x i16> %v
460}
461
462declare <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr>, <8 x i1>, i32)
463
464define <8 x i16> @vpgather_v8i16(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
465; RV32-LABEL: vpgather_v8i16:
466; RV32:       # %bb.0:
467; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
468; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
469; RV32-NEXT:    vmv.v.v v8, v10
470; RV32-NEXT:    ret
471;
472; RV64-LABEL: vpgather_v8i16:
473; RV64:       # %bb.0:
474; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
475; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
476; RV64-NEXT:    vmv.v.v v8, v12
477; RV64-NEXT:    ret
478  %v = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
479  ret <8 x i16> %v
480}
481
482define <8 x i16> @vpgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
483; RV32-LABEL: vpgather_baseidx_v8i8_v8i16:
484; RV32:       # %bb.0:
485; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
486; RV32-NEXT:    vsext.vf4 v10, v8
487; RV32-NEXT:    vadd.vv v10, v10, v10
488; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
489; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
490; RV32-NEXT:    ret
491;
492; RV64-LABEL: vpgather_baseidx_v8i8_v8i16:
493; RV64:       # %bb.0:
494; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
495; RV64-NEXT:    vsext.vf8 v12, v8
496; RV64-NEXT:    vadd.vv v12, v12, v12
497; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
498; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
499; RV64-NEXT:    ret
500  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i8> %idxs
501  %v = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
502  ret <8 x i16> %v
503}
504
505define <8 x i16> @vpgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
506; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8i16:
507; RV32:       # %bb.0:
508; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
509; RV32-NEXT:    vsext.vf4 v10, v8
510; RV32-NEXT:    vadd.vv v10, v10, v10
511; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
512; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
513; RV32-NEXT:    ret
514;
515; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8i16:
516; RV64:       # %bb.0:
517; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
518; RV64-NEXT:    vsext.vf8 v12, v8
519; RV64-NEXT:    vadd.vv v12, v12, v12
520; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
521; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
522; RV64-NEXT:    ret
523  %eidxs = sext <8 x i8> %idxs to <8 x i16>
524  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
525  %v = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
526  ret <8 x i16> %v
527}
528
529define <8 x i16> @vpgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
530; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8i16:
531; RV32:       # %bb.0:
532; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
533; RV32-NEXT:    vwaddu.vv v9, v8, v8
534; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
535; RV32-NEXT:    vluxei16.v v8, (a0), v9, v0.t
536; RV32-NEXT:    ret
537;
538; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8i16:
539; RV64:       # %bb.0:
540; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
541; RV64-NEXT:    vwaddu.vv v9, v8, v8
542; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
543; RV64-NEXT:    vluxei16.v v8, (a0), v9, v0.t
544; RV64-NEXT:    ret
545  %eidxs = zext <8 x i8> %idxs to <8 x i16>
546  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %eidxs
547  %v = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
548  ret <8 x i16> %v
549}
550
551define <8 x i16> @vpgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
552; RV32-LABEL: vpgather_baseidx_v8i16:
553; RV32:       # %bb.0:
554; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
555; RV32-NEXT:    vwadd.vv v10, v8, v8
556; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
557; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
558; RV32-NEXT:    ret
559;
560; RV64-LABEL: vpgather_baseidx_v8i16:
561; RV64:       # %bb.0:
562; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
563; RV64-NEXT:    vsext.vf4 v12, v8
564; RV64-NEXT:    vadd.vv v12, v12, v12
565; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
566; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
567; RV64-NEXT:    ret
568  %ptrs = getelementptr inbounds i16, ptr %base, <8 x i16> %idxs
569  %v = call <8 x i16> @llvm.vp.gather.v8i16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
570  ret <8 x i16> %v
571}
572
573declare <2 x i32> @llvm.vp.gather.v2i32.v2p0(<2 x ptr>, <2 x i1>, i32)
574
575define <2 x i32> @vpgather_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
576; RV32-LABEL: vpgather_v2i32:
577; RV32:       # %bb.0:
578; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
579; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
580; RV32-NEXT:    ret
581;
582; RV64-LABEL: vpgather_v2i32:
583; RV64:       # %bb.0:
584; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
585; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
586; RV64-NEXT:    vmv1r.v v8, v9
587; RV64-NEXT:    ret
588  %v = call <2 x i32> @llvm.vp.gather.v2i32.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
589  ret <2 x i32> %v
590}
591
592define <2 x i64> @vpgather_v2i32_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
593; RV32-LABEL: vpgather_v2i32_sextload_v2i64:
594; RV32:       # %bb.0:
595; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
596; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
597; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
598; RV32-NEXT:    vsext.vf2 v8, v9
599; RV32-NEXT:    ret
600;
601; RV64-LABEL: vpgather_v2i32_sextload_v2i64:
602; RV64:       # %bb.0:
603; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
604; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
605; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
606; RV64-NEXT:    vsext.vf2 v8, v9
607; RV64-NEXT:    ret
608  %v = call <2 x i32> @llvm.vp.gather.v2i32.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
609  %ev = sext <2 x i32> %v to <2 x i64>
610  ret <2 x i64> %ev
611}
612
613define <2 x i64> @vpgather_v2i32_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
614; RV32-LABEL: vpgather_v2i32_zextload_v2i64:
615; RV32:       # %bb.0:
616; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
617; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
618; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
619; RV32-NEXT:    vzext.vf2 v8, v9
620; RV32-NEXT:    ret
621;
622; RV64-LABEL: vpgather_v2i32_zextload_v2i64:
623; RV64:       # %bb.0:
624; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
625; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
626; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
627; RV64-NEXT:    vzext.vf2 v8, v9
628; RV64-NEXT:    ret
629  %v = call <2 x i32> @llvm.vp.gather.v2i32.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
630  %ev = zext <2 x i32> %v to <2 x i64>
631  ret <2 x i64> %ev
632}
633
634declare <4 x i32> @llvm.vp.gather.v4i32.v4p0(<4 x ptr>, <4 x i1>, i32)
635
636define <4 x i32> @vpgather_v4i32(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
637; RV32-LABEL: vpgather_v4i32:
638; RV32:       # %bb.0:
639; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
640; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
641; RV32-NEXT:    ret
642;
643; RV64-LABEL: vpgather_v4i32:
644; RV64:       # %bb.0:
645; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
646; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
647; RV64-NEXT:    vmv.v.v v8, v10
648; RV64-NEXT:    ret
649  %v = call <4 x i32> @llvm.vp.gather.v4i32.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
650  ret <4 x i32> %v
651}
652
653define <4 x i32> @vpgather_truemask_v4i32(<4 x ptr> %ptrs, i32 zeroext %evl) {
654; RV32-LABEL: vpgather_truemask_v4i32:
655; RV32:       # %bb.0:
656; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
657; RV32-NEXT:    vluxei32.v v8, (zero), v8
658; RV32-NEXT:    ret
659;
660; RV64-LABEL: vpgather_truemask_v4i32:
661; RV64:       # %bb.0:
662; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
663; RV64-NEXT:    vluxei64.v v10, (zero), v8
664; RV64-NEXT:    vmv.v.v v8, v10
665; RV64-NEXT:    ret
666  %v = call <4 x i32> @llvm.vp.gather.v4i32.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
667  ret <4 x i32> %v
668}
669
670declare <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr>, <8 x i1>, i32)
671
672define <8 x i32> @vpgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
673; RV32-LABEL: vpgather_v8i32:
674; RV32:       # %bb.0:
675; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
676; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
677; RV32-NEXT:    ret
678;
679; RV64-LABEL: vpgather_v8i32:
680; RV64:       # %bb.0:
681; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
682; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
683; RV64-NEXT:    vmv.v.v v8, v12
684; RV64-NEXT:    ret
685  %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
686  ret <8 x i32> %v
687}
688
689define <8 x i32> @vpgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
690; RV32-LABEL: vpgather_baseidx_v8i8_v8i32:
691; RV32:       # %bb.0:
692; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
693; RV32-NEXT:    vsext.vf4 v10, v8
694; RV32-NEXT:    vsll.vi v8, v10, 2
695; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
696; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
697; RV32-NEXT:    ret
698;
699; RV64-LABEL: vpgather_baseidx_v8i8_v8i32:
700; RV64:       # %bb.0:
701; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
702; RV64-NEXT:    vsext.vf8 v12, v8
703; RV64-NEXT:    vsll.vi v12, v12, 2
704; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
705; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
706; RV64-NEXT:    ret
707  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i8> %idxs
708  %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
709  ret <8 x i32> %v
710}
711
712define <8 x i32> @vpgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
713; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8i32:
714; RV32:       # %bb.0:
715; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
716; RV32-NEXT:    vsext.vf4 v10, v8
717; RV32-NEXT:    vsll.vi v8, v10, 2
718; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
719; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
720; RV32-NEXT:    ret
721;
722; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8i32:
723; RV64:       # %bb.0:
724; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
725; RV64-NEXT:    vsext.vf8 v12, v8
726; RV64-NEXT:    vsll.vi v12, v12, 2
727; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
728; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
729; RV64-NEXT:    ret
730  %eidxs = sext <8 x i8> %idxs to <8 x i32>
731  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
732  %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
733  ret <8 x i32> %v
734}
735
736define <8 x i32> @vpgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
737; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8i32:
738; RV32:       # %bb.0:
739; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
740; RV32-NEXT:    vzext.vf2 v9, v8
741; RV32-NEXT:    vsll.vi v10, v9, 2
742; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
743; RV32-NEXT:    vluxei16.v v8, (a0), v10, v0.t
744; RV32-NEXT:    ret
745;
746; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8i32:
747; RV64:       # %bb.0:
748; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
749; RV64-NEXT:    vzext.vf2 v9, v8
750; RV64-NEXT:    vsll.vi v10, v9, 2
751; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
752; RV64-NEXT:    vluxei16.v v8, (a0), v10, v0.t
753; RV64-NEXT:    ret
754  %eidxs = zext <8 x i8> %idxs to <8 x i32>
755  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
756  %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
757  ret <8 x i32> %v
758}
759
760define <8 x i32> @vpgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
761; RV32-LABEL: vpgather_baseidx_v8i16_v8i32:
762; RV32:       # %bb.0:
763; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
764; RV32-NEXT:    vsext.vf2 v10, v8
765; RV32-NEXT:    vsll.vi v8, v10, 2
766; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
767; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
768; RV32-NEXT:    ret
769;
770; RV64-LABEL: vpgather_baseidx_v8i16_v8i32:
771; RV64:       # %bb.0:
772; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
773; RV64-NEXT:    vsext.vf4 v12, v8
774; RV64-NEXT:    vsll.vi v12, v12, 2
775; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
776; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
777; RV64-NEXT:    ret
778  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i16> %idxs
779  %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
780  ret <8 x i32> %v
781}
782
783define <8 x i32> @vpgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
784; RV32-LABEL: vpgather_baseidx_sext_v8i16_v8i32:
785; RV32:       # %bb.0:
786; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
787; RV32-NEXT:    vsext.vf2 v10, v8
788; RV32-NEXT:    vsll.vi v8, v10, 2
789; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
790; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
791; RV32-NEXT:    ret
792;
793; RV64-LABEL: vpgather_baseidx_sext_v8i16_v8i32:
794; RV64:       # %bb.0:
795; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
796; RV64-NEXT:    vsext.vf4 v12, v8
797; RV64-NEXT:    vsll.vi v12, v12, 2
798; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
799; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
800; RV64-NEXT:    ret
801  %eidxs = sext <8 x i16> %idxs to <8 x i32>
802  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
803  %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
804  ret <8 x i32> %v
805}
806
807define <8 x i32> @vpgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
808; RV32-LABEL: vpgather_baseidx_zext_v8i16_v8i32:
809; RV32:       # %bb.0:
810; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
811; RV32-NEXT:    vzext.vf2 v10, v8
812; RV32-NEXT:    vsll.vi v8, v10, 2
813; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
814; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
815; RV32-NEXT:    ret
816;
817; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8i32:
818; RV64:       # %bb.0:
819; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
820; RV64-NEXT:    vzext.vf2 v10, v8
821; RV64-NEXT:    vsll.vi v8, v10, 2
822; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
823; RV64-NEXT:    vluxei32.v v8, (a0), v8, v0.t
824; RV64-NEXT:    ret
825  %eidxs = zext <8 x i16> %idxs to <8 x i32>
826  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %eidxs
827  %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
828  ret <8 x i32> %v
829}
830
831define <8 x i32> @vpgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
832; RV32-LABEL: vpgather_baseidx_v8i32:
833; RV32:       # %bb.0:
834; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
835; RV32-NEXT:    vsll.vi v8, v8, 2
836; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
837; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
838; RV32-NEXT:    ret
839;
840; RV64-LABEL: vpgather_baseidx_v8i32:
841; RV64:       # %bb.0:
842; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
843; RV64-NEXT:    vsext.vf2 v12, v8
844; RV64-NEXT:    vsll.vi v12, v12, 2
845; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
846; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
847; RV64-NEXT:    ret
848  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %idxs
849  %v = call <8 x i32> @llvm.vp.gather.v8i32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
850  ret <8 x i32> %v
851}
852
853declare <2 x i64> @llvm.vp.gather.v2i64.v2p0(<2 x ptr>, <2 x i1>, i32)
854
855define <2 x i64> @vpgather_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
856; RV32-LABEL: vpgather_v2i64:
857; RV32:       # %bb.0:
858; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
859; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
860; RV32-NEXT:    vmv.v.v v8, v9
861; RV32-NEXT:    ret
862;
863; RV64-LABEL: vpgather_v2i64:
864; RV64:       # %bb.0:
865; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
866; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
867; RV64-NEXT:    ret
868  %v = call <2 x i64> @llvm.vp.gather.v2i64.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
869  ret <2 x i64> %v
870}
871
872declare <4 x i64> @llvm.vp.gather.v4i64.v4p0(<4 x ptr>, <4 x i1>, i32)
873
874define <4 x i64> @vpgather_v4i64(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
875; RV32-LABEL: vpgather_v4i64:
876; RV32:       # %bb.0:
877; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
878; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
879; RV32-NEXT:    vmv.v.v v8, v10
880; RV32-NEXT:    ret
881;
882; RV64-LABEL: vpgather_v4i64:
883; RV64:       # %bb.0:
884; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
885; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
886; RV64-NEXT:    ret
887  %v = call <4 x i64> @llvm.vp.gather.v4i64.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
888  ret <4 x i64> %v
889}
890
891define <4 x i64> @vpgather_truemask_v4i64(<4 x ptr> %ptrs, i32 zeroext %evl) {
892; RV32-LABEL: vpgather_truemask_v4i64:
893; RV32:       # %bb.0:
894; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
895; RV32-NEXT:    vluxei32.v v10, (zero), v8
896; RV32-NEXT:    vmv.v.v v8, v10
897; RV32-NEXT:    ret
898;
899; RV64-LABEL: vpgather_truemask_v4i64:
900; RV64:       # %bb.0:
901; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
902; RV64-NEXT:    vluxei64.v v8, (zero), v8
903; RV64-NEXT:    ret
904  %v = call <4 x i64> @llvm.vp.gather.v4i64.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
905  ret <4 x i64> %v
906}
907
908declare <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr>, <8 x i1>, i32)
909
910define <8 x i64> @vpgather_v8i64(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
911; RV32-LABEL: vpgather_v8i64:
912; RV32:       # %bb.0:
913; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
914; RV32-NEXT:    vluxei32.v v12, (zero), v8, v0.t
915; RV32-NEXT:    vmv.v.v v8, v12
916; RV32-NEXT:    ret
917;
918; RV64-LABEL: vpgather_v8i64:
919; RV64:       # %bb.0:
920; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
921; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
922; RV64-NEXT:    ret
923  %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
924  ret <8 x i64> %v
925}
926
927define <8 x i64> @vpgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
928; RV32-LABEL: vpgather_baseidx_v8i8_v8i64:
929; RV32:       # %bb.0:
930; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
931; RV32-NEXT:    vsext.vf4 v10, v8
932; RV32-NEXT:    vsll.vi v12, v10, 3
933; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
934; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
935; RV32-NEXT:    ret
936;
937; RV64-LABEL: vpgather_baseidx_v8i8_v8i64:
938; RV64:       # %bb.0:
939; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
940; RV64-NEXT:    vsext.vf8 v12, v8
941; RV64-NEXT:    vsll.vi v8, v12, 3
942; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
943; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
944; RV64-NEXT:    ret
945  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i8> %idxs
946  %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
947  ret <8 x i64> %v
948}
949
950define <8 x i64> @vpgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
951; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8i64:
952; RV32:       # %bb.0:
953; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
954; RV32-NEXT:    vsext.vf4 v10, v8
955; RV32-NEXT:    vsll.vi v12, v10, 3
956; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
957; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
958; RV32-NEXT:    ret
959;
960; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8i64:
961; RV64:       # %bb.0:
962; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
963; RV64-NEXT:    vsext.vf8 v12, v8
964; RV64-NEXT:    vsll.vi v8, v12, 3
965; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
966; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
967; RV64-NEXT:    ret
968  %eidxs = sext <8 x i8> %idxs to <8 x i64>
969  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
970  %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
971  ret <8 x i64> %v
972}
973
974define <8 x i64> @vpgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
975; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8i64:
976; RV32:       # %bb.0:
977; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
978; RV32-NEXT:    vzext.vf2 v9, v8
979; RV32-NEXT:    vsll.vi v12, v9, 3
980; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
981; RV32-NEXT:    vluxei16.v v8, (a0), v12, v0.t
982; RV32-NEXT:    ret
983;
984; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8i64:
985; RV64:       # %bb.0:
986; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
987; RV64-NEXT:    vzext.vf2 v9, v8
988; RV64-NEXT:    vsll.vi v12, v9, 3
989; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
990; RV64-NEXT:    vluxei16.v v8, (a0), v12, v0.t
991; RV64-NEXT:    ret
992  %eidxs = zext <8 x i8> %idxs to <8 x i64>
993  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
994  %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
995  ret <8 x i64> %v
996}
997
998define <8 x i64> @vpgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
999; RV32-LABEL: vpgather_baseidx_v8i16_v8i64:
1000; RV32:       # %bb.0:
1001; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1002; RV32-NEXT:    vsext.vf2 v10, v8
1003; RV32-NEXT:    vsll.vi v12, v10, 3
1004; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1005; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
1006; RV32-NEXT:    ret
1007;
1008; RV64-LABEL: vpgather_baseidx_v8i16_v8i64:
1009; RV64:       # %bb.0:
1010; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1011; RV64-NEXT:    vsext.vf4 v12, v8
1012; RV64-NEXT:    vsll.vi v8, v12, 3
1013; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1014; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1015; RV64-NEXT:    ret
1016  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i16> %idxs
1017  %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1018  ret <8 x i64> %v
1019}
1020
1021define <8 x i64> @vpgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1022; RV32-LABEL: vpgather_baseidx_sext_v8i16_v8i64:
1023; RV32:       # %bb.0:
1024; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1025; RV32-NEXT:    vsext.vf2 v10, v8
1026; RV32-NEXT:    vsll.vi v12, v10, 3
1027; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1028; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
1029; RV32-NEXT:    ret
1030;
1031; RV64-LABEL: vpgather_baseidx_sext_v8i16_v8i64:
1032; RV64:       # %bb.0:
1033; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1034; RV64-NEXT:    vsext.vf4 v12, v8
1035; RV64-NEXT:    vsll.vi v8, v12, 3
1036; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1037; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1038; RV64-NEXT:    ret
1039  %eidxs = sext <8 x i16> %idxs to <8 x i64>
1040  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
1041  %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1042  ret <8 x i64> %v
1043}
1044
1045define <8 x i64> @vpgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1046; RV32-LABEL: vpgather_baseidx_zext_v8i16_v8i64:
1047; RV32:       # %bb.0:
1048; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1049; RV32-NEXT:    vzext.vf2 v10, v8
1050; RV32-NEXT:    vsll.vi v12, v10, 3
1051; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1052; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
1053; RV32-NEXT:    ret
1054;
1055; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8i64:
1056; RV64:       # %bb.0:
1057; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1058; RV64-NEXT:    vzext.vf2 v10, v8
1059; RV64-NEXT:    vsll.vi v12, v10, 3
1060; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1061; RV64-NEXT:    vluxei32.v v8, (a0), v12, v0.t
1062; RV64-NEXT:    ret
1063  %eidxs = zext <8 x i16> %idxs to <8 x i64>
1064  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
1065  %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1066  ret <8 x i64> %v
1067}
1068
1069define <8 x i64> @vpgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1070; RV32-LABEL: vpgather_baseidx_v8i32_v8i64:
1071; RV32:       # %bb.0:
1072; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1073; RV32-NEXT:    vsll.vi v12, v8, 3
1074; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1075; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
1076; RV32-NEXT:    ret
1077;
1078; RV64-LABEL: vpgather_baseidx_v8i32_v8i64:
1079; RV64:       # %bb.0:
1080; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1081; RV64-NEXT:    vsext.vf2 v12, v8
1082; RV64-NEXT:    vsll.vi v8, v12, 3
1083; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1084; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1085; RV64-NEXT:    ret
1086  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i32> %idxs
1087  %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1088  ret <8 x i64> %v
1089}
1090
1091define <8 x i64> @vpgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1092; RV32-LABEL: vpgather_baseidx_sext_v8i32_v8i64:
1093; RV32:       # %bb.0:
1094; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1095; RV32-NEXT:    vsll.vi v12, v8, 3
1096; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1097; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
1098; RV32-NEXT:    ret
1099;
1100; RV64-LABEL: vpgather_baseidx_sext_v8i32_v8i64:
1101; RV64:       # %bb.0:
1102; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1103; RV64-NEXT:    vsext.vf2 v12, v8
1104; RV64-NEXT:    vsll.vi v8, v12, 3
1105; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1106; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1107; RV64-NEXT:    ret
1108  %eidxs = sext <8 x i32> %idxs to <8 x i64>
1109  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
1110  %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1111  ret <8 x i64> %v
1112}
1113
1114define <8 x i64> @vpgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1115; RV32-LABEL: vpgather_baseidx_zext_v8i32_v8i64:
1116; RV32:       # %bb.0:
1117; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1118; RV32-NEXT:    vsll.vi v12, v8, 3
1119; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1120; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
1121; RV32-NEXT:    ret
1122;
1123; RV64-LABEL: vpgather_baseidx_zext_v8i32_v8i64:
1124; RV64:       # %bb.0:
1125; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1126; RV64-NEXT:    vzext.vf2 v12, v8
1127; RV64-NEXT:    vsll.vi v8, v12, 3
1128; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1129; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1130; RV64-NEXT:    ret
1131  %eidxs = zext <8 x i32> %idxs to <8 x i64>
1132  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %eidxs
1133  %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1134  ret <8 x i64> %v
1135}
1136
1137define <8 x i64> @vpgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1138; RV32-LABEL: vpgather_baseidx_v8i64:
1139; RV32:       # %bb.0:
1140; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1141; RV32-NEXT:    vnsrl.wi v12, v8, 0
1142; RV32-NEXT:    vsll.vi v12, v12, 3
1143; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1144; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
1145; RV32-NEXT:    ret
1146;
1147; RV64-LABEL: vpgather_baseidx_v8i64:
1148; RV64:       # %bb.0:
1149; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1150; RV64-NEXT:    vsll.vi v8, v8, 3
1151; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1152; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1153; RV64-NEXT:    ret
1154  %ptrs = getelementptr inbounds i64, ptr %base, <8 x i64> %idxs
1155  %v = call <8 x i64> @llvm.vp.gather.v8i64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1156  ret <8 x i64> %v
1157}
1158
1159declare <2 x bfloat> @llvm.vp.gather.v2bf16.v2p0(<2 x ptr>, <2 x i1>, i32)
1160
1161define <2 x bfloat> @vpgather_v2bf16(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
1162; RV32-LABEL: vpgather_v2bf16:
1163; RV32:       # %bb.0:
1164; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
1165; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
1166; RV32-NEXT:    vmv1r.v v8, v9
1167; RV32-NEXT:    ret
1168;
1169; RV64-LABEL: vpgather_v2bf16:
1170; RV64:       # %bb.0:
1171; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
1172; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
1173; RV64-NEXT:    vmv1r.v v8, v9
1174; RV64-NEXT:    ret
1175  %v = call <2 x bfloat> @llvm.vp.gather.v2bf16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
1176  ret <2 x bfloat> %v
1177}
1178
1179declare <4 x bfloat> @llvm.vp.gather.v4bf16.v4p0(<4 x ptr>, <4 x i1>, i32)
1180
1181define <4 x bfloat> @vpgather_v4bf16(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
1182; RV32-LABEL: vpgather_v4bf16:
1183; RV32:       # %bb.0:
1184; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1185; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
1186; RV32-NEXT:    vmv1r.v v8, v9
1187; RV32-NEXT:    ret
1188;
1189; RV64-LABEL: vpgather_v4bf16:
1190; RV64:       # %bb.0:
1191; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1192; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
1193; RV64-NEXT:    vmv1r.v v8, v10
1194; RV64-NEXT:    ret
1195  %v = call <4 x bfloat> @llvm.vp.gather.v4bf16.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
1196  ret <4 x bfloat> %v
1197}
1198
1199define <4 x bfloat> @vpgather_truemask_v4bf16(<4 x ptr> %ptrs, i32 zeroext %evl) {
1200; RV32-LABEL: vpgather_truemask_v4bf16:
1201; RV32:       # %bb.0:
1202; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1203; RV32-NEXT:    vluxei32.v v9, (zero), v8
1204; RV32-NEXT:    vmv1r.v v8, v9
1205; RV32-NEXT:    ret
1206;
1207; RV64-LABEL: vpgather_truemask_v4bf16:
1208; RV64:       # %bb.0:
1209; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1210; RV64-NEXT:    vluxei64.v v10, (zero), v8
1211; RV64-NEXT:    vmv1r.v v8, v10
1212; RV64-NEXT:    ret
1213  %v = call <4 x bfloat> @llvm.vp.gather.v4bf16.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
1214  ret <4 x bfloat> %v
1215}
1216
1217declare <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr>, <8 x i1>, i32)
1218
1219define <8 x bfloat> @vpgather_v8bf16(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
1220; RV32-LABEL: vpgather_v8bf16:
1221; RV32:       # %bb.0:
1222; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1223; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
1224; RV32-NEXT:    vmv.v.v v8, v10
1225; RV32-NEXT:    ret
1226;
1227; RV64-LABEL: vpgather_v8bf16:
1228; RV64:       # %bb.0:
1229; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1230; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
1231; RV64-NEXT:    vmv.v.v v8, v12
1232; RV64-NEXT:    ret
1233  %v = call <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1234  ret <8 x bfloat> %v
1235}
1236
1237define <8 x bfloat> @vpgather_baseidx_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1238; RV32-LABEL: vpgather_baseidx_v8i8_v8bf16:
1239; RV32:       # %bb.0:
1240; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1241; RV32-NEXT:    vsext.vf4 v10, v8
1242; RV32-NEXT:    vadd.vv v10, v10, v10
1243; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
1244; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
1245; RV32-NEXT:    ret
1246;
1247; RV64-LABEL: vpgather_baseidx_v8i8_v8bf16:
1248; RV64:       # %bb.0:
1249; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1250; RV64-NEXT:    vsext.vf8 v12, v8
1251; RV64-NEXT:    vadd.vv v12, v12, v12
1252; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
1253; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
1254; RV64-NEXT:    ret
1255  %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i8> %idxs
1256  %v = call <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1257  ret <8 x bfloat> %v
1258}
1259
1260define <8 x bfloat> @vpgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1261; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8bf16:
1262; RV32:       # %bb.0:
1263; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1264; RV32-NEXT:    vsext.vf4 v10, v8
1265; RV32-NEXT:    vadd.vv v10, v10, v10
1266; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
1267; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
1268; RV32-NEXT:    ret
1269;
1270; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8bf16:
1271; RV64:       # %bb.0:
1272; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1273; RV64-NEXT:    vsext.vf8 v12, v8
1274; RV64-NEXT:    vadd.vv v12, v12, v12
1275; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
1276; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
1277; RV64-NEXT:    ret
1278  %eidxs = sext <8 x i8> %idxs to <8 x i16>
1279  %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs
1280  %v = call <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1281  ret <8 x bfloat> %v
1282}
1283
1284define <8 x bfloat> @vpgather_baseidx_zext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1285; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8bf16:
1286; RV32:       # %bb.0:
1287; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
1288; RV32-NEXT:    vwaddu.vv v9, v8, v8
1289; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
1290; RV32-NEXT:    vluxei16.v v8, (a0), v9, v0.t
1291; RV32-NEXT:    ret
1292;
1293; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8bf16:
1294; RV64:       # %bb.0:
1295; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
1296; RV64-NEXT:    vwaddu.vv v9, v8, v8
1297; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
1298; RV64-NEXT:    vluxei16.v v8, (a0), v9, v0.t
1299; RV64-NEXT:    ret
1300  %eidxs = zext <8 x i8> %idxs to <8 x i16>
1301  %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %eidxs
1302  %v = call <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1303  ret <8 x bfloat> %v
1304}
1305
1306define <8 x bfloat> @vpgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1307; RV32-LABEL: vpgather_baseidx_v8bf16:
1308; RV32:       # %bb.0:
1309; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1310; RV32-NEXT:    vwadd.vv v10, v8, v8
1311; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
1312; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
1313; RV32-NEXT:    ret
1314;
1315; RV64-LABEL: vpgather_baseidx_v8bf16:
1316; RV64:       # %bb.0:
1317; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1318; RV64-NEXT:    vsext.vf4 v12, v8
1319; RV64-NEXT:    vadd.vv v12, v12, v12
1320; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
1321; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
1322; RV64-NEXT:    ret
1323  %ptrs = getelementptr inbounds bfloat, ptr %base, <8 x i16> %idxs
1324  %v = call <8 x bfloat> @llvm.vp.gather.v8bf16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1325  ret <8 x bfloat> %v
1326}
1327
1328declare <2 x half> @llvm.vp.gather.v2f16.v2p0(<2 x ptr>, <2 x i1>, i32)
1329
1330define <2 x half> @vpgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
1331; RV32-LABEL: vpgather_v2f16:
1332; RV32:       # %bb.0:
1333; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
1334; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
1335; RV32-NEXT:    vmv1r.v v8, v9
1336; RV32-NEXT:    ret
1337;
1338; RV64-LABEL: vpgather_v2f16:
1339; RV64:       # %bb.0:
1340; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
1341; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
1342; RV64-NEXT:    vmv1r.v v8, v9
1343; RV64-NEXT:    ret
1344  %v = call <2 x half> @llvm.vp.gather.v2f16.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
1345  ret <2 x half> %v
1346}
1347
1348declare <4 x half> @llvm.vp.gather.v4f16.v4p0(<4 x ptr>, <4 x i1>, i32)
1349
1350define <4 x half> @vpgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
1351; RV32-LABEL: vpgather_v4f16:
1352; RV32:       # %bb.0:
1353; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1354; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
1355; RV32-NEXT:    vmv1r.v v8, v9
1356; RV32-NEXT:    ret
1357;
1358; RV64-LABEL: vpgather_v4f16:
1359; RV64:       # %bb.0:
1360; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1361; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
1362; RV64-NEXT:    vmv1r.v v8, v10
1363; RV64-NEXT:    ret
1364  %v = call <4 x half> @llvm.vp.gather.v4f16.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
1365  ret <4 x half> %v
1366}
1367
1368define <4 x half> @vpgather_truemask_v4f16(<4 x ptr> %ptrs, i32 zeroext %evl) {
1369; RV32-LABEL: vpgather_truemask_v4f16:
1370; RV32:       # %bb.0:
1371; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1372; RV32-NEXT:    vluxei32.v v9, (zero), v8
1373; RV32-NEXT:    vmv1r.v v8, v9
1374; RV32-NEXT:    ret
1375;
1376; RV64-LABEL: vpgather_truemask_v4f16:
1377; RV64:       # %bb.0:
1378; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
1379; RV64-NEXT:    vluxei64.v v10, (zero), v8
1380; RV64-NEXT:    vmv1r.v v8, v10
1381; RV64-NEXT:    ret
1382  %v = call <4 x half> @llvm.vp.gather.v4f16.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
1383  ret <4 x half> %v
1384}
1385
1386declare <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr>, <8 x i1>, i32)
1387
1388define <8 x half> @vpgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
1389; RV32-LABEL: vpgather_v8f16:
1390; RV32:       # %bb.0:
1391; RV32-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1392; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
1393; RV32-NEXT:    vmv.v.v v8, v10
1394; RV32-NEXT:    ret
1395;
1396; RV64-LABEL: vpgather_v8f16:
1397; RV64:       # %bb.0:
1398; RV64-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
1399; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
1400; RV64-NEXT:    vmv.v.v v8, v12
1401; RV64-NEXT:    ret
1402  %v = call <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1403  ret <8 x half> %v
1404}
1405
1406define <8 x half> @vpgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1407; RV32-LABEL: vpgather_baseidx_v8i8_v8f16:
1408; RV32:       # %bb.0:
1409; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1410; RV32-NEXT:    vsext.vf4 v10, v8
1411; RV32-NEXT:    vadd.vv v10, v10, v10
1412; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
1413; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
1414; RV32-NEXT:    ret
1415;
1416; RV64-LABEL: vpgather_baseidx_v8i8_v8f16:
1417; RV64:       # %bb.0:
1418; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1419; RV64-NEXT:    vsext.vf8 v12, v8
1420; RV64-NEXT:    vadd.vv v12, v12, v12
1421; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
1422; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
1423; RV64-NEXT:    ret
1424  %ptrs = getelementptr inbounds half, ptr %base, <8 x i8> %idxs
1425  %v = call <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1426  ret <8 x half> %v
1427}
1428
1429define <8 x half> @vpgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1430; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8f16:
1431; RV32:       # %bb.0:
1432; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1433; RV32-NEXT:    vsext.vf4 v10, v8
1434; RV32-NEXT:    vadd.vv v10, v10, v10
1435; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
1436; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
1437; RV32-NEXT:    ret
1438;
1439; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8f16:
1440; RV64:       # %bb.0:
1441; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1442; RV64-NEXT:    vsext.vf8 v12, v8
1443; RV64-NEXT:    vadd.vv v12, v12, v12
1444; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
1445; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
1446; RV64-NEXT:    ret
1447  %eidxs = sext <8 x i8> %idxs to <8 x i16>
1448  %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
1449  %v = call <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1450  ret <8 x half> %v
1451}
1452
1453define <8 x half> @vpgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1454; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8f16:
1455; RV32:       # %bb.0:
1456; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
1457; RV32-NEXT:    vwaddu.vv v9, v8, v8
1458; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
1459; RV32-NEXT:    vluxei16.v v8, (a0), v9, v0.t
1460; RV32-NEXT:    ret
1461;
1462; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8f16:
1463; RV64:       # %bb.0:
1464; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
1465; RV64-NEXT:    vwaddu.vv v9, v8, v8
1466; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
1467; RV64-NEXT:    vluxei16.v v8, (a0), v9, v0.t
1468; RV64-NEXT:    ret
1469  %eidxs = zext <8 x i8> %idxs to <8 x i16>
1470  %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %eidxs
1471  %v = call <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1472  ret <8 x half> %v
1473}
1474
1475define <8 x half> @vpgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1476; RV32-LABEL: vpgather_baseidx_v8f16:
1477; RV32:       # %bb.0:
1478; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1479; RV32-NEXT:    vwadd.vv v10, v8, v8
1480; RV32-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
1481; RV32-NEXT:    vluxei32.v v8, (a0), v10, v0.t
1482; RV32-NEXT:    ret
1483;
1484; RV64-LABEL: vpgather_baseidx_v8f16:
1485; RV64:       # %bb.0:
1486; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1487; RV64-NEXT:    vsext.vf4 v12, v8
1488; RV64-NEXT:    vadd.vv v12, v12, v12
1489; RV64-NEXT:    vsetvli zero, a1, e16, m1, ta, ma
1490; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
1491; RV64-NEXT:    ret
1492  %ptrs = getelementptr inbounds half, ptr %base, <8 x i16> %idxs
1493  %v = call <8 x half> @llvm.vp.gather.v8f16.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1494  ret <8 x half> %v
1495}
1496
1497declare <2 x float> @llvm.vp.gather.v2f32.v2p0(<2 x ptr>, <2 x i1>, i32)
1498
1499define <2 x float> @vpgather_v2f32(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
1500; RV32-LABEL: vpgather_v2f32:
1501; RV32:       # %bb.0:
1502; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
1503; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
1504; RV32-NEXT:    ret
1505;
1506; RV64-LABEL: vpgather_v2f32:
1507; RV64:       # %bb.0:
1508; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, ma
1509; RV64-NEXT:    vluxei64.v v9, (zero), v8, v0.t
1510; RV64-NEXT:    vmv1r.v v8, v9
1511; RV64-NEXT:    ret
1512  %v = call <2 x float> @llvm.vp.gather.v2f32.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
1513  ret <2 x float> %v
1514}
1515
1516declare <4 x float> @llvm.vp.gather.v4f32.v4p0(<4 x ptr>, <4 x i1>, i32)
1517
1518define <4 x float> @vpgather_v4f32(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
1519; RV32-LABEL: vpgather_v4f32:
1520; RV32:       # %bb.0:
1521; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1522; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
1523; RV32-NEXT:    ret
1524;
1525; RV64-LABEL: vpgather_v4f32:
1526; RV64:       # %bb.0:
1527; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1528; RV64-NEXT:    vluxei64.v v10, (zero), v8, v0.t
1529; RV64-NEXT:    vmv.v.v v8, v10
1530; RV64-NEXT:    ret
1531  %v = call <4 x float> @llvm.vp.gather.v4f32.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
1532  ret <4 x float> %v
1533}
1534
1535define <4 x float> @vpgather_truemask_v4f32(<4 x ptr> %ptrs, i32 zeroext %evl) {
1536; RV32-LABEL: vpgather_truemask_v4f32:
1537; RV32:       # %bb.0:
1538; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1539; RV32-NEXT:    vluxei32.v v8, (zero), v8
1540; RV32-NEXT:    ret
1541;
1542; RV64-LABEL: vpgather_truemask_v4f32:
1543; RV64:       # %bb.0:
1544; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, ma
1545; RV64-NEXT:    vluxei64.v v10, (zero), v8
1546; RV64-NEXT:    vmv.v.v v8, v10
1547; RV64-NEXT:    ret
1548  %v = call <4 x float> @llvm.vp.gather.v4f32.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
1549  ret <4 x float> %v
1550}
1551
1552declare <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr>, <8 x i1>, i32)
1553
1554define <8 x float> @vpgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
1555; RV32-LABEL: vpgather_v8f32:
1556; RV32:       # %bb.0:
1557; RV32-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1558; RV32-NEXT:    vluxei32.v v8, (zero), v8, v0.t
1559; RV32-NEXT:    ret
1560;
1561; RV64-LABEL: vpgather_v8f32:
1562; RV64:       # %bb.0:
1563; RV64-NEXT:    vsetvli zero, a0, e32, m2, ta, ma
1564; RV64-NEXT:    vluxei64.v v12, (zero), v8, v0.t
1565; RV64-NEXT:    vmv.v.v v8, v12
1566; RV64-NEXT:    ret
1567  %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1568  ret <8 x float> %v
1569}
1570
1571define <8 x float> @vpgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1572; RV32-LABEL: vpgather_baseidx_v8i8_v8f32:
1573; RV32:       # %bb.0:
1574; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1575; RV32-NEXT:    vsext.vf4 v10, v8
1576; RV32-NEXT:    vsll.vi v8, v10, 2
1577; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1578; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
1579; RV32-NEXT:    ret
1580;
1581; RV64-LABEL: vpgather_baseidx_v8i8_v8f32:
1582; RV64:       # %bb.0:
1583; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1584; RV64-NEXT:    vsext.vf8 v12, v8
1585; RV64-NEXT:    vsll.vi v12, v12, 2
1586; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1587; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
1588; RV64-NEXT:    ret
1589  %ptrs = getelementptr inbounds float, ptr %base, <8 x i8> %idxs
1590  %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1591  ret <8 x float> %v
1592}
1593
1594define <8 x float> @vpgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1595; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8f32:
1596; RV32:       # %bb.0:
1597; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1598; RV32-NEXT:    vsext.vf4 v10, v8
1599; RV32-NEXT:    vsll.vi v8, v10, 2
1600; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1601; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
1602; RV32-NEXT:    ret
1603;
1604; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8f32:
1605; RV64:       # %bb.0:
1606; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1607; RV64-NEXT:    vsext.vf8 v12, v8
1608; RV64-NEXT:    vsll.vi v12, v12, 2
1609; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1610; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
1611; RV64-NEXT:    ret
1612  %eidxs = sext <8 x i8> %idxs to <8 x i32>
1613  %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
1614  %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1615  ret <8 x float> %v
1616}
1617
1618define <8 x float> @vpgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1619; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8f32:
1620; RV32:       # %bb.0:
1621; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1622; RV32-NEXT:    vzext.vf2 v9, v8
1623; RV32-NEXT:    vsll.vi v10, v9, 2
1624; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1625; RV32-NEXT:    vluxei16.v v8, (a0), v10, v0.t
1626; RV32-NEXT:    ret
1627;
1628; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8f32:
1629; RV64:       # %bb.0:
1630; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1631; RV64-NEXT:    vzext.vf2 v9, v8
1632; RV64-NEXT:    vsll.vi v10, v9, 2
1633; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1634; RV64-NEXT:    vluxei16.v v8, (a0), v10, v0.t
1635; RV64-NEXT:    ret
1636  %eidxs = zext <8 x i8> %idxs to <8 x i32>
1637  %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
1638  %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1639  ret <8 x float> %v
1640}
1641
1642define <8 x float> @vpgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1643; RV32-LABEL: vpgather_baseidx_v8i16_v8f32:
1644; RV32:       # %bb.0:
1645; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1646; RV32-NEXT:    vsext.vf2 v10, v8
1647; RV32-NEXT:    vsll.vi v8, v10, 2
1648; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1649; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
1650; RV32-NEXT:    ret
1651;
1652; RV64-LABEL: vpgather_baseidx_v8i16_v8f32:
1653; RV64:       # %bb.0:
1654; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1655; RV64-NEXT:    vsext.vf4 v12, v8
1656; RV64-NEXT:    vsll.vi v12, v12, 2
1657; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1658; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
1659; RV64-NEXT:    ret
1660  %ptrs = getelementptr inbounds float, ptr %base, <8 x i16> %idxs
1661  %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1662  ret <8 x float> %v
1663}
1664
1665define <8 x float> @vpgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1666; RV32-LABEL: vpgather_baseidx_sext_v8i16_v8f32:
1667; RV32:       # %bb.0:
1668; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1669; RV32-NEXT:    vsext.vf2 v10, v8
1670; RV32-NEXT:    vsll.vi v8, v10, 2
1671; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1672; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
1673; RV32-NEXT:    ret
1674;
1675; RV64-LABEL: vpgather_baseidx_sext_v8i16_v8f32:
1676; RV64:       # %bb.0:
1677; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1678; RV64-NEXT:    vsext.vf4 v12, v8
1679; RV64-NEXT:    vsll.vi v12, v12, 2
1680; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1681; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
1682; RV64-NEXT:    ret
1683  %eidxs = sext <8 x i16> %idxs to <8 x i32>
1684  %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
1685  %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1686  ret <8 x float> %v
1687}
1688
1689define <8 x float> @vpgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1690; RV32-LABEL: vpgather_baseidx_zext_v8i16_v8f32:
1691; RV32:       # %bb.0:
1692; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1693; RV32-NEXT:    vzext.vf2 v10, v8
1694; RV32-NEXT:    vsll.vi v8, v10, 2
1695; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1696; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
1697; RV32-NEXT:    ret
1698;
1699; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8f32:
1700; RV64:       # %bb.0:
1701; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1702; RV64-NEXT:    vzext.vf2 v10, v8
1703; RV64-NEXT:    vsll.vi v8, v10, 2
1704; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1705; RV64-NEXT:    vluxei32.v v8, (a0), v8, v0.t
1706; RV64-NEXT:    ret
1707  %eidxs = zext <8 x i16> %idxs to <8 x i32>
1708  %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %eidxs
1709  %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1710  ret <8 x float> %v
1711}
1712
1713define <8 x float> @vpgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1714; RV32-LABEL: vpgather_baseidx_v8f32:
1715; RV32:       # %bb.0:
1716; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1717; RV32-NEXT:    vsll.vi v8, v8, 2
1718; RV32-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1719; RV32-NEXT:    vluxei32.v v8, (a0), v8, v0.t
1720; RV32-NEXT:    ret
1721;
1722; RV64-LABEL: vpgather_baseidx_v8f32:
1723; RV64:       # %bb.0:
1724; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1725; RV64-NEXT:    vsext.vf2 v12, v8
1726; RV64-NEXT:    vsll.vi v12, v12, 2
1727; RV64-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
1728; RV64-NEXT:    vluxei64.v v8, (a0), v12, v0.t
1729; RV64-NEXT:    ret
1730  %ptrs = getelementptr inbounds float, ptr %base, <8 x i32> %idxs
1731  %v = call <8 x float> @llvm.vp.gather.v8f32.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1732  ret <8 x float> %v
1733}
1734
1735declare <2 x double> @llvm.vp.gather.v2f64.v2p0(<2 x ptr>, <2 x i1>, i32)
1736
1737define <2 x double> @vpgather_v2f64(<2 x ptr> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
1738; RV32-LABEL: vpgather_v2f64:
1739; RV32:       # %bb.0:
1740; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1741; RV32-NEXT:    vluxei32.v v9, (zero), v8, v0.t
1742; RV32-NEXT:    vmv.v.v v8, v9
1743; RV32-NEXT:    ret
1744;
1745; RV64-LABEL: vpgather_v2f64:
1746; RV64:       # %bb.0:
1747; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
1748; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
1749; RV64-NEXT:    ret
1750  %v = call <2 x double> @llvm.vp.gather.v2f64.v2p0(<2 x ptr> %ptrs, <2 x i1> %m, i32 %evl)
1751  ret <2 x double> %v
1752}
1753
1754declare <4 x double> @llvm.vp.gather.v4f64.v4p0(<4 x ptr>, <4 x i1>, i32)
1755
1756define <4 x double> @vpgather_v4f64(<4 x ptr> %ptrs, <4 x i1> %m, i32 zeroext %evl) {
1757; RV32-LABEL: vpgather_v4f64:
1758; RV32:       # %bb.0:
1759; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1760; RV32-NEXT:    vluxei32.v v10, (zero), v8, v0.t
1761; RV32-NEXT:    vmv.v.v v8, v10
1762; RV32-NEXT:    ret
1763;
1764; RV64-LABEL: vpgather_v4f64:
1765; RV64:       # %bb.0:
1766; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1767; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
1768; RV64-NEXT:    ret
1769  %v = call <4 x double> @llvm.vp.gather.v4f64.v4p0(<4 x ptr> %ptrs, <4 x i1> %m, i32 %evl)
1770  ret <4 x double> %v
1771}
1772
1773define <4 x double> @vpgather_truemask_v4f64(<4 x ptr> %ptrs, i32 zeroext %evl) {
1774; RV32-LABEL: vpgather_truemask_v4f64:
1775; RV32:       # %bb.0:
1776; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1777; RV32-NEXT:    vluxei32.v v10, (zero), v8
1778; RV32-NEXT:    vmv.v.v v8, v10
1779; RV32-NEXT:    ret
1780;
1781; RV64-LABEL: vpgather_truemask_v4f64:
1782; RV64:       # %bb.0:
1783; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
1784; RV64-NEXT:    vluxei64.v v8, (zero), v8
1785; RV64-NEXT:    ret
1786  %v = call <4 x double> @llvm.vp.gather.v4f64.v4p0(<4 x ptr> %ptrs, <4 x i1> splat (i1 1), i32 %evl)
1787  ret <4 x double> %v
1788}
1789
1790declare <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr>, <8 x i1>, i32)
1791
1792define <8 x double> @vpgather_v8f64(<8 x ptr> %ptrs, <8 x i1> %m, i32 zeroext %evl) {
1793; RV32-LABEL: vpgather_v8f64:
1794; RV32:       # %bb.0:
1795; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1796; RV32-NEXT:    vluxei32.v v12, (zero), v8, v0.t
1797; RV32-NEXT:    vmv.v.v v8, v12
1798; RV32-NEXT:    ret
1799;
1800; RV64-LABEL: vpgather_v8f64:
1801; RV64:       # %bb.0:
1802; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
1803; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
1804; RV64-NEXT:    ret
1805  %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1806  ret <8 x double> %v
1807}
1808
1809define <8 x double> @vpgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1810; RV32-LABEL: vpgather_baseidx_v8i8_v8f64:
1811; RV32:       # %bb.0:
1812; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1813; RV32-NEXT:    vsext.vf4 v10, v8
1814; RV32-NEXT:    vsll.vi v12, v10, 3
1815; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1816; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
1817; RV32-NEXT:    ret
1818;
1819; RV64-LABEL: vpgather_baseidx_v8i8_v8f64:
1820; RV64:       # %bb.0:
1821; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1822; RV64-NEXT:    vsext.vf8 v12, v8
1823; RV64-NEXT:    vsll.vi v8, v12, 3
1824; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1825; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1826; RV64-NEXT:    ret
1827  %ptrs = getelementptr inbounds double, ptr %base, <8 x i8> %idxs
1828  %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1829  ret <8 x double> %v
1830}
1831
1832define <8 x double> @vpgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1833; RV32-LABEL: vpgather_baseidx_sext_v8i8_v8f64:
1834; RV32:       # %bb.0:
1835; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1836; RV32-NEXT:    vsext.vf4 v10, v8
1837; RV32-NEXT:    vsll.vi v12, v10, 3
1838; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1839; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
1840; RV32-NEXT:    ret
1841;
1842; RV64-LABEL: vpgather_baseidx_sext_v8i8_v8f64:
1843; RV64:       # %bb.0:
1844; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1845; RV64-NEXT:    vsext.vf8 v12, v8
1846; RV64-NEXT:    vsll.vi v8, v12, 3
1847; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1848; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1849; RV64-NEXT:    ret
1850  %eidxs = sext <8 x i8> %idxs to <8 x i64>
1851  %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1852  %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1853  ret <8 x double> %v
1854}
1855
1856define <8 x double> @vpgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1857; RV32-LABEL: vpgather_baseidx_zext_v8i8_v8f64:
1858; RV32:       # %bb.0:
1859; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1860; RV32-NEXT:    vzext.vf2 v9, v8
1861; RV32-NEXT:    vsll.vi v12, v9, 3
1862; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1863; RV32-NEXT:    vluxei16.v v8, (a0), v12, v0.t
1864; RV32-NEXT:    ret
1865;
1866; RV64-LABEL: vpgather_baseidx_zext_v8i8_v8f64:
1867; RV64:       # %bb.0:
1868; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1869; RV64-NEXT:    vzext.vf2 v9, v8
1870; RV64-NEXT:    vsll.vi v12, v9, 3
1871; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1872; RV64-NEXT:    vluxei16.v v8, (a0), v12, v0.t
1873; RV64-NEXT:    ret
1874  %eidxs = zext <8 x i8> %idxs to <8 x i64>
1875  %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1876  %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1877  ret <8 x double> %v
1878}
1879
1880define <8 x double> @vpgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1881; RV32-LABEL: vpgather_baseidx_v8i16_v8f64:
1882; RV32:       # %bb.0:
1883; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1884; RV32-NEXT:    vsext.vf2 v10, v8
1885; RV32-NEXT:    vsll.vi v12, v10, 3
1886; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1887; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
1888; RV32-NEXT:    ret
1889;
1890; RV64-LABEL: vpgather_baseidx_v8i16_v8f64:
1891; RV64:       # %bb.0:
1892; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1893; RV64-NEXT:    vsext.vf4 v12, v8
1894; RV64-NEXT:    vsll.vi v8, v12, 3
1895; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1896; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1897; RV64-NEXT:    ret
1898  %ptrs = getelementptr inbounds double, ptr %base, <8 x i16> %idxs
1899  %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1900  ret <8 x double> %v
1901}
1902
1903define <8 x double> @vpgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1904; RV32-LABEL: vpgather_baseidx_sext_v8i16_v8f64:
1905; RV32:       # %bb.0:
1906; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1907; RV32-NEXT:    vsext.vf2 v10, v8
1908; RV32-NEXT:    vsll.vi v12, v10, 3
1909; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1910; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
1911; RV32-NEXT:    ret
1912;
1913; RV64-LABEL: vpgather_baseidx_sext_v8i16_v8f64:
1914; RV64:       # %bb.0:
1915; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1916; RV64-NEXT:    vsext.vf4 v12, v8
1917; RV64-NEXT:    vsll.vi v8, v12, 3
1918; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1919; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1920; RV64-NEXT:    ret
1921  %eidxs = sext <8 x i16> %idxs to <8 x i64>
1922  %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1923  %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1924  ret <8 x double> %v
1925}
1926
1927define <8 x double> @vpgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1928; RV32-LABEL: vpgather_baseidx_zext_v8i16_v8f64:
1929; RV32:       # %bb.0:
1930; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1931; RV32-NEXT:    vzext.vf2 v10, v8
1932; RV32-NEXT:    vsll.vi v12, v10, 3
1933; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1934; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
1935; RV32-NEXT:    ret
1936;
1937; RV64-LABEL: vpgather_baseidx_zext_v8i16_v8f64:
1938; RV64:       # %bb.0:
1939; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1940; RV64-NEXT:    vzext.vf2 v10, v8
1941; RV64-NEXT:    vsll.vi v12, v10, 3
1942; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1943; RV64-NEXT:    vluxei32.v v8, (a0), v12, v0.t
1944; RV64-NEXT:    ret
1945  %eidxs = zext <8 x i16> %idxs to <8 x i64>
1946  %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1947  %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1948  ret <8 x double> %v
1949}
1950
1951define <8 x double> @vpgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1952; RV32-LABEL: vpgather_baseidx_v8i32_v8f64:
1953; RV32:       # %bb.0:
1954; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1955; RV32-NEXT:    vsll.vi v12, v8, 3
1956; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1957; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
1958; RV32-NEXT:    ret
1959;
1960; RV64-LABEL: vpgather_baseidx_v8i32_v8f64:
1961; RV64:       # %bb.0:
1962; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1963; RV64-NEXT:    vsext.vf2 v12, v8
1964; RV64-NEXT:    vsll.vi v8, v12, 3
1965; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1966; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1967; RV64-NEXT:    ret
1968  %ptrs = getelementptr inbounds double, ptr %base, <8 x i32> %idxs
1969  %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1970  ret <8 x double> %v
1971}
1972
1973define <8 x double> @vpgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1974; RV32-LABEL: vpgather_baseidx_sext_v8i32_v8f64:
1975; RV32:       # %bb.0:
1976; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
1977; RV32-NEXT:    vsll.vi v12, v8, 3
1978; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1979; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
1980; RV32-NEXT:    ret
1981;
1982; RV64-LABEL: vpgather_baseidx_sext_v8i32_v8f64:
1983; RV64:       # %bb.0:
1984; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
1985; RV64-NEXT:    vsext.vf2 v12, v8
1986; RV64-NEXT:    vsll.vi v8, v12, 3
1987; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
1988; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
1989; RV64-NEXT:    ret
1990  %eidxs = sext <8 x i32> %idxs to <8 x i64>
1991  %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
1992  %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
1993  ret <8 x double> %v
1994}
1995
1996define <8 x double> @vpgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 x i1> %m, i32 zeroext %evl) {
1997; RV32-LABEL: vpgather_baseidx_zext_v8i32_v8f64:
1998; RV32:       # %bb.0:
1999; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
2000; RV32-NEXT:    vsll.vi v12, v8, 3
2001; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
2002; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
2003; RV32-NEXT:    ret
2004;
2005; RV64-LABEL: vpgather_baseidx_zext_v8i32_v8f64:
2006; RV64:       # %bb.0:
2007; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
2008; RV64-NEXT:    vzext.vf2 v12, v8
2009; RV64-NEXT:    vsll.vi v8, v12, 3
2010; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
2011; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2012; RV64-NEXT:    ret
2013  %eidxs = zext <8 x i32> %idxs to <8 x i64>
2014  %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %eidxs
2015  %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
2016  ret <8 x double> %v
2017}
2018
2019define <8 x double> @vpgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, i32 zeroext %evl) {
2020; RV32-LABEL: vpgather_baseidx_v8f64:
2021; RV32:       # %bb.0:
2022; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
2023; RV32-NEXT:    vnsrl.wi v12, v8, 0
2024; RV32-NEXT:    vsll.vi v12, v12, 3
2025; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
2026; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
2027; RV32-NEXT:    ret
2028;
2029; RV64-LABEL: vpgather_baseidx_v8f64:
2030; RV64:       # %bb.0:
2031; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
2032; RV64-NEXT:    vsll.vi v8, v8, 3
2033; RV64-NEXT:    vsetvli zero, a1, e64, m4, ta, ma
2034; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2035; RV64-NEXT:    ret
2036  %ptrs = getelementptr inbounds double, ptr %base, <8 x i64> %idxs
2037  %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0(<8 x ptr> %ptrs, <8 x i1> %m, i32 %evl)
2038  ret <8 x double> %v
2039}
2040
2041declare <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr>, <32 x i1>, i32)
2042
2043define <32 x double> @vpgather_v32f64(<32 x ptr> %ptrs, <32 x i1> %m, i32 zeroext %evl) {
2044; RV32-LABEL: vpgather_v32f64:
2045; RV32:       # %bb.0:
2046; RV32-NEXT:    li a2, 16
2047; RV32-NEXT:    mv a1, a0
2048; RV32-NEXT:    bltu a0, a2, .LBB94_2
2049; RV32-NEXT:  # %bb.1:
2050; RV32-NEXT:    li a1, 16
2051; RV32-NEXT:  .LBB94_2:
2052; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2053; RV32-NEXT:    vluxei32.v v24, (zero), v8, v0.t
2054; RV32-NEXT:    addi a1, a0, -16
2055; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2056; RV32-NEXT:    vslidedown.vi v0, v0, 2
2057; RV32-NEXT:    sltu a0, a0, a1
2058; RV32-NEXT:    addi a0, a0, -1
2059; RV32-NEXT:    and a0, a0, a1
2060; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
2061; RV32-NEXT:    vslidedown.vi v8, v8, 16
2062; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2063; RV32-NEXT:    vluxei32.v v16, (zero), v8, v0.t
2064; RV32-NEXT:    vmv8r.v v8, v24
2065; RV32-NEXT:    ret
2066;
2067; RV64-LABEL: vpgather_v32f64:
2068; RV64:       # %bb.0:
2069; RV64-NEXT:    li a2, 16
2070; RV64-NEXT:    mv a1, a0
2071; RV64-NEXT:    bltu a0, a2, .LBB94_2
2072; RV64-NEXT:  # %bb.1:
2073; RV64-NEXT:    li a1, 16
2074; RV64-NEXT:  .LBB94_2:
2075; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2076; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
2077; RV64-NEXT:    addi a1, a0, -16
2078; RV64-NEXT:    sltu a0, a0, a1
2079; RV64-NEXT:    addi a0, a0, -1
2080; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2081; RV64-NEXT:    vslidedown.vi v0, v0, 2
2082; RV64-NEXT:    and a0, a0, a1
2083; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
2084; RV64-NEXT:    vluxei64.v v16, (zero), v16, v0.t
2085; RV64-NEXT:    ret
2086  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2087  ret <32 x double> %v
2088}
2089
2090define <32 x double> @vpgather_baseidx_v32i8_v32f64(ptr %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2091; RV32-LABEL: vpgather_baseidx_v32i8_v32f64:
2092; RV32:       # %bb.0:
2093; RV32-NEXT:    li a2, 32
2094; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
2095; RV32-NEXT:    vsext.vf4 v16, v8
2096; RV32-NEXT:    li a3, 16
2097; RV32-NEXT:    vsll.vi v16, v16, 3
2098; RV32-NEXT:    mv a2, a1
2099; RV32-NEXT:    bltu a1, a3, .LBB95_2
2100; RV32-NEXT:  # %bb.1:
2101; RV32-NEXT:    li a2, 16
2102; RV32-NEXT:  .LBB95_2:
2103; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2104; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2105; RV32-NEXT:    addi a2, a1, -16
2106; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2107; RV32-NEXT:    vslidedown.vi v0, v0, 2
2108; RV32-NEXT:    sltu a1, a1, a2
2109; RV32-NEXT:    addi a1, a1, -1
2110; RV32-NEXT:    and a1, a1, a2
2111; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
2112; RV32-NEXT:    vslidedown.vi v24, v16, 16
2113; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2114; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
2115; RV32-NEXT:    ret
2116;
2117; RV64-LABEL: vpgather_baseidx_v32i8_v32f64:
2118; RV64:       # %bb.0:
2119; RV64-NEXT:    vsetivli zero, 16, e8, m2, ta, ma
2120; RV64-NEXT:    vslidedown.vi v10, v8, 16
2121; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2122; RV64-NEXT:    vsext.vf8 v24, v8
2123; RV64-NEXT:    li a3, 16
2124; RV64-NEXT:    vsext.vf8 v16, v10
2125; RV64-NEXT:    vsll.vi v16, v16, 3
2126; RV64-NEXT:    vsll.vi v8, v24, 3
2127; RV64-NEXT:    mv a2, a1
2128; RV64-NEXT:    bltu a1, a3, .LBB95_2
2129; RV64-NEXT:  # %bb.1:
2130; RV64-NEXT:    li a2, 16
2131; RV64-NEXT:  .LBB95_2:
2132; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2133; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2134; RV64-NEXT:    addi a2, a1, -16
2135; RV64-NEXT:    sltu a1, a1, a2
2136; RV64-NEXT:    addi a1, a1, -1
2137; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2138; RV64-NEXT:    vslidedown.vi v0, v0, 2
2139; RV64-NEXT:    and a1, a1, a2
2140; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2141; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
2142; RV64-NEXT:    ret
2143  %ptrs = getelementptr inbounds double, ptr %base, <32 x i8> %idxs
2144  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2145  ret <32 x double> %v
2146}
2147
2148define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(ptr %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2149; RV32-LABEL: vpgather_baseidx_sext_v32i8_v32f64:
2150; RV32:       # %bb.0:
2151; RV32-NEXT:    li a2, 32
2152; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
2153; RV32-NEXT:    vsext.vf4 v16, v8
2154; RV32-NEXT:    li a3, 16
2155; RV32-NEXT:    vsll.vi v16, v16, 3
2156; RV32-NEXT:    mv a2, a1
2157; RV32-NEXT:    bltu a1, a3, .LBB96_2
2158; RV32-NEXT:  # %bb.1:
2159; RV32-NEXT:    li a2, 16
2160; RV32-NEXT:  .LBB96_2:
2161; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2162; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2163; RV32-NEXT:    addi a2, a1, -16
2164; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2165; RV32-NEXT:    vslidedown.vi v0, v0, 2
2166; RV32-NEXT:    sltu a1, a1, a2
2167; RV32-NEXT:    addi a1, a1, -1
2168; RV32-NEXT:    and a1, a1, a2
2169; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
2170; RV32-NEXT:    vslidedown.vi v24, v16, 16
2171; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2172; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
2173; RV32-NEXT:    ret
2174;
2175; RV64-LABEL: vpgather_baseidx_sext_v32i8_v32f64:
2176; RV64:       # %bb.0:
2177; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2178; RV64-NEXT:    vsext.vf8 v24, v8
2179; RV64-NEXT:    vsetivli zero, 16, e8, m2, ta, ma
2180; RV64-NEXT:    vslidedown.vi v8, v8, 16
2181; RV64-NEXT:    li a3, 16
2182; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2183; RV64-NEXT:    vsext.vf8 v16, v8
2184; RV64-NEXT:    vsll.vi v16, v16, 3
2185; RV64-NEXT:    vsll.vi v8, v24, 3
2186; RV64-NEXT:    mv a2, a1
2187; RV64-NEXT:    bltu a1, a3, .LBB96_2
2188; RV64-NEXT:  # %bb.1:
2189; RV64-NEXT:    li a2, 16
2190; RV64-NEXT:  .LBB96_2:
2191; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2192; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2193; RV64-NEXT:    addi a2, a1, -16
2194; RV64-NEXT:    sltu a1, a1, a2
2195; RV64-NEXT:    addi a1, a1, -1
2196; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2197; RV64-NEXT:    vslidedown.vi v0, v0, 2
2198; RV64-NEXT:    and a1, a1, a2
2199; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2200; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
2201; RV64-NEXT:    ret
2202  %eidxs = sext <32 x i8> %idxs to <32 x i64>
2203  %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs
2204  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2205  ret <32 x double> %v
2206}
2207
2208define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(ptr %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2209; RV32-LABEL: vpgather_baseidx_zext_v32i8_v32f64:
2210; RV32:       # %bb.0:
2211; RV32-NEXT:    li a2, 32
2212; RV32-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
2213; RV32-NEXT:    vzext.vf2 v12, v8
2214; RV32-NEXT:    li a3, 16
2215; RV32-NEXT:    vsll.vi v16, v12, 3
2216; RV32-NEXT:    mv a2, a1
2217; RV32-NEXT:    bltu a1, a3, .LBB97_2
2218; RV32-NEXT:  # %bb.1:
2219; RV32-NEXT:    li a2, 16
2220; RV32-NEXT:  .LBB97_2:
2221; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2222; RV32-NEXT:    vluxei16.v v8, (a0), v16, v0.t
2223; RV32-NEXT:    addi a2, a1, -16
2224; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2225; RV32-NEXT:    vslidedown.vi v0, v0, 2
2226; RV32-NEXT:    sltu a1, a1, a2
2227; RV32-NEXT:    addi a1, a1, -1
2228; RV32-NEXT:    and a1, a1, a2
2229; RV32-NEXT:    vsetivli zero, 16, e16, m4, ta, ma
2230; RV32-NEXT:    vslidedown.vi v24, v16, 16
2231; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2232; RV32-NEXT:    vluxei16.v v16, (a0), v24, v0.t
2233; RV32-NEXT:    ret
2234;
2235; RV64-LABEL: vpgather_baseidx_zext_v32i8_v32f64:
2236; RV64:       # %bb.0:
2237; RV64-NEXT:    li a2, 32
2238; RV64-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
2239; RV64-NEXT:    vzext.vf2 v12, v8
2240; RV64-NEXT:    li a3, 16
2241; RV64-NEXT:    vsll.vi v16, v12, 3
2242; RV64-NEXT:    mv a2, a1
2243; RV64-NEXT:    bltu a1, a3, .LBB97_2
2244; RV64-NEXT:  # %bb.1:
2245; RV64-NEXT:    li a2, 16
2246; RV64-NEXT:  .LBB97_2:
2247; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2248; RV64-NEXT:    vluxei16.v v8, (a0), v16, v0.t
2249; RV64-NEXT:    addi a2, a1, -16
2250; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2251; RV64-NEXT:    vslidedown.vi v0, v0, 2
2252; RV64-NEXT:    sltu a1, a1, a2
2253; RV64-NEXT:    addi a1, a1, -1
2254; RV64-NEXT:    and a1, a1, a2
2255; RV64-NEXT:    vsetivli zero, 16, e16, m4, ta, ma
2256; RV64-NEXT:    vslidedown.vi v24, v16, 16
2257; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2258; RV64-NEXT:    vluxei16.v v16, (a0), v24, v0.t
2259; RV64-NEXT:    ret
2260  %eidxs = zext <32 x i8> %idxs to <32 x i64>
2261  %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs
2262  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2263  ret <32 x double> %v
2264}
2265
2266define <32 x double> @vpgather_baseidx_v32i16_v32f64(ptr %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2267; RV32-LABEL: vpgather_baseidx_v32i16_v32f64:
2268; RV32:       # %bb.0:
2269; RV32-NEXT:    li a2, 32
2270; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
2271; RV32-NEXT:    vsext.vf2 v16, v8
2272; RV32-NEXT:    li a3, 16
2273; RV32-NEXT:    vsll.vi v16, v16, 3
2274; RV32-NEXT:    mv a2, a1
2275; RV32-NEXT:    bltu a1, a3, .LBB98_2
2276; RV32-NEXT:  # %bb.1:
2277; RV32-NEXT:    li a2, 16
2278; RV32-NEXT:  .LBB98_2:
2279; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2280; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2281; RV32-NEXT:    addi a2, a1, -16
2282; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2283; RV32-NEXT:    vslidedown.vi v0, v0, 2
2284; RV32-NEXT:    sltu a1, a1, a2
2285; RV32-NEXT:    addi a1, a1, -1
2286; RV32-NEXT:    and a1, a1, a2
2287; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
2288; RV32-NEXT:    vslidedown.vi v24, v16, 16
2289; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2290; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
2291; RV32-NEXT:    ret
2292;
2293; RV64-LABEL: vpgather_baseidx_v32i16_v32f64:
2294; RV64:       # %bb.0:
2295; RV64-NEXT:    vsetivli zero, 16, e16, m4, ta, ma
2296; RV64-NEXT:    vslidedown.vi v12, v8, 16
2297; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2298; RV64-NEXT:    vsext.vf4 v24, v8
2299; RV64-NEXT:    li a3, 16
2300; RV64-NEXT:    vsext.vf4 v16, v12
2301; RV64-NEXT:    vsll.vi v16, v16, 3
2302; RV64-NEXT:    vsll.vi v8, v24, 3
2303; RV64-NEXT:    mv a2, a1
2304; RV64-NEXT:    bltu a1, a3, .LBB98_2
2305; RV64-NEXT:  # %bb.1:
2306; RV64-NEXT:    li a2, 16
2307; RV64-NEXT:  .LBB98_2:
2308; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2309; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2310; RV64-NEXT:    addi a2, a1, -16
2311; RV64-NEXT:    sltu a1, a1, a2
2312; RV64-NEXT:    addi a1, a1, -1
2313; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2314; RV64-NEXT:    vslidedown.vi v0, v0, 2
2315; RV64-NEXT:    and a1, a1, a2
2316; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2317; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
2318; RV64-NEXT:    ret
2319  %ptrs = getelementptr inbounds double, ptr %base, <32 x i16> %idxs
2320  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2321  ret <32 x double> %v
2322}
2323
2324define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(ptr %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2325; RV32-LABEL: vpgather_baseidx_sext_v32i16_v32f64:
2326; RV32:       # %bb.0:
2327; RV32-NEXT:    li a2, 32
2328; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
2329; RV32-NEXT:    vsext.vf2 v16, v8
2330; RV32-NEXT:    li a3, 16
2331; RV32-NEXT:    vsll.vi v16, v16, 3
2332; RV32-NEXT:    mv a2, a1
2333; RV32-NEXT:    bltu a1, a3, .LBB99_2
2334; RV32-NEXT:  # %bb.1:
2335; RV32-NEXT:    li a2, 16
2336; RV32-NEXT:  .LBB99_2:
2337; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2338; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2339; RV32-NEXT:    addi a2, a1, -16
2340; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2341; RV32-NEXT:    vslidedown.vi v0, v0, 2
2342; RV32-NEXT:    sltu a1, a1, a2
2343; RV32-NEXT:    addi a1, a1, -1
2344; RV32-NEXT:    and a1, a1, a2
2345; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
2346; RV32-NEXT:    vslidedown.vi v24, v16, 16
2347; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2348; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
2349; RV32-NEXT:    ret
2350;
2351; RV64-LABEL: vpgather_baseidx_sext_v32i16_v32f64:
2352; RV64:       # %bb.0:
2353; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2354; RV64-NEXT:    vsext.vf4 v24, v8
2355; RV64-NEXT:    vsetivli zero, 16, e16, m4, ta, ma
2356; RV64-NEXT:    vslidedown.vi v8, v8, 16
2357; RV64-NEXT:    li a3, 16
2358; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2359; RV64-NEXT:    vsext.vf4 v16, v8
2360; RV64-NEXT:    vsll.vi v16, v16, 3
2361; RV64-NEXT:    vsll.vi v8, v24, 3
2362; RV64-NEXT:    mv a2, a1
2363; RV64-NEXT:    bltu a1, a3, .LBB99_2
2364; RV64-NEXT:  # %bb.1:
2365; RV64-NEXT:    li a2, 16
2366; RV64-NEXT:  .LBB99_2:
2367; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2368; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2369; RV64-NEXT:    addi a2, a1, -16
2370; RV64-NEXT:    sltu a1, a1, a2
2371; RV64-NEXT:    addi a1, a1, -1
2372; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2373; RV64-NEXT:    vslidedown.vi v0, v0, 2
2374; RV64-NEXT:    and a1, a1, a2
2375; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2376; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
2377; RV64-NEXT:    ret
2378  %eidxs = sext <32 x i16> %idxs to <32 x i64>
2379  %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs
2380  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2381  ret <32 x double> %v
2382}
2383
2384define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(ptr %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2385; RV32-LABEL: vpgather_baseidx_zext_v32i16_v32f64:
2386; RV32:       # %bb.0:
2387; RV32-NEXT:    li a2, 32
2388; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
2389; RV32-NEXT:    vzext.vf2 v16, v8
2390; RV32-NEXT:    li a3, 16
2391; RV32-NEXT:    vsll.vi v16, v16, 3
2392; RV32-NEXT:    mv a2, a1
2393; RV32-NEXT:    bltu a1, a3, .LBB100_2
2394; RV32-NEXT:  # %bb.1:
2395; RV32-NEXT:    li a2, 16
2396; RV32-NEXT:  .LBB100_2:
2397; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2398; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2399; RV32-NEXT:    addi a2, a1, -16
2400; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2401; RV32-NEXT:    vslidedown.vi v0, v0, 2
2402; RV32-NEXT:    sltu a1, a1, a2
2403; RV32-NEXT:    addi a1, a1, -1
2404; RV32-NEXT:    and a1, a1, a2
2405; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
2406; RV32-NEXT:    vslidedown.vi v24, v16, 16
2407; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2408; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
2409; RV32-NEXT:    ret
2410;
2411; RV64-LABEL: vpgather_baseidx_zext_v32i16_v32f64:
2412; RV64:       # %bb.0:
2413; RV64-NEXT:    li a2, 32
2414; RV64-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
2415; RV64-NEXT:    vzext.vf2 v16, v8
2416; RV64-NEXT:    li a3, 16
2417; RV64-NEXT:    vsll.vi v16, v16, 3
2418; RV64-NEXT:    mv a2, a1
2419; RV64-NEXT:    bltu a1, a3, .LBB100_2
2420; RV64-NEXT:  # %bb.1:
2421; RV64-NEXT:    li a2, 16
2422; RV64-NEXT:  .LBB100_2:
2423; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2424; RV64-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2425; RV64-NEXT:    addi a2, a1, -16
2426; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2427; RV64-NEXT:    vslidedown.vi v0, v0, 2
2428; RV64-NEXT:    sltu a1, a1, a2
2429; RV64-NEXT:    addi a1, a1, -1
2430; RV64-NEXT:    and a1, a1, a2
2431; RV64-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
2432; RV64-NEXT:    vslidedown.vi v24, v16, 16
2433; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2434; RV64-NEXT:    vluxei32.v v16, (a0), v24, v0.t
2435; RV64-NEXT:    ret
2436  %eidxs = zext <32 x i16> %idxs to <32 x i64>
2437  %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs
2438  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2439  ret <32 x double> %v
2440}
2441
2442define <32 x double> @vpgather_baseidx_v32i32_v32f64(ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2443; RV32-LABEL: vpgather_baseidx_v32i32_v32f64:
2444; RV32:       # %bb.0:
2445; RV32-NEXT:    li a2, 32
2446; RV32-NEXT:    li a3, 16
2447; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
2448; RV32-NEXT:    vsll.vi v16, v8, 3
2449; RV32-NEXT:    mv a2, a1
2450; RV32-NEXT:    bltu a1, a3, .LBB101_2
2451; RV32-NEXT:  # %bb.1:
2452; RV32-NEXT:    li a2, 16
2453; RV32-NEXT:  .LBB101_2:
2454; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2455; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2456; RV32-NEXT:    addi a2, a1, -16
2457; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2458; RV32-NEXT:    vslidedown.vi v0, v0, 2
2459; RV32-NEXT:    sltu a1, a1, a2
2460; RV32-NEXT:    addi a1, a1, -1
2461; RV32-NEXT:    and a1, a1, a2
2462; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
2463; RV32-NEXT:    vslidedown.vi v24, v16, 16
2464; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2465; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
2466; RV32-NEXT:    ret
2467;
2468; RV64-LABEL: vpgather_baseidx_v32i32_v32f64:
2469; RV64:       # %bb.0:
2470; RV64-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
2471; RV64-NEXT:    vslidedown.vi v16, v8, 16
2472; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2473; RV64-NEXT:    vsext.vf2 v24, v8
2474; RV64-NEXT:    li a3, 16
2475; RV64-NEXT:    vsext.vf2 v8, v16
2476; RV64-NEXT:    vsll.vi v16, v8, 3
2477; RV64-NEXT:    vsll.vi v8, v24, 3
2478; RV64-NEXT:    mv a2, a1
2479; RV64-NEXT:    bltu a1, a3, .LBB101_2
2480; RV64-NEXT:  # %bb.1:
2481; RV64-NEXT:    li a2, 16
2482; RV64-NEXT:  .LBB101_2:
2483; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2484; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2485; RV64-NEXT:    addi a2, a1, -16
2486; RV64-NEXT:    sltu a1, a1, a2
2487; RV64-NEXT:    addi a1, a1, -1
2488; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2489; RV64-NEXT:    vslidedown.vi v0, v0, 2
2490; RV64-NEXT:    and a1, a1, a2
2491; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2492; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
2493; RV64-NEXT:    ret
2494  %ptrs = getelementptr inbounds double, ptr %base, <32 x i32> %idxs
2495  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2496  ret <32 x double> %v
2497}
2498
2499define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2500; RV32-LABEL: vpgather_baseidx_sext_v32i32_v32f64:
2501; RV32:       # %bb.0:
2502; RV32-NEXT:    li a2, 32
2503; RV32-NEXT:    li a3, 16
2504; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
2505; RV32-NEXT:    vsll.vi v16, v8, 3
2506; RV32-NEXT:    mv a2, a1
2507; RV32-NEXT:    bltu a1, a3, .LBB102_2
2508; RV32-NEXT:  # %bb.1:
2509; RV32-NEXT:    li a2, 16
2510; RV32-NEXT:  .LBB102_2:
2511; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2512; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2513; RV32-NEXT:    addi a2, a1, -16
2514; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2515; RV32-NEXT:    vslidedown.vi v0, v0, 2
2516; RV32-NEXT:    sltu a1, a1, a2
2517; RV32-NEXT:    addi a1, a1, -1
2518; RV32-NEXT:    and a1, a1, a2
2519; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
2520; RV32-NEXT:    vslidedown.vi v24, v16, 16
2521; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2522; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
2523; RV32-NEXT:    ret
2524;
2525; RV64-LABEL: vpgather_baseidx_sext_v32i32_v32f64:
2526; RV64:       # %bb.0:
2527; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2528; RV64-NEXT:    vsext.vf2 v24, v8
2529; RV64-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
2530; RV64-NEXT:    vslidedown.vi v8, v8, 16
2531; RV64-NEXT:    li a3, 16
2532; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2533; RV64-NEXT:    vsext.vf2 v16, v8
2534; RV64-NEXT:    vsll.vi v16, v16, 3
2535; RV64-NEXT:    vsll.vi v8, v24, 3
2536; RV64-NEXT:    mv a2, a1
2537; RV64-NEXT:    bltu a1, a3, .LBB102_2
2538; RV64-NEXT:  # %bb.1:
2539; RV64-NEXT:    li a2, 16
2540; RV64-NEXT:  .LBB102_2:
2541; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2542; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2543; RV64-NEXT:    addi a2, a1, -16
2544; RV64-NEXT:    sltu a1, a1, a2
2545; RV64-NEXT:    addi a1, a1, -1
2546; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2547; RV64-NEXT:    vslidedown.vi v0, v0, 2
2548; RV64-NEXT:    and a1, a1, a2
2549; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2550; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
2551; RV64-NEXT:    ret
2552  %eidxs = sext <32 x i32> %idxs to <32 x i64>
2553  %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs
2554  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2555  ret <32 x double> %v
2556}
2557
2558define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2559; RV32-LABEL: vpgather_baseidx_zext_v32i32_v32f64:
2560; RV32:       # %bb.0:
2561; RV32-NEXT:    li a2, 32
2562; RV32-NEXT:    li a3, 16
2563; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
2564; RV32-NEXT:    vsll.vi v16, v8, 3
2565; RV32-NEXT:    mv a2, a1
2566; RV32-NEXT:    bltu a1, a3, .LBB103_2
2567; RV32-NEXT:  # %bb.1:
2568; RV32-NEXT:    li a2, 16
2569; RV32-NEXT:  .LBB103_2:
2570; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2571; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
2572; RV32-NEXT:    addi a2, a1, -16
2573; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2574; RV32-NEXT:    vslidedown.vi v0, v0, 2
2575; RV32-NEXT:    sltu a1, a1, a2
2576; RV32-NEXT:    addi a1, a1, -1
2577; RV32-NEXT:    and a1, a1, a2
2578; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
2579; RV32-NEXT:    vslidedown.vi v24, v16, 16
2580; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2581; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
2582; RV32-NEXT:    ret
2583;
2584; RV64-LABEL: vpgather_baseidx_zext_v32i32_v32f64:
2585; RV64:       # %bb.0:
2586; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2587; RV64-NEXT:    vzext.vf2 v24, v8
2588; RV64-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
2589; RV64-NEXT:    vslidedown.vi v8, v8, 16
2590; RV64-NEXT:    li a3, 16
2591; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2592; RV64-NEXT:    vzext.vf2 v16, v8
2593; RV64-NEXT:    vsll.vi v16, v16, 3
2594; RV64-NEXT:    vsll.vi v8, v24, 3
2595; RV64-NEXT:    mv a2, a1
2596; RV64-NEXT:    bltu a1, a3, .LBB103_2
2597; RV64-NEXT:  # %bb.1:
2598; RV64-NEXT:    li a2, 16
2599; RV64-NEXT:  .LBB103_2:
2600; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2601; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2602; RV64-NEXT:    addi a2, a1, -16
2603; RV64-NEXT:    sltu a1, a1, a2
2604; RV64-NEXT:    addi a1, a1, -1
2605; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2606; RV64-NEXT:    vslidedown.vi v0, v0, 2
2607; RV64-NEXT:    and a1, a1, a2
2608; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2609; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
2610; RV64-NEXT:    ret
2611  %eidxs = zext <32 x i32> %idxs to <32 x i64>
2612  %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs
2613  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2614  ret <32 x double> %v
2615}
2616
2617define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x i1> %m, i32 zeroext %evl) {
2618; RV32-LABEL: vpgather_baseidx_v32f64:
2619; RV32:       # %bb.0:
2620; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
2621; RV32-NEXT:    vmv1r.v v7, v0
2622; RV32-NEXT:    vnsrl.wi v24, v16, 0
2623; RV32-NEXT:    vnsrl.wi v16, v8, 0
2624; RV32-NEXT:    li a2, 32
2625; RV32-NEXT:    addi a3, a1, -16
2626; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2627; RV32-NEXT:    vslidedown.vi v0, v0, 2
2628; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
2629; RV32-NEXT:    vslideup.vi v16, v24, 16
2630; RV32-NEXT:    vsll.vi v24, v16, 3
2631; RV32-NEXT:    sltu a2, a1, a3
2632; RV32-NEXT:    addi a2, a2, -1
2633; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
2634; RV32-NEXT:    vslidedown.vi v8, v24, 16
2635; RV32-NEXT:    and a2, a2, a3
2636; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2637; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
2638; RV32-NEXT:    li a2, 16
2639; RV32-NEXT:    bltu a1, a2, .LBB104_2
2640; RV32-NEXT:  # %bb.1:
2641; RV32-NEXT:    li a1, 16
2642; RV32-NEXT:  .LBB104_2:
2643; RV32-NEXT:    vmv1r.v v0, v7
2644; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2645; RV32-NEXT:    vluxei32.v v8, (a0), v24, v0.t
2646; RV32-NEXT:    ret
2647;
2648; RV64-LABEL: vpgather_baseidx_v32f64:
2649; RV64:       # %bb.0:
2650; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
2651; RV64-NEXT:    vsll.vi v16, v16, 3
2652; RV64-NEXT:    li a3, 16
2653; RV64-NEXT:    vsll.vi v8, v8, 3
2654; RV64-NEXT:    mv a2, a1
2655; RV64-NEXT:    bltu a1, a3, .LBB104_2
2656; RV64-NEXT:  # %bb.1:
2657; RV64-NEXT:    li a2, 16
2658; RV64-NEXT:  .LBB104_2:
2659; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
2660; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
2661; RV64-NEXT:    addi a2, a1, -16
2662; RV64-NEXT:    sltu a1, a1, a2
2663; RV64-NEXT:    addi a1, a1, -1
2664; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
2665; RV64-NEXT:    vslidedown.vi v0, v0, 2
2666; RV64-NEXT:    and a1, a1, a2
2667; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
2668; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
2669; RV64-NEXT:    ret
2670  %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %idxs
2671  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl)
2672  ret <32 x double> %v
2673}
2674